1 files changed, 85981 insertions, 0 deletions
diff --git a/openvz-sources/022.078-r3/0100_patch-022stab078-core.patch b/openvz-sources/022.078-r3/0100_patch-022stab078-core.patch
new file mode 100644
index 0000000..a179de7
--- /dev/null
+++ b/openvz-sources/022.078-r3/0100_patch-022stab078-core.patch
@@ -0,0 +1,85981 @@
+diff -uprN linux-2.6.8.1.orig/COPYING.SWsoft linux-2.6.8.1-ve022stab078/COPYING.SWsoft
+--- linux-2.6.8.1.orig/COPYING.SWsoft	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/COPYING.SWsoft	2006-05-11 13:05:37.000000000 +0400
+@@ -0,0 +1,350 @@
++
++Nothing in this license should be construed as a grant by SWsoft of any rights
++beyond the rights specified in the GNU General Public License, and nothing in
++this license should be construed as a waiver by SWsoft of its patent, copyright
++and/or trademark rights, beyond the waiver required by the GNU General Public
++License. This license is expressly inapplicable to any product that is not
++within the scope of the GNU General Public License
++
++----------------------------------------
++
++		    GNU GENERAL PUBLIC LICENSE
++		       Version 2, June 1991
++
++ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
++                       59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
++ Everyone is permitted to copy and distribute verbatim copies
++ of this license document, but changing it is not allowed.
++
++			    Preamble
++
++  The licenses for most software are designed to take away your
++freedom to share and change it.  By contrast, the GNU General Public
++License is intended to guarantee your freedom to share and change free
++software--to make sure the software is free for all its users.  This
++General Public License applies to most of the Free Software
++Foundation's software and to any other program whose authors commit to
++using it.  (Some other Free Software Foundation software is covered by
++the GNU Library General Public License instead.)  You can apply it to
++your programs, too.
++
++  When we speak of free software, we are referring to freedom, not
++price.  Our General Public Licenses are designed to make sure that you
++have the freedom to distribute copies of free software (and charge for
++this service if you wish), that you receive source code or can get it
++if you want it, that you can change the software or use pieces of it
++in new free programs; and that you know you can do these things.
++
++  To protect your rights, we need to make restrictions that forbid
++anyone to deny you these rights or to ask you to surrender the rights.
++These restrictions translate to certain responsibilities for you if you
++distribute copies of the software, or if you modify it.
++
++  For example, if you distribute copies of such a program, whether
++gratis or for a fee, you must give the recipients all the rights that
++you have.  You must make sure that they, too, receive or can get the
++source code.  And you must show them these terms so they know their
++rights.
++
++  We protect your rights with two steps: (1) copyright the software, and
++(2) offer you this license which gives you legal permission to copy,
++distribute and/or modify the software.
++
++  Also, for each author's protection and ours, we want to make certain
++that everyone understands that there is no warranty for this free
++software.  If the software is modified by someone else and passed on, we
++want its recipients to know that what they have is not the original, so
++that any problems introduced by others will not reflect on the original
++authors' reputations.
++
++  Finally, any free program is threatened constantly by software
++patents.  We wish to avoid the danger that redistributors of a free
++program will individually obtain patent licenses, in effect making the
++program proprietary.  To prevent this, we have made it clear that any
++patent must be licensed for everyone's free use or not licensed at all.
++
++  The precise terms and conditions for copying, distribution and
++modification follow.
++
++		    GNU GENERAL PUBLIC LICENSE
++   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
++
++  0. This License applies to any program or other work which contains
++a notice placed by the copyright holder saying it may be distributed
++under the terms of this General Public License.  The "Program", below,
++refers to any such program or work, and a "work based on the Program"
++means either the Program or any derivative work under copyright law:
++that is to say, a work containing the Program or a portion of it,
++either verbatim or with modifications and/or translated into another
++language.  (Hereinafter, translation is included without limitation in
++the term "modification".)  Each licensee is addressed as "you".
++
++Activities other than copying, distribution and modification are not
++covered by this License; they are outside its scope.  The act of
++running the Program is not restricted, and the output from the Program
++is covered only if its contents constitute a work based on the
++Program (independent of having been made by running the Program).
++Whether that is true depends on what the Program does.
++
++  1. You may copy and distribute verbatim copies of the Program's
++source code as you receive it, in any medium, provided that you
++conspicuously and appropriately publish on each copy an appropriate
++copyright notice and disclaimer of warranty; keep intact all the
++notices that refer to this License and to the absence of any warranty;
++and give any other recipients of the Program a copy of this License
++along with the Program.
++
++You may charge a fee for the physical act of transferring a copy, and
++you may at your option offer warranty protection in exchange for a fee.
++
++  2. You may modify your copy or copies of the Program or any portion
++of it, thus forming a work based on the Program, and copy and
++distribute such modifications or work under the terms of Section 1
++above, provided that you also meet all of these conditions:
++
++    a) You must cause the modified files to carry prominent notices
++    stating that you changed the files and the date of any change.
++
++    b) You must cause any work that you distribute or publish, that in
++    whole or in part contains or is derived from the Program or any
++    part thereof, to be licensed as a whole at no charge to all third
++    parties under the terms of this License.
++
++    c) If the modified program normally reads commands interactively
++    when run, you must cause it, when started running for such
++    interactive use in the most ordinary way, to print or display an
++    announcement including an appropriate copyright notice and a
++    notice that there is no warranty (or else, saying that you provide
++    a warranty) and that users may redistribute the program under
++    these conditions, and telling the user how to view a copy of this
++    License.  (Exception: if the Program itself is interactive but
++    does not normally print such an announcement, your work based on
++    the Program is not required to print an announcement.)
++
++These requirements apply to the modified work as a whole.  If
++identifiable sections of that work are not derived from the Program,
++and can be reasonably considered independent and separate works in
++themselves, then this License, and its terms, do not apply to those
++sections when you distribute them as separate works.  But when you
++distribute the same sections as part of a whole which is a work based
++on the Program, the distribution of the whole must be on the terms of
++this License, whose permissions for other licensees extend to the
++entire whole, and thus to each and every part regardless of who wrote it.
++
++Thus, it is not the intent of this section to claim rights or contest
++your rights to work written entirely by you; rather, the intent is to
++exercise the right to control the distribution of derivative or
++collective works based on the Program.
++
++In addition, mere aggregation of another work not based on the Program
++with the Program (or with a work based on the Program) on a volume of
++a storage or distribution medium does not bring the other work under
++the scope of this License.
++
++  3. You may copy and distribute the Program (or a work based on it,
++under Section 2) in object code or executable form under the terms of
++Sections 1 and 2 above provided that you also do one of the following:
++
++    a) Accompany it with the complete corresponding machine-readable
++    source code, which must be distributed under the terms of Sections
++    1 and 2 above on a medium customarily used for software interchange; or,
++
++    b) Accompany it with a written offer, valid for at least three
++    years, to give any third party, for a charge no more than your
++    cost of physically performing source distribution, a complete
++    machine-readable copy of the corresponding source code, to be
++    distributed under the terms of Sections 1 and 2 above on a medium
++    customarily used for software interchange; or,
++
++    c) Accompany it with the information you received as to the offer
++    to distribute corresponding source code.  (This alternative is
++    allowed only for noncommercial distribution and only if you
++    received the program in object code or executable form with such
++    an offer, in accord with Subsection b above.)
++
++The source code for a work means the preferred form of the work for
++making modifications to it.  For an executable work, complete source
++code means all the source code for all modules it contains, plus any
++associated interface definition files, plus the scripts used to
++control compilation and installation of the executable.  However, as a
++special exception, the source code distributed need not include
++anything that is normally distributed (in either source or binary
++form) with the major components (compiler, kernel, and so on) of the
++operating system on which the executable runs, unless that component
++itself accompanies the executable.
++
++If distribution of executable or object code is made by offering
++access to copy from a designated place, then offering equivalent
++access to copy the source code from the same place counts as
++distribution of the source code, even though third parties are not
++compelled to copy the source along with the object code.
++
++  4. You may not copy, modify, sublicense, or distribute the Program
++except as expressly provided under this License.  Any attempt
++otherwise to copy, modify, sublicense or distribute the Program is
++void, and will automatically terminate your rights under this License.
++However, parties who have received copies, or rights, from you under
++this License will not have their licenses terminated so long as such
++parties remain in full compliance.
++
++  5. You are not required to accept this License, since you have not
++signed it.  However, nothing else grants you permission to modify or
++distribute the Program or its derivative works.  These actions are
++prohibited by law if you do not accept this License.  Therefore, by
++modifying or distributing the Program (or any work based on the
++Program), you indicate your acceptance of this License to do so, and
++all its terms and conditions for copying, distributing or modifying
++the Program or works based on it.
++
++  6. Each time you redistribute the Program (or any work based on the
++Program), the recipient automatically receives a license from the
++original licensor to copy, distribute or modify the Program subject to
++these terms and conditions.  You may not impose any further
++restrictions on the recipients' exercise of the rights granted herein.
++You are not responsible for enforcing compliance by third parties to
++this License.
++
++  7. If, as a consequence of a court judgment or allegation of patent
++infringement or for any other reason (not limited to patent issues),
++conditions are imposed on you (whether by court order, agreement or
++otherwise) that contradict the conditions of this License, they do not
++excuse you from the conditions of this License.  If you cannot
++distribute so as to satisfy simultaneously your obligations under this
++License and any other pertinent obligations, then as a consequence you
++may not distribute the Program at all.  For example, if a patent
++license would not permit royalty-free redistribution of the Program by
++all those who receive copies directly or indirectly through you, then
++the only way you could satisfy both it and this License would be to
++refrain entirely from distribution of the Program.
++
++If any portion of this section is held invalid or unenforceable under
++any particular circumstance, the balance of the section is intended to
++apply and the section as a whole is intended to apply in other
++circumstances.
++
++It is not the purpose of this section to induce you to infringe any
++patents or other property right claims or to contest validity of any
++such claims; this section has the sole purpose of protecting the
++integrity of the free software distribution system, which is
++implemented by public license practices.  Many people have made
++generous contributions to the wide range of software distributed
++through that system in reliance on consistent application of that
++system; it is up to the author/donor to decide if he or she is willing
++to distribute software through any other system and a licensee cannot
++impose that choice.
++
++This section is intended to make thoroughly clear what is believed to
++be a consequence of the rest of this License.
++
++  8. If the distribution and/or use of the Program is restricted in
++certain countries either by patents or by copyrighted interfaces, the
++original copyright holder who places the Program under this License
++may add an explicit geographical distribution limitation excluding
++those countries, so that distribution is permitted only in or among
++countries not thus excluded.  In such case, this License incorporates
++the limitation as if written in the body of this License.
++
++  9. The Free Software Foundation may publish revised and/or new versions
++of the General Public License from time to time.  Such new versions will
++be similar in spirit to the present version, but may differ in detail to
++address new problems or concerns.
++
++Each version is given a distinguishing version number.  If the Program
++specifies a version number of this License which applies to it and "any
++later version", you have the option of following the terms and conditions
++either of that version or of any later version published by the Free
++Software Foundation.  If the Program does not specify a version number of
++this License, you may choose any version ever published by the Free Software
++Foundation.
++
++  10. If you wish to incorporate parts of the Program into other free
++programs whose distribution conditions are different, write to the author
++to ask for permission.  For software which is copyrighted by the Free
++Software Foundation, write to the Free Software Foundation; we sometimes
++make exceptions for this.  Our decision will be guided by the two goals
++of preserving the free status of all derivatives of our free software and
++of promoting the sharing and reuse of software generally.
++
++			    NO WARRANTY
++
++  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
++FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
++OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
++PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
++OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
++TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
++PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
++REPAIR OR CORRECTION.
++
++  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
++WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
++REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
++INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
++OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
++TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
++YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
++PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
++POSSIBILITY OF SUCH DAMAGES.
++
++		     END OF TERMS AND CONDITIONS
++
++	    How to Apply These Terms to Your New Programs
++
++  If you develop a new program, and you want it to be of the greatest
++possible use to the public, the best way to achieve this is to make it
++free software which everyone can redistribute and change under these terms.
++
++  To do so, attach the following notices to the program.  It is safest
++to attach them to the start of each source file to most effectively
++convey the exclusion of warranty; and each file should have at least
++the "copyright" line and a pointer to where the full notice is found.
++
++    <one line to give the program's name and a brief idea of what it does.>
++    Copyright (C) <year>  <name of author>
++
++    This program is free software; you can redistribute it and/or modify
++    it under the terms of the GNU General Public License as published by
++    the Free Software Foundation; either version 2 of the License, or
++    (at your option) any later version.
++
++    This program is distributed in the hope that it will be useful,
++    but WITHOUT ANY WARRANTY; without even the implied warranty of
++    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++    GNU General Public License for more details.
++
++    You should have received a copy of the GNU General Public License
++    along with this program; if not, write to the Free Software
++    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
++
++
++Also add information on how to contact you by electronic and paper mail.
++
++If the program is interactive, make it output a short notice like this
++when it starts in an interactive mode:
++
++    Gnomovision version 69, Copyright (C) year name of author
++    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
++    This is free software, and you are welcome to redistribute it
++    under certain conditions; type `show c' for details.
++
++The hypothetical commands `show w' and `show c' should show the appropriate
++parts of the General Public License.  Of course, the commands you use may
++be called something other than `show w' and `show c'; they could even be
++mouse-clicks or menu items--whatever suits your program.
++
++You should also get your employer (if you work as a programmer) or your
++school, if any, to sign a "copyright disclaimer" for the program, if
++necessary.  Here is a sample; alter the names:
++
++  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
++  `Gnomovision' (which makes passes at compilers) written by James Hacker.
++
++  <signature of Ty Coon>, 1 April 1989
++  Ty Coon, President of Vice
++
++This General Public License does not permit incorporating your program into
++proprietary programs.  If your program is a subroutine library, you may
++consider it more useful to permit linking proprietary applications with the
++library.  If this is what you want to do, use the GNU Library General
++Public License instead of this License.
+diff -uprN linux-2.6.8.1.orig/Documentation/cachetlb.txt linux-2.6.8.1-ve022stab078/Documentation/cachetlb.txt
+--- linux-2.6.8.1.orig/Documentation/cachetlb.txt	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/Documentation/cachetlb.txt	2006-05-11 13:05:30.000000000 +0400
+@@ -142,6 +142,11 @@ changes occur:
+ 	The ia64 sn2 platform is one example of a platform
+ 	that uses this interface.
+ 
++8) void lazy_mmu_prot_update(pte_t pte)
++	This interface is called whenever the protection on
++	any user PTEs change.  This interface provides a notification
++	to architecture specific code to take appropiate action.
++
+ 
+ Next, we have the cache flushing interfaces.  In general, when Linux
+ is changing an existing virtual-->physical mapping to a new value,
+diff -uprN linux-2.6.8.1.orig/Documentation/filesystems/Locking linux-2.6.8.1-ve022stab078/Documentation/filesystems/Locking
+--- linux-2.6.8.1.orig/Documentation/filesystems/Locking	2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/Documentation/filesystems/Locking	2006-05-11 13:05:35.000000000 +0400
+@@ -90,7 +90,7 @@ prototypes:
+ 	void (*destroy_inode)(struct inode *);
+ 	void (*read_inode) (struct inode *);
+ 	void (*dirty_inode) (struct inode *);
+-	void (*write_inode) (struct inode *, int);
++	int (*write_inode) (struct inode *, int);
+ 	void (*put_inode) (struct inode *);
+ 	void (*drop_inode) (struct inode *);
+ 	void (*delete_inode) (struct inode *);
+diff -uprN linux-2.6.8.1.orig/Documentation/filesystems/vfs.txt linux-2.6.8.1-ve022stab078/Documentation/filesystems/vfs.txt
+--- linux-2.6.8.1.orig/Documentation/filesystems/vfs.txt	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/Documentation/filesystems/vfs.txt	2006-05-11 13:05:35.000000000 +0400
+@@ -176,7 +176,7 @@ filesystem. As of kernel 2.1.99, the fol
+ 
+ struct super_operations {
+ 	void (*read_inode) (struct inode *);
+-	void (*write_inode) (struct inode *, int);
++	int (*write_inode) (struct inode *, int);
+ 	void (*put_inode) (struct inode *);
+ 	void (*drop_inode) (struct inode *);
+ 	void (*delete_inode) (struct inode *);
+diff -uprN linux-2.6.8.1.orig/Documentation/i386/zero-page.txt linux-2.6.8.1-ve022stab078/Documentation/i386/zero-page.txt
+--- linux-2.6.8.1.orig/Documentation/i386/zero-page.txt	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/Documentation/i386/zero-page.txt	2006-05-11 13:05:29.000000000 +0400
+@@ -28,7 +28,8 @@ Offset	Type		Description
+ 
+  0xa0	16 bytes	System description table truncated to 16 bytes.
+ 			( struct sys_desc_table_struct )
+- 0xb0 - 0x1c3		Free. Add more parameters here if you really need them.
++ 0xb0 - 0x13f		Free. Add more parameters here if you really need them.
++ 0x140- 0x1be		EDID_INFO Video mode setup
+ 
+ 0x1c4	unsigned long	EFI system table pointer
+ 0x1c8	unsigned long	EFI memory descriptor size
+diff -uprN linux-2.6.8.1.orig/Documentation/power/swsusp.txt linux-2.6.8.1-ve022stab078/Documentation/power/swsusp.txt
+--- linux-2.6.8.1.orig/Documentation/power/swsusp.txt	2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/Documentation/power/swsusp.txt	2006-05-11 13:05:25.000000000 +0400
+@@ -211,8 +211,8 @@ A: All such kernel threads need to be fi
+ where it is safe to be frozen (no kernel semaphores should be held at
+ that point and it must be safe to sleep there), and add:
+ 
+-            if (current->flags & PF_FREEZE)
+-                    refrigerator(PF_FREEZE);
++		if (test_thread_flag(TIF_FREEZE))
++			refrigerator();
+ 
+ Q: What is the difference between between "platform", "shutdown" and
+ "firmware" in /sys/power/disk?
+diff -uprN linux-2.6.8.1.orig/Documentation/ve.txt linux-2.6.8.1-ve022stab078/Documentation/ve.txt
+--- linux-2.6.8.1.orig/Documentation/ve.txt	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/Documentation/ve.txt	2006-05-11 13:05:40.000000000 +0400
+@@ -0,0 +1,37 @@
++                        OpenVZ Overview
++                        ---------------
++   (C) SWsoft, 2005, http://www.sw-soft.com, All rights reserved.
++   Licensing governed by "linux/COPYING.SWsoft" file.
++
++OpenVZ is a virtualization technology which allows to run multiple
++isolated VPSs (Virtual Private Server) on a single operating system.
++It uses a single instance of Linux kernel in memory which efficiently
++manages resources between VPSs.
++
++Virtual environment (VE) notion which is used in kernel is the original
++name of more modern notion of Virtual Private Server (VPS).
++
++From user point of view, every VPS is an isolated operating system with
++private file system, private set of users, private root superuser,
++private set of processes and so on. Every application which do not
++require direct hardware access can't feel the difference between VPS
++and real standalone server.
++
++From kernel point of view, VPS is an isolated set of processes spawned
++from their private 'init' process. Kernel controls which resources are
++accessible inside VPS and which amount of these resources can be
++consumed/used by VPS processes. Also kernel provides isolation between
++VPSs thus ensuring that one VPS can't use private resources of another
++VPS, make DoS/hack/crash attack on it's neighbour and so on.
++
++main Open Virtuozzo config options:
++    CONFIG_FAIRSCHED=y
++    CONFIG_SCHED_VCPU=y
++    CONFIG_VE=y
++    CONFIG_VE_CALLS=m
++    CONFIG_VE_NETDEV=m
++    CONFIG_VE_IPTABLES=y
++
++Official product pages:
++    http://www.virtuozzo.com
++    http://openvz.org
+diff -uprN linux-2.6.8.1.orig/Documentation/vsched.txt linux-2.6.8.1-ve022stab078/Documentation/vsched.txt
+--- linux-2.6.8.1.orig/Documentation/vsched.txt	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/Documentation/vsched.txt	2006-05-11 13:05:39.000000000 +0400
+@@ -0,0 +1,83 @@
++Copyright (C) 2005 SWsoft. All rights reserved.
++Licensing governed by "linux/COPYING.SWsoft" file.
++
++Hierarchical CPU schedulers
++~~~~~~~~~~~~~~~~~~~~~~~~~~~
++
++Hierarchical CPU scheduler is a stack of CPU schedulers which allows
++to organize different policies of scheduling in the system and/or between
++groups of processes.
++
++Virtuozzo uses a hierarchical Fair CPU scheduler organized as a 2-stage
++CPU scheduler, where the scheduling decisions are made in 2 steps:
++1. On the first step Fair CPU scheduler selects a group of processes
++  which should get some CPU time.
++2. Then standard Linux scheduler chooses a process inside the group.
++Such scheduler efficiently allows to isolate one group of processes
++from another and still allows a group to use more than 1 CPU on SMP systems.
++
++This document describes a new middle layer of Virtuozzo hierarchical CPU
++scheduler which makes decisions after Fair scheduler, but before Linux
++scheduler and which is called VCPU scheduler.
++
++
++Where VCPU scheduler comes from?
++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++
++Existing hierarchical CPU scheduler uses isolated algorithms on each stage
++of decision making, i.e. every scheduler makes its decisions without
++taking into account the details of other schedulers. This can lead to a number
++of problems described below.
++
++On SMP systems there are possible situations when the first CPU scheduler
++in the hierarchy (e.g. Fair scheduler) wants to schedule some group of
++processes on the physical CPU, but the underlying process scheduler
++(e.g. Linux O(1) CPU scheduler) is unable to schedule any processes
++on this physical CPU. Usually this happens due to the fact that Linux
++kernel scheduler uses per-physical CPU runqueues.
++
++Another problem is that Linux scheduler also knows nothing about
++Fair scheduler and can't balance efficiently without taking into account
++statistics about process groups from Fair scheduler. Without such
++statistics Linux scheduler can concentrate all processes on one physical
++CPU, thus making CPU consuming highly inefficient.
++
++VCPU scheduler solves these problems by adding a new layer between
++Fair schedule and Linux scheduler.
++
++VCPU scheduler
++~~~~~~~~~~~~~~
++
++VCPU scheduler is a CPU scheduler which splits notion of
++physical and virtual CPUs (VCPU and PCPU). This means that tasks are
++running on virtual CPU runqueues, while VCPUs are running on PCPUs.
++
++The Virtuozzo hierarchical fair scheduler becomes 3 stage CPU scheduler:
++1. First, Fair CPU scheduler select a group of processes.
++2. Then VCPU scheduler select a virtual CPU to run (this is actually
++  a runqueue).
++3. Standard Linux scheduler chooses a process from the runqueue.
++
++For example on the picture below PCPU0 executes tasks from
++VCPU1 runqueue and PCPU1 is idle:
++
++   virtual          |         physical       |          virtual
++  idle CPUs         |           CPUs         |           CPUS
++--------------------|------------------------|--------------------------
++                    |                        |     -----------------
++                    |                        |    | virtual sched X |
++                    |                        |    |   -----------   |
++                    |                        |    |  |   VCPU0   |  |
++                    |                        |    |   -----------   |
++ ------------       |        -----------          |   -----------   |
++| idle VCPU0 |      |       |   PCPU0   |  <--->  |  |   VCPU1   |  |
++ ------------       |        -----------          |   -----------   |
++                    |                        |     -----------------
++                    |                        |
++                    |                        |     -----------------
++                    |                        |    | virtual sched Y |
++ ------------                -----------     |    |   -----------   |
++| idle VCPU1 |    <--->     |   PCPU1   |    |    |  |   VCPU0   |  |
++ ------------                -----------     |    |   -----------   |
++                    |                        |     -----------------
++                    |                        |
+diff -uprN linux-2.6.8.1.orig/Makefile linux-2.6.8.1-ve022stab078/Makefile
+--- linux-2.6.8.1.orig/Makefile	2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/Makefile	2006-05-11 13:05:49.000000000 +0400
+@@ -1,7 +1,10 @@
+ VERSION = 2
+ PATCHLEVEL = 6
+ SUBLEVEL = 8
+-EXTRAVERSION = .1
++EXTRAVERSION-y = smp
++EXTRAVERSION- = up
++EXTRAVERSION-n = up
++EXTRAVERSION = -022stab078-$(EXTRAVERSION-$(CONFIG_SMP))
+ NAME=Zonked Quokka
+ 
+ # *DOCUMENTATION*
+diff -uprN linux-2.6.8.1.orig/arch/alpha/kernel/ptrace.c linux-2.6.8.1-ve022stab078/arch/alpha/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/alpha/kernel/ptrace.c	2004-08-14 14:56:14.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/alpha/kernel/ptrace.c	2006-05-11 13:05:26.000000000 +0400
+@@ -354,7 +354,7 @@ do_sys_ptrace(long request, long pid, lo
+ 	 */
+ 	case PTRACE_KILL:
+ 		ret = 0;
+-		if (child->state == TASK_ZOMBIE)
++		if (child->exit_state == EXIT_ZOMBIE)
+ 			break;
+ 		child->exit_code = SIGKILL;
+ 		/* make sure single-step breakpoint is gone. */
+diff -uprN linux-2.6.8.1.orig/arch/arm/kernel/ptrace.c linux-2.6.8.1-ve022stab078/arch/arm/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/arm/kernel/ptrace.c	2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/arm/kernel/ptrace.c	2006-05-11 13:05:26.000000000 +0400
+@@ -677,7 +677,7 @@ static int do_ptrace(int request, struct
+ 			/* make sure single-step breakpoint is gone. */
+ 			child->ptrace &= ~PT_SINGLESTEP;
+ 			ptrace_cancel_bpt(child);
+-			if (child->state != TASK_ZOMBIE) {
++			if (child->exit_state != EXIT_ZOMBIE) {
+ 				child->exit_code = SIGKILL;
+ 				wake_up_process(child);
+ 			}
+diff -uprN linux-2.6.8.1.orig/arch/arm/kernel/signal.c linux-2.6.8.1-ve022stab078/arch/arm/kernel/signal.c
+--- linux-2.6.8.1.orig/arch/arm/kernel/signal.c	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/arm/kernel/signal.c	2006-05-11 13:05:25.000000000 +0400
+@@ -548,9 +548,10 @@ static int do_signal(sigset_t *oldset, s
+ 	if (!user_mode(regs))
+ 		return 0;
+ 
+-	if (current->flags & PF_FREEZE) {
+-		refrigerator(0);
+-		goto no_signal;
++	if (unlikely(test_thread_flag(TIF_FREEZE))) {
++		refrigerator();
++		if (!signal_pending(current))
++			goto no_signal;
+ 	}
+ 
+ 	if (current->ptrace & PT_SINGLESTEP)
+diff -uprN linux-2.6.8.1.orig/arch/arm26/kernel/ptrace.c linux-2.6.8.1-ve022stab078/arch/arm26/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/arm26/kernel/ptrace.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/arm26/kernel/ptrace.c	2006-05-11 13:05:26.000000000 +0400
+@@ -614,7 +614,7 @@ static int do_ptrace(int request, struct
+ 			/* make sure single-step breakpoint is gone. */
+ 			child->ptrace &= ~PT_SINGLESTEP;
+ 			ptrace_cancel_bpt(child);
+-			if (child->state != TASK_ZOMBIE) {
++			if (child->exit_state != EXIT_ZOMBIE) {
+ 				child->exit_code = SIGKILL;
+ 				wake_up_process(child);
+ 			}
+diff -uprN linux-2.6.8.1.orig/arch/cris/arch-v10/kernel/ptrace.c linux-2.6.8.1-ve022stab078/arch/cris/arch-v10/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/cris/arch-v10/kernel/ptrace.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/cris/arch-v10/kernel/ptrace.c	2006-05-11 13:05:26.000000000 +0400
+@@ -185,7 +185,7 @@ sys_ptrace(long request, long pid, long 
+ 		case PTRACE_KILL:
+ 			ret = 0;
+ 			
+-			if (child->state == TASK_ZOMBIE)
++			if (child->exit_state == EXIT_ZOMBIE)
+ 				break;
+ 			
+ 			child->exit_code = SIGKILL;
+diff -uprN linux-2.6.8.1.orig/arch/h8300/kernel/ptrace.c linux-2.6.8.1-ve022stab078/arch/h8300/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/h8300/kernel/ptrace.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/h8300/kernel/ptrace.c	2006-05-11 13:05:26.000000000 +0400
+@@ -199,7 +199,7 @@ asmlinkage int sys_ptrace(long request, 
+ 		case PTRACE_KILL: {
+ 
+ 			ret = 0;
+-			if (child->state == TASK_ZOMBIE) /* already dead */
++			if (child->exit_state == EXIT_ZOMBIE) /* already dead */
+ 				break;
+ 			child->exit_code = SIGKILL;
+ 			h8300_disable_trace(child);
+diff -uprN linux-2.6.8.1.orig/arch/i386/boot/setup.S linux-2.6.8.1-ve022stab078/arch/i386/boot/setup.S
+--- linux-2.6.8.1.orig/arch/i386/boot/setup.S	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/boot/setup.S	2006-05-11 13:05:38.000000000 +0400
+@@ -156,7 +156,7 @@ cmd_line_ptr:	.long 0			# (Header versio
+ 					# can be located anywhere in
+ 					# low memory 0x10000 or higher.
+ 
+-ramdisk_max:	.long (MAXMEM-1) & 0x7fffffff
++ramdisk_max:	.long (__MAXMEM-1) & 0x7fffffff
+ 					# (Header version 0x0203 or later)
+ 					# The highest safe address for
+ 					# the contents of an initrd
+diff -uprN linux-2.6.8.1.orig/arch/i386/boot/video.S linux-2.6.8.1-ve022stab078/arch/i386/boot/video.S
+--- linux-2.6.8.1.orig/arch/i386/boot/video.S	2004-08-14 14:56:25.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/boot/video.S	2006-05-11 13:05:37.000000000 +0400
+@@ -123,6 +123,9 @@ video:	pushw	%ds		# We use different seg
+ 	cmpw	$ASK_VGA, %ax			# Bring up the menu
+ 	jz	vid2
+ 
++#ifndef CONFIG_FB
++	mov	$VIDEO_80x25, %ax		# hack to force 80x25 mode
++#endif
+ 	call	mode_set			# Set the mode
+ 	jc	vid1
+ 
+@@ -1901,7 +1904,7 @@ store_edid:
+ 
+ 	movl	$0x13131313, %eax		# memset block with 0x13
+ 	movw    $32, %cx
+-	movw	$0x440, %di
++	movw	$0x140, %di
+ 	cld
+ 	rep 
+ 	stosl  
+@@ -1910,7 +1913,7 @@ store_edid:
+ 	movw	$0x01, %bx
+ 	movw	$0x00, %cx
+ 	movw    $0x01, %dx
+-	movw	$0x440, %di
++	movw	$0x140, %di
+ 	int	$0x10	
+ 
+ 	popw	%di				# restore all registers        
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/Makefile linux-2.6.8.1-ve022stab078/arch/i386/kernel/Makefile
+--- linux-2.6.8.1.orig/arch/i386/kernel/Makefile	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/Makefile	2006-05-11 13:05:38.000000000 +0400
+@@ -7,7 +7,7 @@ extra-y := head.o init_task.o vmlinux.ld
+ obj-y	:= process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \
+ 		ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_i386.o \
+ 		pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \
+-		doublefault.o
++		doublefault.o entry_trampoline.o
+ 
+ obj-y				+= cpu/
+ obj-y				+= timers/
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/acpi/boot.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/acpi/boot.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/acpi/boot.c	2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/acpi/boot.c	2006-05-11 13:05:38.000000000 +0400
+@@ -484,7 +484,7 @@ acpi_scan_rsdp (
+ 	 * RSDP signature.
+ 	 */
+ 	for (offset = 0; offset < length; offset += 16) {
+-		if (strncmp((char *) (start + offset), "RSD PTR ", sig_len))
++		if (strncmp((char *) __va(start + offset), "RSD PTR ", sig_len))
+ 			continue;
+ 		return (start + offset);
+ 	}
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/acpi/sleep.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/acpi/sleep.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/acpi/sleep.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/acpi/sleep.c	2006-05-11 13:05:38.000000000 +0400
+@@ -19,13 +19,29 @@ extern void zap_low_mappings(void);
+ 
+ extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
+ 
+-static void init_low_mapping(pgd_t *pgd, int pgd_limit)
++static void map_low(pgd_t *pgd_base, unsigned long start, unsigned long end)
+ {
+-	int pgd_ofs = 0;
+-
+-	while ((pgd_ofs < pgd_limit) && (pgd_ofs + USER_PTRS_PER_PGD < PTRS_PER_PGD)) {
+-		set_pgd(pgd, *(pgd+USER_PTRS_PER_PGD));
+-		pgd_ofs++, pgd++;
++	unsigned long vaddr;
++	pmd_t *pmd;
++	pgd_t *pgd;
++	int i, j;
++
++	pgd = pgd_base;
++
++	for (i = 0; i < PTRS_PER_PGD; pgd++, i++) {
++		vaddr = i*PGDIR_SIZE;
++		if (end && (vaddr >= end))
++			break;
++		pmd = pmd_offset(pgd, 0);
++		for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
++			vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
++			if (end && (vaddr >= end))
++				break;
++			if (vaddr < start)
++				continue;
++			set_pmd(pmd, __pmd(_KERNPG_TABLE + _PAGE_PSE +
++								vaddr - start));
++		}
+ 	}
+ }
+ 
+@@ -39,7 +55,9 @@ int acpi_save_state_mem (void)
+ {
+ 	if (!acpi_wakeup_address)
+ 		return 1;
+-	init_low_mapping(swapper_pg_dir, USER_PTRS_PER_PGD);
++	if (!cpu_has_pse)
++		return 1;
++	map_low(swapper_pg_dir, 0, LOW_MAPPINGS_SIZE);
+ 	memcpy((void *) acpi_wakeup_address, &wakeup_start, &wakeup_end - &wakeup_start);
+ 	acpi_copy_wakeup_routine(acpi_wakeup_address);
+ 
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/acpi/wakeup.S linux-2.6.8.1-ve022stab078/arch/i386/kernel/acpi/wakeup.S
+--- linux-2.6.8.1.orig/arch/i386/kernel/acpi/wakeup.S	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/acpi/wakeup.S	2006-05-11 13:05:38.000000000 +0400
+@@ -67,6 +67,13 @@ wakeup_code:
+ 	movw	$0x0e00 + 'i', %fs:(0x12)
+ 	
+ 	# need a gdt
++	#use the gdt copied in this low mem
++	lea	temp_gdt_table - wakeup_code, %eax
++	xor	%ebx, %ebx
++	movw	%ds, %bx
++	shll	$4, %ebx
++	addl	%ebx, %eax
++	movl	%eax, real_save_gdt + 2 - wakeup_code
+ 	lgdt	real_save_gdt - wakeup_code
+ 
+ 	movl	real_save_cr0 - wakeup_code, %eax
+@@ -89,6 +96,7 @@ real_save_cr4:	.long 0
+ real_magic:	.long 0
+ video_mode:	.long 0
+ video_flags:	.long 0
++temp_gdt_table: .fill GDT_ENTRIES, 8, 0
+ 
+ bogus_real_magic:
+ 	movw	$0x0e00 + 'B', %fs:(0x12)
+@@ -231,6 +239,13 @@ ENTRY(acpi_copy_wakeup_routine)
+ 	movl	%edx, real_save_cr0 - wakeup_start (%eax)
+ 	sgdt    real_save_gdt - wakeup_start (%eax)
+ 
++	# gdt wont be addressable from real mode in 4g4g split
++	# copying it to the lower mem
++	xor	%ecx, %ecx
++	movw	saved_gdt, %cx
++	movl	saved_gdt + 2, %esi	
++	lea 	temp_gdt_table - wakeup_start (%eax), %edi	
++	rep movsb
+ 	movl	saved_videomode, %edx
+ 	movl	%edx, video_mode - wakeup_start (%eax)
+ 	movl	acpi_video_flags, %edx
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/apic.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/apic.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/apic.c	2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/apic.c	2006-05-11 13:05:40.000000000 +0400
+@@ -970,9 +970,7 @@ void __init setup_boot_APIC_clock(void)
+ 
+ void __init setup_secondary_APIC_clock(void)
+ {
+-	local_irq_disable(); /* FIXME: Do we need this? --RR */
+ 	setup_APIC_timer(calibration_result);
+-	local_irq_enable();
+ }
+ 
+ void __init disable_APIC_timer(void)
+@@ -1035,7 +1033,7 @@ int setup_profiling_timer(unsigned int m
+  * value into /proc/profile.
+  */
+ 
+-inline void smp_local_timer_interrupt(struct pt_regs * regs)
++asmlinkage void smp_local_timer_interrupt(struct pt_regs * regs)
+ {
+ 	int cpu = smp_processor_id();
+ 
+@@ -1088,11 +1086,18 @@ inline void smp_local_timer_interrupt(st
+ 
+ void smp_apic_timer_interrupt(struct pt_regs regs)
+ {
+-	int cpu = smp_processor_id();
++#ifdef CONFIG_4KSTACKS
++	union irq_ctx	*curctx;
++	union irq_ctx	*irqctx;
++	u32		*isp;
++#endif
++	int		cpu;
++	struct ve_struct *envid;
+ 
+ 	/*
+ 	 * the NMI deadlock-detector uses this.
+ 	 */
++	cpu = smp_processor_id();
+ 	irq_stat[cpu].apic_timer_irqs++;
+ 
+ 	/*
+@@ -1105,9 +1110,35 @@ void smp_apic_timer_interrupt(struct pt_
+ 	 * Besides, if we don't timer interrupts ignore the global
+ 	 * interrupt lock, which is the WrongThing (tm) to do.
+ 	 */
++	envid = set_exec_env(get_ve0());
+ 	irq_enter();
++#ifdef CONFIG_4KSTACKS
++	curctx = (union irq_ctx *) current_thread_info();
++	irqctx = hardirq_ctx[cpu];
++	if (curctx == irqctx) {
++		smp_local_timer_interrupt(&regs);
++	} else {
++		/* build the stack frame on the IRQ stack */
++		isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
++		irqctx->tinfo.task = curctx->tinfo.task;
++		irqctx->tinfo.real_stack = curctx->tinfo.real_stack;
++		irqctx->tinfo.virtual_stack = curctx->tinfo.virtual_stack;
++		irqctx->tinfo.previous_esp = current_stack_pointer();
++
++		*--isp = (u32) &regs;
++		asm volatile(
++			"       xchgl   %%ebx,%%esp     \n"
++			"       call    smp_local_timer_interrupt \n"
++			"       xchgl   %%ebx,%%esp     \n"
++			: : "b"(isp)
++			: "memory", "cc", "edx", "ecx"
++		);
++	}
++#else
+ 	smp_local_timer_interrupt(&regs);
++#endif
+ 	irq_exit();
++	(void)set_exec_env(envid);
+ }
+ 
+ /*
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/asm-offsets.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/asm-offsets.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/asm-offsets.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/asm-offsets.c	2006-05-11 13:05:38.000000000 +0400
+@@ -61,5 +61,19 @@ void foo(void)
+ 	DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, esp0) -
+ 		 sizeof(struct tss_struct));
+ 
++	DEFINE(TI_task, offsetof (struct thread_info, task));
++	DEFINE(TI_exec_domain, offsetof (struct thread_info, exec_domain));
++	DEFINE(TI_flags, offsetof (struct thread_info, flags));
++	DEFINE(TI_preempt_count, offsetof (struct thread_info, preempt_count));
++	DEFINE(TI_addr_limit, offsetof (struct thread_info, addr_limit));
++	DEFINE(TI_real_stack, offsetof (struct thread_info, real_stack));
++	DEFINE(TI_virtual_stack, offsetof (struct thread_info, virtual_stack));
++	DEFINE(TI_user_pgd, offsetof (struct thread_info, user_pgd));
++
++	DEFINE(FIX_ENTRY_TRAMPOLINE_0_addr,
++			__fix_to_virt(FIX_ENTRY_TRAMPOLINE_0));
++	DEFINE(FIX_VSYSCALL_addr, __fix_to_virt(FIX_VSYSCALL));
+ 	DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
++	DEFINE(task_thread_db7,
++		offsetof (struct task_struct, thread.debugreg[7]));
+ }
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/cpu/amd.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/cpu/amd.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/cpu/amd.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/cpu/amd.c	2006-05-11 13:05:28.000000000 +0400
+@@ -28,6 +28,22 @@ static void __init init_amd(struct cpuin
+ 	int mbytes = num_physpages >> (20-PAGE_SHIFT);
+ 	int r;
+ 
++#ifdef CONFIG_SMP
++	unsigned long long value;
++
++	/* Disable TLB flush filter by setting HWCR.FFDIS on K8
++	 * bit 6 of msr C001_0015
++	 *
++	 * Errata 63 for SH-B3 steppings
++	 * Errata 122 for all steppings (F+ have it disabled by default)
++	 */
++	if (c->x86 == 15) {
++		rdmsrl(MSR_K7_HWCR, value);
++		value |= 1 << 6;
++		wrmsrl(MSR_K7_HWCR, value);
++	}
++#endif
++
+ 	/*
+ 	 *	FIXME: We should handle the K5 here. Set up the write
+ 	 *	range and also turn on MSR 83 bits 4 and 31 (write alloc,
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/cpu/common.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/cpu/common.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/cpu/common.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/cpu/common.c	2006-05-11 13:05:38.000000000 +0400
+@@ -196,7 +196,10 @@ int __init have_cpuid_p(void)
+ 
+ /* Do minimum CPU detection early.
+    Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
+-   The others are not touched to avoid unwanted side effects. */
++   The others are not touched to avoid unwanted side effects.
++
++   WARNING: this function is only called on the BP.  Don't add code here
++   that is supposed to run on all CPUs. */
+ void __init early_cpu_detect(void)
+ {
+ 	struct cpuinfo_x86 *c = &boot_cpu_data;
+@@ -228,8 +231,6 @@ void __init early_cpu_detect(void)
+ 		if (cap0 & (1<<19))
+ 			c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
+ 	}
+-
+-	early_intel_workaround(c);
+ }
+ 
+ void __init generic_identify(struct cpuinfo_x86 * c)
+@@ -275,6 +276,8 @@ void __init generic_identify(struct cpui
+ 				get_model_name(c); /* Default name */
+ 		}
+ 	}
++
++	early_intel_workaround(c);
+ }
+ 
+ static void __init squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
+@@ -554,12 +557,16 @@ void __init cpu_init (void)
+ 	set_tss_desc(cpu,t);
+ 	cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
+ 	load_TR_desc();
+-	load_LDT(&init_mm.context);
++	if (cpu)
++		load_LDT(&init_mm.context);
+ 
+ 	/* Set up doublefault TSS pointer in the GDT */
+ 	__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
+ 	cpu_gdt_table[cpu][GDT_ENTRY_DOUBLEFAULT_TSS].b &= 0xfffffdff;
+ 
++	if (cpu)
++		trap_init_virtual_GDT();
++
+ 	/* Clear %fs and %gs. */
+ 	asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
+ 
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/cpu/intel.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/cpu/intel.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/cpu/intel.c	2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/cpu/intel.c	2006-05-11 13:05:38.000000000 +0400
+@@ -10,6 +10,7 @@
+ #include <asm/processor.h>
+ #include <asm/msr.h>
+ #include <asm/uaccess.h>
++#include <asm/desc.h>
+ 
+ #include "cpu.h"
+ 
+@@ -19,8 +20,6 @@
+ #include <mach_apic.h>
+ #endif
+ 
+-extern int trap_init_f00f_bug(void);
+-
+ #ifdef CONFIG_X86_INTEL_USERCOPY
+ /*
+  * Alignment at which movsl is preferred for bulk memory copies.
+@@ -97,10 +96,13 @@ static struct _cache_table cache_table[]
+ 	{ 0x70, LVL_TRACE,  12 },
+ 	{ 0x71, LVL_TRACE,  16 },
+ 	{ 0x72, LVL_TRACE,  32 },
++	{ 0x78, LVL_2,	    1024 },
+ 	{ 0x79, LVL_2,      128 },
+ 	{ 0x7a, LVL_2,      256 },
+ 	{ 0x7b, LVL_2,      512 },
+ 	{ 0x7c, LVL_2,      1024 },
++	{ 0x7d, LVL_2,	    2048 },
++	{ 0x7f, LVL_2,	    512 },
+ 	{ 0x82, LVL_2,      256 },
+ 	{ 0x83, LVL_2,      512 },
+ 	{ 0x84, LVL_2,      1024 },
+@@ -147,7 +149,7 @@ static void __init init_intel(struct cpu
+ 
+ 		c->f00f_bug = 1;
+ 		if ( !f00f_workaround_enabled ) {
+-			trap_init_f00f_bug();
++			trap_init_virtual_IDT();
+ 			printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
+ 			f00f_workaround_enabled = 1;
+ 		}
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/cpu/mtrr/if.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/cpu/mtrr/if.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/cpu/mtrr/if.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/cpu/mtrr/if.c	2006-05-11 13:05:40.000000000 +0400
+@@ -358,7 +358,7 @@ static int __init mtrr_if_init(void)
+ 		return -ENODEV;
+ 
+ 	proc_root_mtrr =
+-	    create_proc_entry("mtrr", S_IWUSR | S_IRUGO, &proc_root);
++	    create_proc_entry("mtrr", S_IWUSR | S_IRUGO, NULL);
+ 	if (proc_root_mtrr) {
+ 		proc_root_mtrr->owner = THIS_MODULE;
+ 		proc_root_mtrr->proc_fops = &mtrr_fops;
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/cpu/proc.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/cpu/proc.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/cpu/proc.c	2004-08-14 14:56:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/cpu/proc.c	2006-05-11 13:05:40.000000000 +0400
+@@ -3,6 +3,8 @@
+ #include <linux/string.h>
+ #include <asm/semaphore.h>
+ #include <linux/seq_file.h>
++#include <linux/vsched.h>
++#include <linux/fairsched.h>
+ 
+ /*
+  *	Get CPU information for use by the procfs.
+@@ -58,11 +60,17 @@ static int show_cpuinfo(struct seq_file 
+ 	struct cpuinfo_x86 *c = v;
+ 	int i, n = c - cpu_data;
+ 	int fpu_exception;
++	unsigned long vcpu_khz;
+ 
+ #ifdef CONFIG_SMP
+-	if (!cpu_online(n))
++	if (!vcpu_online(n))
+ 		return 0;
+ #endif
++#ifdef CONFIG_VE
++	vcpu_khz = ve_scale_khz(cpu_khz);
++#else
++	vcpu_khz = cpu_khz;
++#endif
+ 	seq_printf(m, "processor\t: %d\n"
+ 		"vendor_id\t: %s\n"
+ 		"cpu family\t: %d\n"
+@@ -81,14 +89,14 @@ static int show_cpuinfo(struct seq_file 
+ 
+ 	if ( cpu_has(c, X86_FEATURE_TSC) ) {
+ 		seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n",
+-			cpu_khz / 1000, (cpu_khz % 1000));
++			vcpu_khz / 1000, (vcpu_khz % 1000));
+ 	}
+ 
+ 	/* Cache size */
+ 	if (c->x86_cache_size >= 0)
+ 		seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
+ #ifdef CONFIG_X86_HT
+-	if (cpu_has_ht) {
++	if (smp_num_siblings > 1) {
+ 		extern int phys_proc_id[NR_CPUS];
+ 		seq_printf(m, "physical id\t: %d\n", phys_proc_id[n]);
+ 		seq_printf(m, "siblings\t: %d\n", smp_num_siblings);
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/doublefault.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/doublefault.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/doublefault.c	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/doublefault.c	2006-05-11 13:05:38.000000000 +0400
+@@ -8,12 +8,13 @@
+ #include <asm/pgtable.h>
+ #include <asm/processor.h>
+ #include <asm/desc.h>
++#include <asm/fixmap.h>
+ 
+ #define DOUBLEFAULT_STACKSIZE (1024)
+ static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE];
+ #define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE)
+ 
+-#define ptr_ok(x) ((x) > 0xc0000000 && (x) < 0xc1000000)
++#define ptr_ok(x) (((x) > __PAGE_OFFSET && (x) < (__PAGE_OFFSET + 0x01000000)) || ((x) >= FIXADDR_START))
+ 
+ static void doublefault_fn(void)
+ {
+@@ -39,8 +40,8 @@ static void doublefault_fn(void)
+ 
+ 			printk("eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n",
+ 				t->eax, t->ebx, t->ecx, t->edx);
+-			printk("esi = %08lx, edi = %08lx\n",
+-				t->esi, t->edi);
++			printk("esi = %08lx, edi = %08lx, ebp = %08lx\n",
++				t->esi, t->edi, t->ebp);
+ 		}
+ 	}
+ 
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/entry.S linux-2.6.8.1-ve022stab078/arch/i386/kernel/entry.S
+--- linux-2.6.8.1.orig/arch/i386/kernel/entry.S	2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/entry.S	2006-05-11 13:05:43.000000000 +0400
+@@ -43,8 +43,10 @@
+ #include <linux/config.h>
+ #include <linux/linkage.h>
+ #include <asm/thread_info.h>
++#include <asm/asm_offsets.h>
+ #include <asm/errno.h>
+ #include <asm/segment.h>
++#include <asm/page.h>
+ #include <asm/smp.h>
+ #include <asm/page.h>
+ #include "irq_vectors.h"
+@@ -81,7 +83,102 @@ VM_MASK		= 0x00020000
+ #define resume_kernel		restore_all
+ #endif
+ 
+-#define SAVE_ALL \
++#ifdef CONFIG_X86_HIGH_ENTRY
++
++#ifdef CONFIG_X86_SWITCH_PAGETABLES
++
++#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
++/*
++ * If task is preempted in __SWITCH_KERNELSPACE, and moved to another cpu,
++ * __switch_to repoints %esp to the appropriate virtual stack; but %ebp is
++ * left stale, so we must check whether to repeat the real stack calculation.
++ */
++#define repeat_if_esp_changed				\
++	xorl %esp, %ebp;				\
++	testl $-THREAD_SIZE, %ebp;			\
++	jnz 0b
++#else
++#define repeat_if_esp_changed
++#endif
++
++/* clobbers ebx, edx and ebp */
++
++#define __SWITCH_KERNELSPACE				\
++	cmpl $0xff000000, %esp;				\
++	jb 1f;						\
++							\
++	/*						\
++	 * switch pagetables and load the real stack,	\
++	 * keep the stack offset:			\
++	 */						\
++							\
++	movl $swapper_pg_dir-__PAGE_OFFSET, %edx;	\
++							\
++	/* GET_THREAD_INFO(%ebp) intermixed */		\
++0:							\
++	movl %esp, %ebp;				\
++	movl %esp, %ebx;				\
++	andl $(-THREAD_SIZE), %ebp;				\
++	andl $(THREAD_SIZE-1), %ebx;				\
++	orl TI_real_stack(%ebp), %ebx;			\
++	repeat_if_esp_changed;				\
++							\
++	movl %edx, %cr3;				\
++	movl %ebx, %esp;				\
++1:
++
++#endif
++
++
++#define __SWITCH_USERSPACE \
++	/* interrupted any of the user return paths? */	\
++							\
++	movl EIP(%esp), %eax;				\
++							\
++	cmpl $int80_ret_start_marker, %eax;		\
++	jb 33f; /* nope - continue with sysexit check */\
++	cmpl $int80_ret_end_marker, %eax;		\
++	jb 22f; /* yes - switch to virtual stack */	\
++33:							\
++	cmpl $sysexit_ret_start_marker, %eax;		\
++	jb 44f; /* nope - continue with user check */	\
++	cmpl $sysexit_ret_end_marker, %eax;		\
++	jb 22f; /* yes - switch to virtual stack */	\
++	/* return to userspace? */			\
++44:							\
++	movl EFLAGS(%esp),%ecx;				\
++	movb CS(%esp),%cl;				\
++	testl $(VM_MASK | 3),%ecx;			\
++	jz 2f;						\
++22:							\
++	/*						\
++	 * switch to the virtual stack, then switch to	\
++	 * the userspace pagetables.			\
++	 */						\
++							\
++	GET_THREAD_INFO(%ebp);				\
++	movl TI_virtual_stack(%ebp), %edx;		\
++	movl TI_user_pgd(%ebp), %ecx;			\
++							\
++	movl %esp, %ebx;				\
++	andl $(THREAD_SIZE-1), %ebx;				\
++	orl %ebx, %edx;					\
++int80_ret_start_marker:					\
++	movl %edx, %esp; 				\
++	movl %ecx, %cr3;				\
++							\
++	__RESTORE_ALL_USER;				\
++int80_ret_end_marker:					\
++2:
++
++#else /* !CONFIG_X86_HIGH_ENTRY */
++
++#define __SWITCH_KERNELSPACE
++#define __SWITCH_USERSPACE
++
++#endif
++
++#define __SAVE_ALL \
+ 	cld; \
+ 	pushl %es; \
+ 	pushl %ds; \
+@@ -96,7 +193,7 @@ VM_MASK		= 0x00020000
+ 	movl %edx, %ds; \
+ 	movl %edx, %es;
+ 
+-#define RESTORE_INT_REGS \
++#define __RESTORE_INT_REGS \
+ 	popl %ebx;	\
+ 	popl %ecx;	\
+ 	popl %edx;	\
+@@ -105,29 +202,44 @@ VM_MASK		= 0x00020000
+ 	popl %ebp;	\
+ 	popl %eax
+ 
+-#define RESTORE_REGS	\
+-	RESTORE_INT_REGS; \
+-1:	popl %ds;	\
+-2:	popl %es;	\
+-.section .fixup,"ax";	\
+-3:	movl $0,(%esp);	\
+-	jmp 1b;		\
+-4:	movl $0,(%esp);	\
+-	jmp 2b;		\
+-.previous;		\
++#define __RESTORE_REGS	\
++	__RESTORE_INT_REGS; \
++	popl %ds;	\
++	popl %es;
++
++#define __RESTORE_REGS_USER \
++	__RESTORE_INT_REGS; \
++111:	popl %ds;	\
++222:	popl %es;	\
++	jmp 666f;	\
++444:	movl $0,(%esp);	\
++	jmp 111b;	\
++555:	movl $0,(%esp);	\
++	jmp 222b;	\
++666:			\
+ .section __ex_table,"a";\
+ 	.align 4;	\
+-	.long 1b,3b;	\
+-	.long 2b,4b;	\
++	.long 111b,444b;\
++	.long 222b,555b;\
+ .previous
+ 
++#define __RESTORE_ALL_USER \
++	__RESTORE_REGS_USER \
++	__RESTORE_IRET
++
++#ifdef CONFIG_X86_HIGH_ENTRY
++#define __RESTORE_ALL	\
++	__RESTORE_REGS	\
++	__RESTORE_IRET
++#else /* !CONFIG_X86_HIGH_ENTRY */
++#define __RESTORE_ALL	__RESTORE_ALL_USER
++#endif
+ 
+-#define RESTORE_ALL	\
+-	RESTORE_REGS	\
++#define __RESTORE_IRET	\
+ 	addl $4, %esp;	\
+-1:	iret;		\
++333:	iret;		\
+ .section .fixup,"ax";   \
+-2:	sti;		\
++666:	sti;		\
+ 	movl $(__USER_DS), %edx; \
+ 	movl %edx, %ds; \
+ 	movl %edx, %es; \
+@@ -136,10 +248,18 @@ VM_MASK		= 0x00020000
+ .previous;		\
+ .section __ex_table,"a";\
+ 	.align 4;	\
+-	.long 1b,2b;	\
++	.long 333b,666b;\
+ .previous
+ 
++#define SAVE_ALL \
++	__SAVE_ALL;					\
++	__SWITCH_KERNELSPACE;
++
++#define RESTORE_ALL					\
++	__SWITCH_USERSPACE;				\
++	__RESTORE_ALL;
+ 
++.section .entry.text,"ax"
+ 
+ ENTRY(lcall7)
+ 	pushfl			# We get a different stack layout with call
+@@ -240,17 +360,9 @@ sysenter_past_esp:
+ 	pushl $(__USER_CS)
+ 	pushl $SYSENTER_RETURN
+ 
+-/*
+- * Load the potential sixth argument from user stack.
+- * Careful about security.
+- */
+-	cmpl $__PAGE_OFFSET-3,%ebp
+-	jae syscall_fault
+-1:	movl (%ebp),%ebp
+-.section __ex_table,"a"
+-	.align 4
+-	.long 1b,syscall_fault
+-.previous
++	/*
++	 * No six-argument syscall is ever used with sysenter.
++	 */
+ 
+ 	pushl %eax
+ 	SAVE_ALL
+@@ -266,12 +378,35 @@ sysenter_past_esp:
+ 	movl TI_flags(%ebp), %ecx
+ 	testw $_TIF_ALLWORK_MASK, %cx
+ 	jne syscall_exit_work
++
++#ifdef CONFIG_X86_SWITCH_PAGETABLES
++
++	GET_THREAD_INFO(%ebp)
++	movl TI_virtual_stack(%ebp), %edx
++	movl TI_user_pgd(%ebp), %ecx
++	movl %esp, %ebx
++	andl $(THREAD_SIZE-1), %ebx
++	orl %ebx, %edx
++sysexit_ret_start_marker:
++	movl %edx, %esp
++	movl %ecx, %cr3
++	/*
++	 * only ebx is not restored by the userspace sysenter vsyscall
++	 * code, it assumes it to be callee-saved.
++	 */
++	movl EBX(%esp), %ebx
++#endif
++
+ /* if something modifies registers it must also disable sysexit */
+ 	movl EIP(%esp), %edx
+ 	movl OLDESP(%esp), %ecx
++	xorl %ebp,%ebp
+ 	sti
+ 	sysexit
+-
++#ifdef CONFIG_X86_SWITCH_PAGETABLES
++sysexit_ret_end_marker:
++	nop
++#endif
+ 
+ 	# system call handler stub
+ ENTRY(system_call)
+@@ -321,6 +456,22 @@ work_notifysig:				# deal with pending s
+ 					# vm86-space
+ 	xorl %edx, %edx
+ 	call do_notify_resume
++
++#if CONFIG_X86_HIGH_ENTRY
++	/*
++	 * Reload db7 if necessary:
++	 */
++	movl TI_flags(%ebp), %ecx
++	testb $_TIF_DB7, %cl
++	jnz work_db7
++
++	jmp restore_all
++
++work_db7:
++	movl TI_task(%ebp), %edx;
++	movl task_thread_db7(%edx), %edx;
++	movl %edx, %db7;
++#endif
+ 	jmp restore_all
+ 
+ 	ALIGN
+@@ -358,14 +509,6 @@ syscall_exit_work:
+ 	jmp resume_userspace
+ 
+ 	ALIGN
+-syscall_fault:
+-	pushl %eax			# save orig_eax
+-	SAVE_ALL
+-	GET_THREAD_INFO(%ebp)
+-	movl $-EFAULT,EAX(%esp)
+-	jmp resume_userspace
+-
+-	ALIGN
+ syscall_badsys:
+ 	movl $-ENOSYS,EAX(%esp)
+ 	jmp resume_userspace
+@@ -376,7 +519,7 @@ syscall_badsys:
+  */
+ .data
+ ENTRY(interrupt)
+-.text
++.previous
+ 
+ vector=0
+ ENTRY(irq_entries_start)
+@@ -386,7 +529,7 @@ ENTRY(irq_entries_start)
+ 	jmp common_interrupt
+ .data
+ 	.long 1b
+-.text
++.previous
+ vector=vector+1
+ .endr
+ 
+@@ -427,12 +570,17 @@ error_code:
+ 	movl ES(%esp), %edi		# get the function address
+ 	movl %eax, ORIG_EAX(%esp)
+ 	movl %ecx, ES(%esp)
+-	movl %esp, %edx
+ 	pushl %esi			# push the error code
+-	pushl %edx			# push the pt_regs pointer
+ 	movl $(__USER_DS), %edx
+ 	movl %edx, %ds
+ 	movl %edx, %es
++
++/* clobbers edx, ebx and ebp */
++	__SWITCH_KERNELSPACE
++
++	leal 4(%esp), %edx		# prepare pt_regs
++	pushl %edx			# push pt_regs
++
+ 	call *%edi
+ 	addl $8, %esp
+ 	jmp ret_from_exception
+@@ -523,7 +671,7 @@ nmi_stack_correct:
+ 	pushl %edx
+ 	call do_nmi
+ 	addl $8, %esp
+-	RESTORE_ALL
++	jmp restore_all
+ 
+ nmi_stack_fixup:
+ 	FIX_STACK(12,nmi_stack_correct, 1)
+@@ -600,6 +748,8 @@ ENTRY(spurious_interrupt_bug)
+ 	pushl $do_spurious_interrupt_bug
+ 	jmp error_code
+ 
++.previous
++
+ .data
+ ENTRY(sys_call_table)
+ 	.long sys_restart_syscall	/* 0 - old "setup()" system call, used for restarting */
+@@ -887,4 +1037,26 @@ ENTRY(sys_call_table)
+ 	.long sys_mq_getsetattr
+ 	.long sys_ni_syscall		/* reserved for kexec */
+ 
++	.rept 500-(.-sys_call_table)/4
++		.long sys_ni_syscall
++	.endr
++	.long sys_fairsched_mknod	/* 500 */
++	.long sys_fairsched_rmnod
++	.long sys_fairsched_chwt
++	.long sys_fairsched_mvpr
++	.long sys_fairsched_rate
++
++	.rept 510-(.-sys_call_table)/4
++		.long sys_ni_syscall
++	.endr
++
++	.long sys_getluid		/* 510 */
++	.long sys_setluid
++	.long sys_setublimit
++	.long sys_ubstat
++	.long sys_ni_syscall
++	.long sys_ni_syscall
++	.long sys_lchmod		/* 516 */
++	.long sys_lutime
++
+ syscall_table_size=(.-sys_call_table)
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/entry_trampoline.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/entry_trampoline.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/entry_trampoline.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/entry_trampoline.c	2006-05-11 13:05:38.000000000 +0400
+@@ -0,0 +1,75 @@
++/*
++ * linux/arch/i386/kernel/entry_trampoline.c
++ *
++ * (C) Copyright 2003 Ingo Molnar
++ *
++ * This file contains the needed support code for 4GB userspace
++ */
++
++#include <linux/init.h>
++#include <linux/smp.h>
++#include <linux/mm.h>
++#include <linux/sched.h>
++#include <linux/kernel.h>
++#include <linux/string.h>
++#include <linux/highmem.h>
++#include <asm/desc.h>
++#include <asm/atomic_kmap.h>
++
++extern char __entry_tramp_start, __entry_tramp_end, __start___entry_text;
++
++void __init init_entry_mappings(void)
++{
++#ifdef CONFIG_X86_HIGH_ENTRY
++
++	void *tramp;
++	int p;
++
++	/*
++	 * We need a high IDT and GDT for the 4G/4G split:
++	 */
++	trap_init_virtual_IDT();
++
++	__set_fixmap(FIX_ENTRY_TRAMPOLINE_0, __pa((unsigned long)&__entry_tramp_start), PAGE_KERNEL_EXEC);
++	__set_fixmap(FIX_ENTRY_TRAMPOLINE_1, __pa((unsigned long)&__entry_tramp_start) + PAGE_SIZE, PAGE_KERNEL_EXEC);
++	tramp = (void *)fix_to_virt(FIX_ENTRY_TRAMPOLINE_0);
++
++	printk("mapped 4G/4G trampoline to %p.\n", tramp);
++	BUG_ON((void *)&__start___entry_text != tramp);
++	/*
++	 * Virtual kernel stack:
++	 */
++	BUG_ON(__kmap_atomic_vaddr(KM_VSTACK_TOP) & (THREAD_SIZE-1));
++	BUG_ON(sizeof(struct desc_struct)*NR_CPUS*GDT_ENTRIES > 2*PAGE_SIZE);
++	BUG_ON((unsigned int)&__entry_tramp_end - (unsigned int)&__entry_tramp_start > 2*PAGE_SIZE);
++
++	/*
++	 * set up the initial thread's virtual stack related
++	 * fields:
++	 */
++	for (p = 0; p < ARRAY_SIZE(current->thread_info->stack_page); p++)
++		current->thread_info->stack_page[p] = virt_to_page((char *)current->thread_info + (p*PAGE_SIZE));
++
++	current->thread_info->virtual_stack = (void *)__kmap_atomic_vaddr(KM_VSTACK_TOP);
++
++	for (p = 0; p < ARRAY_SIZE(current->thread_info->stack_page); p++) {
++		__kunmap_atomic_type(KM_VSTACK_TOP-p);
++		__kmap_atomic(current->thread_info->stack_page[p], KM_VSTACK_TOP-p);
++	}
++#endif
++	current->thread_info->real_stack = (void *)current->thread_info;
++	current->thread_info->user_pgd = NULL;
++	current->thread.esp0 = (unsigned long)current->thread_info->real_stack + THREAD_SIZE;
++}
++
++
++
++void __init entry_trampoline_setup(void)
++{
++	/*
++	 * old IRQ entries set up by the boot code will still hang
++	 * around - they are a sign of hw trouble anyway, now they'll
++	 * produce a double fault message.
++	 */
++	trap_init_virtual_GDT();
++}
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/i386_ksyms.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/i386_ksyms.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/i386_ksyms.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/i386_ksyms.c	2006-05-11 13:05:38.000000000 +0400
+@@ -92,7 +92,6 @@ EXPORT_SYMBOL_NOVERS(__down_failed_inter
+ EXPORT_SYMBOL_NOVERS(__down_failed_trylock);
+ EXPORT_SYMBOL_NOVERS(__up_wakeup);
+ /* Networking helper routines. */
+-EXPORT_SYMBOL(csum_partial_copy_generic);
+ /* Delay loops */
+ EXPORT_SYMBOL(__ndelay);
+ EXPORT_SYMBOL(__udelay);
+@@ -106,13 +105,17 @@ EXPORT_SYMBOL_NOVERS(__get_user_4);
+ EXPORT_SYMBOL(strpbrk);
+ EXPORT_SYMBOL(strstr);
+ 
++#if !defined(CONFIG_X86_UACCESS_INDIRECT)
+ EXPORT_SYMBOL(strncpy_from_user);
+-EXPORT_SYMBOL(__strncpy_from_user);
++EXPORT_SYMBOL(__direct_strncpy_from_user);
+ EXPORT_SYMBOL(clear_user);
+ EXPORT_SYMBOL(__clear_user);
+ EXPORT_SYMBOL(__copy_from_user_ll);
+ EXPORT_SYMBOL(__copy_to_user_ll);
+ EXPORT_SYMBOL(strnlen_user);
++#else /* CONFIG_X86_UACCESS_INDIRECT */
++EXPORT_SYMBOL(direct_csum_partial_copy_generic);
++#endif
+ 
+ EXPORT_SYMBOL(dma_alloc_coherent);
+ EXPORT_SYMBOL(dma_free_coherent);
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/i387.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/i387.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/i387.c	2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/i387.c	2006-05-11 13:05:38.000000000 +0400
+@@ -227,6 +227,7 @@ void set_fpu_twd( struct task_struct *ts
+ static int convert_fxsr_to_user( struct _fpstate __user *buf,
+ 					struct i387_fxsave_struct *fxsave )
+ {
++	struct _fpreg tmp[8]; /* 80 bytes scratch area */
+ 	unsigned long env[7];
+ 	struct _fpreg __user *to;
+ 	struct _fpxreg *from;
+@@ -243,23 +244,25 @@ static int convert_fxsr_to_user( struct 
+ 	if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) )
+ 		return 1;
+ 
+-	to = &buf->_st[0];
++	to = tmp;
+ 	from = (struct _fpxreg *) &fxsave->st_space[0];
+ 	for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
+ 		unsigned long __user *t = (unsigned long __user *)to;
+ 		unsigned long *f = (unsigned long *)from;
+ 
+-		if (__put_user(*f, t) ||
+-				__put_user(*(f + 1), t + 1) ||
+-				__put_user(from->exponent, &to->exponent))
+-			return 1;
++		*t = *f;
++		*(t + 1) = *(f+1);
++		to->exponent = from->exponent;
+ 	}
++	if (copy_to_user(buf->_st, tmp, sizeof(struct _fpreg [8])))
++		return 1;
+ 	return 0;
+ }
+ 
+ static int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave,
+ 					  struct _fpstate __user *buf )
+ {
++	struct _fpreg tmp[8]; /* 80 bytes scratch area */
+ 	unsigned long env[7];
+ 	struct _fpxreg *to;
+ 	struct _fpreg __user *from;
+@@ -267,6 +270,8 @@ static int convert_fxsr_from_user( struc
+ 
+ 	if ( __copy_from_user( env, buf, 7 * sizeof(long) ) )
+ 		return 1;
++	if (copy_from_user(tmp, buf->_st, sizeof(struct _fpreg [8])))
++		return 1;
+ 
+ 	fxsave->cwd = (unsigned short)(env[0] & 0xffff);
+ 	fxsave->swd = (unsigned short)(env[1] & 0xffff);
+@@ -278,15 +283,14 @@ static int convert_fxsr_from_user( struc
+ 	fxsave->fos = env[6];
+ 
+ 	to = (struct _fpxreg *) &fxsave->st_space[0];
+-	from = &buf->_st[0];
++	from = tmp;
+ 	for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
+ 		unsigned long *t = (unsigned long *)to;
+ 		unsigned long __user *f = (unsigned long __user *)from;
+ 
+-		if (__get_user(*t, f) ||
+-				__get_user(*(t + 1), f + 1) ||
+-				__get_user(to->exponent, &from->exponent))
+-			return 1;
++		*t = *f;
++		*(t + 1) = *(f + 1);
++		to->exponent = from->exponent;
+ 	}
+ 	return 0;
+ }
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/init_task.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/init_task.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/init_task.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/init_task.c	2006-05-11 13:05:38.000000000 +0400
+@@ -27,7 +27,7 @@ EXPORT_SYMBOL(init_mm);
+  */
+ union thread_union init_thread_union 
+ 	__attribute__((__section__(".data.init_task"))) =
+-		{ INIT_THREAD_INFO(init_task) };
++		{ INIT_THREAD_INFO(init_task, init_thread_union) };
+ 
+ /*
+  * Initial task structure.
+@@ -45,5 +45,5 @@ EXPORT_SYMBOL(init_task);
+  * section. Since TSS's are completely CPU-local, we want them
+  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
+  */ 
+-struct tss_struct init_tss[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = INIT_TSS };
++struct tss_struct init_tss[NR_CPUS] __attribute__((__section__(".data.tss"))) = { [0 ... NR_CPUS-1] = INIT_TSS };
+ 
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/io_apic.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/io_apic.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/io_apic.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/io_apic.c	2006-05-11 13:05:28.000000000 +0400
+@@ -635,7 +635,7 @@ failed:
+ 	return 0;
+ }
+ 
+-static int __init irqbalance_disable(char *str)
++int __init irqbalance_disable(char *str)
+ {
+ 	irqbalance_disabled = 1;
+ 	return 0;
+@@ -652,7 +652,7 @@ static inline void move_irq(int irq)
+ 	}
+ }
+ 
+-__initcall(balanced_irq_init);
++late_initcall(balanced_irq_init);
+ 
+ #else /* !CONFIG_IRQBALANCE */
+ static inline void move_irq(int irq) { }
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/irq.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/irq.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/irq.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/irq.c	2006-05-11 13:05:40.000000000 +0400
+@@ -45,6 +45,9 @@
+ #include <asm/desc.h>
+ #include <asm/irq.h>
+ 
++#include <ub/beancounter.h>
++#include <ub/ub_task.h>
++
+ /*
+  * Linux has a controller-independent x86 interrupt architecture.
+  * every controller has a 'controller-template', that is used
+@@ -79,6 +82,68 @@ static void register_irq_proc (unsigned 
+ #ifdef CONFIG_4KSTACKS
+ union irq_ctx *hardirq_ctx[NR_CPUS];
+ union irq_ctx *softirq_ctx[NR_CPUS];
++union irq_ctx *overflow_ctx[NR_CPUS];
++#endif
++
++#ifdef CONFIG_DEBUG_STACKOVERFLOW
++static void report_stack_overflow(unsigned long delta)
++{
++	printk("Stack overflow %lu task=%s (%p)",
++			delta, current->comm, current);
++	dump_stack();
++}
++
++void check_stack_overflow(void)
++{
++	/* Debugging check for stack overflow: is there less than 512KB free? */
++	long esp;
++	unsigned long flags;
++#ifdef CONFIG_4KSTACKS
++	u32 *isp;
++	union irq_ctx * curctx;
++	union irq_ctx * irqctx;
++#endif
++
++	__asm__ __volatile__("andl %%esp,%0" :
++			"=r" (esp) : "0" (THREAD_SIZE - 1));
++	if (likely(esp > (sizeof(struct thread_info) + STACK_WARN)))
++		return;
++
++	local_irq_save(flags);
++#ifdef CONFIG_4KSTACKS
++	curctx = (union irq_ctx *) current_thread_info();
++	irqctx = overflow_ctx[smp_processor_id()];
++
++	if (curctx == irqctx)
++		report_stack_overflow(esp);
++	else {
++		/* build the stack frame on the IRQ stack */
++		isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
++		irqctx->tinfo.task = curctx->tinfo.task;
++		irqctx->tinfo.real_stack = curctx->tinfo.real_stack;
++		irqctx->tinfo.virtual_stack = curctx->tinfo.virtual_stack;
++		irqctx->tinfo.previous_esp = current_stack_pointer();
++
++		*--isp = (u32) esp;
++
++		asm volatile(
++			"       xchgl   %%ebx,%%esp     \n"
++			"       call    report_stack_overflow \n"
++			"       xchgl   %%ebx,%%esp     \n"
++			:
++			: "b"(isp)
++			: "memory", "cc", "eax", "edx", "ecx"
++		);
++	}
++#else
++	report_stack_overflow(esp);
++#endif
++	local_irq_restore(flags);
++}
++#else
++void check_stack_overflow(void)
++{
++}
+ #endif
+ 
+ /*
+@@ -221,15 +286,19 @@ asmlinkage int handle_IRQ_event(unsigned
+ {
+ 	int status = 1;	/* Force the "do bottom halves" bit */
+ 	int retval = 0;
++	struct user_beancounter *ub;
+ 
+ 	if (!(action->flags & SA_INTERRUPT))
+ 		local_irq_enable();
+ 
++	ub = set_exec_ub(get_ub0());
+ 	do {
+ 		status |= action->flags;
+ 		retval |= action->handler(irq, action->dev_id, regs);
+ 		action = action->next;
+ 	} while (action);
++	(void)set_exec_ub(ub);
++
+ 	if (status & SA_SAMPLE_RANDOM)
+ 		add_interrupt_randomness(irq);
+ 	local_irq_disable();
+@@ -270,7 +339,7 @@ static void report_bad_irq(int irq, irq_
+ 
+ static int noirqdebug;
+ 
+-static int __init noirqdebug_setup(char *str)
++int __init noirqdebug_setup(char *str)
+ {
+ 	noirqdebug = 1;
+ 	printk("IRQ lockup detection disabled\n");
+@@ -429,23 +498,13 @@ asmlinkage unsigned int do_IRQ(struct pt
+ 	irq_desc_t *desc = irq_desc + irq;
+ 	struct irqaction * action;
+ 	unsigned int status;
++	struct ve_struct *envid;
+ 
++	envid = set_exec_env(get_ve0());
+ 	irq_enter();
+ 
+-#ifdef CONFIG_DEBUG_STACKOVERFLOW
+-	/* Debugging check for stack overflow: is there less than 1KB free? */
+-	{
+-		long esp;
++	check_stack_overflow();
+ 
+-		__asm__ __volatile__("andl %%esp,%0" :
+-					"=r" (esp) : "0" (THREAD_SIZE - 1));
+-		if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) {
+-			printk("do_IRQ: stack overflow: %ld\n",
+-				esp - sizeof(struct thread_info));
+-			dump_stack();
+-		}
+-	}
+-#endif
+ 	kstat_this_cpu.irqs[irq]++;
+ 	spin_lock(&desc->lock);
+ 	desc->handler->ack(irq);
+@@ -513,6 +572,8 @@ asmlinkage unsigned int do_IRQ(struct pt
+ 			/* build the stack frame on the IRQ stack */
+ 			isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
+ 			irqctx->tinfo.task = curctx->tinfo.task;
++			irqctx->tinfo.real_stack = curctx->tinfo.real_stack;
++			irqctx->tinfo.virtual_stack = curctx->tinfo.virtual_stack;
+ 			irqctx->tinfo.previous_esp = current_stack_pointer();
+ 
+ 			*--isp = (u32) action;
+@@ -541,7 +602,6 @@ asmlinkage unsigned int do_IRQ(struct pt
+ 	}
+ 
+ #else
+-
+ 	for (;;) {
+ 		irqreturn_t action_ret;
+ 
+@@ -568,6 +628,7 @@ out:
+ 	spin_unlock(&desc->lock);
+ 
+ 	irq_exit();
++	(void)set_exec_env(envid);
+ 
+ 	return 1;
+ }
+@@ -995,13 +1056,15 @@ static int irq_affinity_read_proc(char *
+ 	return len;
+ }
+ 
++int no_irq_affinity;
++
+ static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
+ 					unsigned long count, void *data)
+ {
+ 	int irq = (long)data, full_count = count, err;
+ 	cpumask_t new_value, tmp;
+ 
+-	if (!irq_desc[irq].handler->set_affinity)
++	if (!irq_desc[irq].handler->set_affinity || no_irq_affinity)
+ 		return -EIO;
+ 
+ 	err = cpumask_parse(buffer, count, new_value);
+@@ -1122,6 +1185,9 @@ void init_irq_proc (void)
+  */
+ static char softirq_stack[NR_CPUS * THREAD_SIZE]  __attribute__((__aligned__(THREAD_SIZE)));
+ static char hardirq_stack[NR_CPUS * THREAD_SIZE]  __attribute__((__aligned__(THREAD_SIZE)));
++#ifdef CONFIG_DEBUG_STACKOVERFLOW
++static char overflow_stack[NR_CPUS * THREAD_SIZE]  __attribute__((__aligned__(THREAD_SIZE)));
++#endif
+ 
+ /*
+  * allocate per-cpu stacks for hardirq and for softirq processing
+@@ -1151,8 +1217,19 @@ void irq_ctx_init(int cpu)
+ 
+ 	softirq_ctx[cpu] = irqctx;
+ 
+-	printk("CPU %u irqstacks, hard=%p soft=%p\n",
+-		cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
++#ifdef CONFIG_DEBUG_STACKOVERFLOW
++	irqctx = (union irq_ctx*) &overflow_stack[cpu*THREAD_SIZE];
++	irqctx->tinfo.task              = NULL;
++	irqctx->tinfo.exec_domain       = NULL;
++	irqctx->tinfo.cpu               = cpu;
++	irqctx->tinfo.preempt_count     = HARDIRQ_OFFSET;
++	irqctx->tinfo.addr_limit        = MAKE_MM_SEG(0);
++
++	overflow_ctx[cpu] = irqctx;
++#endif
++
++	printk("CPU %u irqstacks, hard=%p soft=%p overflow=%p\n",
++		cpu,hardirq_ctx[cpu],softirq_ctx[cpu],overflow_ctx[cpu]);
+ }
+ 
+ extern asmlinkage void __do_softirq(void);
+@@ -1173,6 +1250,8 @@ asmlinkage void do_softirq(void)
+ 		curctx = current_thread_info();
+ 		irqctx = softirq_ctx[smp_processor_id()];
+ 		irqctx->tinfo.task = curctx->task;
++		irqctx->tinfo.real_stack = curctx->real_stack;
++		irqctx->tinfo.virtual_stack = curctx->virtual_stack;
+ 		irqctx->tinfo.previous_esp = current_stack_pointer();
+ 
+ 		/* build the stack frame on the softirq stack */
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/ldt.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/ldt.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/ldt.c	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/ldt.c	2006-05-11 13:05:38.000000000 +0400
+@@ -2,7 +2,7 @@
+  * linux/kernel/ldt.c
+  *
+  * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
+- * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
++ * Copyright (C) 1999, 2003 Ingo Molnar <mingo@redhat.com>
+  */
+ 
+ #include <linux/errno.h>
+@@ -18,6 +18,8 @@
+ #include <asm/system.h>
+ #include <asm/ldt.h>
+ #include <asm/desc.h>
++#include <linux/highmem.h>
++#include <asm/atomic_kmap.h>
+ 
+ #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
+ static void flush_ldt(void *null)
+@@ -29,34 +31,31 @@ static void flush_ldt(void *null)
+ 
+ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
+ {
+-	void *oldldt;
+-	void *newldt;
+-	int oldsize;
++	int oldsize, newsize, i;
+ 
+ 	if (mincount <= pc->size)
+ 		return 0;
++	/*
++	 * LDT got larger - reallocate if necessary.
++	 */
+ 	oldsize = pc->size;
+ 	mincount = (mincount+511)&(~511);
+-	if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
+-		newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
+-	else
+-		newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
+-
+-	if (!newldt)
+-		return -ENOMEM;
+-
+-	if (oldsize)
+-		memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE);
+-	oldldt = pc->ldt;
+-	memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE);
+-	pc->ldt = newldt;
+-	wmb();
++	newsize = mincount*LDT_ENTRY_SIZE;
++	for (i = 0; i < newsize; i += PAGE_SIZE) {
++		int nr = i/PAGE_SIZE;
++		BUG_ON(i >= 64*1024);
++		if (!pc->ldt_pages[nr]) {
++			pc->ldt_pages[nr] = alloc_page(GFP_HIGHUSER|__GFP_UBC);
++			if (!pc->ldt_pages[nr])
++				return -ENOMEM;
++			clear_highpage(pc->ldt_pages[nr]);
++		}
++	}
+ 	pc->size = mincount;
+-	wmb();
+-
+ 	if (reload) {
+ #ifdef CONFIG_SMP
+ 		cpumask_t mask;
++
+ 		preempt_disable();
+ 		load_LDT(pc);
+ 		mask = cpumask_of_cpu(smp_processor_id());
+@@ -67,24 +66,32 @@ static int alloc_ldt(mm_context_t *pc, i
+ 		load_LDT(pc);
+ #endif
+ 	}
+-	if (oldsize) {
+-		if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
+-			vfree(oldldt);
+-		else
+-			kfree(oldldt);
+-	}
+ 	return 0;
+ }
+ 
+ static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
+ {
+-	int err = alloc_ldt(new, old->size, 0);
+-	if (err < 0)
++	int i, err, size = old->size, nr_pages = (size*LDT_ENTRY_SIZE + PAGE_SIZE-1)/PAGE_SIZE;
++
++	err = alloc_ldt(new, size, 0);
++	if (err < 0) {
++		new->size = 0;
+ 		return err;
+-	memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
++	}
++	for (i = 0; i < nr_pages; i++)
++		copy_user_highpage(new->ldt_pages[i], old->ldt_pages[i], 0);
+ 	return 0;
+ }
+ 
++static void free_ldt(mm_context_t *mc)
++{
++	int i;
++
++	for (i = 0; i < MAX_LDT_PAGES; i++)
++		if (mc->ldt_pages[i])
++			__free_page(mc->ldt_pages[i]);
++}
++
+ /*
+  * we do not have to muck with descriptors here, that is
+  * done in switch_mm() as needed.
+@@ -96,10 +103,13 @@ int init_new_context(struct task_struct 
+ 
+ 	init_MUTEX(&mm->context.sem);
+ 	mm->context.size = 0;
++	memset(mm->context.ldt_pages, 0, sizeof(struct page *) * MAX_LDT_PAGES);
+ 	old_mm = current->mm;
+ 	if (old_mm && old_mm->context.size > 0) {
+ 		down(&old_mm->context.sem);
+ 		retval = copy_ldt(&mm->context, &old_mm->context);
++		if (retval < 0)
++			free_ldt(&mm->context);
+ 		up(&old_mm->context.sem);
+ 	}
+ 	return retval;
+@@ -107,23 +117,21 @@ int init_new_context(struct task_struct 
+ 
+ /*
+  * No need to lock the MM as we are the last user
++ * Do not touch the ldt register, we are already
++ * in the next thread.
+  */
+ void destroy_context(struct mm_struct *mm)
+ {
+-	if (mm->context.size) {
+-		if (mm == current->active_mm)
+-			clear_LDT();
+-		if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
+-			vfree(mm->context.ldt);
+-		else
+-			kfree(mm->context.ldt);
+-		mm->context.size = 0;
+-	}
++	int i, nr_pages = (mm->context.size*LDT_ENTRY_SIZE + PAGE_SIZE-1) / PAGE_SIZE;
++
++	for (i = 0; i < nr_pages; i++)
++		__free_page(mm->context.ldt_pages[i]);
++	mm->context.size = 0;
+ }
+ 
+ static int read_ldt(void __user * ptr, unsigned long bytecount)
+ {
+-	int err;
++	int err, i;
+ 	unsigned long size;
+ 	struct mm_struct * mm = current->mm;
+ 
+@@ -138,8 +146,25 @@ static int read_ldt(void __user * ptr, u
+ 		size = bytecount;
+ 
+ 	err = 0;
+-	if (copy_to_user(ptr, mm->context.ldt, size))
+-		err = -EFAULT;
++	/*
++	 * This is necessary just in case we got here straight from a
++	 * context-switch where the ptes were set but no tlb flush
++	 * was done yet. We rather avoid doing a TLB flush in the
++	 * context-switch path and do it here instead.
++	 */
++	__flush_tlb_global();
++
++	for (i = 0; i < size; i += PAGE_SIZE) {
++		int nr = i / PAGE_SIZE, bytes;
++		char *kaddr = kmap(mm->context.ldt_pages[nr]);
++
++		bytes = size - i;
++		if (bytes > PAGE_SIZE)
++			bytes = PAGE_SIZE;
++		if (copy_to_user(ptr + i, kaddr, bytes))
++			err = -EFAULT;
++		kunmap(mm->context.ldt_pages[nr]);
++	}
+ 	up(&mm->context.sem);
+ 	if (err < 0)
+ 		return err;
+@@ -158,7 +183,7 @@ static int read_default_ldt(void __user 
+ 
+ 	err = 0;
+ 	address = &default_ldt[0];
+-	size = 5*sizeof(struct desc_struct);
++	size = 5*LDT_ENTRY_SIZE;
+ 	if (size > bytecount)
+ 		size = bytecount;
+ 
+@@ -200,7 +225,15 @@ static int write_ldt(void __user * ptr, 
+ 			goto out_unlock;
+ 	}
+ 
+-	lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt);
++	/*
++	 * No rescheduling allowed from this point to the install.
++	 *
++	 * We do a TLB flush for the same reason as in the read_ldt() path.
++	 */
++	preempt_disable();
++	__flush_tlb_global();
++	lp = (__u32 *) ((ldt_info.entry_number << 3) +
++			(char *) __kmap_atomic_vaddr(KM_LDT_PAGE0));
+ 
+    	/* Allow LDTs to be cleared by the user. */
+    	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
+@@ -221,6 +254,7 @@ install:
+ 	*lp	= entry_1;
+ 	*(lp+1)	= entry_2;
+ 	error = 0;
++	preempt_enable();
+ 
+ out_unlock:
+ 	up(&mm->context.sem);
+@@ -248,3 +282,26 @@ asmlinkage int sys_modify_ldt(int func, 
+ 	}
+ 	return ret;
+ }
++
++/*
++ * load one particular LDT into the current CPU
++ */
++void load_LDT_nolock(mm_context_t *pc, int cpu)
++{
++	struct page **pages = pc->ldt_pages;
++	int count = pc->size;
++	int nr_pages, i;
++
++	if (likely(!count)) {
++		pages = &default_ldt_page;
++		count = 5;
++	}
++       	nr_pages = (count*LDT_ENTRY_SIZE + PAGE_SIZE-1) / PAGE_SIZE;
++
++	for (i = 0; i < nr_pages; i++) {
++		__kunmap_atomic_type(KM_LDT_PAGE0 - i);
++		__kmap_atomic(pages[i], KM_LDT_PAGE0 - i);
++	}
++	set_ldt_desc(cpu, (void *)__kmap_atomic_vaddr(KM_LDT_PAGE0), count);
++	load_LDT_desc();
++}
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/mpparse.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/mpparse.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/mpparse.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/mpparse.c	2006-05-11 13:05:38.000000000 +0400
+@@ -690,7 +690,7 @@ void __init get_smp_config (void)
+ 		 * Read the physical hardware table.  Anything here will
+ 		 * override the defaults.
+ 		 */
+-		if (!smp_read_mpc((void *)mpf->mpf_physptr)) {
++		if (!smp_read_mpc((void *)phys_to_virt(mpf->mpf_physptr))) {
+ 			smp_found_config = 0;
+ 			printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
+ 			printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/nmi.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/nmi.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/nmi.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/nmi.c	2006-05-11 13:05:49.000000000 +0400
+@@ -31,7 +31,12 @@
+ #include <asm/mpspec.h>
+ #include <asm/nmi.h>
+ 
+-unsigned int nmi_watchdog = NMI_NONE;
++#ifdef CONFIG_NMI_WATCHDOG
++#define NMI_DEFAULT NMI_IO_APIC
++#else
++#define NMI_DEFAULT NMI_NONE
++#endif
++unsigned int nmi_watchdog = NMI_DEFAULT;
+ static unsigned int nmi_hz = HZ;
+ static unsigned int nmi_perfctr_msr;	/* the MSR to reset in NMI handler */
+ static unsigned int nmi_p4_cccr_val;
+@@ -459,6 +464,21 @@ void touch_nmi_watchdog (void)
+ 		alert_counter[i] = 0;
+ }
+ 
++static spinlock_t show_regs_lock = SPIN_LOCK_UNLOCKED;
++
++void smp_show_regs(struct pt_regs *regs, void *info)
++{
++	if (regs == NULL)
++		return;
++
++	bust_spinlocks(1);
++	spin_lock(&show_regs_lock);
++	printk("----------- IPI show regs -----------");
++	show_regs(regs);
++	spin_unlock(&show_regs_lock);
++	bust_spinlocks(0);
++}
++
+ void nmi_watchdog_tick (struct pt_regs * regs)
+ {
+ 
+@@ -486,7 +506,11 @@ void nmi_watchdog_tick (struct pt_regs *
+ 			bust_spinlocks(1);
+ 			printk("NMI Watchdog detected LOCKUP on CPU%d, eip %08lx, registers:\n", cpu, regs->eip);
+ 			show_registers(regs);
+-			printk("console shuts up ...\n");
++			smp_nmi_call_function(smp_show_regs, NULL, 1);
++			bust_spinlocks(1);
++			/* current CPU messages should go bottom */
++			if (!decode_call_traces)
++				smp_show_regs(regs, NULL);
+ 			console_silent();
+ 			spin_unlock(&nmi_print_lock);
+ 			bust_spinlocks(0);
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/process.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/process.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/process.c	2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/process.c	2006-05-11 13:05:49.000000000 +0400
+@@ -46,6 +46,7 @@
+ #include <asm/i387.h>
+ #include <asm/irq.h>
+ #include <asm/desc.h>
++#include <asm/atomic_kmap.h>
+ #ifdef CONFIG_MATH_EMULATION
+ #include <asm/math_emu.h>
+ #endif
+@@ -219,11 +220,14 @@ __setup("idle=", idle_setup);
+ void show_regs(struct pt_regs * regs)
+ {
+ 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
++	extern int die_counter;
+ 
+ 	printk("\n");
+-	printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
+-	printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
+-	print_symbol("EIP is at %s\n", regs->eip);
++	printk("Pid: %d, comm: %20s, oopses: %d\n", current->pid, current->comm, die_counter);
++	printk("EIP: %04x:[<%08lx>] CPU: %d, VCPU: %d:%d\n",0xffff & regs->xcs,regs->eip, smp_processor_id(),
++			task_vsched_id(current), task_cpu(current));
++	if (decode_call_traces)
++		print_symbol("EIP is at %s\n", regs->eip);
+ 
+ 	if (regs->xcs & 3)
+ 		printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
+@@ -247,6 +251,8 @@ void show_regs(struct pt_regs * regs)
+ 		: "=r" (cr4): "0" (0));
+ 	printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
+ 	show_trace(NULL, &regs->esp);
++	if (!decode_call_traces)
++		printk(" EIP: [<%08lx>]\n",regs->eip);
+ }
+ 
+ /*
+@@ -272,6 +278,13 @@ int kernel_thread(int (*fn)(void *), voi
+ {
+ 	struct pt_regs regs;
+ 
++	/* Don't allow kernel_thread() inside VE */
++	if (!ve_is_super(get_exec_env())) {
++		printk("kernel_thread call inside VE\n");
++		dump_stack();
++		return -EPERM;
++	}
++
+ 	memset(&regs, 0, sizeof(regs));
+ 
+ 	regs.ebx = (unsigned long) fn;
+@@ -311,6 +324,9 @@ void flush_thread(void)
+ 	struct task_struct *tsk = current;
+ 
+ 	memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
++#ifdef CONFIG_X86_HIGH_ENTRY
++	clear_thread_flag(TIF_DB7);
++#endif
+ 	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));	
+ 	/*
+ 	 * Forget coprocessor state..
+@@ -324,9 +340,8 @@ void release_thread(struct task_struct *
+ 	if (dead_task->mm) {
+ 		// temporary debugging check
+ 		if (dead_task->mm->context.size) {
+-			printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
++			printk("WARNING: dead process %8s still has LDT? <%d>\n",
+ 					dead_task->comm,
+-					dead_task->mm->context.ldt,
+ 					dead_task->mm->context.size);
+ 			BUG();
+ 		}
+@@ -350,7 +365,7 @@ int copy_thread(int nr, unsigned long cl
+ {
+ 	struct pt_regs * childregs;
+ 	struct task_struct *tsk;
+-	int err;
++	int err, i;
+ 
+ 	childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
+ 	*childregs = *regs;
+@@ -361,7 +376,18 @@ int copy_thread(int nr, unsigned long cl
+ 	p->thread.esp = (unsigned long) childregs;
+ 	p->thread.esp0 = (unsigned long) (childregs+1);
+ 
++	/*
++	 * get the two stack pages, for the virtual stack.
++	 *
++	 * IMPORTANT: this code relies on the fact that the task
++	 * structure is an THREAD_SIZE aligned piece of physical memory.
++	 */
++	for (i = 0; i < ARRAY_SIZE(p->thread_info->stack_page); i++)
++		p->thread_info->stack_page[i] =
++				virt_to_page((unsigned long)p->thread_info + (i*PAGE_SIZE));
++
+ 	p->thread.eip = (unsigned long) ret_from_fork;
++	p->thread_info->real_stack = p->thread_info;
+ 
+ 	savesegment(fs,p->thread.fs);
+ 	savesegment(gs,p->thread.gs);
+@@ -513,10 +539,42 @@ struct task_struct fastcall * __switch_t
+ 
+ 	__unlazy_fpu(prev_p);
+ 
++#ifdef CONFIG_X86_HIGH_ENTRY
++{
++	int i;
++	/*
++	 * Set the ptes of the virtual stack. (NOTE: a one-page TLB flush is
++	 * needed because otherwise NMIs could interrupt the
++	 * user-return code with a virtual stack and stale TLBs.)
++	 */
++	for (i = 0; i < ARRAY_SIZE(next_p->thread_info->stack_page); i++) {
++		__kunmap_atomic_type(KM_VSTACK_TOP-i);
++		__kmap_atomic(next_p->thread_info->stack_page[i], KM_VSTACK_TOP-i);
++	}
++	/*
++	 * NOTE: here we rely on the task being the stack as well
++	 */
++	next_p->thread_info->virtual_stack =
++			(void *)__kmap_atomic_vaddr(KM_VSTACK_TOP);
++}
++#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
++	/*
++	 * If next was preempted on entry from userspace to kernel,
++	 * and now it's on a different cpu, we need to adjust %esp.
++	 * This assumes that entry.S does not copy %esp while on the
++	 * virtual stack (with interrupts enabled): which is so,
++	 * except within __SWITCH_KERNELSPACE itself.
++	 */
++	if (unlikely(next->esp >= TASK_SIZE)) {
++		next->esp &= THREAD_SIZE - 1;
++		next->esp |= (unsigned long) next_p->thread_info->virtual_stack;
++	}
++#endif
++#endif
+ 	/*
+ 	 * Reload esp0, LDT and the page table pointer:
+ 	 */
+-	load_esp0(tss, next);
++	load_virtual_esp0(tss, next_p);
+ 
+ 	/*
+ 	 * Load the per-thread Thread-Local Storage descriptor.
+@@ -759,6 +817,8 @@ asmlinkage int sys_get_thread_area(struc
+ 	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+ 		return -EINVAL;
+ 
++	memset(&info, 0, sizeof(info));
++
+ 	desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
+ 
+ 	info.entry_number = idx;
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/ptrace.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/ptrace.c	2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/ptrace.c	2006-05-11 13:05:49.000000000 +0400
+@@ -253,7 +253,7 @@ asmlinkage int sys_ptrace(long request, 
+ 	}
+ 	ret = -ESRCH;
+ 	read_lock(&tasklist_lock);
+-	child = find_task_by_pid(pid);
++	child = find_task_by_pid_ve(pid);
+ 	if (child)
+ 		get_task_struct(child);
+ 	read_unlock(&tasklist_lock);
+@@ -388,7 +388,7 @@ asmlinkage int sys_ptrace(long request, 
+ 		long tmp;
+ 
+ 		ret = 0;
+-		if (child->state == TASK_ZOMBIE)	/* already dead */
++		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
+ 			break;
+ 		child->exit_code = SIGKILL;
+ 		/* make sure the single step bit is not set. */
+@@ -541,8 +541,10 @@ void do_syscall_trace(struct pt_regs *re
+ 		return;
+ 	/* the 0x80 provides a way for the tracing parent to distinguish
+ 	   between a syscall stop and SIGTRAP delivery */
++	set_pn_state(current, entryexit ? PN_STOP_LEAVE : PN_STOP_ENTRY);
+ 	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
+ 				 ? 0x80 : 0));
++	clear_pn_state(current);
+ 
+ 	/*
+ 	 * this isn't the same as continuing with a signal, but it will do
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/reboot.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/reboot.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/reboot.c	2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/reboot.c	2006-05-11 13:05:38.000000000 +0400
+@@ -233,12 +233,11 @@ void machine_real_restart(unsigned char 
+ 	CMOS_WRITE(0x00, 0x8f);
+ 	spin_unlock_irqrestore(&rtc_lock, flags);
+ 
+-	/* Remap the kernel at virtual address zero, as well as offset zero
+-	   from the kernel segment.  This assumes the kernel segment starts at
+-	   virtual address PAGE_OFFSET. */
+-
+-	memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
+-		sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS);
++	/*
++	 * Remap the first 16 MB of RAM (which includes the kernel image)
++	 * at virtual address zero:
++	 */
++	setup_identity_mappings(swapper_pg_dir, 0, LOW_MAPPINGS_SIZE);
+ 
+ 	/*
+ 	 * Use `swapper_pg_dir' as our page directory.
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/setup.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/setup.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/setup.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/setup.c	2006-05-11 13:05:38.000000000 +0400
+@@ -39,6 +39,7 @@
+ #include <linux/efi.h>
+ #include <linux/init.h>
+ #include <linux/edd.h>
++#include <linux/mmzone.h>
+ #include <video/edid.h>
+ #include <asm/e820.h>
+ #include <asm/mpspec.h>
+@@ -1073,7 +1074,19 @@ static unsigned long __init setup_memory
+ 				INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
+ 			initrd_end = initrd_start+INITRD_SIZE;
+ 		}
+-		else {
++		else if ((max_low_pfn << PAGE_SHIFT) <
++				PAGE_ALIGN(INITRD_START + INITRD_SIZE)) {
++			/* GRUB places initrd as high as possible, so when
++			   VMALLOC_AREA is bigger than std Linux has, such
++			   initrd is inaccessiable in normal zone (highmem) */
++
++			/* initrd should be totally in highmem, sorry */
++			BUG_ON(INITRD_START < (max_low_pfn << PAGE_SHIFT));
++
++			initrd_copy = INITRD_SIZE;
++			printk(KERN_ERR "initrd: GRUB workaround enabled\n");
++			/* initrd is copied from highmem in initrd_move() */
++		} else {
+ 			printk(KERN_ERR "initrd extends beyond end of memory "
+ 			    "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
+ 			    INITRD_START + INITRD_SIZE,
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/signal.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/signal.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/signal.c	2004-08-14 14:55:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/signal.c	2006-05-11 13:05:45.000000000 +0400
+@@ -42,6 +42,7 @@ sys_sigsuspend(int history0, int history
+ 	mask &= _BLOCKABLE;
+ 	spin_lock_irq(&current->sighand->siglock);
+ 	saveset = current->blocked;
++	set_sigsuspend_state(current, saveset);
+ 	siginitset(&current->blocked, mask);
+ 	recalc_sigpending();
+ 	spin_unlock_irq(&current->sighand->siglock);
+@@ -50,8 +51,10 @@ sys_sigsuspend(int history0, int history
+ 	while (1) {
+ 		current->state = TASK_INTERRUPTIBLE;
+ 		schedule();
+-		if (do_signal(regs, &saveset))
++		if (do_signal(regs, &saveset)) {
++			clear_sigsuspend_state(current);
+ 			return -EINTR;
++		}
+ 	}
+ }
+ 
+@@ -70,6 +73,7 @@ sys_rt_sigsuspend(struct pt_regs regs)
+ 
+ 	spin_lock_irq(&current->sighand->siglock);
+ 	saveset = current->blocked;
++	set_sigsuspend_state(current, saveset);
+ 	current->blocked = newset;
+ 	recalc_sigpending();
+ 	spin_unlock_irq(&current->sighand->siglock);
+@@ -78,8 +82,10 @@ sys_rt_sigsuspend(struct pt_regs regs)
+ 	while (1) {
+ 		current->state = TASK_INTERRUPTIBLE;
+ 		schedule();
+-		if (do_signal(&regs, &saveset))
++		if (do_signal(&regs, &saveset)) {
++			clear_sigsuspend_state(current);
+ 			return -EINTR;
++		}
+ 	}
+ }
+ 
+@@ -132,28 +138,29 @@ sys_sigaltstack(unsigned long ebx)
+  */
+ 
+ static int
+-restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax)
++restore_sigcontext(struct pt_regs *regs,
++		struct sigcontext __user *__sc, int *peax)
+ {
+-	unsigned int err = 0;
++	struct sigcontext scratch; /* 88 bytes of scratch area */
+ 
+ 	/* Always make any pending restarted system calls return -EINTR */
+ 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ 
+-#define COPY(x)		err |= __get_user(regs->x, &sc->x)
++	if (copy_from_user(&scratch, __sc, sizeof(scratch)))
++		return -EFAULT;
++
++#define COPY(x)		regs->x = scratch.x
+ 
+ #define COPY_SEG(seg)							\
+-	{ unsigned short tmp;						\
+-	  err |= __get_user(tmp, &sc->seg);				\
++	{ unsigned short tmp = scratch.seg;				\
+ 	  regs->x##seg = tmp; }
+ 
+ #define COPY_SEG_STRICT(seg)						\
+-	{ unsigned short tmp;						\
+-	  err |= __get_user(tmp, &sc->seg);				\
++	{ unsigned short tmp = scratch.seg;				\
+ 	  regs->x##seg = tmp|3; }
+ 
+ #define GET_SEG(seg)							\
+-	{ unsigned short tmp;						\
+-	  err |= __get_user(tmp, &sc->seg);				\
++	{ unsigned short tmp = scratch.seg;				\
+ 	  loadsegment(seg,tmp); }
+ 
+ #define	FIX_EFLAGS	(X86_EFLAGS_AC | X86_EFLAGS_OF | X86_EFLAGS_DF | \
+@@ -176,27 +183,29 @@ restore_sigcontext(struct pt_regs *regs,
+ 	COPY_SEG_STRICT(ss);
+ 	
+ 	{
+-		unsigned int tmpflags;
+-		err |= __get_user(tmpflags, &sc->eflags);
++		unsigned int tmpflags = scratch.eflags;
+ 		regs->eflags = (regs->eflags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
+ 		regs->orig_eax = -1;		/* disable syscall checks */
+ 	}
+ 
+ 	{
+-		struct _fpstate __user * buf;
+-		err |= __get_user(buf, &sc->fpstate);
++		struct _fpstate * buf = scratch.fpstate;
+ 		if (buf) {
+ 			if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
+-				goto badframe;
+-			err |= restore_i387(buf);
++				return -EFAULT;
++			if (restore_i387(buf))
++				return -EFAULT;
++		} else {
++			struct task_struct *me = current;
++			if (me->used_math) {
++				clear_fpu(me);
++				me->used_math = 0;
++			}
+ 		}
+ 	}
+ 
+-	err |= __get_user(*peax, &sc->eax);
+-	return err;
+-
+-badframe:
+-	return 1;
++	*peax = scratch.eax;
++	return 0;
+ }
+ 
+ asmlinkage int sys_sigreturn(unsigned long __unused)
+@@ -265,46 +274,47 @@ badframe:
+  */
+ 
+ static int
+-setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
++setup_sigcontext(struct sigcontext __user *__sc, struct _fpstate __user *fpstate,
+ 		 struct pt_regs *regs, unsigned long mask)
+ {
+-	int tmp, err = 0;
++	struct sigcontext sc; /* 88 bytes of scratch area */
++	int tmp;
+ 
+ 	tmp = 0;
+ 	__asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp));
+-	err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
++	*(unsigned int *)&sc.gs = tmp;
+ 	__asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp));
+-	err |= __put_user(tmp, (unsigned int __user *)&sc->fs);
+-
+-	err |= __put_user(regs->xes, (unsigned int __user *)&sc->es);
+-	err |= __put_user(regs->xds, (unsigned int __user *)&sc->ds);
+-	err |= __put_user(regs->edi, &sc->edi);
+-	err |= __put_user(regs->esi, &sc->esi);
+-	err |= __put_user(regs->ebp, &sc->ebp);
+-	err |= __put_user(regs->esp, &sc->esp);
+-	err |= __put_user(regs->ebx, &sc->ebx);
+-	err |= __put_user(regs->edx, &sc->edx);
+-	err |= __put_user(regs->ecx, &sc->ecx);
+-	err |= __put_user(regs->eax, &sc->eax);
+-	err |= __put_user(current->thread.trap_no, &sc->trapno);
+-	err |= __put_user(current->thread.error_code, &sc->err);
+-	err |= __put_user(regs->eip, &sc->eip);
+-	err |= __put_user(regs->xcs, (unsigned int __user *)&sc->cs);
+-	err |= __put_user(regs->eflags, &sc->eflags);
+-	err |= __put_user(regs->esp, &sc->esp_at_signal);
+-	err |= __put_user(regs->xss, (unsigned int __user *)&sc->ss);
++	*(unsigned int *)&sc.fs = tmp;
++	*(unsigned int *)&sc.es = regs->xes;
++	*(unsigned int *)&sc.ds = regs->xds;
++	sc.edi = regs->edi;
++	sc.esi = regs->esi;
++	sc.ebp = regs->ebp;
++	sc.esp = regs->esp;
++	sc.ebx = regs->ebx;
++	sc.edx = regs->edx;
++	sc.ecx = regs->ecx;
++	sc.eax = regs->eax;
++	sc.trapno = current->thread.trap_no;
++	sc.err = current->thread.error_code;
++	sc.eip = regs->eip;
++	*(unsigned int *)&sc.cs = regs->xcs;
++	sc.eflags = regs->eflags;
++	sc.esp_at_signal = regs->esp;
++	*(unsigned int *)&sc.ss = regs->xss;
+ 
+ 	tmp = save_i387(fpstate);
+ 	if (tmp < 0)
+-	  err = 1;
+-	else
+-	  err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate);
++		return 1;
++	sc.fpstate = tmp ? fpstate : NULL;
+ 
+ 	/* non-iBCS2 extensions.. */
+-	err |= __put_user(mask, &sc->oldmask);
+-	err |= __put_user(current->thread.cr2, &sc->cr2);
++	sc.oldmask = mask;
++	sc.cr2 = current->thread.cr2;
+ 
+-	return err;
++	if (copy_to_user(__sc, &sc, sizeof(sc)))
++		return 1;
++	return 0;
+ }
+ 
+ /*
+@@ -443,7 +453,7 @@ static void setup_rt_frame(int sig, stru
+ 	/* Create the ucontext.  */
+ 	err |= __put_user(0, &frame->uc.uc_flags);
+ 	err |= __put_user(0, &frame->uc.uc_link);
+-	err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
++	err |= __put_user(current->sas_ss_sp, (unsigned long *)&frame->uc.uc_stack.ss_sp);
+ 	err |= __put_user(sas_ss_flags(regs->esp),
+ 			  &frame->uc.uc_stack.ss_flags);
+ 	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+@@ -565,9 +575,10 @@ int fastcall do_signal(struct pt_regs *r
+ 	if ((regs->xcs & 3) != 3)
+ 		return 1;
+ 
+-	if (current->flags & PF_FREEZE) {
+-		refrigerator(0);
+-		goto no_signal;
++	if (unlikely(test_thread_flag(TIF_FREEZE))) {
++		refrigerator();
++		if (!signal_pending(current))
++			goto no_signal;
+ 	}
+ 
+ 	if (!oldset)
+@@ -580,7 +591,9 @@ int fastcall do_signal(struct pt_regs *r
+ 		 * have been cleared if the watchpoint triggered
+ 		 * inside the kernel.
+ 		 */
+-		__asm__("movl %0,%%db7"	: : "r" (current->thread.debugreg[7]));
++		if (unlikely(current->thread.debugreg[7])) {
++			__asm__("movl %0,%%db7"	: : "r" (current->thread.debugreg[7]));
++		}
+ 
+ 		/* Whee!  Actually deliver the signal.  */
+ 		handle_signal(signr, &info, oldset, regs);
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/smp.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/smp.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/smp.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/smp.c	2006-05-11 13:05:38.000000000 +0400
+@@ -22,6 +22,7 @@
+ 
+ #include <asm/mtrr.h>
+ #include <asm/tlbflush.h>
++#include <asm/nmi.h>
+ #include <mach_ipi.h>
+ #include <mach_apic.h>
+ 
+@@ -122,7 +123,7 @@ static inline int __prepare_ICR2 (unsign
+ 	return SET_APIC_DEST_FIELD(mask);
+ }
+ 
+-inline void __send_IPI_shortcut(unsigned int shortcut, int vector)
++void __send_IPI_shortcut(unsigned int shortcut, int vector)
+ {
+ 	/*
+ 	 * Subtle. In the case of the 'never do double writes' workaround
+@@ -157,7 +158,7 @@ void fastcall send_IPI_self(int vector)
+ /*
+  * This is only used on smaller machines.
+  */
+-inline void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
++void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
+ {
+ 	unsigned long mask = cpus_addr(cpumask)[0];
+ 	unsigned long cfg;
+@@ -326,10 +327,12 @@ asmlinkage void smp_invalidate_interrupt
+ 		 
+ 	if (flush_mm == cpu_tlbstate[cpu].active_mm) {
+ 		if (cpu_tlbstate[cpu].state == TLBSTATE_OK) {
++#ifndef CONFIG_X86_SWITCH_PAGETABLES
+ 			if (flush_va == FLUSH_ALL)
+ 				local_flush_tlb();
+ 			else
+ 				__flush_tlb_one(flush_va);
++#endif
+ 		} else
+ 			leave_mm(cpu);
+ 	}
+@@ -395,21 +398,6 @@ static void flush_tlb_others(cpumask_t c
+ 	spin_unlock(&tlbstate_lock);
+ }
+ 	
+-void flush_tlb_current_task(void)
+-{
+-	struct mm_struct *mm = current->mm;
+-	cpumask_t cpu_mask;
+-
+-	preempt_disable();
+-	cpu_mask = mm->cpu_vm_mask;
+-	cpu_clear(smp_processor_id(), cpu_mask);
+-
+-	local_flush_tlb();
+-	if (!cpus_empty(cpu_mask))
+-		flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+-	preempt_enable();
+-}
+-
+ void flush_tlb_mm (struct mm_struct * mm)
+ {
+ 	cpumask_t cpu_mask;
+@@ -441,7 +429,10 @@ void flush_tlb_page(struct vm_area_struc
+ 
+ 	if (current->active_mm == mm) {
+ 		if(current->mm)
+-			__flush_tlb_one(va);
++#ifndef CONFIG_X86_SWITCH_PAGETABLES
++			__flush_tlb_one(va)
++#endif
++				;
+ 		 else
+ 		 	leave_mm(smp_processor_id());
+ 	}
+@@ -547,6 +538,89 @@ int smp_call_function (void (*func) (voi
+ 	return 0;
+ }
+ 
++static spinlock_t nmi_call_lock = SPIN_LOCK_UNLOCKED;
++static struct nmi_call_data_struct {
++	smp_nmi_function func;
++	void *info;
++	atomic_t started;
++	atomic_t finished;
++	cpumask_t cpus_called;
++	int wait;
++} *nmi_call_data;
++
++static int smp_nmi_callback(struct pt_regs * regs, int cpu)
++{
++	smp_nmi_function func;
++	void *info;
++	int wait;
++
++	func = nmi_call_data->func;
++	info = nmi_call_data->info;
++	wait = nmi_call_data->wait;
++	ack_APIC_irq();
++	/* prevent from calling func() multiple times */
++	if (cpu_test_and_set(cpu, nmi_call_data->cpus_called))
++		return 0;
++	/*
++	 * notify initiating CPU that I've grabbed the data and am
++	 * about to execute the function
++	 */
++	mb();
++	atomic_inc(&nmi_call_data->started);
++	/* at this point the nmi_call_data structure is out of scope */
++	irq_enter();
++	func(regs, info);
++	irq_exit();
++	if (wait)
++		atomic_inc(&nmi_call_data->finished);
++
++	return 0;
++}
++
++/* 
++ * This function tries to call func(regs, info) on each cpu.
++ * Func must be fast and non-blocking.
++ * May be called with disabled interrupts and from any context.
++ */
++int smp_nmi_call_function(smp_nmi_function func, void *info, int wait)
++{
++	struct nmi_call_data_struct data;
++	int cpus;
++
++	cpus = num_online_cpus() - 1;
++	if (!cpus)
++		return 0;
++
++	data.func = func;
++	data.info = info;
++	data.wait = wait;
++	atomic_set(&data.started, 0);
++	atomic_set(&data.finished, 0);
++	cpus_clear(data.cpus_called);
++	/* prevent this cpu from calling func if NMI happens */
++	cpu_set(smp_processor_id(), data.cpus_called);
++
++	if (!spin_trylock(&nmi_call_lock))
++		return -1;
++
++	nmi_call_data = &data;
++	set_nmi_ipi_callback(smp_nmi_callback);
++	mb();
++
++	/* Send a message to all other CPUs and wait for them to respond */
++	send_IPI_allbutself(APIC_DM_NMI);
++	while (atomic_read(&data.started) != cpus)
++		barrier();
++
++	unset_nmi_ipi_callback();
++	if (wait)
++		while (atomic_read(&data.finished) != cpus)
++			barrier();
++	spin_unlock(&nmi_call_lock);
++
++	return 0;
++}
++
+ static void stop_this_cpu (void * dummy)
+ {
+ 	/*
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/smpboot.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/smpboot.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/smpboot.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/smpboot.c	2006-05-11 13:05:40.000000000 +0400
+@@ -309,6 +309,8 @@ static void __init synchronize_tsc_bp (v
+ 	if (!buggy)
+ 		printk("passed.\n");
+ 		;
++	/* TSC reset. kill whatever might rely on old values */
++	VE_TASK_INFO(current)->wakeup_stamp = 0;
+ }
+ 
+ static void __init synchronize_tsc_ap (void)
+@@ -334,6 +336,8 @@ static void __init synchronize_tsc_ap (v
+ 		atomic_inc(&tsc_count_stop);
+ 		while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
+ 	}
++	/* TSC reset. kill whatever might rely on old values */
++	VE_TASK_INFO(current)->wakeup_stamp = 0;
+ }
+ #undef NR_LOOPS
+ 
+@@ -405,8 +409,6 @@ void __init smp_callin(void)
+ 	setup_local_APIC();
+ 	map_cpu_to_logical_apicid();
+ 
+-	local_irq_enable();
+-
+ 	/*
+ 	 * Get our bogomips.
+ 	 */
+@@ -419,7 +421,7 @@ void __init smp_callin(void)
+  	smp_store_cpu_info(cpuid);
+ 
+ 	disable_APIC_timer();
+-	local_irq_disable();
++
+ 	/*
+ 	 * Allow the master to continue.
+ 	 */
+@@ -463,6 +465,10 @@ int __init start_secondary(void *unused)
+ 	 */
+ 	local_flush_tlb();
+ 	cpu_set(smp_processor_id(), cpu_online_map);
++
++	/* We can take interrupts now: we're officially "up". */
++	local_irq_enable();
++
+ 	wmb();
+ 	return cpu_idle();
+ }
+@@ -499,7 +505,7 @@ static struct task_struct * __init fork_
+ 	 * don't care about the eip and regs settings since
+ 	 * we'll never reschedule the forked task.
+ 	 */
+-	return copy_process(CLONE_VM|CLONE_IDLETASK, 0, &regs, 0, NULL, NULL);
++	return copy_process(CLONE_VM|CLONE_IDLETASK, 0, &regs, 0, NULL, NULL, 0);
+ }
+ 
+ #ifdef CONFIG_NUMA
+@@ -810,6 +816,9 @@ static int __init do_boot_cpu(int apicid
+ 
+ 	idle->thread.eip = (unsigned long) start_secondary;
+ 
++	/* Cosmetic: sleep_time won't be changed afterwards for the idle
++	 * thread;  keep it 0 rather than -cycles. */
++	VE_TASK_INFO(idle)->sleep_time = 0;
+ 	unhash_process(idle);
+ 
+ 	/* start_eip had better be page-aligned! */
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/sys_i386.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/sys_i386.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/sys_i386.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/sys_i386.c	2006-05-11 13:05:40.000000000 +0400
+@@ -217,7 +217,7 @@ asmlinkage int sys_uname(struct old_utsn
+ 	if (!name)
+ 		return -EFAULT;
+ 	down_read(&uts_sem);
+-	err=copy_to_user(name, &system_utsname, sizeof (*name));
++	err=copy_to_user(name, &ve_utsname, sizeof (*name));
+ 	up_read(&uts_sem);
+ 	return err?-EFAULT:0;
+ }
+@@ -233,15 +233,15 @@ asmlinkage int sys_olduname(struct oldol
+   
+   	down_read(&uts_sem);
+ 	
+-	error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
++	error = __copy_to_user(name->sysname,ve_utsname.sysname,__OLD_UTS_LEN);
+ 	error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
+-	error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
++	error |= __copy_to_user(name->nodename,ve_utsname.nodename,__OLD_UTS_LEN);
+ 	error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
+-	error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
++	error |= __copy_to_user(name->release,ve_utsname.release,__OLD_UTS_LEN);
+ 	error |= __put_user(0,name->release+__OLD_UTS_LEN);
+-	error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
++	error |= __copy_to_user(name->version,ve_utsname.version,__OLD_UTS_LEN);
+ 	error |= __put_user(0,name->version+__OLD_UTS_LEN);
+-	error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
++	error |= __copy_to_user(name->machine,ve_utsname.machine,__OLD_UTS_LEN);
+ 	error |= __put_user(0,name->machine+__OLD_UTS_LEN);
+ 	
+ 	up_read(&uts_sem);
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/sysenter.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/sysenter.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/sysenter.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/sysenter.c	2006-05-11 13:05:38.000000000 +0400
+@@ -18,13 +18,18 @@
+ #include <asm/msr.h>
+ #include <asm/pgtable.h>
+ #include <asm/unistd.h>
++#include <linux/highmem.h>
+ 
+ extern asmlinkage void sysenter_entry(void);
+ 
+ void enable_sep_cpu(void *info)
+ {
+ 	int cpu = get_cpu();
++#ifdef CONFIG_X86_HIGH_ENTRY
++	struct tss_struct *tss = (struct tss_struct *) __fix_to_virt(FIX_TSS_0) + cpu;
++#else
+ 	struct tss_struct *tss = init_tss + cpu;
++#endif
+ 
+ 	tss->ss1 = __KERNEL_CS;
+ 	tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/time.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/time.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/time.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/time.c	2006-05-11 13:05:29.000000000 +0400
+@@ -362,7 +362,7 @@ void __init hpet_time_init(void)
+ 	xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
+ 	wall_to_monotonic.tv_nsec = -xtime.tv_nsec;
+ 
+-	if (hpet_enable() >= 0) {
++	if ((hpet_enable() >= 0) && hpet_use_timer) {
+ 		printk("Using HPET for base-timer\n");
+ 	}
+ 
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/time_hpet.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/time_hpet.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/time_hpet.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/time_hpet.c	2006-05-11 13:05:29.000000000 +0400
+@@ -26,6 +26,7 @@
+ unsigned long hpet_period;	/* fsecs / HPET clock */
+ unsigned long hpet_tick;	/* hpet clks count per tick */
+ unsigned long hpet_address;	/* hpet memory map physical address */
++int hpet_use_timer;
+ 
+ static int use_hpet; 		/* can be used for runtime check of hpet */
+ static int boot_hpet_disable; 	/* boottime override for HPET timer */
+@@ -88,8 +89,7 @@ int __init hpet_enable(void)
+ 	 * So, we are OK with HPET_EMULATE_RTC part too, where we need
+ 	 * to have atleast 2 timers.
+ 	 */
+-	if (!(id & HPET_ID_NUMBER) ||
+-	    !(id & HPET_ID_LEGSUP))
++	if (!(id & HPET_ID_NUMBER)) 
+ 		return -1;
+ 
+ 	hpet_period = hpet_readl(HPET_PERIOD);
+@@ -109,6 +109,8 @@ int __init hpet_enable(void)
+ 	if (hpet_tick_rem > (hpet_period >> 1))
+ 		hpet_tick++; /* rounding the result */
+ 
++	hpet_use_timer = id & HPET_ID_LEGSUP;
++
+ 	/*
+ 	 * Stop the timers and reset the main counter.
+ 	 */
+@@ -118,21 +120,30 @@ int __init hpet_enable(void)
+ 	hpet_writel(0, HPET_COUNTER);
+ 	hpet_writel(0, HPET_COUNTER + 4);
+ 
+-	/*
+-	 * Set up timer 0, as periodic with first interrupt to happen at
+-	 * hpet_tick, and period also hpet_tick.
+-	 */
+-	cfg = hpet_readl(HPET_T0_CFG);
+-	cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
+-	       HPET_TN_SETVAL | HPET_TN_32BIT;
+-	hpet_writel(cfg, HPET_T0_CFG);
+-	hpet_writel(hpet_tick, HPET_T0_CMP);
++	if (hpet_use_timer) {
++		/*
++		 * Set up timer 0, as periodic with first interrupt to happen at
++		 * hpet_tick, and period also hpet_tick.
++		 */
++		cfg = hpet_readl(HPET_T0_CFG);
++		cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
++		       HPET_TN_SETVAL | HPET_TN_32BIT;
++		hpet_writel(cfg, HPET_T0_CFG);
++		/*
++		 * Some systems seems to need two writes to HPET_T0_CMP,
++		 * to get interrupts working
++		 */
++		hpet_writel(hpet_tick, HPET_T0_CMP);
++		hpet_writel(hpet_tick, HPET_T0_CMP);
++	}
+ 
+ 	/*
+  	 * Go!
+  	 */
+ 	cfg = hpet_readl(HPET_CFG);
+-	cfg |= HPET_CFG_ENABLE | HPET_CFG_LEGACY;
++	if (hpet_use_timer)
++		cfg |= HPET_CFG_LEGACY;
++	cfg |= HPET_CFG_ENABLE;
+ 	hpet_writel(cfg, HPET_CFG);
+ 
+ 	use_hpet = 1;
+@@ -181,7 +192,8 @@ int __init hpet_enable(void)
+ #endif
+ 
+ #ifdef CONFIG_X86_LOCAL_APIC
+-	wait_timer_tick = wait_hpet_tick;
++	if (hpet_use_timer)
++		wait_timer_tick = wait_hpet_tick;
+ #endif
+ 	return 0;
+ }
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/timers/timer_hpet.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/timers/timer_hpet.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/timers/timer_hpet.c	2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/timers/timer_hpet.c	2006-05-11 13:05:29.000000000 +0400
+@@ -79,7 +79,7 @@ static unsigned long get_offset_hpet(voi
+ 
+ 	eax = hpet_readl(HPET_COUNTER);
+ 	eax -= hpet_last;	/* hpet delta */
+-
++	eax = min(hpet_tick, eax);
+ 	/*
+          * Time offset = (hpet delta) * ( usecs per HPET clock )
+ 	 *             = (hpet delta) * ( usecs per tick / HPET clocks per tick)
+@@ -105,9 +105,12 @@ static void mark_offset_hpet(void)
+ 	last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
+ 	rdtsc(last_tsc_low, last_tsc_high);
+ 
+-	offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
+-	if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))) {
+-		int lost_ticks = (offset - hpet_last) / hpet_tick;
++	if (hpet_use_timer)
++		offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
++	else
++		offset = hpet_readl(HPET_COUNTER);
++	if (unlikely(((offset - hpet_last) >= (2*hpet_tick)) && (hpet_last != 0))) {
++		int lost_ticks = ((offset - hpet_last) / hpet_tick) - 1;
+ 		jiffies_64 += lost_ticks;
+ 	}
+ 	hpet_last = offset;
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/timers/timer_tsc.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/timers/timer_tsc.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/timers/timer_tsc.c	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/timers/timer_tsc.c	2006-05-11 13:05:39.000000000 +0400
+@@ -81,7 +81,7 @@ static int count2; /* counter for mark_o
+  * Equal to 2^32 * (1 / (clocks per usec) ).
+  * Initialized in time_init.
+  */
+-static unsigned long fast_gettimeoffset_quotient;
++unsigned long fast_gettimeoffset_quotient;
+ 
+ static unsigned long get_offset_tsc(void)
+ {
+@@ -474,7 +474,7 @@ static int __init init_tsc(char* overrid
+ 	if (cpu_has_tsc) {
+ 		unsigned long tsc_quotient;
+ #ifdef CONFIG_HPET_TIMER
+-		if (is_hpet_enabled()){
++		if (is_hpet_enabled() && hpet_use_timer) {
+ 			unsigned long result, remain;
+ 			printk("Using TSC for gettimeofday\n");
+ 			tsc_quotient = calibrate_tsc_hpet(NULL);
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/traps.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/traps.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/traps.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/traps.c	2006-05-11 13:05:49.000000000 +0400
+@@ -54,12 +54,8 @@
+ 
+ #include "mach_traps.h"
+ 
+-asmlinkage int system_call(void);
+-asmlinkage void lcall7(void);
+-asmlinkage void lcall27(void);
+-
+-struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
+-		{ 0, 0 }, { 0, 0 } };
++struct desc_struct default_ldt[] __attribute__((__section__(".data.default_ldt"))) = { { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } };
++struct page *default_ldt_page;
+ 
+ /* Do we ignore FPU interrupts ? */
+ char ignore_fpu_irq = 0;
+@@ -93,45 +89,41 @@ asmlinkage void machine_check(void);
+ 
+ static int kstack_depth_to_print = 24;
+ 
+-static int valid_stack_ptr(struct task_struct *task, void *p)
++static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
+ {
+-	if (p <= (void *)task->thread_info)
+-		return 0;
+-	if (kstack_end(p))
+-		return 0;
+-	return 1;
++	return	p > (void *)tinfo &&
++		p < (void *)tinfo + THREAD_SIZE - 3;
+ }
+ 
+-#ifdef CONFIG_FRAME_POINTER
+-static void print_context_stack(struct task_struct *task, unsigned long *stack,
+-			 unsigned long ebp)
++static inline unsigned long print_context_stack(struct thread_info *tinfo,
++				unsigned long *stack, unsigned long ebp)
+ {
+ 	unsigned long addr;
+ 
+-	while (valid_stack_ptr(task, (void *)ebp)) {
++#ifdef	CONFIG_FRAME_POINTER
++	while (valid_stack_ptr(tinfo, (void *)ebp)) {
+ 		addr = *(unsigned long *)(ebp + 4);
+-		printk(" [<%08lx>] ", addr);
+-		print_symbol("%s", addr);
+-		printk("\n");
++		printk(" [<%08lx>]", addr);
++		if (decode_call_traces) {
++			print_symbol(" %s", addr);
++			printk("\n");
++		}
+ 		ebp = *(unsigned long *)ebp;
+ 	}
+-}
+ #else
+-static void print_context_stack(struct task_struct *task, unsigned long *stack,
+-			 unsigned long ebp)
+-{
+-	unsigned long addr;
+-
+-	while (!kstack_end(stack)) {
++	while (valid_stack_ptr(tinfo, stack)) {
+ 		addr = *stack++;
+ 		if (__kernel_text_address(addr)) {
+ 			printk(" [<%08lx>]", addr);
+-			print_symbol(" %s", addr);
+-			printk("\n");
++			if (decode_call_traces) {
++				print_symbol(" %s", addr);
++				printk("\n");
++			}
+ 		}
+ 	}
+-}
+ #endif
++	return ebp;
++}
+ 
+ void show_trace(struct task_struct *task, unsigned long * stack)
+ {
+@@ -140,11 +132,6 @@ void show_trace(struct task_struct *task
+ 	if (!task)
+ 		task = current;
+ 
+-	if (!valid_stack_ptr(task, stack)) {
+-		printk("Stack pointer is garbage, not printing trace\n");
+-		return;
+-	}
+-
+ 	if (task == current) {
+ 		/* Grab ebp right from our regs */
+ 		asm ("movl %%ebp, %0" : "=r" (ebp) : );
+@@ -157,11 +144,14 @@ void show_trace(struct task_struct *task
+ 		struct thread_info *context;
+ 		context = (struct thread_info *)
+ 			((unsigned long)stack & (~(THREAD_SIZE - 1)));
+-		print_context_stack(task, stack, ebp);
++		ebp = print_context_stack(context, stack, ebp);
+ 		stack = (unsigned long*)context->previous_esp;
+ 		if (!stack)
+ 			break;
+-		printk(" =======================\n");
++		if (decode_call_traces)
++			printk(" =======================\n");
++		else
++			printk(" =<ctx>= ");
+ 	}
+ }
+ 
+@@ -185,8 +175,12 @@ void show_stack(struct task_struct *task
+ 			printk("\n       ");
+ 		printk("%08lx ", *stack++);
+ 	}
+-	printk("\nCall Trace:\n");
++	printk("\nCall Trace:");
++	if (decode_call_traces)
++		printk("\n");
+ 	show_trace(task, esp);
++	if (!decode_call_traces)
++		printk("\n");
+ }
+ 
+ /*
+@@ -197,6 +191,8 @@ void dump_stack(void)
+ 	unsigned long stack;
+ 
+ 	show_trace(current, &stack);
++	if (!decode_call_traces)
++		printk("\n");
+ }
+ 
+ EXPORT_SYMBOL(dump_stack);
+@@ -216,9 +212,10 @@ void show_registers(struct pt_regs *regs
+ 		ss = regs->xss & 0xffff;
+ 	}
+ 	print_modules();
+-	printk("CPU:    %d\nEIP:    %04x:[<%08lx>]    %s\nEFLAGS: %08lx"
++	printk("CPU:    %d, VCPU: %d:%d\nEIP:    %04x:[<%08lx>]    %s\nEFLAGS: %08lx"
+ 			"   (%s) \n",
+-		smp_processor_id(), 0xffff & regs->xcs, regs->eip,
++		smp_processor_id(), task_vsched_id(current), task_cpu(current),
++		0xffff & regs->xcs, regs->eip,
+ 		print_tainted(), regs->eflags, UTS_RELEASE);
+ 	print_symbol("EIP is at %s\n", regs->eip);
+ 	printk("eax: %08lx   ebx: %08lx   ecx: %08lx   edx: %08lx\n",
+@@ -227,8 +224,10 @@ void show_registers(struct pt_regs *regs
+ 		regs->esi, regs->edi, regs->ebp, esp);
+ 	printk("ds: %04x   es: %04x   ss: %04x\n",
+ 		regs->xds & 0xffff, regs->xes & 0xffff, ss);
+-	printk("Process %s (pid: %d, threadinfo=%p task=%p)",
+-		current->comm, current->pid, current_thread_info(), current);
++	printk("Process %s (pid: %d, veid=%d, threadinfo=%p task=%p)",
++		current->comm, current->pid,
++		VEID(VE_TASK_INFO(current)->owner_env),
++		current_thread_info(), current);
+ 	/*
+ 	 * When in-kernel, we also print out the stack and code at the
+ 	 * time of the fault..
+@@ -244,8 +243,10 @@ void show_registers(struct pt_regs *regs
+ 
+ 		for(i=0;i<20;i++)
+ 		{
+-			unsigned char c;
+-			if(__get_user(c, &((unsigned char*)regs->eip)[i])) {
++			unsigned char c = 0;
++                        if ((user_mode(regs) && get_user(c, &((unsigned char*)regs->eip)[i])) ||
++                            (!user_mode(regs) && __direct_get_user(c, &((unsigned char*)regs->eip)[i]))) {
++			
+ bad:
+ 				printk(" Bad EIP value.");
+ 				break;
+@@ -269,16 +270,14 @@ static void handle_BUG(struct pt_regs *r
+ 
+ 	eip = regs->eip;
+ 
+-	if (eip < PAGE_OFFSET)
+-		goto no_bug;
+-	if (__get_user(ud2, (unsigned short *)eip))
++	if (__direct_get_user(ud2, (unsigned short *)eip))
+ 		goto no_bug;
+ 	if (ud2 != 0x0b0f)
+ 		goto no_bug;
+-	if (__get_user(line, (unsigned short *)(eip + 2)))
++	if (__direct_get_user(line, (unsigned short *)(eip + 4)))
+ 		goto bug;
+-	if (__get_user(file, (char **)(eip + 4)) ||
+-		(unsigned long)file < PAGE_OFFSET || __get_user(c, file))
++	if (__direct_get_user(file, (char **)(eip + 7)) ||
++		__direct_get_user(c, file))
+ 		file = "<bad filename>";
+ 
+ 	printk("------------[ cut here ]------------\n");
+@@ -292,11 +291,18 @@ bug:
+ 	printk("Kernel BUG\n");
+ }
+ 
++static void inline check_kernel_csum_bug(void)
++{
++	if (kernel_text_csum_broken)
++		printk("Kernel code checksum mismatch detected %d times\n",
++			kernel_text_csum_broken);
++}
++
+ spinlock_t die_lock = SPIN_LOCK_UNLOCKED;
++int die_counter;
+ 
+ void die(const char * str, struct pt_regs * regs, long err)
+ {
+-	static int die_counter;
+ 	int nl = 0;
+ 
+ 	console_verbose();
+@@ -319,6 +325,7 @@ void die(const char * str, struct pt_reg
+ 	if (nl)
+ 		printk("\n");
+ 	show_registers(regs);
++	check_kernel_csum_bug();
+ 	bust_spinlocks(0);
+ 	spin_unlock_irq(&die_lock);
+ 	if (in_interrupt())
+@@ -531,6 +538,7 @@ static int dummy_nmi_callback(struct pt_
+ }
+  
+ static nmi_callback_t nmi_callback = dummy_nmi_callback;
++static nmi_callback_t nmi_ipi_callback = dummy_nmi_callback;
+  
+ asmlinkage void do_nmi(struct pt_regs * regs, long error_code)
+ {
+@@ -544,9 +552,20 @@ asmlinkage void do_nmi(struct pt_regs * 
+ 	if (!nmi_callback(regs, cpu))
+ 		default_do_nmi(regs);
+ 
++	nmi_ipi_callback(regs, cpu);
+ 	nmi_exit();
+ }
+ 
++void set_nmi_ipi_callback(nmi_callback_t callback)
++{
++	nmi_ipi_callback = callback;
++}
++
++void unset_nmi_ipi_callback(void)
++{
++	nmi_ipi_callback = dummy_nmi_callback;
++}
++
+ void set_nmi_callback(nmi_callback_t callback)
+ {
+ 	nmi_callback = callback;
+@@ -591,10 +610,18 @@ asmlinkage void do_debug(struct pt_regs 
+ 	if (regs->eflags & X86_EFLAGS_IF)
+ 		local_irq_enable();
+ 
+-	/* Mask out spurious debug traps due to lazy DR7 setting */
++	/*
++	 * Mask out spurious debug traps due to lazy DR7 setting or
++	 * due to 4G/4G kernel mode:
++	 */
+ 	if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
+ 		if (!tsk->thread.debugreg[7])
+ 			goto clear_dr7;
++		if (!user_mode(regs)) {
++			// restore upon return-to-userspace:
++			set_thread_flag(TIF_DB7);
++			goto clear_dr7;
++		}
+ 	}
+ 
+ 	if (regs->eflags & VM_MASK)
+@@ -836,19 +863,52 @@ asmlinkage void math_emulate(long arg)
+ 
+ #endif /* CONFIG_MATH_EMULATION */
+ 
+-#ifdef CONFIG_X86_F00F_BUG
+-void __init trap_init_f00f_bug(void)
++void __init trap_init_virtual_IDT(void)
+ {
+-	__set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO);
+-
+ 	/*
+-	 * Update the IDT descriptor and reload the IDT so that
+-	 * it uses the read-only mapped virtual address.
++	 * "idt" is magic - it overlaps the idt_descr
++	 * variable so that updating idt will automatically
++	 * update the idt descriptor..
+ 	 */
+-	idt_descr.address = fix_to_virt(FIX_F00F_IDT);
++	__set_fixmap(FIX_IDT, __pa(&idt_table), PAGE_KERNEL_RO);
++	idt_descr.address = __fix_to_virt(FIX_IDT);
++
+ 	__asm__ __volatile__("lidt %0" : : "m" (idt_descr));
+ }
++
++void __init trap_init_virtual_GDT(void)
++{
++	int cpu = smp_processor_id();
++	struct Xgt_desc_struct *gdt_desc = cpu_gdt_descr + cpu;
++	struct Xgt_desc_struct tmp_desc = {0, 0};
++	struct tss_struct * t;
++
++	__asm__ __volatile__("sgdt %0": "=m" (tmp_desc): :"memory");
++
++#ifdef CONFIG_X86_HIGH_ENTRY
++	if (!cpu) {
++		int i;
++		__set_fixmap(FIX_GDT_0, __pa(cpu_gdt_table), PAGE_KERNEL);
++		__set_fixmap(FIX_GDT_1, __pa(cpu_gdt_table) + PAGE_SIZE, PAGE_KERNEL);
++		for(i = 0; i < FIX_TSS_COUNT; i++)
++			__set_fixmap(FIX_TSS_0 - i, __pa(init_tss) + i * PAGE_SIZE, PAGE_KERNEL);
++	}
++	
++	gdt_desc->address = __fix_to_virt(FIX_GDT_0) + sizeof(cpu_gdt_table[0]) * cpu;
++#else
++	gdt_desc->address = (unsigned long)cpu_gdt_table[cpu];
+ #endif
++	__asm__ __volatile__("lgdt %0": "=m" (*gdt_desc));
++
++#ifdef CONFIG_X86_HIGH_ENTRY
++	t = (struct tss_struct *) __fix_to_virt(FIX_TSS_0) + cpu;
++#else
++	t = init_tss + cpu;
++#endif
++	set_tss_desc(cpu, t);
++	cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
++	load_TR_desc();
++}
+ 
+ #define _set_gate(gate_addr,type,dpl,addr,seg) \
+ do { \
+@@ -875,17 +935,17 @@ void set_intr_gate(unsigned int n, void 
+ 	_set_gate(idt_table+n,14,0,addr,__KERNEL_CS);
+ }
+ 
+-static void __init set_trap_gate(unsigned int n, void *addr)
++void __init set_trap_gate(unsigned int n, void *addr)
+ {
+ 	_set_gate(idt_table+n,15,0,addr,__KERNEL_CS);
+ }
+ 
+-static void __init set_system_gate(unsigned int n, void *addr)
++void __init set_system_gate(unsigned int n, void *addr)
+ {
+ 	_set_gate(idt_table+n,15,3,addr,__KERNEL_CS);
+ }
+ 
+-static void __init set_call_gate(void *a, void *addr)
++void __init set_call_gate(void *a, void *addr)
+ {
+ 	_set_gate(a,12,3,addr,__KERNEL_CS);
+ }
+@@ -907,6 +967,7 @@ void __init trap_init(void)
+ #ifdef CONFIG_X86_LOCAL_APIC
+ 	init_apic_mappings();
+ #endif
++	init_entry_mappings();
+ 
+ 	set_trap_gate(0,&divide_error);
+ 	set_intr_gate(1,&debug);
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/vm86.c linux-2.6.8.1-ve022stab078/arch/i386/kernel/vm86.c
+--- linux-2.6.8.1.orig/arch/i386/kernel/vm86.c	2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/vm86.c	2006-05-11 13:05:38.000000000 +0400
+@@ -124,7 +124,7 @@ struct pt_regs * fastcall save_v86_state
+ 	tss = init_tss + get_cpu();
+ 	current->thread.esp0 = current->thread.saved_esp0;
+ 	current->thread.sysenter_cs = __KERNEL_CS;
+-	load_esp0(tss, &current->thread);
++	load_virtual_esp0(tss, current);
+ 	current->thread.saved_esp0 = 0;
+ 	put_cpu();
+ 
+@@ -307,7 +307,7 @@ static void do_sys_vm86(struct kernel_vm
+ 	tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0;
+ 	if (cpu_has_sep)
+ 		tsk->thread.sysenter_cs = 0;
+-	load_esp0(tss, &tsk->thread);
++	load_virtual_esp0(tss, tsk);
+ 	put_cpu();
+ 
+ 	tsk->thread.screen_bitmap = info->screen_bitmap;
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/vmlinux.lds.S linux-2.6.8.1-ve022stab078/arch/i386/kernel/vmlinux.lds.S
+--- linux-2.6.8.1.orig/arch/i386/kernel/vmlinux.lds.S	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/vmlinux.lds.S	2006-05-11 13:05:38.000000000 +0400
+@@ -5,13 +5,17 @@
+ #include <asm-generic/vmlinux.lds.h>
+ #include <asm/thread_info.h>
+ 
++#include <linux/config.h>
++#include <asm/page.h>
++#include <asm/asm_offsets.h>
++
+ OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+ OUTPUT_ARCH(i386)
+ ENTRY(startup_32)
+ jiffies = jiffies_64;
+ SECTIONS
+ {
+-  . = 0xC0000000 + 0x100000;
++  . = __PAGE_OFFSET + 0x100000;
+   /* read-only */
+   _text = .;			/* Text and read-only data */
+   .text : {
+@@ -21,6 +25,19 @@ SECTIONS
+ 	*(.gnu.warning)
+ 	} = 0x9090
+ 
++#ifdef CONFIG_X86_4G
++  . = ALIGN(PAGE_SIZE_asm);
++  __entry_tramp_start = .;
++  . = FIX_ENTRY_TRAMPOLINE_0_addr;
++  __start___entry_text = .;
++  .entry.text : AT (__entry_tramp_start) { *(.entry.text) }
++  __entry_tramp_end = __entry_tramp_start + SIZEOF(.entry.text);
++  . = __entry_tramp_end;
++  . = ALIGN(PAGE_SIZE_asm);
++#else
++  .entry.text : { *(.entry.text) }
++#endif
++
+   _etext = .;			/* End of text section */
+ 
+   . = ALIGN(16);		/* Exception table */
+@@ -36,15 +53,12 @@ SECTIONS
+ 	CONSTRUCTORS
+ 	}
+ 
+-  . = ALIGN(4096);
++  . = ALIGN(PAGE_SIZE_asm);
+   __nosave_begin = .;
+   .data_nosave : { *(.data.nosave) }
+-  . = ALIGN(4096);
++  . = ALIGN(PAGE_SIZE_asm);
+   __nosave_end = .;
+ 
+-  . = ALIGN(4096);
+-  .data.page_aligned : { *(.data.idt) }
+-
+   . = ALIGN(32);
+   .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+ 
+@@ -54,7 +68,7 @@ SECTIONS
+   .data.init_task : { *(.data.init_task) }
+ 
+   /* will be freed after init */
+-  . = ALIGN(4096);		/* Init code and data */
++  . = ALIGN(PAGE_SIZE_asm);		/* Init code and data */
+   __init_begin = .;
+   .init.text : { 
+ 	_sinittext = .;
+@@ -93,7 +107,7 @@ SECTIONS
+      from .altinstructions and .eh_frame */
+   .exit.text : { *(.exit.text) }
+   .exit.data : { *(.exit.data) }
+-  . = ALIGN(4096);
++  . = ALIGN(PAGE_SIZE_asm);
+   __initramfs_start = .;
+   .init.ramfs : { *(.init.ramfs) }
+   __initramfs_end = .;
+@@ -101,10 +115,22 @@ SECTIONS
+   __per_cpu_start = .;
+   .data.percpu  : { *(.data.percpu) }
+   __per_cpu_end = .;
+-  . = ALIGN(4096);
++  . = ALIGN(PAGE_SIZE_asm);
+   __init_end = .;
+   /* freed after init ends here */
+-	
++
++  . = ALIGN(PAGE_SIZE_asm);
++  .data.page_aligned_tss : { *(.data.tss) }
++
++  . = ALIGN(PAGE_SIZE_asm);
++  .data.page_aligned_default_ldt : { *(.data.default_ldt) }
++
++  . = ALIGN(PAGE_SIZE_asm);
++  .data.page_aligned_idt : { *(.data.idt) }
++
++  . = ALIGN(PAGE_SIZE_asm);
++  .data.page_aligned_gdt : { *(.data.gdt) }
++
+   __bss_start = .;		/* BSS */
+   .bss : {
+ 	*(.bss.page_aligned)
+@@ -132,4 +158,6 @@ SECTIONS
+   .stab.index 0 : { *(.stab.index) }
+   .stab.indexstr 0 : { *(.stab.indexstr) }
+   .comment 0 : { *(.comment) }
++
++
+ }
+diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/vsyscall-sysenter.S linux-2.6.8.1-ve022stab078/arch/i386/kernel/vsyscall-sysenter.S
+--- linux-2.6.8.1.orig/arch/i386/kernel/vsyscall-sysenter.S	2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/kernel/vsyscall-sysenter.S	2006-05-11 13:05:38.000000000 +0400
+@@ -12,6 +12,11 @@
+ 	.type __kernel_vsyscall,@function
+ __kernel_vsyscall:
+ .LSTART_vsyscall:
++	cmpl $192, %eax
++	jne 1f
++	int $0x80
++	ret
++1:
+ 	push %ecx
+ .Lpush_ecx:
+ 	push %edx
+@@ -84,7 +89,7 @@ SYSENTER_RETURN:
+ 	.byte 0x04		/* DW_CFA_advance_loc4 */
+ 	.long .Lpop_ebp-.Lenter_kernel
+ 	.byte 0x0e		/* DW_CFA_def_cfa_offset */
+-	.byte 0x12		/* RA at offset 12 now */
++	.byte 0x0c		/* RA at offset 12 now */
+ 	.byte 0xc5		/* DW_CFA_restore %ebp */
+ 	.byte 0x04		/* DW_CFA_advance_loc4 */
+ 	.long .Lpop_edx-.Lpop_ebp
+diff -uprN linux-2.6.8.1.orig/arch/i386/lib/checksum.S linux-2.6.8.1-ve022stab078/arch/i386/lib/checksum.S
+--- linux-2.6.8.1.orig/arch/i386/lib/checksum.S	2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/lib/checksum.S	2006-05-11 13:05:38.000000000 +0400
+@@ -280,14 +280,14 @@ unsigned int csum_partial_copy_generic (
+ 	.previous
+ 
+ .align 4
+-.globl csum_partial_copy_generic
++.globl direct_csum_partial_copy_generic
+ 				
+ #ifndef CONFIG_X86_USE_PPRO_CHECKSUM
+ 
+ #define ARGBASE 16		
+ #define FP		12
+ 		
+-csum_partial_copy_generic:
++direct_csum_partial_copy_generic:
+ 	subl  $4,%esp	
+ 	pushl %edi
+ 	pushl %esi
+@@ -422,7 +422,7 @@ DST(	movb %cl, (%edi)	)
+ 
+ #define ARGBASE 12
+ 		
+-csum_partial_copy_generic:
++direct_csum_partial_copy_generic:
+ 	pushl %ebx
+ 	pushl %edi
+ 	pushl %esi
+diff -uprN linux-2.6.8.1.orig/arch/i386/lib/getuser.S linux-2.6.8.1-ve022stab078/arch/i386/lib/getuser.S
+--- linux-2.6.8.1.orig/arch/i386/lib/getuser.S	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/lib/getuser.S	2006-05-11 13:05:38.000000000 +0400
+@@ -9,6 +9,7 @@
+  * return value.
+  */
+ #include <asm/thread_info.h>
++#include <asm/asm_offsets.h>
+ 
+ 
+ /*
+diff -uprN linux-2.6.8.1.orig/arch/i386/lib/usercopy.c linux-2.6.8.1-ve022stab078/arch/i386/lib/usercopy.c
+--- linux-2.6.8.1.orig/arch/i386/lib/usercopy.c	2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/lib/usercopy.c	2006-05-11 13:05:38.000000000 +0400
+@@ -9,7 +9,6 @@
+ #include <linux/mm.h>
+ #include <linux/highmem.h>
+ #include <linux/blkdev.h>
+-#include <linux/module.h>
+ #include <asm/uaccess.h>
+ #include <asm/mmx.h>
+ 
+@@ -77,7 +76,7 @@ do {									   \
+  * and returns @count.
+  */
+ long
+-__strncpy_from_user(char *dst, const char __user *src, long count)
++__direct_strncpy_from_user(char *dst, const char __user *src, long count)
+ {
+ 	long res;
+ 	__do_strncpy_from_user(dst, src, count, res);
+@@ -103,7 +102,7 @@ __strncpy_from_user(char *dst, const cha
+  * and returns @count.
+  */
+ long
+-strncpy_from_user(char *dst, const char __user *src, long count)
++direct_strncpy_from_user(char *dst, const char __user *src, long count)
+ {
+ 	long res = -EFAULT;
+ 	if (access_ok(VERIFY_READ, src, 1))
+@@ -148,7 +147,7 @@ do {									\
+  * On success, this will be zero.
+  */
+ unsigned long
+-clear_user(void __user *to, unsigned long n)
++direct_clear_user(void __user *to, unsigned long n)
+ {
+ 	might_sleep();
+ 	if (access_ok(VERIFY_WRITE, to, n))
+@@ -168,7 +167,7 @@ clear_user(void __user *to, unsigned lon
+  * On success, this will be zero.
+  */
+ unsigned long
+-__clear_user(void __user *to, unsigned long n)
++__direct_clear_user(void __user *to, unsigned long n)
+ {
+ 	__do_clear_user(to, n);
+ 	return n;
+@@ -185,7 +184,7 @@ __clear_user(void __user *to, unsigned l
+  * On exception, returns 0.
+  * If the string is too long, returns a value greater than @n.
+  */
+-long strnlen_user(const char __user *s, long n)
++long direct_strnlen_user(const char __user *s, long n)
+ {
+ 	unsigned long mask = -__addr_ok(s);
+ 	unsigned long res, tmp;
+@@ -568,8 +567,7 @@ survive:
+ 	return n;
+ }
+ 
+-unsigned long
+-__copy_from_user_ll(void *to, const void __user *from, unsigned long n)
++unsigned long __copy_from_user_ll(void *to, const void __user *from, unsigned long n)
+ {
+ 	if (movsl_is_ok(to, from, n))
+ 		__copy_user_zeroing(to, from, n);
+@@ -578,53 +576,3 @@ __copy_from_user_ll(void *to, const void
+ 	return n;
+ }
+ 
+-/**
+- * copy_to_user: - Copy a block of data into user space.
+- * @to:   Destination address, in user space.
+- * @from: Source address, in kernel space.
+- * @n:    Number of bytes to copy.
+- *
+- * Context: User context only.  This function may sleep.
+- *
+- * Copy data from kernel space to user space.
+- *
+- * Returns number of bytes that could not be copied.
+- * On success, this will be zero.
+- */
+-unsigned long
+-copy_to_user(void __user *to, const void *from, unsigned long n)
+-{
+-	might_sleep();
+-	if (access_ok(VERIFY_WRITE, to, n))
+-		n = __copy_to_user(to, from, n);
+-	return n;
+-}
+-EXPORT_SYMBOL(copy_to_user);
+-
+-/**
+- * copy_from_user: - Copy a block of data from user space.
+- * @to:   Destination address, in kernel space.
+- * @from: Source address, in user space.
+- * @n:    Number of bytes to copy.
+- *
+- * Context: User context only.  This function may sleep.
+- *
+- * Copy data from user space to kernel space.
+- *
+- * Returns number of bytes that could not be copied.
+- * On success, this will be zero.
+- *
+- * If some data could not be copied, this function will pad the copied
+- * data to the requested size using zero bytes.
+- */
+-unsigned long
+-copy_from_user(void *to, const void __user *from, unsigned long n)
+-{
+-	might_sleep();
+-	if (access_ok(VERIFY_READ, from, n))
+-		n = __copy_from_user(to, from, n);
+-	else
+-		memset(to, 0, n);
+-	return n;
+-}
+-EXPORT_SYMBOL(copy_from_user);
+diff -uprN linux-2.6.8.1.orig/arch/i386/math-emu/fpu_system.h linux-2.6.8.1-ve022stab078/arch/i386/math-emu/fpu_system.h
+--- linux-2.6.8.1.orig/arch/i386/math-emu/fpu_system.h	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/math-emu/fpu_system.h	2006-05-11 13:05:38.000000000 +0400
+@@ -15,6 +15,7 @@
+ #include <linux/sched.h>
+ #include <linux/kernel.h>
+ #include <linux/mm.h>
++#include <asm/atomic_kmap.h>
+ 
+ /* This sets the pointer FPU_info to point to the argument part
+    of the stack frame of math_emulate() */
+@@ -22,7 +23,7 @@
+ 
+ /* s is always from a cpu register, and the cpu does bounds checking
+  * during register load --> no further bounds checks needed */
+-#define LDT_DESCRIPTOR(s)	(((struct desc_struct *)current->mm->context.ldt)[(s) >> 3])
++#define LDT_DESCRIPTOR(s)	(((struct desc_struct *)__kmap_atomic_vaddr(KM_LDT_PAGE0))[(s) >> 3])
+ #define SEG_D_SIZE(x)		((x).b & (3 << 21))
+ #define SEG_G_BIT(x)		((x).b & (1 << 23))
+ #define SEG_GRANULARITY(x)	(((x).b & (1 << 23)) ? 4096 : 1)
+diff -uprN linux-2.6.8.1.orig/arch/i386/mm/fault.c linux-2.6.8.1-ve022stab078/arch/i386/mm/fault.c
+--- linux-2.6.8.1.orig/arch/i386/mm/fault.c	2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/mm/fault.c	2006-05-11 13:05:38.000000000 +0400
+@@ -26,36 +26,11 @@
+ #include <asm/uaccess.h>
+ #include <asm/hardirq.h>
+ #include <asm/desc.h>
++#include <asm/tlbflush.h>
+ 
+ extern void die(const char *,struct pt_regs *,long);
+ 
+ /*
+- * Unlock any spinlocks which will prevent us from getting the
+- * message out 
+- */
+-void bust_spinlocks(int yes)
+-{
+-	int loglevel_save = console_loglevel;
+-
+-	if (yes) {
+-		oops_in_progress = 1;
+-		return;
+-	}
+-#ifdef CONFIG_VT
+-	unblank_screen();
+-#endif
+-	oops_in_progress = 0;
+-	/*
+-	 * OK, the message is on the console.  Now we call printk()
+-	 * without oops_in_progress set so that printk will give klogd
+-	 * a poke.  Hold onto your hats...
+-	 */
+-	console_loglevel = 15;		/* NMI oopser may have shut the console up */
+-	printk(" ");
+-	console_loglevel = loglevel_save;
+-}
+-
+-/*
+  * Return EIP plus the CS segment base.  The segment limit is also
+  * adjusted, clamped to the kernel/user address space (whichever is
+  * appropriate), and returned in *eip_limit.
+@@ -103,8 +78,17 @@ static inline unsigned long get_segment_
+ 	if (seg & (1<<2)) {
+ 		/* Must lock the LDT while reading it. */
+ 		down(&current->mm->context.sem);
++#if 1
++		/* horrible hack for 4/4 disabled kernels.
++		   I'm not quite sure what the TLB flush is good for,
++		   it's mindlessly copied from the read_ldt code */
++		__flush_tlb_global();
++		desc = kmap(current->mm->context.ldt_pages[(seg&~7)/PAGE_SIZE]);
++		desc = (void *)desc + ((seg & ~7) % PAGE_SIZE);
++#else
+ 		desc = current->mm->context.ldt;
+ 		desc = (void *)desc + (seg & ~7);
++#endif
+ 	} else {
+ 		/* Must disable preemption while reading the GDT. */
+ 		desc = (u32 *)&cpu_gdt_table[get_cpu()];
+@@ -117,6 +101,9 @@ static inline unsigned long get_segment_
+ 		 (desc[1] & 0xff000000);
+ 
+ 	if (seg & (1<<2)) { 
++#if 1
++		kunmap((void *)((unsigned long)desc & PAGE_MASK));
++#endif
+ 		up(&current->mm->context.sem);
+ 	} else
+ 		put_cpu();
+@@ -232,6 +219,8 @@ asmlinkage void do_page_fault(struct pt_
+ 
+ 	tsk = current;
+ 
++	check_stack_overflow();
++
+ 	info.si_code = SEGV_MAPERR;
+ 
+ 	/*
+@@ -247,6 +236,17 @@ asmlinkage void do_page_fault(struct pt_
+ 	 * (error_code & 4) == 0, and that the fault was not a
+ 	 * protection error (error_code & 1) == 0.
+ 	 */
++#ifdef CONFIG_X86_4G
++	/*
++	 * On 4/4 all kernels faults are either bugs, vmalloc or prefetch
++	 */
++	/* If it's vm86 fall through */
++	if (unlikely(!(regs->eflags & VM_MASK) && ((regs->xcs & 3) == 0))) {
++		if (error_code & 3)
++			goto bad_area_nosemaphore;
++		goto vmalloc_fault;
++	}
++#else
+ 	if (unlikely(address >= TASK_SIZE)) { 
+ 		if (!(error_code & 5))
+ 			goto vmalloc_fault;
+@@ -256,6 +256,7 @@ asmlinkage void do_page_fault(struct pt_
+ 		 */
+ 		goto bad_area_nosemaphore;
+ 	} 
++#endif
+ 
+ 	mm = tsk->mm;
+ 
+@@ -333,7 +334,6 @@ good_area:
+ 				goto bad_area;
+ 	}
+ 
+- survive:
+ 	/*
+ 	 * If for any reason at all we couldn't handle the fault,
+ 	 * make sure we exit gracefully rather than endlessly redo
+@@ -472,14 +472,14 @@ no_context:
+  */
+ out_of_memory:
+ 	up_read(&mm->mmap_sem);
+-	if (tsk->pid == 1) {
+-		yield();
+-		down_read(&mm->mmap_sem);
+-		goto survive;
++	if (error_code & 4) {
++		/* 
++		 * 0-order allocation always success if something really 
++		 * fatal not happen: beancounter overdraft or OOM. Den 
++		 */
++		force_sig(SIGKILL, tsk);
++		return;
+ 	}
+-	printk("VM: killing process %s\n", tsk->comm);
+-	if (error_code & 4)
+-		do_exit(SIGKILL);
+ 	goto no_context;
+ 
+ do_sigbus:
+diff -uprN linux-2.6.8.1.orig/arch/i386/mm/highmem.c linux-2.6.8.1-ve022stab078/arch/i386/mm/highmem.c
+--- linux-2.6.8.1.orig/arch/i386/mm/highmem.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/mm/highmem.c	2006-05-11 13:05:38.000000000 +0400
+@@ -41,12 +41,45 @@ void *kmap_atomic(struct page *page, enu
+ 	if (!pte_none(*(kmap_pte-idx)))
+ 		BUG();
+ #endif
+-	set_pte(kmap_pte-idx, mk_pte(page, kmap_prot));
++	/*
++	 * If the page is not a normal RAM page, then map it
++	 * uncached to be on the safe side - it could be device
++	 * memory that must not be prefetched:
++	 */
++	if (PageReserved(page))
++		set_pte(kmap_pte-idx, mk_pte(page, kmap_prot_nocache));
++	else
++		set_pte(kmap_pte-idx, mk_pte(page, kmap_prot));
+ 	__flush_tlb_one(vaddr);
+ 
+ 	return (void*) vaddr;
+ }
+ 
++/*
++ * page frame number based kmaps - useful for PCI mappings.
++ * NOTE: we map the page with the same mapping as what user is using.
++ */
++void *kmap_atomic_pte(pte_t *pte, enum km_type type)
++{
++	enum fixed_addresses idx;
++	unsigned long vaddr;
++
++	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
++	inc_preempt_count();
++
++	idx = type + KM_TYPE_NR*smp_processor_id();
++	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
++#ifdef CONFIG_DEBUG_HIGHMEM
++	if (!pte_none(*(kmap_pte-idx)))
++		BUG();
++#endif
++	set_pte(kmap_pte-idx, *pte);
++	__flush_tlb_one(vaddr);
++
++	return (void*) vaddr;
++}
++
++
+ void kunmap_atomic(void *kvaddr, enum km_type type)
+ {
+ #ifdef CONFIG_DEBUG_HIGHMEM
+diff -uprN linux-2.6.8.1.orig/arch/i386/mm/hugetlbpage.c linux-2.6.8.1-ve022stab078/arch/i386/mm/hugetlbpage.c
+--- linux-2.6.8.1.orig/arch/i386/mm/hugetlbpage.c	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/mm/hugetlbpage.c	2006-05-11 13:05:38.000000000 +0400
+@@ -18,6 +18,8 @@
+ #include <asm/tlb.h>
+ #include <asm/tlbflush.h>
+ 
++#include <ub/ub_vmpages.h>
++
+ static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
+ {
+ 	pgd_t *pgd;
+@@ -43,6 +45,7 @@ static void set_huge_pte(struct mm_struc
+ 	pte_t entry;
+ 
+ 	mm->rss += (HPAGE_SIZE / PAGE_SIZE);
++	ub_unused_privvm_dec(mm_ub(mm), HPAGE_SIZE / PAGE_SIZE, vma);
+ 	if (write_access) {
+ 		entry =
+ 		    pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
+@@ -83,6 +86,7 @@ int copy_hugetlb_page_range(struct mm_st
+ 		get_page(ptepage);
+ 		set_pte(dst_pte, entry);
+ 		dst->rss += (HPAGE_SIZE / PAGE_SIZE);
++		ub_unused_privvm_dec(mm_ub(dst), HPAGE_SIZE / PAGE_SIZE, vma);
+ 		addr += HPAGE_SIZE;
+ 	}
+ 	return 0;
+@@ -219,6 +223,7 @@ void unmap_hugepage_range(struct vm_area
+ 		put_page(page);
+ 	}
+ 	mm->rss -= (end - start) >> PAGE_SHIFT;
++	ub_unused_privvm_inc(mm_ub(mm), (end - start) >> PAGE_SHIFT, vma);
+ 	flush_tlb_range(vma, start, end);
+ }
+ 
+diff -uprN linux-2.6.8.1.orig/arch/i386/mm/init.c linux-2.6.8.1-ve022stab078/arch/i386/mm/init.c
+--- linux-2.6.8.1.orig/arch/i386/mm/init.c	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/mm/init.c	2006-05-11 13:05:38.000000000 +0400
+@@ -27,6 +27,7 @@
+ #include <linux/slab.h>
+ #include <linux/proc_fs.h>
+ #include <linux/efi.h>
++#include <linux/initrd.h>
+ 
+ #include <asm/processor.h>
+ #include <asm/system.h>
+@@ -39,143 +40,14 @@
+ #include <asm/tlb.h>
+ #include <asm/tlbflush.h>
+ #include <asm/sections.h>
++#include <asm/setup.h>
++#include <asm/desc.h>
+ 
+ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+ unsigned long highstart_pfn, highend_pfn;
+ 
+ static int do_test_wp_bit(void);
+ 
+-/*
+- * Creates a middle page table and puts a pointer to it in the
+- * given global directory entry. This only returns the gd entry
+- * in non-PAE compilation mode, since the middle layer is folded.
+- */
+-static pmd_t * __init one_md_table_init(pgd_t *pgd)
+-{
+-	pmd_t *pmd_table;
+-		
+-#ifdef CONFIG_X86_PAE
+-	pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+-	set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
+-	if (pmd_table != pmd_offset(pgd, 0)) 
+-		BUG();
+-#else
+-	pmd_table = pmd_offset(pgd, 0);
+-#endif
+-
+-	return pmd_table;
+-}
+-
+-/*
+- * Create a page table and place a pointer to it in a middle page
+- * directory entry.
+- */
+-static pte_t * __init one_page_table_init(pmd_t *pmd)
+-{
+-	if (pmd_none(*pmd)) {
+-		pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+-		set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
+-		if (page_table != pte_offset_kernel(pmd, 0))
+-			BUG();	
+-
+-		return page_table;
+-	}
+-	
+-	return pte_offset_kernel(pmd, 0);
+-}
+-
+-/*
+- * This function initializes a certain range of kernel virtual memory 
+- * with new bootmem page tables, everywhere page tables are missing in
+- * the given range.
+- */
+-
+-/*
+- * NOTE: The pagetables are allocated contiguous on the physical space 
+- * so we can cache the place of the first one and move around without 
+- * checking the pgd every time.
+- */
+-static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
+-{
+-	pgd_t *pgd;
+-	pmd_t *pmd;
+-	int pgd_idx, pmd_idx;
+-	unsigned long vaddr;
+-
+-	vaddr = start;
+-	pgd_idx = pgd_index(vaddr);
+-	pmd_idx = pmd_index(vaddr);
+-	pgd = pgd_base + pgd_idx;
+-
+-	for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
+-		if (pgd_none(*pgd)) 
+-			one_md_table_init(pgd);
+-
+-		pmd = pmd_offset(pgd, vaddr);
+-		for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) {
+-			if (pmd_none(*pmd)) 
+-				one_page_table_init(pmd);
+-
+-			vaddr += PMD_SIZE;
+-		}
+-		pmd_idx = 0;
+-	}
+-}
+-
+-static inline int is_kernel_text(unsigned long addr)
+-{
+-	if (addr >= (unsigned long)_stext && addr <= (unsigned long)__init_end)
+-		return 1;
+-	return 0;
+-}
+-
+-/*
+- * This maps the physical memory to kernel virtual address space, a total 
+- * of max_low_pfn pages, by creating page tables starting from address 
+- * PAGE_OFFSET.
+- */
+-static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
+-{
+-	unsigned long pfn;
+-	pgd_t *pgd;
+-	pmd_t *pmd;
+-	pte_t *pte;
+-	int pgd_idx, pmd_idx, pte_ofs;
+-
+-	pgd_idx = pgd_index(PAGE_OFFSET);
+-	pgd = pgd_base + pgd_idx;
+-	pfn = 0;
+-
+-	for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
+-		pmd = one_md_table_init(pgd);
+-		if (pfn >= max_low_pfn)
+-			continue;
+-		for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) {
+-			unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET;
+-
+-			/* Map with big pages if possible, otherwise create normal page tables. */
+-			if (cpu_has_pse) {
+-				unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1;
+-
+-				if (is_kernel_text(address) || is_kernel_text(address2))
+-					set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
+-				else
+-					set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
+-				pfn += PTRS_PER_PTE;
+-			} else {
+-				pte = one_page_table_init(pmd);
+-
+-				for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) {
+-						if (is_kernel_text(address))
+-							set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
+-						else
+-							set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
+-				}
+-			}
+-		}
+-	}
+-}
+-
+ static inline int page_kills_ppro(unsigned long pagenr)
+ {
+ 	if (pagenr >= 0x70000 && pagenr <= 0x7003F)
+@@ -223,11 +95,8 @@ static inline int page_is_ram(unsigned l
+ 	return 0;
+ }
+ 
+-#ifdef CONFIG_HIGHMEM
+ pte_t *kmap_pte;
+-pgprot_t kmap_prot;
+ 
+-EXPORT_SYMBOL(kmap_prot);
+ EXPORT_SYMBOL(kmap_pte);
+ 
+ #define kmap_get_fixmap_pte(vaddr)					\
+@@ -235,29 +104,7 @@ EXPORT_SYMBOL(kmap_pte);
+ 
+ void __init kmap_init(void)
+ {
+-	unsigned long kmap_vstart;
+-
+-	/* cache the first kmap pte */
+-	kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
+-	kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
+-
+-	kmap_prot = PAGE_KERNEL;
+-}
+-
+-void __init permanent_kmaps_init(pgd_t *pgd_base)
+-{
+-	pgd_t *pgd;
+-	pmd_t *pmd;
+-	pte_t *pte;
+-	unsigned long vaddr;
+-
+-	vaddr = PKMAP_BASE;
+-	page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
+-
+-	pgd = swapper_pg_dir + pgd_index(vaddr);
+-	pmd = pmd_offset(pgd, vaddr);
+-	pte = pte_offset_kernel(pmd, vaddr);
+-	pkmap_page_table = pte;	
++	kmap_pte = kmap_get_fixmap_pte(__fix_to_virt(FIX_KMAP_BEGIN));
+ }
+ 
+ void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)
+@@ -272,6 +119,8 @@ void __init one_highpage_init(struct pag
+ 		SetPageReserved(page);
+ }
+ 
++#ifdef CONFIG_HIGHMEM
++
+ #ifndef CONFIG_DISCONTIGMEM
+ void __init set_highmem_pages_init(int bad_ppro) 
+ {
+@@ -283,12 +132,9 @@ void __init set_highmem_pages_init(int b
+ #else
+ extern void set_highmem_pages_init(int);
+ #endif /* !CONFIG_DISCONTIGMEM */
+-
+ #else
+-#define kmap_init() do { } while (0)
+-#define permanent_kmaps_init(pgd_base) do { } while (0)
+-#define set_highmem_pages_init(bad_ppro) do { } while (0)
+-#endif /* CONFIG_HIGHMEM */
++# define set_highmem_pages_init(bad_ppro) do { } while (0)
++#endif
+ 
+ unsigned long long __PAGE_KERNEL = _PAGE_KERNEL;
+ unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
+@@ -299,31 +145,125 @@ unsigned long long __PAGE_KERNEL_EXEC = 
+ extern void __init remap_numa_kva(void);
+ #endif
+ 
+-static void __init pagetable_init (void)
++static __init void prepare_pagetables(pgd_t *pgd_base, unsigned long address)
++{
++	pgd_t *pgd;
++	pmd_t *pmd;
++	pte_t *pte;
++
++	pgd = pgd_base + pgd_index(address);
++	pmd = pmd_offset(pgd, address);
++	if (!pmd_present(*pmd)) {
++		pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
++		set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)));
++	}
++}
++
++static void __init fixrange_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
++{
++	unsigned long vaddr;
++
++	for (vaddr = start; vaddr != end; vaddr += PAGE_SIZE)
++		prepare_pagetables(pgd_base, vaddr);
++}
++
++void setup_identity_mappings(pgd_t *pgd_base, unsigned long start, unsigned long end)
+ {
+ 	unsigned long vaddr;
+-	pgd_t *pgd_base = swapper_pg_dir;
++	pgd_t *pgd;
++	int i, j, k;
++	pmd_t *pmd;
++	pte_t *pte, *pte_base;
++
++	pgd = pgd_base;
+ 
++	for (i = 0; i < PTRS_PER_PGD; pgd++, i++) {
++		vaddr = i*PGDIR_SIZE;
++		if (end && (vaddr >= end))
++			break;
++		pmd = pmd_offset(pgd, 0);
++		for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
++			vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
++			if (end && (vaddr >= end))
++				break;
++			if (vaddr < start)
++				continue;
++			if (cpu_has_pse) {
++				unsigned long __pe;
++
++				set_in_cr4(X86_CR4_PSE);
++				boot_cpu_data.wp_works_ok = 1;
++				__pe = _KERNPG_TABLE + _PAGE_PSE + vaddr - start;
++				/* Make it "global" too if supported */
++				if (cpu_has_pge) {
++					set_in_cr4(X86_CR4_PGE);
++#if !defined(CONFIG_X86_SWITCH_PAGETABLES)
++					__pe += _PAGE_GLOBAL;
++					__PAGE_KERNEL |= _PAGE_GLOBAL;
++#endif
++				}
++				set_pmd(pmd, __pmd(__pe));
++				continue;
++			}
++			if (!pmd_present(*pmd))
++				pte_base = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
++			else
++				pte_base = pte_offset_kernel(pmd, 0);
++			pte = pte_base;
++			for (k = 0; k < PTRS_PER_PTE; pte++, k++) {
++				vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE;
++				if (end && (vaddr >= end))
++					break;
++				if (vaddr < start)
++					continue;
++				*pte = mk_pte_phys(vaddr-start, PAGE_KERNEL);
++			}
++			set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base)));
++		}
++	}
++}
++
++static void __init pagetable_init (void)
++{
++	unsigned long vaddr, end;
++	pgd_t *pgd_base;
+ #ifdef CONFIG_X86_PAE
+ 	int i;
+-	/* Init entries of the first-level page table to the zero page */
+-	for (i = 0; i < PTRS_PER_PGD; i++)
+-		set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
+ #endif
+ 
+-	/* Enable PSE if available */
+-	if (cpu_has_pse) {
+-		set_in_cr4(X86_CR4_PSE);
+-	}
++	/*
++	 * This can be zero as well - no problem, in that case we exit
++	 * the loops anyway due to the PTRS_PER_* conditions.
++	 */
++	end = (unsigned long)__va(max_low_pfn*PAGE_SIZE);
+ 
+-	/* Enable PGE if available */
+-	if (cpu_has_pge) {
+-		set_in_cr4(X86_CR4_PGE);
+-		__PAGE_KERNEL |= _PAGE_GLOBAL;
+-		__PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
++	pgd_base = swapper_pg_dir;
++#ifdef CONFIG_X86_PAE
++	/*
++	 * It causes too many problems if there's no proper pmd set up
++	 * for all 4 entries of the PGD - so we allocate all of them.
++	 * PAE systems will not miss this extra 4-8K anyway ...
++	 */
++	for (i = 0; i < PTRS_PER_PGD; i++) {
++		pmd_t *pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
++		set_pgd(pgd_base + i, __pgd(__pa(pmd) + 0x1));
+ 	}
++#endif
++	/*
++	 * Set up lowmem-sized identity mappings at PAGE_OFFSET:
++	 */
++	setup_identity_mappings(pgd_base, PAGE_OFFSET, end);
+ 
+-	kernel_physical_mapping_init(pgd_base);
++	/*
++	 * Add flat-mode identity-mappings - SMP needs it when
++	 * starting up on an AP from real-mode. (In the non-PAE
++	 * case we already have these mappings through head.S.)
++	 * All user-space mappings are explicitly cleared after
++	 * SMP startup.
++	 */
++#if defined(CONFIG_SMP) && defined(CONFIG_X86_PAE)
++	setup_identity_mappings(pgd_base, 0, 16*1024*1024);
++#endif
+ 	remap_numa_kva();
+ 
+ 	/*
+@@ -331,22 +271,57 @@ static void __init pagetable_init (void)
+ 	 * created - mappings will be set by set_fixmap():
+ 	 */
+ 	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
+-	page_table_range_init(vaddr, 0, pgd_base);
++	fixrange_init(vaddr, 0, pgd_base);
+ 
+-	permanent_kmaps_init(pgd_base);
++#ifdef CONFIG_HIGHMEM
++	{
++		pgd_t *pgd;
++		pmd_t *pmd;
++		pte_t *pte;
+ 
+-#ifdef CONFIG_X86_PAE
+-	/*
+-	 * Add low memory identity-mappings - SMP needs it when
+-	 * starting up on an AP from real-mode. In the non-PAE
+-	 * case we already have these mappings through head.S.
+-	 * All user-space mappings are explicitly cleared after
+-	 * SMP startup.
+-	 */
+-	pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];
++		/*
++		 * Permanent kmaps:
++		 */
++		vaddr = PKMAP_BASE;
++		fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
++
++		pgd = swapper_pg_dir + pgd_index(vaddr);
++		pmd = pmd_offset(pgd, vaddr);
++		pte = pte_offset_kernel(pmd, vaddr);
++		pkmap_page_table = pte;
++	}
+ #endif
+ }
+ 
++/*
++ * Clear kernel pagetables in a PMD_SIZE-aligned range.
++ */
++static void clear_mappings(pgd_t *pgd_base, unsigned long start, unsigned long end)
++{
++	unsigned long vaddr;
++	pgd_t *pgd;
++	pmd_t *pmd;
++	int i, j;
++
++	pgd = pgd_base;
++
++	for (i = 0; i < PTRS_PER_PGD; pgd++, i++) {
++		vaddr = i*PGDIR_SIZE;
++		if (end && (vaddr >= end))
++			break;
++		pmd = pmd_offset(pgd, 0);
++		for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
++			vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
++			if (end && (vaddr >= end))
++				break;
++			if (vaddr < start)
++				continue;
++			pmd_clear(pmd);
++		}
++	}
++	flush_tlb_all();
++}
++
+ #if defined(CONFIG_PM_DISK) || defined(CONFIG_SOFTWARE_SUSPEND)
+ /*
+  * Swap suspend & friends need this for resume because things like the intel-agp
+@@ -365,25 +340,16 @@ static inline void save_pg_dir(void)
+ }
+ #endif
+ 
+-void zap_low_mappings (void)
+-{
+-	int i;
+ 
++void zap_low_mappings(void)
++{
+ 	save_pg_dir();
+ 
++	printk("zapping low mappings.\n");
+ 	/*
+ 	 * Zap initial low-memory mappings.
+-	 *
+-	 * Note that "pgd_clear()" doesn't do it for
+-	 * us, because pgd_clear() is a no-op on i386.
+ 	 */
+-	for (i = 0; i < USER_PTRS_PER_PGD; i++)
+-#ifdef CONFIG_X86_PAE
+-		set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
+-#else
+-		set_pgd(swapper_pg_dir+i, __pgd(0));
+-#endif
+-	flush_tlb_all();
++	clear_mappings(swapper_pg_dir, 0, 16*1024*1024);
+ }
+ 
+ #ifndef CONFIG_DISCONTIGMEM
+@@ -454,7 +420,6 @@ static void __init set_nx(void)
+ 		}
+ 	}
+ }
+-
+ /*
+  * Enables/disables executability of a given kernel page and
+  * returns the previous setting.
+@@ -512,7 +477,15 @@ void __init paging_init(void)
+ 		set_in_cr4(X86_CR4_PAE);
+ #endif
+ 	__flush_tlb_all();
+-
++	/*
++	 * Subtle. SMP is doing it's boot stuff late (because it has to
++	 * fork idle threads) - but it also needs low mappings for the
++	 * protected-mode entry to work. We zap these entries only after
++	 * the WP-bit has been tested.
++	 */
++#ifndef CONFIG_SMP
++	zap_low_mappings();
++#endif
+ 	kmap_init();
+ 	zone_sizes_init();
+ }
+@@ -561,6 +534,37 @@ extern void set_max_mapnr_init(void);
+ 
+ static struct kcore_list kcore_mem, kcore_vmalloc; 
+ 
++#ifdef CONFIG_BLK_DEV_INITRD
++/*
++ * This function move initrd from highmem to normal zone, if needed.
++ * Note, we have to do it before highmem pages are given to buddy allocator.
++ */
++static void initrd_move(void)
++{
++	unsigned long i, start, off;
++	struct page *page;
++	void *addr;
++
++	if (initrd_copy <= 0)
++		return;
++
++	initrd_start = (unsigned long)
++			alloc_bootmem_low_pages(PAGE_ALIGN(INITRD_SIZE));
++	initrd_end = INITRD_START + initrd_copy;
++	start = (initrd_end - initrd_copy) & PAGE_MASK;
++	off = (initrd_end - initrd_copy) & ~PAGE_MASK;
++	for (i = 0; i < initrd_copy; i += PAGE_SIZE) {
++		page = pfn_to_page((start + i) >> PAGE_SHIFT);
++		addr = kmap_atomic(page, KM_USER0);
++		memcpy((void *)initrd_start + i,
++			addr, PAGE_SIZE);
++		kunmap_atomic(addr, KM_USER0);
++	}
++	initrd_start += off;
++	initrd_end = initrd_start + initrd_copy;
++}
++#endif
++
+ void __init mem_init(void)
+ {
+ 	extern int ppro_with_ram_bug(void);
+@@ -593,6 +597,9 @@ void __init mem_init(void)
+ 	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
+ #endif
+ 
++#ifdef CONFIG_BLK_DEV_INITRD
++	initrd_move();
++#endif
+ 	/* this will put all low memory onto the freelists */
+ 	totalram_pages += __free_all_bootmem();
+ 
+@@ -631,38 +638,57 @@ void __init mem_init(void)
+ 	if (boot_cpu_data.wp_works_ok < 0)
+ 		test_wp_bit();
+ 
+-	/*
+-	 * Subtle. SMP is doing it's boot stuff late (because it has to
+-	 * fork idle threads) - but it also needs low mappings for the
+-	 * protected-mode entry to work. We zap these entries only after
+-	 * the WP-bit has been tested.
+-	 */
+-#ifndef CONFIG_SMP
+-	zap_low_mappings();
+-#endif
++	entry_trampoline_setup();
++	default_ldt_page = virt_to_page(default_ldt);
++	load_LDT(&init_mm.context);
+ }
+ 
+-kmem_cache_t *pgd_cache;
+-kmem_cache_t *pmd_cache;
++kmem_cache_t *pgd_cache, *pmd_cache, *kpmd_cache;
+ 
+ void __init pgtable_cache_init(void)
+ {
++	void (*ctor)(void *, kmem_cache_t *, unsigned long);
++	void (*dtor)(void *, kmem_cache_t *, unsigned long);
++
+ 	if (PTRS_PER_PMD > 1) {
+ 		pmd_cache = kmem_cache_create("pmd",
+ 					PTRS_PER_PMD*sizeof(pmd_t),
+ 					PTRS_PER_PMD*sizeof(pmd_t),
+-					0,
++					SLAB_UBC,
+ 					pmd_ctor,
+ 					NULL);
+ 		if (!pmd_cache)
+ 			panic("pgtable_cache_init(): cannot create pmd cache");
++
++		if (TASK_SIZE > PAGE_OFFSET) {
++			kpmd_cache = kmem_cache_create("kpmd",
++					PTRS_PER_PMD*sizeof(pmd_t),
++					PTRS_PER_PMD*sizeof(pmd_t),
++					SLAB_UBC,
++					kpmd_ctor,
++					NULL);
++			if (!kpmd_cache)
++				panic("pgtable_cache_init(): "
++						"cannot create kpmd cache");
++		}
+ 	}
++
++	if (PTRS_PER_PMD == 1 || TASK_SIZE <= PAGE_OFFSET)
++		ctor = pgd_ctor;
++	else
++		ctor = NULL;
++
++	if (PTRS_PER_PMD == 1 && TASK_SIZE <= PAGE_OFFSET)
++		dtor = pgd_dtor;
++	else
++		dtor = NULL;
++
+ 	pgd_cache = kmem_cache_create("pgd",
+ 				PTRS_PER_PGD*sizeof(pgd_t),
+ 				PTRS_PER_PGD*sizeof(pgd_t),
+-				0,
+-				pgd_ctor,
+-				PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
++				SLAB_UBC,
++				ctor,
++				dtor);
+ 	if (!pgd_cache)
+ 		panic("pgtable_cache_init(): Cannot create pgd cache");
+ }
+diff -uprN linux-2.6.8.1.orig/arch/i386/mm/pageattr.c linux-2.6.8.1-ve022stab078/arch/i386/mm/pageattr.c
+--- linux-2.6.8.1.orig/arch/i386/mm/pageattr.c	2004-08-14 14:55:20.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/mm/pageattr.c	2006-05-11 13:05:38.000000000 +0400
+@@ -67,22 +67,21 @@ static void flush_kernel_map(void *dummy
+ 
+ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) 
+ { 
+-	struct page *page;
+-	unsigned long flags;
+-
+ 	set_pte_atomic(kpte, pte); 	/* change init_mm */
+-	if (PTRS_PER_PMD > 1)
+-		return;
+-
+-	spin_lock_irqsave(&pgd_lock, flags);
+-	for (page = pgd_list; page; page = (struct page *)page->index) {
+-		pgd_t *pgd;
+-		pmd_t *pmd;
+-		pgd = (pgd_t *)page_address(page) + pgd_index(address);
+-		pmd = pmd_offset(pgd, address);
+-		set_pte_atomic((pte_t *)pmd, pte);
++#ifndef CONFIG_X86_PAE
++	{
++		struct list_head *l;
++		if (TASK_SIZE > PAGE_OFFSET)
++			return;
++		spin_lock(&mmlist_lock);
++		list_for_each(l, &init_mm.mmlist) {
++			struct mm_struct *mm = list_entry(l, struct mm_struct, mmlist);
++			pmd_t *pmd = pmd_offset(pgd_offset(mm, address), address);
++			set_pte_atomic((pte_t *)pmd, pte);
++		}
++		spin_unlock(&mmlist_lock);
+ 	}
+-	spin_unlock_irqrestore(&pgd_lock, flags);
++#endif
+ }
+ 
+ /* 
+diff -uprN linux-2.6.8.1.orig/arch/i386/mm/pgtable.c linux-2.6.8.1-ve022stab078/arch/i386/mm/pgtable.c
+--- linux-2.6.8.1.orig/arch/i386/mm/pgtable.c	2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/mm/pgtable.c	2006-05-11 13:05:40.000000000 +0400
+@@ -5,8 +5,10 @@
+ #include <linux/config.h>
+ #include <linux/sched.h>
+ #include <linux/kernel.h>
++#include <linux/module.h>
+ #include <linux/errno.h>
+ #include <linux/mm.h>
++#include <linux/vmalloc.h>
+ #include <linux/swap.h>
+ #include <linux/smp.h>
+ #include <linux/highmem.h>
+@@ -21,6 +23,7 @@
+ #include <asm/e820.h>
+ #include <asm/tlb.h>
+ #include <asm/tlbflush.h>
++#include <asm/atomic_kmap.h>
+ 
+ void show_mem(void)
+ {
+@@ -53,6 +56,7 @@ void show_mem(void)
+ 	printk("%d reserved pages\n",reserved);
+ 	printk("%d pages shared\n",shared);
+ 	printk("%d pages swap cached\n",cached);
++	vprintstat();
+ }
+ 
+ /*
+@@ -143,9 +147,10 @@ struct page *pte_alloc_one(struct mm_str
+ 	struct page *pte;
+ 
+ #ifdef CONFIG_HIGHPTE
+-	pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0);
++	pte = alloc_pages(GFP_KERNEL_UBC|__GFP_SOFT_UBC|
++			__GFP_HIGHMEM|__GFP_REPEAT, 0);
+ #else
+-	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
++	pte = alloc_pages(GFP_KERNEL_UBC|__GFP_SOFT_UBC|__GFP_REPEAT, 0);
+ #endif
+ 	if (pte)
+ 		clear_highpage(pte);
+@@ -157,11 +162,20 @@ void pmd_ctor(void *pmd, kmem_cache_t *c
+ 	memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
+ }
+ 
++void kpmd_ctor(void *__pmd, kmem_cache_t *cache, unsigned long flags)
++{
++	pmd_t *kpmd, *pmd;
++	kpmd = pmd_offset(&swapper_pg_dir[PTRS_PER_PGD-1],
++				(PTRS_PER_PMD - NR_SHARED_PMDS)*PMD_SIZE);
++	pmd = (pmd_t *)__pmd + (PTRS_PER_PMD - NR_SHARED_PMDS);
++
++	memset(__pmd, 0, (PTRS_PER_PMD - NR_SHARED_PMDS)*sizeof(pmd_t));
++	memcpy(pmd, kpmd, NR_SHARED_PMDS*sizeof(pmd_t));
++}
++
+ /*
+- * List of all pgd's needed for non-PAE so it can invalidate entries
+- * in both cached and uncached pgd's; not needed for PAE since the
+- * kernel pmd is shared. If PAE were not to share the pmd a similar
+- * tactic would be needed. This is essentially codepath-based locking
++ * List of all pgd's needed so it can invalidate entries in both cached
++ * and uncached pgd's. This is essentially codepath-based locking
+  * against pageattr.c; it is the unique case in which a valid change
+  * of kernel pagetables can't be lazily synchronized by vmalloc faults.
+  * vmalloc faults work because attached pagetables are never freed.
+@@ -169,6 +183,12 @@ void pmd_ctor(void *pmd, kmem_cache_t *c
+  * checks at dup_mmap(), exec(), and other mmlist addition points
+  * could be used. The locking scheme was chosen on the basis of
+  * manfred's recommendations and having no core impact whatsoever.
++ *
++ * Lexicon for #ifdefless conditions to config options:
++ * (a) PTRS_PER_PMD == 1 means non-PAE.
++ * (b) PTRS_PER_PMD > 1 means PAE.
++ * (c) TASK_SIZE > PAGE_OFFSET means 4:4.
++ * (d) TASK_SIZE <= PAGE_OFFSET means non-4:4.
+  * -- wli
+  */
+ spinlock_t pgd_lock = SPIN_LOCK_UNLOCKED;
+@@ -194,26 +214,38 @@ static inline void pgd_list_del(pgd_t *p
+ 		next->private = (unsigned long)pprev;
+ }
+ 
+-void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
++void pgd_ctor(void *__pgd, kmem_cache_t *cache, unsigned long unused)
+ {
++	pgd_t *pgd = __pgd;
+ 	unsigned long flags;
+ 
+-	if (PTRS_PER_PMD == 1)
+-		spin_lock_irqsave(&pgd_lock, flags);
++	if (PTRS_PER_PMD == 1) {
++		if (TASK_SIZE <= PAGE_OFFSET)
++			spin_lock_irqsave(&pgd_lock, flags);
++		else
++			memcpy(&pgd[PTRS_PER_PGD - NR_SHARED_PMDS],
++				&swapper_pg_dir[PTRS_PER_PGD - NR_SHARED_PMDS],
++				NR_SHARED_PMDS*sizeof(pgd_t));
++	}
+ 
+-	memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD,
+-			swapper_pg_dir + USER_PTRS_PER_PGD,
+-			(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
++	if (TASK_SIZE <= PAGE_OFFSET)
++		memcpy(&pgd[USER_PTRS_PER_PGD],
++			&swapper_pg_dir[USER_PTRS_PER_PGD],
++			(PTRS_PER_PGD - USER_PTRS_PER_PGD)*sizeof(pgd_t));
+ 
+ 	if (PTRS_PER_PMD > 1)
+ 		return;
+ 
+-	pgd_list_add(pgd);
+-	spin_unlock_irqrestore(&pgd_lock, flags);
+-	memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
++	if (TASK_SIZE > PAGE_OFFSET)
++		memset(pgd, 0, (PTRS_PER_PGD - NR_SHARED_PMDS)*sizeof(pgd_t));
++	else {
++		pgd_list_add(pgd);
++		spin_unlock_irqrestore(&pgd_lock, flags);
++		memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
++	}
+ }
+ 
+-/* never called when PTRS_PER_PMD > 1 */
++/* Never called when PTRS_PER_PMD > 1 || TASK_SIZE > PAGE_OFFSET */
+ void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
+ {
+ 	unsigned long flags; /* can be called from interrupt context */
+@@ -231,15 +263,31 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
+ 	if (PTRS_PER_PMD == 1 || !pgd)
+ 		return pgd;
+ 
++	/*
++	 * In the 4G userspace case alias the top 16 MB virtual
++	 * memory range into the user mappings as well (these
++	 * include the trampoline and CPU data structures).
++	 */
+ 	for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
+-		pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
++		pmd_t *pmd;
++
++		if (TASK_SIZE > PAGE_OFFSET && i == USER_PTRS_PER_PGD - 1)
++			pmd = kmem_cache_alloc(kpmd_cache, GFP_KERNEL);
++		else
++			pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
++
+ 		if (!pmd)
+ 			goto out_oom;
+ 		set_pgd(&pgd[i], __pgd(1 + __pa((u64)((u32)pmd))));
+ 	}
+-	return pgd;
+ 
++	return pgd;
+ out_oom:
++	/*
++	 * we don't have to handle the kpmd_cache here, since it's the
++	 * last allocation, and has either nothing to free or when it
++	 * succeeds the whole operation succeeds.
++	 */
+ 	for (i--; i >= 0; i--)
+ 		kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
+ 	kmem_cache_free(pgd_cache, pgd);
+@@ -250,10 +298,27 @@ void pgd_free(pgd_t *pgd)
+ {
+ 	int i;
+ 
+-	/* in the PAE case user pgd entries are overwritten before usage */
+-	if (PTRS_PER_PMD > 1)
+-		for (i = 0; i < USER_PTRS_PER_PGD; ++i)
+-			kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
+ 	/* in the non-PAE case, clear_page_tables() clears user pgd entries */
++	if (PTRS_PER_PMD == 1)
++		goto out_free;
++
++	/* in the PAE case user pgd entries are overwritten before usage */
++	for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
++		pmd_t *pmd = __va(pgd_val(pgd[i]) - 1);
++
++		/*
++		 * only userspace pmd's are cleared for us
++		 * by mm/memory.c; it's a slab cache invariant
++		 * that we must separate the kernel pmd slab
++		 * all times, else we'll have bad pmd's.
++		 */
++		if (TASK_SIZE > PAGE_OFFSET && i == USER_PTRS_PER_PGD - 1)
++			kmem_cache_free(kpmd_cache, pmd);
++		else
++			kmem_cache_free(pmd_cache, pmd);
++	}
++out_free:
+ 	kmem_cache_free(pgd_cache, pgd);
+ }
++
++EXPORT_SYMBOL(show_mem);
+diff -uprN linux-2.6.8.1.orig/arch/i386/pci/fixup.c linux-2.6.8.1-ve022stab078/arch/i386/pci/fixup.c
+--- linux-2.6.8.1.orig/arch/i386/pci/fixup.c	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/pci/fixup.c	2006-05-11 13:05:29.000000000 +0400
+@@ -210,10 +210,7 @@ static void __devinit pci_fixup_transpar
+  */
+ static void __init pci_fixup_nforce2(struct pci_dev *dev)
+ {
+-	u32 val, fixed_val;
+-	u8 rev;
+-
+-	pci_read_config_byte(dev, PCI_REVISION_ID, &rev);
++	u32 val;
+ 
+ 	/*
+ 	 * Chip  Old value   New value
+@@ -223,17 +220,14 @@ static void __init pci_fixup_nforce2(str
+ 	 * Northbridge chip version may be determined by
+ 	 * reading the PCI revision ID (0xC1 or greater is C18D).
+ 	 */
+-	fixed_val = rev < 0xC1 ? 0x1F01FF01 : 0x9F01FF01;
+-
+ 	pci_read_config_dword(dev, 0x6c, &val);
+ 
+ 	/*
+-	 * Apply fixup only if C1 Halt Disconnect is enabled
+-	 * (bit28) because it is not supported on some boards.
++	 * Apply fixup if needed, but don't touch disconnect state
+ 	 */
+-	if ((val & (1 << 28)) && val != fixed_val) {
++	if ((val & 0x00FF0000) != 0x00010000) {
+ 		printk(KERN_WARNING "PCI: nForce2 C1 Halt Disconnect fixup\n");
+-		pci_write_config_dword(dev, 0x6c, fixed_val);
++		pci_write_config_dword(dev, 0x6c, (val & 0xFF00FFFF) | 0x00010000);
+ 	}
+ }
+ 
+diff -uprN linux-2.6.8.1.orig/arch/i386/power/cpu.c linux-2.6.8.1-ve022stab078/arch/i386/power/cpu.c
+--- linux-2.6.8.1.orig/arch/i386/power/cpu.c	2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/i386/power/cpu.c	2006-05-11 13:05:38.000000000 +0400
+@@ -83,9 +83,7 @@ do_fpu_end(void)
+ static void fix_processor_context(void)
+ {
+ 	int cpu = smp_processor_id();
+-	struct tss_struct * t = init_tss + cpu;
+ 
+-	set_tss_desc(cpu,t);	/* This just modifies memory; should not be necessary. But... This is necessary, because 386 hardware has concept of busy TSS or some similar stupidity. */
+         cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
+ 
+ 	load_TR_desc();				/* This does ltr */
+diff -uprN linux-2.6.8.1.orig/arch/ia64/hp/common/sba_iommu.c linux-2.6.8.1-ve022stab078/arch/ia64/hp/common/sba_iommu.c
+--- linux-2.6.8.1.orig/arch/ia64/hp/common/sba_iommu.c	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/hp/common/sba_iommu.c	2006-05-11 13:05:30.000000000 +0400
+@@ -475,7 +475,7 @@ sba_search_bitmap(struct ioc *ioc, unsig
+ 	 * purges IOTLB entries in power-of-two sizes, so we also
+ 	 * allocate IOVA space in power-of-two sizes.
+ 	 */
+-	bits_wanted = 1UL << get_iovp_order(bits_wanted << PAGE_SHIFT);
++	bits_wanted = 1UL << get_iovp_order(bits_wanted << iovp_shift);
+ 
+ 	if (likely(bits_wanted == 1)) {
+ 		unsigned int bitshiftcnt;
+@@ -684,7 +684,7 @@ sba_free_range(struct ioc *ioc, dma_addr
+ 	unsigned long m;
+ 
+ 	/* Round up to power-of-two size: see AR2305 note above */
+-	bits_not_wanted = 1UL << get_iovp_order(bits_not_wanted << PAGE_SHIFT);
++	bits_not_wanted = 1UL << get_iovp_order(bits_not_wanted << iovp_shift);
+ 	for (; bits_not_wanted > 0 ; res_ptr++) {
+ 		
+ 		if (unlikely(bits_not_wanted > BITS_PER_LONG)) {
+@@ -757,7 +757,7 @@ sba_io_pdir_entry(u64 *pdir_ptr, unsigne
+ #ifdef ENABLE_MARK_CLEAN
+ /**
+  * Since DMA is i-cache coherent, any (complete) pages that were written via
+- * DMA can be marked as "clean" so that update_mmu_cache() doesn't have to
++ * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
+  * flush them when they get mapped into an executable vm-area.
+  */
+ static void
+diff -uprN linux-2.6.8.1.orig/arch/ia64/ia32/binfmt_elf32.c linux-2.6.8.1-ve022stab078/arch/ia64/ia32/binfmt_elf32.c
+--- linux-2.6.8.1.orig/arch/ia64/ia32/binfmt_elf32.c	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/ia32/binfmt_elf32.c	2006-05-11 13:05:38.000000000 +0400
+@@ -18,6 +18,8 @@
+ #include <asm/param.h>
+ #include <asm/signal.h>
+ 
++#include <ub/ub_vmpages.h>
++
+ #include "ia32priv.h"
+ #include "elfcore32.h"
+ 
+@@ -84,7 +86,11 @@ ia64_elf32_init (struct pt_regs *regs)
+ 		vma->vm_ops = &ia32_shared_page_vm_ops;
+ 		down_write(&current->mm->mmap_sem);
+ 		{
+-			insert_vm_struct(current->mm, vma);
++			if (insert_vm_struct(current->mm, vma)) {
++				kmem_cache_free(vm_area_cachep, vma);
++				up_write(&current->mm->mmap_sem);
++				return;
++			}
+ 		}
+ 		up_write(&current->mm->mmap_sem);
+ 	}
+@@ -93,6 +99,11 @@ ia64_elf32_init (struct pt_regs *regs)
+ 	 * Install LDT as anonymous memory.  This gives us all-zero segment descriptors
+ 	 * until a task modifies them via modify_ldt().
+ 	 */
++	if (ub_memory_charge(mm_ub(current->mm), 
++			PAGE_ALIGN(IA32_LDT_ENTRIES * IA32_LDT_ENTRY_SIZE),
++			VM_WRITE, NULL, UB_SOFT))
++		return;
++
+ 	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ 	if (vma) {
+ 		memset(vma, 0, sizeof(*vma));
+@@ -103,10 +114,21 @@ ia64_elf32_init (struct pt_regs *regs)
+ 		vma->vm_flags = VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE;
+ 		down_write(&current->mm->mmap_sem);
+ 		{
+-			insert_vm_struct(current->mm, vma);
++			if (insert_vm_struct(current->mm, vma)) {
++				kmem_cache_free(vm_area_cachep, vma);
++				up_write(&current->mm->mmap_sem);
++				ub_memory_uncharge(mm_ub(current->mm),
++					PAGE_ALIGN(IA32_LDT_ENTRIES *
++						IA32_LDT_ENTRY_SIZE),
++					VM_WRITE, NULL);
++				return;
++			}
+ 		}
+ 		up_write(&current->mm->mmap_sem);
+-	}
++	} else
++		ub_memory_uncharge(mm_ub(current->mm),
++			PAGE_ALIGN(IA32_LDT_ENTRIES * IA32_LDT_ENTRY_SIZE),
++			VM_WRITE, NULL);
+ 
+ 	ia64_psr(regs)->ac = 0;		/* turn off alignment checking */
+ 	regs->loadrs = 0;
+@@ -148,10 +170,10 @@ ia64_elf32_init (struct pt_regs *regs)
+ int
+ ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack)
+ {
+-	unsigned long stack_base;
++	unsigned long stack_base, vm_end, vm_start;
+ 	struct vm_area_struct *mpnt;
+ 	struct mm_struct *mm = current->mm;
+-	int i;
++	int i, ret;
+ 
+ 	stack_base = IA32_STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE;
+ 	mm->arg_start = bprm->p + stack_base;
+@@ -161,23 +183,29 @@ ia32_setup_arg_pages (struct linux_binpr
+ 		bprm->loader += stack_base;
+ 	bprm->exec += stack_base;
+ 
++	vm_end = IA32_STACK_TOP;
++	vm_start = PAGE_MASK & (unsigned long)bprm->p;
++
++	ret = ub_memory_charge(mm_ub(mm), vm_end - vm_start, VM_STACK_FLAGS,
++				NULL, UB_HARD);
++	if (ret)
++		goto out;
++
++	ret = -ENOMEM;
+ 	mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ 	if (!mpnt)
+-		return -ENOMEM;
++		goto out_uncharge;
+ 
+-	if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))
+-				      >> PAGE_SHIFT)) {
+-		kmem_cache_free(vm_area_cachep, mpnt);
+-		return -ENOMEM;
+-	}
++	if (security_vm_enough_memory((vm_end - vm_start) >> PAGE_SHIFT))
++		goto out_free;
+ 
+ 	memset(mpnt, 0, sizeof(*mpnt));
+ 
+ 	down_write(&current->mm->mmap_sem);
+ 	{
+ 		mpnt->vm_mm = current->mm;
+-		mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
+-		mpnt->vm_end = IA32_STACK_TOP;
++		mpnt->vm_start = vm_start;
++		mpnt->vm_end = vm_end;
+ 		if (executable_stack == EXSTACK_ENABLE_X)
+ 			mpnt->vm_flags = VM_STACK_FLAGS |  VM_EXEC;
+ 		else if (executable_stack == EXSTACK_DISABLE_X)
+@@ -186,7 +214,8 @@ ia32_setup_arg_pages (struct linux_binpr
+ 			mpnt->vm_flags = VM_STACK_FLAGS;
+ 		mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC)?
+ 					PAGE_COPY_EXEC: PAGE_COPY;
+-		insert_vm_struct(current->mm, mpnt);
++		if ((ret = insert_vm_struct(current->mm, mpnt)))
++			goto out_up;
+ 		current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+ 	}
+ 
+@@ -205,6 +234,16 @@ ia32_setup_arg_pages (struct linux_binpr
+ 	current->thread.ppl = ia32_init_pp_list();
+ 
+ 	return 0;
++
++out_up:
++	up_write(&current->mm->mmap_sem);
++	vm_unacct_memory((vm_end - vm_start) >> PAGE_SHIFT);
++out_free:
++	kmem_cache_free(vm_area_cachep, mpnt);
++out_uncharge:
++	ub_memory_uncharge(mm_ub(mm), vm_end - vm_start, VM_STACK_FLAGS, NULL);
++out:
++	return ret;
+ }
+ 
+ static void
+diff -uprN linux-2.6.8.1.orig/arch/ia64/ia32/ia32_entry.S linux-2.6.8.1-ve022stab078/arch/ia64/ia32/ia32_entry.S
+--- linux-2.6.8.1.orig/arch/ia64/ia32/ia32_entry.S	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/ia32/ia32_entry.S	2006-05-11 13:05:27.000000000 +0400
+@@ -387,7 +387,7 @@ ia32_syscall_table:
+ 	data8 sys32_rt_sigaction
+ 	data8 sys32_rt_sigprocmask /* 175 */
+ 	data8 sys_rt_sigpending
+-	data8 sys32_rt_sigtimedwait
++	data8 compat_rt_sigtimedwait
+ 	data8 sys32_rt_sigqueueinfo
+ 	data8 sys32_rt_sigsuspend
+ 	data8 sys32_pread	  /* 180 */
+diff -uprN linux-2.6.8.1.orig/arch/ia64/ia32/ia32_signal.c linux-2.6.8.1-ve022stab078/arch/ia64/ia32/ia32_signal.c
+--- linux-2.6.8.1.orig/arch/ia64/ia32/ia32_signal.c	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/ia32/ia32_signal.c	2006-05-11 13:05:34.000000000 +0400
+@@ -59,19 +59,19 @@ struct rt_sigframe_ia32
+        int sig;
+        int pinfo;
+        int puc;
+-       siginfo_t32 info;
++       compat_siginfo_t info;
+        struct ucontext_ia32 uc;
+        struct _fpstate_ia32 fpstate;
+        char retcode[8];
+ };
+ 
+ int
+-copy_siginfo_from_user32 (siginfo_t *to, siginfo_t32 *from)
++copy_siginfo_from_user32 (siginfo_t *to, compat_siginfo_t *from)
+ {
+ 	unsigned long tmp;
+ 	int err;
+ 
+-	if (!access_ok(VERIFY_READ, from, sizeof(siginfo_t32)))
++	if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t)))
+ 		return -EFAULT;
+ 
+ 	err = __get_user(to->si_signo, &from->si_signo);
+@@ -110,12 +110,12 @@ copy_siginfo_from_user32 (siginfo_t *to,
+ }
+ 
+ int
+-copy_siginfo_to_user32 (siginfo_t32 *to, siginfo_t *from)
++copy_siginfo_to_user32 (compat_siginfo_t *to, siginfo_t *from)
+ {
+ 	unsigned int addr;
+ 	int err;
+ 
+-	if (!access_ok(VERIFY_WRITE, to, sizeof(siginfo_t32)))
++	if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
+ 		return -EFAULT;
+ 
+ 	/* If you change siginfo_t structure, please be sure
+@@ -459,7 +459,7 @@ ia32_rt_sigsuspend (compat_sigset_t *use
+ 	sigset_t oldset, set;
+ 
+ 	scr->scratch_unat = 0;	/* avoid leaking kernel bits to user level */
+-	memset(&set, 0, sizeof(&set));
++	memset(&set, 0, sizeof(set));
+ 
+ 	if (sigsetsize > sizeof(sigset_t))
+ 		return -EINVAL;
+@@ -505,6 +505,7 @@ sys32_signal (int sig, unsigned int hand
+ 
+ 	sigact_set_handler(&new_sa, handler, 0);
+ 	new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK;
++	sigemptyset(&new_sa.sa.sa_mask);
+ 
+ 	ret = do_sigaction(sig, &new_sa, &old_sa);
+ 
+@@ -574,33 +575,7 @@ sys32_rt_sigprocmask (int how, compat_si
+ }
+ 
+ asmlinkage long
+-sys32_rt_sigtimedwait (compat_sigset_t *uthese, siginfo_t32 *uinfo,
+-		struct compat_timespec *uts, unsigned int sigsetsize)
+-{
+-	extern int copy_siginfo_to_user32 (siginfo_t32 *, siginfo_t *);
+-	mm_segment_t old_fs = get_fs();
+-	struct timespec t;
+-	siginfo_t info;
+-	sigset_t s;
+-	int ret;
+-
+-	if (copy_from_user(&s.sig, uthese, sizeof(compat_sigset_t)))
+-		return -EFAULT;
+-	if (uts && get_compat_timespec(&t, uts))
+-		return -EFAULT;
+-	set_fs(KERNEL_DS);
+-	ret = sys_rt_sigtimedwait(&s, uinfo ? &info : NULL, uts ? &t : NULL,
+-			sigsetsize);
+-	set_fs(old_fs);
+-	if (ret >= 0 && uinfo) {
+-		if (copy_siginfo_to_user32(uinfo, &info))
+-			return -EFAULT;
+-	}
+-	return ret;
+-}
+-
+-asmlinkage long
+-sys32_rt_sigqueueinfo (int pid, int sig, siginfo_t32 *uinfo)
++sys32_rt_sigqueueinfo (int pid, int sig, compat_siginfo_t *uinfo)
+ {
+ 	mm_segment_t old_fs = get_fs();
+ 	siginfo_t info;
+diff -uprN linux-2.6.8.1.orig/arch/ia64/ia32/ia32priv.h linux-2.6.8.1-ve022stab078/arch/ia64/ia32/ia32priv.h
+--- linux-2.6.8.1.orig/arch/ia64/ia32/ia32priv.h	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/ia32/ia32priv.h	2006-05-11 13:05:27.000000000 +0400
+@@ -229,7 +229,7 @@ typedef union sigval32 {
+ 
+ #define SIGEV_PAD_SIZE32 ((SIGEV_MAX_SIZE/sizeof(int)) - 3)
+ 
+-typedef struct siginfo32 {
++typedef struct compat_siginfo {
+ 	int si_signo;
+ 	int si_errno;
+ 	int si_code;
+@@ -279,7 +279,7 @@ typedef struct siginfo32 {
+ 			int _fd;
+ 		} _sigpoll;
+ 	} _sifields;
+-} siginfo_t32;
++} compat_siginfo_t;
+ 
+ typedef struct sigevent32 {
+ 	sigval_t32 sigev_value;
+diff -uprN linux-2.6.8.1.orig/arch/ia64/ia32/sys_ia32.c linux-2.6.8.1-ve022stab078/arch/ia64/ia32/sys_ia32.c
+--- linux-2.6.8.1.orig/arch/ia64/ia32/sys_ia32.c	2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/ia32/sys_ia32.c	2006-05-11 13:05:42.000000000 +0400
+@@ -770,7 +770,7 @@ emulate_mmap (struct file *file, unsigne
+ 		ia32_set_pp((unsigned int)start, (unsigned int)end, flags);
+ 		if (start > pstart) {
+ 			if (flags & MAP_SHARED)
+-				printk(KERN_INFO
++				ve_printk(VE_LOG, KERN_INFO
+ 				       "%s(%d): emulate_mmap() can't share head (addr=0x%lx)\n",
+ 				       current->comm, current->pid, start);
+ 			ret = mmap_subpage(file, start, min(PAGE_ALIGN(start), end), prot, flags,
+@@ -783,7 +783,7 @@ emulate_mmap (struct file *file, unsigne
+ 		}
+ 		if (end < pend) {
+ 			if (flags & MAP_SHARED)
+-				printk(KERN_INFO
++				ve_printk(VE_LOG, KERN_INFO
+ 				       "%s(%d): emulate_mmap() can't share tail (end=0x%lx)\n",
+ 				       current->comm, current->pid, end);
+ 			ret = mmap_subpage(file, max(start, PAGE_START(end)), end, prot, flags,
+@@ -814,7 +814,7 @@ emulate_mmap (struct file *file, unsigne
+ 	is_congruent = (flags & MAP_ANONYMOUS) || (offset_in_page(poff) == 0);
+ 
+ 	if ((flags & MAP_SHARED) && !is_congruent)
+-		printk(KERN_INFO "%s(%d): emulate_mmap() can't share contents of incongruent mmap "
++		ve_printk(VE_LOG, KERN_INFO "%s(%d): emulate_mmap() can't share contents of incongruent mmap "
+ 		       "(addr=0x%lx,off=0x%llx)\n", current->comm, current->pid, start, off);
+ 
+ 	DBG("mmap_body: mapping [0x%lx-0x%lx) %s with poff 0x%llx\n", pstart, pend,
+@@ -1521,7 +1521,7 @@ getreg (struct task_struct *child, int r
+ 		return __USER_DS;
+ 	      case PT_CS: return __USER_CS;
+ 	      default:
+-		printk(KERN_ERR "ia32.getreg(): unknown register %d\n", regno);
++		ve_printk(VE_LOG, KERN_ERR "ia32.getreg(): unknown register %d\n", regno);
+ 		break;
+ 	}
+ 	return 0;
+@@ -1547,18 +1547,18 @@ putreg (struct task_struct *child, int r
+ 	      case PT_EFL: child->thread.eflag = value; break;
+ 	      case PT_DS: case PT_ES: case PT_FS: case PT_GS: case PT_SS:
+ 		if (value != __USER_DS)
+-			printk(KERN_ERR
++			ve_printk(VE_LOG, KERN_ERR
+ 			       "ia32.putreg: attempt to set invalid segment register %d = %x\n",
+ 			       regno, value);
+ 		break;
+ 	      case PT_CS:
+ 		if (value != __USER_CS)
+-			printk(KERN_ERR
++			ve_printk(VE_LOG, KERN_ERR
+ 			       "ia32.putreg: attempt to to set invalid segment register %d = %x\n",
+ 			       regno, value);
+ 		break;
+ 	      default:
+-		printk(KERN_ERR "ia32.putreg: unknown register %d\n", regno);
++		ve_printk(VE_LOG, KERN_ERR "ia32.putreg: unknown register %d\n", regno);
+ 		break;
+ 	}
+ }
+@@ -1799,7 +1799,7 @@ sys32_ptrace (int request, pid_t pid, un
+ 
+ 	ret = -ESRCH;
+ 	read_lock(&tasklist_lock);
+-	child = find_task_by_pid(pid);
++	child = find_task_by_pid_ve(pid);
+ 	if (child)
+ 		get_task_struct(child);
+ 	read_unlock(&tasklist_lock);
+@@ -2419,7 +2419,7 @@ sys32_sendfile (int out_fd, int in_fd, i
+ 	ret = sys_sendfile(out_fd, in_fd, offset ? &of : NULL, count);
+ 	set_fs(old_fs);
+ 
+-	if (!ret && offset && put_user(of, offset))
++	if (offset && put_user(of, offset))
+ 		return -EFAULT;
+ 
+ 	return ret;
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/acpi.c linux-2.6.8.1-ve022stab078/arch/ia64/kernel/acpi.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/acpi.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/kernel/acpi.c	2006-05-11 13:05:30.000000000 +0400
+@@ -430,8 +430,9 @@ acpi_numa_arch_fixup (void)
+ {
+ 	int i, j, node_from, node_to;
+ 
+-	/* If there's no SRAT, fix the phys_id */
++	/* If there's no SRAT, fix the phys_id and mark node 0 online */
+ 	if (srat_num_cpus == 0) {
++		node_set_online(0);
+ 		node_cpuid[0].phys_id = hard_smp_processor_id();
+ 		return;
+ 	}
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/asm-offsets.c linux-2.6.8.1-ve022stab078/arch/ia64/kernel/asm-offsets.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/asm-offsets.c	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/kernel/asm-offsets.c	2006-05-11 13:05:40.000000000 +0400
+@@ -38,11 +38,21 @@ void foo(void)
+ 	DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
+ 	DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
+ 	DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
++#ifdef CONFIG_VE
++	DEFINE(IA64_TASK_PID_OFFSET, offsetof
++			(struct task_struct, pids[PIDTYPE_PID].vnr));
++#else
+ 	DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid));
++#endif
+ 	DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent));
+ 	DEFINE(IA64_TASK_SIGHAND_OFFSET,offsetof (struct task_struct, sighand));
+ 	DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal));
++#ifdef CONFIG_VE
++	DEFINE(IA64_TASK_TGID_OFFSET, offsetof
++			(struct task_struct, pids[PIDTYPE_TGID].vnr));
++#else
+ 	DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid));
++#endif
+ 	DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct task_struct, thread.ksp));
+ 	DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct task_struct, thread.on_ustack));
+ 
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/entry.S linux-2.6.8.1-ve022stab078/arch/ia64/kernel/entry.S
+--- linux-2.6.8.1.orig/arch/ia64/kernel/entry.S	2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/kernel/entry.S	2006-05-11 13:05:43.000000000 +0400
+@@ -51,8 +51,11 @@
+ 	 * setup a null register window frame.
+ 	 */
+ ENTRY(ia64_execve)
+-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(3)
+-	alloc loc1=ar.pfs,3,2,4,0
++	/*
++	 * Allocate 8 input registers since ptrace() may clobber them
++	 */
++	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
++	alloc loc1=ar.pfs,8,2,4,0
+ 	mov loc0=rp
+ 	.body
+ 	mov out0=in0			// filename
+@@ -113,8 +116,11 @@ END(ia64_execve)
+  *	      u64 tls)
+  */
+ GLOBAL_ENTRY(sys_clone2)
+-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(6)
+-	alloc r16=ar.pfs,6,2,6,0
++	/*
++	 * Allocate 8 input registers since ptrace() may clobber them
++	 */
++	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
++	alloc r16=ar.pfs,8,2,6,0
+ 	DO_SAVE_SWITCH_STACK
+ 	adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp
+ 	mov loc0=rp
+@@ -142,8 +148,11 @@ END(sys_clone2)
+  *	Deprecated.  Use sys_clone2() instead.
+  */
+ GLOBAL_ENTRY(sys_clone)
+-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
+-	alloc r16=ar.pfs,5,2,6,0
++	/*
++	 * Allocate 8 input registers since ptrace() may clobber them
++	 */
++	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
++	alloc r16=ar.pfs,8,2,6,0
+ 	DO_SAVE_SWITCH_STACK
+ 	adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp
+ 	mov loc0=rp
+@@ -1139,7 +1148,7 @@ ENTRY(notify_resume_user)
+ 	;;
+ (pNonSys) mov out2=0				// out2==0 => not a syscall
+ 	.fframe 16
+-	.spillpsp ar.unat, 16			// (note that offset is relative to psp+0x10!)
++	.spillsp ar.unat, 16
+ 	st8 [sp]=r9,-16				// allocate space for ar.unat and save it
+ 	st8 [out1]=loc1,-8			// save ar.pfs, out1=&sigscratch
+ 	.body
+@@ -1165,7 +1174,7 @@ GLOBAL_ENTRY(sys_rt_sigsuspend)
+ 	adds out2=8,sp				// out2=&sigscratch->ar_pfs
+ 	;;
+ 	.fframe 16
+-	.spillpsp ar.unat, 16			// (note that offset is relative to psp+0x10!)
++	.spillsp ar.unat, 16
+ 	st8 [sp]=r9,-16				// allocate space for ar.unat and save it
+ 	st8 [out2]=loc1,-8			// save ar.pfs, out2=&sigscratch
+ 	.body
+@@ -1183,7 +1192,10 @@ END(sys_rt_sigsuspend)
+ 
+ ENTRY(sys_rt_sigreturn)
+ 	PT_REGS_UNWIND_INFO(0)
+-	alloc r2=ar.pfs,0,0,1,0
++	/*
++	 * Allocate 8 input registers since ptrace() may clobber them
++	 */
++	alloc r2=ar.pfs,8,0,1,0
+ 	.prologue
+ 	PT_REGS_SAVES(16)
+ 	adds sp=-16,sp
+@@ -1537,5 +1549,19 @@ sys_call_table:
+ 	data8 sys_ni_syscall
+ 	data8 sys_ni_syscall
+ 	data8 sys_ni_syscall
++.rept 1500-1280
++	data8 sys_ni_syscall			// 1280 - 1499
++.endr
++	data8 sys_fairsched_mknod		// 1500
++	data8 sys_fairsched_rmnod
++	data8 sys_fairsched_chwt
++	data8 sys_fairsched_mvpr
++	data8 sys_fairsched_rate
++	data8 sys_getluid			// 1505
++	data8 sys_setluid
++	data8 sys_setublimit
++	data8 sys_ubstat
++	data8 sys_lchmod
++	data8 sys_lutime			// 1510
+ 
+ 	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/entry.h linux-2.6.8.1-ve022stab078/arch/ia64/kernel/entry.h
+--- linux-2.6.8.1.orig/arch/ia64/kernel/entry.h	2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/kernel/entry.h	2006-05-11 13:05:30.000000000 +0400
+@@ -1,14 +1,25 @@
+ #include <linux/config.h>
+ 
+ /*
+- * Preserved registers that are shared between code in ivt.S and entry.S.  Be
+- * careful not to step on these!
++ * Preserved registers that are shared between code in ivt.S and
++ * entry.S.  Be careful not to step on these!
+  */
+-#define pLvSys		p1	/* set 1 if leave from syscall; otherwise, set 0 */
+-#define pKStk		p2	/* will leave_{kernel,syscall} return to kernel-stacks? */
+-#define pUStk		p3	/* will leave_{kernel,syscall} return to user-stacks? */
+-#define pSys		p4	/* are we processing a (synchronous) system call? */
+-#define pNonSys		p5	/* complement of pSys */
++#define PRED_LEAVE_SYSCALL	1 /* TRUE iff leave from syscall */
++#define PRED_KERNEL_STACK	2 /* returning to kernel-stacks? */
++#define PRED_USER_STACK		3 /* returning to user-stacks? */
++#define PRED_SYSCALL		4 /* inside a system call? */
++#define PRED_NON_SYSCALL	5 /* complement of PRED_SYSCALL */
++
++#ifdef __ASSEMBLY__
++# define PASTE2(x,y)	x##y
++# define PASTE(x,y)	PASTE2(x,y)
++
++# define pLvSys		PASTE(p,PRED_LEAVE_SYSCALL)
++# define pKStk		PASTE(p,PRED_KERNEL_STACK)
++# define pUStk		PASTE(p,PRED_USER_STACK)
++# define pSys		PASTE(p,PRED_SYSCALL)
++# define pNonSys	PASTE(p,PRED_NON_SYSCALL)
++#endif
+ 
+ #define PT(f)		(IA64_PT_REGS_##f##_OFFSET)
+ #define SW(f)		(IA64_SWITCH_STACK_##f##_OFFSET)
+@@ -49,7 +60,7 @@
+ 	.spillsp @priunat,SW(AR_UNAT)+16+(off);					\
+ 	.spillsp ar.rnat,SW(AR_RNAT)+16+(off);					\
+ 	.spillsp ar.bspstore,SW(AR_BSPSTORE)+16+(off);				\
+-	.spillsp pr,SW(PR)+16+(off))
++	.spillsp pr,SW(PR)+16+(off)
+ 
+ #define DO_SAVE_SWITCH_STACK			\
+ 	movl r28=1f;				\
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/fsys.S linux-2.6.8.1-ve022stab078/arch/ia64/kernel/fsys.S
+--- linux-2.6.8.1.orig/arch/ia64/kernel/fsys.S	2004-08-14 14:56:25.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/kernel/fsys.S	2006-05-11 13:05:40.000000000 +0400
+@@ -70,6 +70,7 @@ ENTRY(fsys_getpid)
+ 	FSYS_RETURN
+ END(fsys_getpid)
+ 
++#ifndef CONFIG_VE
+ ENTRY(fsys_getppid)
+ 	.prologue
+ 	.altrp b6
+@@ -116,6 +117,7 @@ ENTRY(fsys_getppid)
+ #endif
+ 	FSYS_RETURN
+ END(fsys_getppid)
++#endif
+ 
+ ENTRY(fsys_set_tid_address)
+ 	.prologue
+@@ -445,9 +447,9 @@ EX(.fail_efault, ld8 r14=[r33])			// r14
+ 	;;
+ 
+ 	st8 [r2]=r14				// update current->blocked with new mask
+-	cmpxchg4.acq r14=[r9],r18,ar.ccv	// current->thread_info->flags <- r18
++	cmpxchg4.acq r8=[r9],r18,ar.ccv		// current->thread_info->flags <- r18
+ 	;;
+-	cmp.ne p6,p0=r17,r14			// update failed?
++	cmp.ne p6,p0=r17,r8			// update failed?
+ (p6)	br.cond.spnt.few 1b			// yes -> retry
+ 
+ #ifdef CONFIG_SMP
+@@ -597,8 +599,9 @@ GLOBAL_ENTRY(fsys_bubble_down)
+ 	;;
+ 	mov rp=r2				// set the real return addr
+ 	tbit.z p8,p0=r3,TIF_SYSCALL_TRACE
+-
+-(p8)	br.call.sptk.many b6=b6			// ignore this return addr
++	;;
++(p10)	br.cond.spnt.many ia64_ret_from_syscall	// p10==true means out registers are more than 8
++(p8)	br.call.sptk.many b6=b6		// ignore this return addr
+ 	br.cond.sptk ia64_trace_syscall
+ END(fsys_bubble_down)
+ 
+@@ -626,7 +629,11 @@ fsyscall_table:
+ 	data8 0				// chown
+ 	data8 0				// lseek		// 1040
+ 	data8 fsys_getpid		// getpid
++#ifdef CONFIG_VE
++	data8 0				// getppid
++#else
+ 	data8 fsys_getppid		// getppid
++#endif
+ 	data8 0				// mount
+ 	data8 0				// umount
+ 	data8 0				// setuid		// 1045
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/gate.S linux-2.6.8.1-ve022stab078/arch/ia64/kernel/gate.S
+--- linux-2.6.8.1.orig/arch/ia64/kernel/gate.S	2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/kernel/gate.S	2006-05-11 13:05:35.000000000 +0400
+@@ -81,6 +81,7 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc)
+ 	LOAD_FSYSCALL_TABLE(r14)
+ 
+ 	mov r16=IA64_KR(CURRENT)		// 12 cycle read latency
++	tnat.nz p10,p9=r15
+ 	mov r19=NR_syscalls-1
+ 	;;
+ 	shladd r18=r17,3,r14
+@@ -119,7 +120,8 @@ GLOBAL_ENTRY(__kernel_syscall_via_epc)
+ #endif
+ 
+ 	mov r10=-1
+-	mov r8=ENOSYS
++(p10)	mov r8=EINVAL
++(p9)	mov r8=ENOSYS
+ 	FSYS_RETURN
+ END(__kernel_syscall_via_epc)
+ 
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/irq.c linux-2.6.8.1-ve022stab078/arch/ia64/kernel/irq.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/irq.c	2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/kernel/irq.c	2006-05-11 13:05:38.000000000 +0400
+@@ -56,6 +56,8 @@
+ #include <asm/delay.h>
+ #include <asm/irq.h>
+ 
++#include <ub/beancounter.h>
++#include <ub/ub_task.h>
+ 
+ /*
+  * Linux has a controller-independent x86 interrupt architecture.
+@@ -256,15 +258,18 @@ int handle_IRQ_event(unsigned int irq,
+ {
+ 	int status = 1;	/* Force the "do bottom halves" bit */
+ 	int retval = 0;
++	struct user_beancounter *ub;
+ 
+ 	if (!(action->flags & SA_INTERRUPT))
+ 		local_irq_enable();
+ 
++	ub = set_exec_ub(get_ub0());
+ 	do {
+ 		status |= action->flags;
+ 		retval |= action->handler(irq, action->dev_id, regs);
+ 		action = action->next;
+ 	} while (action);
++	(void)set_exec_ub(ub);
+ 	if (status & SA_SAMPLE_RANDOM)
+ 		add_interrupt_randomness(irq);
+ 	local_irq_disable();
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/irq_ia64.c linux-2.6.8.1-ve022stab078/arch/ia64/kernel/irq_ia64.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/irq_ia64.c	2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/kernel/irq_ia64.c	2006-05-11 13:05:40.000000000 +0400
+@@ -101,6 +101,7 @@ void
+ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
+ {
+ 	unsigned long saved_tpr;
++	struct ve_struct *ve;
+ 
+ #if IRQ_DEBUG
+ 	{
+@@ -137,6 +138,12 @@ ia64_handle_irq (ia64_vector vector, str
+ 	 * 16 (without this, it would be ~240, which could easily lead
+ 	 * to kernel stack overflows).
+ 	 */
++
++#ifdef CONFIG_HOTPLUG_CPU
++#warning "Fix fixup_irqs & ia64_process_pending_intr to set correct env and ub!"
++#endif
++
++	ve = set_exec_env(get_ve0());
+ 	irq_enter();
+ 	saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
+ 	ia64_srlz_d();
+@@ -162,6 +169,7 @@ ia64_handle_irq (ia64_vector vector, str
+ 	 * come through until ia64_eoi() has been done.
+ 	 */
+ 	irq_exit();
++	(void)set_exec_env(ve);
+ }
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/ivt.S linux-2.6.8.1-ve022stab078/arch/ia64/kernel/ivt.S
+--- linux-2.6.8.1.orig/arch/ia64/kernel/ivt.S	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/kernel/ivt.S	2006-05-11 13:05:35.000000000 +0400
+@@ -51,6 +51,7 @@
+ #include <asm/system.h>
+ #include <asm/thread_info.h>
+ #include <asm/unistd.h>
++#include <asm/errno.h>
+ 
+ #if 1
+ # define PSR_DEFAULT_BITS	psr.ac
+@@ -732,10 +733,12 @@ ENTRY(break_fault)
+ 	ssm psr.ic | PSR_DEFAULT_BITS
+ 	;;
+ 	srlz.i					// guarantee that interruption collection is on
++	mov r3=NR_syscalls - 1
+ 	;;
+ (p15)	ssm psr.i				// restore psr.i
++	// p10==true means out registers are more than 8 or r15's Nat is true
++(p10)	br.cond.spnt.many ia64_ret_from_syscall
+ 	;;
+-	mov r3=NR_syscalls - 1
+ 	movl r16=sys_call_table
+ 
+ 	adds r15=-1024,r15			// r15 contains the syscall number---subtract 1024
+@@ -836,8 +839,11 @@ END(interrupt)
+ 	 * On exit:
+ 	 *	- executing on bank 1 registers
+ 	 *	- psr.ic enabled, interrupts restored
++	 *	-  p10: TRUE if syscall is invoked with more than 8 out
++	 *		registers or r15's Nat is true
+ 	 *	-  r1: kernel's gp
+ 	 *	-  r3: preserved (same as on entry)
++	 *	-  r8: -EINVAL if p10 is true
+ 	 *	- r12: points to kernel stack
+ 	 *	- r13: points to current task
+ 	 *	- p15: TRUE if interrupts need to be re-enabled
+@@ -852,7 +858,7 @@ GLOBAL_ENTRY(ia64_syscall_setup)
+ 	add r17=PT(R11),r1			// initialize second base pointer
+ 	;;
+ 	alloc r19=ar.pfs,8,0,0,0		// ensure in0-in7 are writable
+-	st8 [r16]=r29,PT(CR_IFS)-PT(CR_IPSR)	// save cr.ipsr
++	st8 [r16]=r29,PT(AR_PFS)-PT(CR_IPSR)	// save cr.ipsr
+ 	tnat.nz p8,p0=in0
+ 
+ 	st8.spill [r17]=r11,PT(CR_IIP)-PT(R11)	// save r11
+@@ -860,31 +866,36 @@ GLOBAL_ENTRY(ia64_syscall_setup)
+ (pKStk)	mov r18=r0				// make sure r18 isn't NaT
+ 	;;
+ 
++	st8 [r16]=r26,PT(CR_IFS)-PT(AR_PFS)	// save ar.pfs
+ 	st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP)	// save cr.iip
+ 	mov r28=b0				// save b0 (2 cyc)
+-(p8)	mov in0=-1
+ 	;;
+ 
+-	st8 [r16]=r0,PT(AR_PFS)-PT(CR_IFS)	// clear cr.ifs
+ 	st8 [r17]=r25,PT(AR_RSC)-PT(AR_UNAT)	// save ar.unat
+-(p9)	mov in1=-1
++	dep r19=0,r19,38,26			// clear all bits but 0..37 [I0]
++(p8)	mov in0=-1
+ 	;;
+ 
+-	st8 [r16]=r26,PT(AR_RNAT)-PT(AR_PFS)	// save ar.pfs
++	st8 [r16]=r19,PT(AR_RNAT)-PT(CR_IFS)	// store ar.pfs.pfm in cr.ifs
++	extr.u r11=r19,7,7	// I0		// get sol of ar.pfs
++	and r8=0x7f,r19		// A		// get sof of ar.pfs
++
+ 	st8 [r17]=r27,PT(AR_BSPSTORE)-PT(AR_RSC)// save ar.rsc
+-	tnat.nz p10,p0=in2
++	tbit.nz p15,p0=r29,IA64_PSR_I_BIT // I0
++(p9)	mov in1=-1
++	;;
+ 
+ (pUStk) sub r18=r18,r22				// r18=RSE.ndirty*8
+-	tbit.nz p15,p0=r29,IA64_PSR_I_BIT
+-	tnat.nz p11,p0=in3
++	tnat.nz p10,p0=in2
++	add r11=8,r11
+ 	;;
+ (pKStk) adds r16=PT(PR)-PT(AR_RNAT),r16		// skip over ar_rnat field
+ (pKStk) adds r17=PT(B0)-PT(AR_BSPSTORE),r17	// skip over ar_bspstore field
++	tnat.nz p11,p0=in3
++	;;
+ (p10)	mov in2=-1
+-
++	tnat.nz p12,p0=in4				// [I0]
+ (p11)	mov in3=-1
+-	tnat.nz p12,p0=in4
+-	tnat.nz p13,p0=in5
+ 	;;
+ (pUStk) st8 [r16]=r24,PT(PR)-PT(AR_RNAT)	// save ar.rnat
+ (pUStk) st8 [r17]=r23,PT(B0)-PT(AR_BSPSTORE)	// save ar.bspstore
+@@ -892,36 +903,41 @@ GLOBAL_ENTRY(ia64_syscall_setup)
+ 	;;
+ 	st8 [r16]=r31,PT(LOADRS)-PT(PR)		// save predicates
+ 	st8 [r17]=r28,PT(R1)-PT(B0)		// save b0
+-(p12)	mov in4=-1
++	tnat.nz p13,p0=in5				// [I0]
+ 	;;
+ 	st8 [r16]=r18,PT(R12)-PT(LOADRS)	// save ar.rsc value for "loadrs"
+ 	st8.spill [r17]=r20,PT(R13)-PT(R1)	// save original r1
+-(p13)	mov in5=-1
++(p12)	mov in4=-1
+ 	;;
+ 
+ .mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12)	// save r12
+ .mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13)		// save r13
+-	tnat.nz p14,p0=in6
++(p13)	mov in5=-1
+ 	;;
+ 	st8 [r16]=r21,PT(R8)-PT(AR_FPSR)	// save ar.fpsr
+-	st8.spill [r17]=r15			// save r15
+-	tnat.nz p8,p0=in7
++	tnat.nz p14,p0=in6
++	cmp.lt p10,p9=r11,r8	// frame size can't be more than local+8
+ 	;;
+ 	stf8 [r16]=f1		// ensure pt_regs.r8 != 0 (see handle_syscall_error)
++(p9)	tnat.nz p10,p0=r15
+ 	adds r12=-16,r1		// switch to kernel memory stack (with 16 bytes of scratch)
+-(p14)	mov in6=-1
++
++	st8.spill [r17]=r15			// save r15
++	tnat.nz p8,p0=in7
++	nop.i 0
+ 
+ 	mov r13=r2				// establish `current'
+ 	movl r1=__gp				// establish kernel global pointer
+ 	;;
++(p14)	mov in6=-1
+ (p8)	mov in7=-1
+-	tnat.nz p9,p0=r15
++	nop.i 0
+ 
+ 	cmp.eq pSys,pNonSys=r0,r0		// set pSys=1, pNonSys=0
+ 	movl r17=FPSR_DEFAULT
+ 	;;
+ 	mov.m ar.fpsr=r17			// set ar.fpsr to kernel default value
+-(p9)	mov r15=-1
++(p10)	mov r8=-EINVAL
+ 	br.ret.sptk.many b7
+ END(ia64_syscall_setup)
+ 
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/mca.c linux-2.6.8.1-ve022stab078/arch/ia64/kernel/mca.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/mca.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/kernel/mca.c	2006-05-11 13:05:40.000000000 +0400
+@@ -501,13 +501,13 @@ init_handler_platform (pal_min_state_are
+ #endif
+ 	{
+ 		struct task_struct *g, *t;
+-		do_each_thread (g, t) {
++		do_each_thread_all(g, t) {
+ 			if (t == current)
+ 				continue;
+ 
+ 			printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm);
+ 			show_stack(t, NULL);
+-		} while_each_thread (g, t);
++		} while_each_thread_all(g, t);
+ 	}
+ #ifdef CONFIG_SMP
+ 	if (!tasklist_lock.write_lock)
+@@ -691,6 +691,7 @@ ia64_mca_wakeup_ipi_wait(void)
+ 			irr = ia64_getreg(_IA64_REG_CR_IRR3);
+ 			break;
+ 		}
++		cpu_relax();
+ 	} while (!(irr & (1UL << irr_bit))) ;
+ }
+ 
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/perfmon.c linux-2.6.8.1-ve022stab078/arch/ia64/kernel/perfmon.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/perfmon.c	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/kernel/perfmon.c	2006-05-11 13:05:40.000000000 +0400
+@@ -2582,7 +2582,7 @@ pfm_task_incompatible(pfm_context_t *ctx
+ 		return -EINVAL;
+ 	}
+ 
+-	if (task->state == TASK_ZOMBIE) {
++	if (task->exit_state == EXIT_ZOMBIE) {
+ 		DPRINT(("cannot attach to  zombie task [%d]\n", task->pid));
+ 		return -EBUSY;
+ 	}
+@@ -2619,7 +2619,7 @@ pfm_get_task(pfm_context_t *ctx, pid_t p
+ 
+ 		read_lock(&tasklist_lock);
+ 
+-		p = find_task_by_pid(pid);
++		p = find_task_by_pid_ve(pid);
+ 
+ 		/* make sure task cannot go away while we operate on it */
+ 		if (p) get_task_struct(p);
+@@ -4177,12 +4177,12 @@ pfm_check_task_exist(pfm_context_t *ctx)
+ 
+ 	read_lock(&tasklist_lock);
+ 
+-	do_each_thread (g, t) {
++	do_each_thread_ve(g, t) {
+ 		if (t->thread.pfm_context == ctx) {
+ 			ret = 0;
+ 			break;
+ 		}
+-	} while_each_thread (g, t);
++	} while_each_thread_ve(g, t);
+ 
+ 	read_unlock(&tasklist_lock);
+ 
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/process.c linux-2.6.8.1-ve022stab078/arch/ia64/kernel/process.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/process.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/kernel/process.c	2006-05-11 13:05:40.000000000 +0400
+@@ -185,6 +185,8 @@ default_idle (void)
+ 	while (!need_resched())
+ 		if (pal_halt && !pmu_active)
+ 			safe_halt();
++		else
++			cpu_relax();
+ }
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
+@@ -601,7 +603,7 @@ dump_fpu (struct pt_regs *pt, elf_fpregs
+ 	return 1;	/* f0-f31 are always valid so we always return 1 */
+ }
+ 
+-asmlinkage long
++long
+ sys_execve (char *filename, char **argv, char **envp, struct pt_regs *regs)
+ {
+ 	int error;
+@@ -626,6 +628,13 @@ kernel_thread (int (*fn)(void *), void *
+ 		struct pt_regs pt;
+ 	} regs;
+ 
++	/* Don't allow kernel_thread() inside VE */
++	if (!ve_is_super(get_exec_env())) {
++		printk("kernel_thread call inside VE\n");
++		dump_stack();
++		return -EPERM;
++	}
++
+ 	memset(&regs, 0, sizeof(regs));
+ 	regs.pt.cr_iip = helper_fptr[0];	/* set entry point (IP) */
+ 	regs.pt.r1 = helper_fptr[1];		/* set GP */
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/ptrace.c linux-2.6.8.1-ve022stab078/arch/ia64/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/ptrace.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/kernel/ptrace.c	2006-05-11 13:05:49.000000000 +0400
+@@ -1,7 +1,7 @@
+ /*
+  * Kernel support for the ptrace() and syscall tracing interfaces.
+  *
+- * Copyright (C) 1999-2003 Hewlett-Packard Co
++ * Copyright (C) 1999-2004 Hewlett-Packard Co
+  *	David Mosberger-Tang <davidm@hpl.hp.com>
+  *
+  * Derived from the x86 and Alpha versions.  Most of the code in here
+@@ -31,9 +31,6 @@
+ 
+ #include "entry.h"
+ 
+-#define p4	(1UL << 4)	/* for pSys (see entry.h) */
+-#define p5	(1UL << 5)	/* for pNonSys (see entry.h) */
+-
+ /*
+  * Bits in the PSR that we allow ptrace() to change:
+  *	be, up, ac, mfl, mfh (the user mask; five bits total)
+@@ -304,7 +301,6 @@ put_rnat (struct task_struct *task, stru
+ 	long num_regs, nbits;
+ 	struct pt_regs *pt;
+ 	unsigned long cfm, *urbs_kargs;
+-	struct unw_frame_info info;
+ 
+ 	pt = ia64_task_regs(task);
+ 	kbsp = (unsigned long *) sw->ar_bspstore;
+@@ -316,11 +312,8 @@ put_rnat (struct task_struct *task, stru
+ 		 * If entered via syscall, don't allow user to set rnat bits
+ 		 * for syscall args.
+ 		 */
+-		unw_init_from_blocked_task(&info,task);
+-		if (unw_unwind_to_user(&info) == 0) {
+-			unw_get_cfm(&info,&cfm);
+-			urbs_kargs = ia64_rse_skip_regs(urbs_end,-(cfm & 0x7f));
+-		}
++		cfm = pt->cr_ifs;
++		urbs_kargs = ia64_rse_skip_regs(urbs_end, -(cfm & 0x7f));
+ 	}
+ 
+ 	if (urbs_kargs >= urnat_addr)
+@@ -480,27 +473,18 @@ ia64_poke (struct task_struct *child, st
+ unsigned long
+ ia64_get_user_rbs_end (struct task_struct *child, struct pt_regs *pt, unsigned long *cfmp)
+ {
+-	unsigned long *krbs, *bspstore, cfm;
+-	struct unw_frame_info info;
++	unsigned long *krbs, *bspstore, cfm = pt->cr_ifs;
+ 	long ndirty;
+ 
+ 	krbs = (unsigned long *) child + IA64_RBS_OFFSET/8;
+ 	bspstore = (unsigned long *) pt->ar_bspstore;
+ 	ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19));
+-	cfm = pt->cr_ifs & ~(1UL << 63);
+ 
+-	if (in_syscall(pt)) {
+-		/*
+-		 * If bit 63 of cr.ifs is cleared, the kernel was entered via a system
+-		 * call and we need to recover the CFM that existed on entry to the
+-		 * kernel by unwinding the kernel stack.
+-		 */
+-		unw_init_from_blocked_task(&info, child);
+-		if (unw_unwind_to_user(&info) == 0) {
+-			unw_get_cfm(&info, &cfm);
+-			ndirty += (cfm & 0x7f);
+-		}
+-	}
++	if (in_syscall(pt))
++		ndirty += (cfm & 0x7f);
++	else
++		cfm &= ~(1UL << 63);	/* clear valid bit */
++
+ 	if (cfmp)
+ 		*cfmp = cfm;
+ 	return (unsigned long) ia64_rse_skip_regs(bspstore, ndirty);
+@@ -591,7 +575,7 @@ find_thread_for_addr (struct task_struct
+ 			goto out;
+ 	} while ((p = next_thread(p)) != child);
+ 
+-	do_each_thread(g, p) {
++	do_each_thread_ve(g, p) {
+ 		if (child->mm != mm)
+ 			continue;
+ 
+@@ -599,7 +583,7 @@ find_thread_for_addr (struct task_struct
+ 			child = p;
+ 			goto out;
+ 		}
+-	} while_each_thread(g, p);
++	} while_each_thread_ve(g, p);
+   out:
+ 	mmput(mm);
+ 	return child;
+@@ -682,8 +666,8 @@ convert_to_non_syscall (struct task_stru
+ 	}
+ 
+ 	unw_get_pr(&prev_info, &pr);
+-	pr &= ~pSys;
+-	pr |= pNonSys;
++	pr &= ~(1UL << PRED_SYSCALL);
++	pr |=  (1UL << PRED_NON_SYSCALL);
+ 	unw_set_pr(&prev_info, pr);
+ 
+ 	pt->cr_ifs = (1UL << 63) | cfm;
+@@ -854,6 +838,13 @@ access_uarea (struct task_struct *child,
+ 				*data = (pt->cr_ipsr & IPSR_READ_MASK);
+ 			return 0;
+ 
++		      case PT_AR_RSC:
++			if (write_access)
++				pt->ar_rsc = *data | (3 << 2); /* force PL3 */
++			else
++				*data = pt->ar_rsc;
++			return 0;
++
+ 		      case PT_AR_RNAT:
+ 			urbs_end = ia64_get_user_rbs_end(child, pt, NULL);
+ 			rnat_addr = (long) ia64_rse_rnat_addr((long *) urbs_end);
+@@ -909,9 +900,6 @@ access_uarea (struct task_struct *child,
+ 			ptr = (unsigned long *)
+ 				((long) pt + offsetof(struct pt_regs, ar_bspstore));
+ 			break;
+-		      case PT_AR_RSC:
+-			ptr = (unsigned long *) ((long) pt + offsetof(struct pt_regs, ar_rsc));
+-			break;
+ 		      case PT_AR_UNAT:
+ 			ptr = (unsigned long *) ((long) pt + offsetof(struct pt_regs, ar_unat));
+ 			break;
+@@ -997,12 +985,14 @@ access_uarea (struct task_struct *child,
+ }
+ 
+ static long
+-ptrace_getregs (struct task_struct *child, struct pt_all_user_regs *ppr)
++ptrace_getregs (struct task_struct *child, struct pt_all_user_regs __user *ppr)
+ {
++	unsigned long psr, ec, lc, rnat, bsp, cfm, nat_bits, val;
++	struct unw_frame_info info;
++	struct ia64_fpreg fpval;
+ 	struct switch_stack *sw;
+ 	struct pt_regs *pt;
+ 	long ret, retval;
+-	struct unw_frame_info info;
+ 	char nat = 0;
+ 	int i;
+ 
+@@ -1023,12 +1013,21 @@ ptrace_getregs (struct task_struct *chil
+ 		return -EIO;
+ 	}
+ 
++	if (access_uarea(child, PT_CR_IPSR, &psr, 0) < 0
++	    || access_uarea(child, PT_AR_EC, &ec, 0) < 0
++	    || access_uarea(child, PT_AR_LC, &lc, 0) < 0
++	    || access_uarea(child, PT_AR_RNAT, &rnat, 0) < 0
++	    || access_uarea(child, PT_AR_BSP, &bsp, 0) < 0
++	    || access_uarea(child, PT_CFM, &cfm, 0)
++	    || access_uarea(child, PT_NAT_BITS, &nat_bits, 0))
++		return -EIO;
++
+ 	retval = 0;
+ 
+ 	/* control regs */
+ 
+ 	retval |= __put_user(pt->cr_iip, &ppr->cr_iip);
+-	retval |= access_uarea(child, PT_CR_IPSR, &ppr->cr_ipsr, 0);
++	retval |= __put_user(psr, &ppr->cr_ipsr);
+ 
+ 	/* app regs */
+ 
+@@ -1039,11 +1038,11 @@ ptrace_getregs (struct task_struct *chil
+ 	retval |= __put_user(pt->ar_ccv, &ppr->ar[PT_AUR_CCV]);
+ 	retval |= __put_user(pt->ar_fpsr, &ppr->ar[PT_AUR_FPSR]);
+ 
+-	retval |= access_uarea(child, PT_AR_EC, &ppr->ar[PT_AUR_EC], 0);
+-	retval |= access_uarea(child, PT_AR_LC, &ppr->ar[PT_AUR_LC], 0);
+-	retval |= access_uarea(child, PT_AR_RNAT, &ppr->ar[PT_AUR_RNAT], 0);
+-	retval |= access_uarea(child, PT_AR_BSP, &ppr->ar[PT_AUR_BSP], 0);
+-	retval |= access_uarea(child, PT_CFM, &ppr->cfm, 0);
++	retval |= __put_user(ec, &ppr->ar[PT_AUR_EC]);
++	retval |= __put_user(lc, &ppr->ar[PT_AUR_LC]);
++	retval |= __put_user(rnat, &ppr->ar[PT_AUR_RNAT]);
++	retval |= __put_user(bsp, &ppr->ar[PT_AUR_BSP]);
++	retval |= __put_user(cfm, &ppr->cfm);
+ 
+ 	/* gr1-gr3 */
+ 
+@@ -1053,7 +1052,9 @@ ptrace_getregs (struct task_struct *chil
+ 	/* gr4-gr7 */
+ 
+ 	for (i = 4; i < 8; i++) {
+-		retval |= unw_access_gr(&info, i, &ppr->gr[i], &nat, 0);
++		if (unw_access_gr(&info, i, &val, &nat, 0) < 0)
++			return -EIO;
++		retval |= __put_user(val, &ppr->gr[i]);
+ 	}
+ 
+ 	/* gr8-gr11 */
+@@ -1077,7 +1078,9 @@ ptrace_getregs (struct task_struct *chil
+ 	/* b1-b5 */
+ 
+ 	for (i = 1; i < 6; i++) {
+-		retval |= unw_access_br(&info, i, &ppr->br[i], 0);
++		if (unw_access_br(&info, i, &val, 0) < 0)
++			return -EIO;
++		__put_user(val, &ppr->br[i]);
+ 	}
+ 
+ 	/* b6-b7 */
+@@ -1088,8 +1091,9 @@ ptrace_getregs (struct task_struct *chil
+ 	/* fr2-fr5 */
+ 
+ 	for (i = 2; i < 6; i++) {
+-		retval |= access_fr(&info, i, 0, (unsigned long *) &ppr->fr[i], 0);
+-		retval |= access_fr(&info, i, 1, (unsigned long *) &ppr->fr[i] + 1, 0);
++		if (unw_get_fr(&info, i, &fpval) < 0)
++			return -EIO;
++		retval |= __copy_to_user(&ppr->fr[i], &fpval, sizeof (fpval));
+ 	}
+ 
+ 	/* fr6-fr11 */
+@@ -1103,8 +1107,9 @@ ptrace_getregs (struct task_struct *chil
+ 	/* fr16-fr31 */
+ 
+ 	for (i = 16; i < 32; i++) {
+-		retval |= access_fr(&info, i, 0, (unsigned long *) &ppr->fr[i], 0);
+-		retval |= access_fr(&info, i, 1, (unsigned long *) &ppr->fr[i] + 1, 0);
++		if (unw_get_fr(&info, i, &fpval) < 0)
++			return -EIO;
++		retval |= __copy_to_user(&ppr->fr[i], &fpval, sizeof (fpval));
+ 	}
+ 
+ 	/* fph */
+@@ -1118,22 +1123,25 @@ ptrace_getregs (struct task_struct *chil
+ 
+ 	/* nat bits */
+ 
+-	retval |= access_uarea(child, PT_NAT_BITS, &ppr->nat, 0);
++	retval |= __put_user(nat_bits, &ppr->nat);
+ 
+ 	ret = retval ? -EIO : 0;
+ 	return ret;
+ }
+ 
+ static long
+-ptrace_setregs (struct task_struct *child, struct pt_all_user_regs *ppr)
++ptrace_setregs (struct task_struct *child, struct pt_all_user_regs __user *ppr)
+ {
++	unsigned long psr, rsc, ec, lc, rnat, bsp, cfm, nat_bits, val = 0;
++	struct unw_frame_info info;
+ 	struct switch_stack *sw;
++	struct ia64_fpreg fpval;
+ 	struct pt_regs *pt;
+ 	long ret, retval;
+-	struct unw_frame_info info;
+-	char nat = 0;
+ 	int i;
+ 
++	memset(&fpval, 0, sizeof(fpval));
++
+ 	retval = verify_area(VERIFY_READ, ppr, sizeof(struct pt_all_user_regs));
+ 	if (retval != 0) {
+ 		return -EIO;
+@@ -1156,22 +1164,22 @@ ptrace_setregs (struct task_struct *chil
+ 	/* control regs */
+ 
+ 	retval |= __get_user(pt->cr_iip, &ppr->cr_iip);
+-	retval |= access_uarea(child, PT_CR_IPSR, &ppr->cr_ipsr, 1);
++	retval |= __get_user(psr, &ppr->cr_ipsr);
+ 
+ 	/* app regs */
+ 
+ 	retval |= __get_user(pt->ar_pfs, &ppr->ar[PT_AUR_PFS]);
+-	retval |= __get_user(pt->ar_rsc, &ppr->ar[PT_AUR_RSC]);
++	retval |= __get_user(rsc, &ppr->ar[PT_AUR_RSC]);
+ 	retval |= __get_user(pt->ar_bspstore, &ppr->ar[PT_AUR_BSPSTORE]);
+ 	retval |= __get_user(pt->ar_unat, &ppr->ar[PT_AUR_UNAT]);
+ 	retval |= __get_user(pt->ar_ccv, &ppr->ar[PT_AUR_CCV]);
+ 	retval |= __get_user(pt->ar_fpsr, &ppr->ar[PT_AUR_FPSR]);
+ 
+-	retval |= access_uarea(child, PT_AR_EC, &ppr->ar[PT_AUR_EC], 1);
+-	retval |= access_uarea(child, PT_AR_LC, &ppr->ar[PT_AUR_LC], 1);
+-	retval |= access_uarea(child, PT_AR_RNAT, &ppr->ar[PT_AUR_RNAT], 1);
+-	retval |= access_uarea(child, PT_AR_BSP, &ppr->ar[PT_AUR_BSP], 1);
+-	retval |= access_uarea(child, PT_CFM, &ppr->cfm, 1);
++	retval |= __get_user(ec, &ppr->ar[PT_AUR_EC]);
++	retval |= __get_user(lc, &ppr->ar[PT_AUR_LC]);
++	retval |= __get_user(rnat, &ppr->ar[PT_AUR_RNAT]);
++	retval |= __get_user(bsp, &ppr->ar[PT_AUR_BSP]);
++	retval |= __get_user(cfm, &ppr->cfm);
+ 
+ 	/* gr1-gr3 */
+ 
+@@ -1181,11 +1189,9 @@ ptrace_setregs (struct task_struct *chil
+ 	/* gr4-gr7 */
+ 
+ 	for (i = 4; i < 8; i++) {
+-		long ret = unw_get_gr(&info, i, &ppr->gr[i], &nat);
+-		if (ret < 0) {
+-			return ret;
+-		}
+-		retval |= unw_access_gr(&info, i, &ppr->gr[i], &nat, 1);
++		retval |= __get_user(val, &ppr->gr[i]);
++		if (unw_set_gr(&info, i, val, 0) < 0)	 /* NaT bit will be set via PT_NAT_BITS */
++			return -EIO;
+ 	}
+ 
+ 	/* gr8-gr11 */
+@@ -1209,7 +1215,8 @@ ptrace_setregs (struct task_struct *chil
+ 	/* b1-b5 */
+ 
+ 	for (i = 1; i < 6; i++) {
+-		retval |= unw_access_br(&info, i, &ppr->br[i], 1);
++		retval |= __get_user(val, &ppr->br[i]);
++		unw_set_br(&info, i, val);
+ 	}
+ 
+ 	/* b6-b7 */
+@@ -1220,8 +1227,9 @@ ptrace_setregs (struct task_struct *chil
+ 	/* fr2-fr5 */
+ 
+ 	for (i = 2; i < 6; i++) {
+-		retval |= access_fr(&info, i, 0, (unsigned long *) &ppr->fr[i], 1);
+-		retval |= access_fr(&info, i, 1, (unsigned long *) &ppr->fr[i] + 1, 1);
++		retval |= __copy_from_user(&fpval, &ppr->fr[i], sizeof(fpval));
++		if (unw_set_fr(&info, i, fpval) < 0)
++			return -EIO;
+ 	}
+ 
+ 	/* fr6-fr11 */
+@@ -1235,8 +1243,9 @@ ptrace_setregs (struct task_struct *chil
+ 	/* fr16-fr31 */
+ 
+ 	for (i = 16; i < 32; i++) {
+-		retval |= access_fr(&info, i, 0, (unsigned long *) &ppr->fr[i], 1);
+-		retval |= access_fr(&info, i, 1, (unsigned long *) &ppr->fr[i] + 1, 1);
++		retval |= __copy_from_user(&fpval, &ppr->fr[i], sizeof(fpval));
++		if (unw_set_fr(&info, i, fpval) < 0)
++			return -EIO;
+ 	}
+ 
+ 	/* fph */
+@@ -1250,7 +1259,16 @@ ptrace_setregs (struct task_struct *chil
+ 
+ 	/* nat bits */
+ 
+-	retval |= access_uarea(child, PT_NAT_BITS, &ppr->nat, 1);
++	retval |= __get_user(nat_bits, &ppr->nat);
++
++	retval |= access_uarea(child, PT_CR_IPSR, &psr, 1);
++	retval |= access_uarea(child, PT_AR_RSC, &rsc, 1);
++	retval |= access_uarea(child, PT_AR_EC, &ec, 1);
++	retval |= access_uarea(child, PT_AR_LC, &lc, 1);
++	retval |= access_uarea(child, PT_AR_RNAT, &rnat, 1);
++	retval |= access_uarea(child, PT_AR_BSP, &bsp, 1);
++	retval |= access_uarea(child, PT_CFM, &cfm, 1);
++	retval |= access_uarea(child, PT_NAT_BITS, &nat_bits, 1);
+ 
+ 	ret = retval ? -EIO : 0;
+ 	return ret;
+@@ -1300,7 +1318,7 @@ sys_ptrace (long request, pid_t pid, uns
+ 	ret = -ESRCH;
+ 	read_lock(&tasklist_lock);
+ 	{
+-		child = find_task_by_pid(pid);
++		child = find_task_by_pid_ve(pid);
+ 		if (child) {
+ 			if (peek_or_poke)
+ 				child = find_thread_for_addr(child, addr);
+@@ -1393,7 +1411,7 @@ sys_ptrace (long request, pid_t pid, uns
+ 		 * sigkill.  Perhaps it should be put in the status
+ 		 * that it wants to exit.
+ 		 */
+-		if (child->state == TASK_ZOMBIE)		/* already dead */
++		if (child->exit_state == EXIT_ZOMBIE)		/* already dead */
+ 			goto out_tsk;
+ 		child->exit_code = SIGKILL;
+ 
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/salinfo.c linux-2.6.8.1-ve022stab078/arch/ia64/kernel/salinfo.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/salinfo.c	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/kernel/salinfo.c	2006-05-11 13:05:30.000000000 +0400
+@@ -417,7 +417,12 @@ retry:
+ 
+ 	if (!data->saved_num)
+ 		call_on_cpu(cpu, salinfo_log_read_cpu, data);
+-	data->state = data->log_size ? STATE_LOG_RECORD : STATE_NO_DATA;
++	if (!data->log_size) {
++	        data->state = STATE_NO_DATA;
++	        clear_bit(cpu, &data->cpu_event);
++	} else {
++	        data->state = STATE_LOG_RECORD;
++	}
+ }
+ 
+ static ssize_t
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/signal.c linux-2.6.8.1-ve022stab078/arch/ia64/kernel/signal.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/signal.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/kernel/signal.c	2006-05-11 13:05:40.000000000 +0400
+@@ -95,7 +95,7 @@ sys_sigaltstack (const stack_t *uss, sta
+ static long
+ restore_sigcontext (struct sigcontext *sc, struct sigscratch *scr)
+ {
+-	unsigned long ip, flags, nat, um, cfm;
++	unsigned long ip, flags, nat, um, cfm, rsc;
+ 	long err;
+ 
+ 	/* Always make any pending restarted system calls return -EINTR */
+@@ -107,7 +107,7 @@ restore_sigcontext (struct sigcontext *s
+ 	err |= __get_user(ip, &sc->sc_ip);			/* instruction pointer */
+ 	err |= __get_user(cfm, &sc->sc_cfm);
+ 	err |= __get_user(um, &sc->sc_um);			/* user mask */
+-	err |= __get_user(scr->pt.ar_rsc, &sc->sc_ar_rsc);
++	err |= __get_user(rsc, &sc->sc_ar_rsc);
+ 	err |= __get_user(scr->pt.ar_unat, &sc->sc_ar_unat);
+ 	err |= __get_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr);
+ 	err |= __get_user(scr->pt.ar_pfs, &sc->sc_ar_pfs);
+@@ -120,6 +120,7 @@ restore_sigcontext (struct sigcontext *s
+ 	err |= __copy_from_user(&scr->pt.r15, &sc->sc_gr[15], 8);	/* r15 */
+ 
+ 	scr->pt.cr_ifs = cfm | (1UL << 63);
++	scr->pt.ar_rsc = rsc | (3 << 2); /* force PL3 */
+ 
+ 	/* establish new instruction pointer: */
+ 	scr->pt.cr_iip = ip & ~0x3UL;
+@@ -267,7 +268,7 @@ ia64_rt_sigreturn (struct sigscratch *sc
+ 	si.si_signo = SIGSEGV;
+ 	si.si_errno = 0;
+ 	si.si_code = SI_KERNEL;
+-	si.si_pid = current->pid;
++	si.si_pid = virt_pid(current);
+ 	si.si_uid = current->uid;
+ 	si.si_addr = sc;
+ 	force_sig_info(SIGSEGV, &si, current);
+@@ -290,12 +291,10 @@ setup_sigcontext (struct sigcontext *sc,
+ 
+ 	if (on_sig_stack((unsigned long) sc))
+ 		flags |= IA64_SC_FLAG_ONSTACK;
+-	if ((ifs & (1UL << 63)) == 0) {
+-		/* if cr_ifs isn't valid, we got here through a syscall */
++	if ((ifs & (1UL << 63)) == 0)
++		/* if cr_ifs doesn't have the valid bit set, we got here through a syscall */
+ 		flags |= IA64_SC_FLAG_IN_SYSCALL;
+-		cfm = scr->ar_pfs & ((1UL << 38) - 1);
+-	} else
+-		cfm = ifs & ((1UL << 38) - 1);
++	cfm = ifs & ((1UL << 38) - 1);
+ 	ia64_flush_fph(current);
+ 	if ((current->thread.flags & IA64_THREAD_FPH_VALID)) {
+ 		flags |= IA64_SC_FLAG_FPH_VALID;
+@@ -429,7 +428,7 @@ setup_frame (int sig, struct k_sigaction
+ 	si.si_signo = SIGSEGV;
+ 	si.si_errno = 0;
+ 	si.si_code = SI_KERNEL;
+-	si.si_pid = current->pid;
++	si.si_pid = virt_pid(current);
+ 	si.si_uid = current->uid;
+ 	si.si_addr = frame;
+ 	force_sig_info(SIGSEGV, &si, current);
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/smp.c linux-2.6.8.1-ve022stab078/arch/ia64/kernel/smp.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/smp.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/kernel/smp.c	2006-05-11 13:05:30.000000000 +0400
+@@ -290,11 +290,11 @@ smp_call_function_single (int cpuid, voi
+ 
+ 	/* Wait for response */
+ 	while (atomic_read(&data.started) != cpus)
+-		barrier();
++		cpu_relax();
+ 
+ 	if (wait)
+ 		while (atomic_read(&data.finished) != cpus)
+-			barrier();
++			cpu_relax();
+ 	call_data = NULL;
+ 
+ 	spin_unlock_bh(&call_lock);
+@@ -349,11 +349,11 @@ smp_call_function (void (*func) (void *i
+ 
+ 	/* Wait for response */
+ 	while (atomic_read(&data.started) != cpus)
+-		barrier();
++		cpu_relax();
+ 
+ 	if (wait)
+ 		while (atomic_read(&data.finished) != cpus)
+-			barrier();
++			cpu_relax();
+ 	call_data = NULL;
+ 
+ 	spin_unlock(&call_lock);
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/smpboot.c linux-2.6.8.1-ve022stab078/arch/ia64/kernel/smpboot.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/smpboot.c	2004-08-14 14:54:52.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/kernel/smpboot.c	2006-05-11 13:05:40.000000000 +0400
+@@ -363,7 +363,7 @@ fork_by_hand (void)
+ 	 * Don't care about the IP and regs settings since we'll never reschedule the
+ 	 * forked task.
+ 	 */
+-	return copy_process(CLONE_VM|CLONE_IDLETASK, 0, 0, 0, NULL, NULL);
++	return copy_process(CLONE_VM|CLONE_IDLETASK, 0, 0, 0, NULL, NULL, 0);
+ }
+ 
+ struct create_idle {
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/time.c linux-2.6.8.1-ve022stab078/arch/ia64/kernel/time.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/time.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/kernel/time.c	2006-05-11 13:05:40.000000000 +0400
+@@ -36,6 +36,9 @@ u64 jiffies_64 = INITIAL_JIFFIES;
+ 
+ EXPORT_SYMBOL(jiffies_64);
+ 
++unsigned int cpu_khz;					/* TSC clocks / usec, not used here */
++EXPORT_SYMBOL(cpu_khz);
++
+ #define TIME_KEEPER_ID	0	/* smp_processor_id() of time-keeper */
+ 
+ #ifdef CONFIG_IA64_DEBUG_IRQ
+@@ -389,6 +392,8 @@ ia64_init_itm (void)
+ 		register_time_interpolator(&itc_interpolator);
+ 	}
+ 
++	cpu_khz = local_cpu_data->proc_freq / 1000;
++
+ 	/* Setup the CPU local timer tick */
+ 	ia64_cpu_local_tick();
+ }
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/traps.c linux-2.6.8.1-ve022stab078/arch/ia64/kernel/traps.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/traps.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/kernel/traps.c	2006-05-11 13:05:24.000000000 +0400
+@@ -35,34 +35,6 @@ trap_init (void)
+ 		fpswa_interface = __va(ia64_boot_param->fpswa);
+ }
+ 
+-/*
+- * Unlock any spinlocks which will prevent us from getting the message out (timerlist_lock
+- * is acquired through the console unblank code)
+- */
+-void
+-bust_spinlocks (int yes)
+-{
+-	int loglevel_save = console_loglevel;
+-
+-	if (yes) {
+-		oops_in_progress = 1;
+-		return;
+-	}
+-
+-#ifdef CONFIG_VT
+-	unblank_screen();
+-#endif
+-	oops_in_progress = 0;
+-	/*
+-	 * OK, the message is on the console.  Now we call printk() without
+-	 * oops_in_progress set so that printk will give klogd a poke.  Hold onto
+-	 * your hats...
+-	 */
+-	console_loglevel = 15;		/* NMI oopser may have shut the console up */
+-	printk(" ");
+-	console_loglevel = loglevel_save;
+-}
+-
+ void
+ die (const char *str, struct pt_regs *regs, long err)
+ {
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/unaligned.c linux-2.6.8.1-ve022stab078/arch/ia64/kernel/unaligned.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/unaligned.c	2004-08-14 14:56:14.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/kernel/unaligned.c	2006-05-11 13:05:40.000000000 +0400
+@@ -24,7 +24,7 @@
+ #include <asm/uaccess.h>
+ #include <asm/unaligned.h>
+ 
+-extern void die_if_kernel(char *str, struct pt_regs *regs, long err) __attribute__ ((noreturn));
++extern void die_if_kernel(char *str, struct pt_regs *regs, long err);
+ 
+ #undef DEBUG_UNALIGNED_TRAP
+ 
+@@ -1281,7 +1281,7 @@ within_logging_rate_limit (void)
+ {
+ 	static unsigned long count, last_time;
+ 
+-	if (jiffies - last_time > 5*HZ)
++	if (jiffies - last_time > 60*HZ)
+ 		count = 0;
+ 	if (++count < 5) {
+ 		last_time = jiffies;
+@@ -1339,7 +1339,7 @@ ia64_handle_unaligned (unsigned long ifa
+ 			if (user_mode(regs))
+ 				tty_write_message(current->signal->tty, buf);
+ 			buf[len-1] = '\0';	/* drop '\r' */
+-			printk(KERN_WARNING "%s", buf);	/* watch for command names containing %s */
++			ve_printk(VE_LOG, KERN_WARNING "%s", buf);	/* watch for command names containing %s */
+ 		}
+ 	} else {
+ 		if (within_logging_rate_limit())
+diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/unwind.c linux-2.6.8.1-ve022stab078/arch/ia64/kernel/unwind.c
+--- linux-2.6.8.1.orig/arch/ia64/kernel/unwind.c	2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/kernel/unwind.c	2006-05-11 13:05:30.000000000 +0400
+@@ -48,7 +48,6 @@
+ #include "unwind_i.h"
+ 
+ #define MIN(a,b)	((a) < (b) ? (a) : (b))
+-#define p5		5
+ 
+ #define UNW_LOG_CACHE_SIZE	7	/* each unw_script is ~256 bytes in size */
+ #define UNW_CACHE_SIZE		(1 << UNW_LOG_CACHE_SIZE)
+@@ -365,7 +364,7 @@ unw_access_gr (struct unw_frame_info *in
+ 			if (info->pri_unat_loc)
+ 				nat_addr = info->pri_unat_loc;
+ 			else
+-				nat_addr = &info->sw->ar_unat;
++				nat_addr = &info->sw->caller_unat;
+ 			nat_mask = (1UL << ((long) addr & 0x1f8)/8);
+ 		}
+ 	} else {
+@@ -527,7 +526,7 @@ unw_access_ar (struct unw_frame_info *in
+ 	      case UNW_AR_UNAT:
+ 		addr = info->unat_loc;
+ 		if (!addr)
+-			addr = &info->sw->ar_unat;
++			addr = &info->sw->caller_unat;
+ 		break;
+ 
+ 	      case UNW_AR_LC:
+@@ -1787,7 +1786,7 @@ run_script (struct unw_script *script, s
+ 
+ 		      case UNW_INSN_SETNAT_MEMSTK:
+ 			if (!state->pri_unat_loc)
+-				state->pri_unat_loc = &state->sw->ar_unat;
++				state->pri_unat_loc = &state->sw->caller_unat;
+ 			/* register off. is a multiple of 8, so the least 3 bits (type) are 0 */
+ 			s[dst+1] = ((unsigned long) state->pri_unat_loc - s[dst]) | UNW_NAT_MEMSTK;
+ 			break;
+@@ -1905,7 +1904,7 @@ unw_unwind (struct unw_frame_info *info)
+ 	num_regs = 0;
+ 	if ((info->flags & UNW_FLAG_INTERRUPT_FRAME)) {
+ 		info->pt = info->sp + 16;
+-		if ((pr & (1UL << pNonSys)) != 0)
++		if ((pr & (1UL << PRED_NON_SYSCALL)) != 0)
+ 			num_regs = *info->cfm_loc & 0x7f;		/* size of frame */
+ 		info->pfs_loc =
+ 			(unsigned long *) (info->pt + offsetof(struct pt_regs, ar_pfs));
+@@ -1951,20 +1950,30 @@ EXPORT_SYMBOL(unw_unwind);
+ int
+ unw_unwind_to_user (struct unw_frame_info *info)
+ {
+-	unsigned long ip;
++	unsigned long ip, sp, pr = 0;
+ 
+ 	while (unw_unwind(info) >= 0) {
+-		if (unw_get_rp(info, &ip) < 0) {
+-			unw_get_ip(info, &ip);
+-			UNW_DPRINT(0, "unwind.%s: failed to read return pointer (ip=0x%lx)\n",
+-				   __FUNCTION__, ip);
+-			return -1;
++		unw_get_sp(info, &sp);
++		if ((long)((unsigned long)info->task + IA64_STK_OFFSET - sp)
++		    < IA64_PT_REGS_SIZE) {
++			UNW_DPRINT(0, "unwind.%s: ran off the top of the kernel stack\n",
++				   __FUNCTION__);
++			break;
+ 		}
+-		if (ip < FIXADDR_USER_END)
++		if (unw_is_intr_frame(info) &&
++		    (pr & (1UL << PRED_USER_STACK)))
+ 			return 0;
++		if (unw_get_pr (info, &pr) < 0) {
++			unw_get_rp(info, &ip);
++			UNW_DPRINT(0, "unwind.%s: failed to read "
++				   "predicate register (ip=0x%lx)\n",
++				__FUNCTION__, ip);
++			return -1;
++		}
+ 	}
+ 	unw_get_ip(info, &ip);
+-	UNW_DPRINT(0, "unwind.%s: failed to unwind to user-level (ip=0x%lx)\n", __FUNCTION__, ip);
++	UNW_DPRINT(0, "unwind.%s: failed to unwind to user-level (ip=0x%lx)\n",
++		   __FUNCTION__, ip);
+ 	return -1;
+ }
+ EXPORT_SYMBOL(unw_unwind_to_user);
+@@ -2239,11 +2248,11 @@ unw_init (void)
+ 	if (8*sizeof(unw_hash_index_t) < UNW_LOG_HASH_SIZE)
+ 		unw_hash_index_t_is_too_narrow();
+ 
+-	unw.sw_off[unw.preg_index[UNW_REG_PRI_UNAT_GR]] = SW(AR_UNAT);
++	unw.sw_off[unw.preg_index[UNW_REG_PRI_UNAT_GR]] = SW(CALLER_UNAT);
+ 	unw.sw_off[unw.preg_index[UNW_REG_BSPSTORE]] = SW(AR_BSPSTORE);
+-	unw.sw_off[unw.preg_index[UNW_REG_PFS]] = SW(AR_UNAT);
++	unw.sw_off[unw.preg_index[UNW_REG_PFS]] = SW(AR_PFS);
+ 	unw.sw_off[unw.preg_index[UNW_REG_RP]] = SW(B0);
+-	unw.sw_off[unw.preg_index[UNW_REG_UNAT]] = SW(AR_UNAT);
++	unw.sw_off[unw.preg_index[UNW_REG_UNAT]] = SW(CALLER_UNAT);
+ 	unw.sw_off[unw.preg_index[UNW_REG_PR]] = SW(PR);
+ 	unw.sw_off[unw.preg_index[UNW_REG_LC]] = SW(AR_LC);
+ 	unw.sw_off[unw.preg_index[UNW_REG_FPSR]] = SW(AR_FPSR);
+diff -uprN linux-2.6.8.1.orig/arch/ia64/lib/memcpy_mck.S linux-2.6.8.1-ve022stab078/arch/ia64/lib/memcpy_mck.S
+--- linux-2.6.8.1.orig/arch/ia64/lib/memcpy_mck.S	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/lib/memcpy_mck.S	2006-05-11 13:05:30.000000000 +0400
+@@ -309,7 +309,7 @@ EK(.ex_handler,	(p[D])	st8 [dst1] = t15,
+ 	add	src_pre_mem=0,src0	// prefetch src pointer
+ 	add	dst_pre_mem=0,dst0	// prefetch dest pointer
+ 	and	src0=-8,src0		// 1st src pointer
+-(p7)	mov	ar.lc = r21
++(p7)	mov	ar.lc = cnt
+ (p8)	mov	ar.lc = r0
+ 	;;
+ 	TEXT_ALIGN(32)
+@@ -634,8 +634,11 @@ END(memcpy)
+ 	clrrrb
+ 	;;
+ 	alloc	saved_pfs_stack=ar.pfs,3,3,3,0
++	cmp.lt	p8,p0=A,r0
+ 	sub	B = dst0, saved_in0	// how many byte copied so far
+ 	;;
++(p8)	mov	A = 0;			// A shouldn't be negative, cap it
++	;;
+ 	sub	C = A, B
+ 	sub	D = saved_in2, A
+ 	;;
+diff -uprN linux-2.6.8.1.orig/arch/ia64/lib/swiotlb.c linux-2.6.8.1-ve022stab078/arch/ia64/lib/swiotlb.c
+--- linux-2.6.8.1.orig/arch/ia64/lib/swiotlb.c	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/lib/swiotlb.c	2006-05-11 13:05:30.000000000 +0400
+@@ -337,7 +337,7 @@ swiotlb_map_single (struct device *hwdev
+ 
+ /*
+  * Since DMA is i-cache coherent, any (complete) pages that were written via
+- * DMA can be marked as "clean" so that update_mmu_cache() doesn't have to
++ * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
+  * flush them when they get mapped into an executable vm-area.
+  */
+ static void
+diff -uprN linux-2.6.8.1.orig/arch/ia64/mm/contig.c linux-2.6.8.1-ve022stab078/arch/ia64/mm/contig.c
+--- linux-2.6.8.1.orig/arch/ia64/mm/contig.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/mm/contig.c	2006-05-11 13:05:40.000000000 +0400
+@@ -19,6 +19,7 @@
+ #include <linux/efi.h>
+ #include <linux/mm.h>
+ #include <linux/swap.h>
++#include <linux/module.h>
+ 
+ #include <asm/meminit.h>
+ #include <asm/pgalloc.h>
+@@ -297,3 +298,5 @@ paging_init (void)
+ #endif /* !CONFIG_VIRTUAL_MEM_MAP */
+ 	zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
+ }
++
++EXPORT_SYMBOL(show_mem);
+diff -uprN linux-2.6.8.1.orig/arch/ia64/mm/discontig.c linux-2.6.8.1-ve022stab078/arch/ia64/mm/discontig.c
+--- linux-2.6.8.1.orig/arch/ia64/mm/discontig.c	2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/mm/discontig.c	2006-05-11 13:05:40.000000000 +0400
+@@ -21,6 +21,7 @@
+ #include <asm/meminit.h>
+ #include <asm/numa.h>
+ #include <asm/sections.h>
++#include <linux/module.h>
+ 
+ /*
+  * Track per-node information needed to setup the boot memory allocator, the
+@@ -671,3 +672,5 @@ void paging_init(void)
+ 
+ 	zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
+ }
++
++EXPORT_SYMBOL(show_mem);
+diff -uprN linux-2.6.8.1.orig/arch/ia64/mm/fault.c linux-2.6.8.1-ve022stab078/arch/ia64/mm/fault.c
+--- linux-2.6.8.1.orig/arch/ia64/mm/fault.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/mm/fault.c	2006-05-11 13:05:38.000000000 +0400
+@@ -16,6 +16,8 @@
+ #include <asm/uaccess.h>
+ #include <asm/hardirq.h>
+ 
++#include <ub/beancounter.h>
++
+ extern void die (char *, struct pt_regs *, long);
+ 
+ /*
+@@ -36,6 +38,11 @@ expand_backing_store (struct vm_area_str
+ 	if (address - vma->vm_start > current->rlim[RLIMIT_STACK].rlim_cur
+ 	    || (((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->rlim[RLIMIT_AS].rlim_cur))
+ 		return -ENOMEM;
++
++	if (ub_memory_charge(mm_ub(vma->vm_mm), PAGE_SIZE,
++				vma->vm_flags, vma->vm_file, UB_HARD))
++		return -ENOMEM;
++
+ 	vma->vm_end += PAGE_SIZE;
+ 	vma->vm_mm->total_vm += grow;
+ 	if (vma->vm_flags & VM_LOCKED)
+@@ -213,9 +220,6 @@ ia64_do_page_fault (unsigned long addres
+ 		return;
+ 	}
+ 
+-	if (ia64_done_with_exception(regs))
+-		return;
+-
+ 	/*
+ 	 * Since we have no vma's for region 5, we might get here even if the address is
+ 	 * valid, due to the VHPT walker inserting a non present translation that becomes
+@@ -226,6 +230,9 @@ ia64_do_page_fault (unsigned long addres
+ 	if (REGION_NUMBER(address) == 5 && mapped_kernel_page_is_present(address))
+ 		return;
+ 
++	if (ia64_done_with_exception(regs))
++		return;
++
+ 	/*
+ 	 * Oops. The kernel tried to access some bad page. We'll have to terminate things
+ 	 * with extreme prejudice.
+@@ -244,13 +251,13 @@ ia64_do_page_fault (unsigned long addres
+ 
+   out_of_memory:
+ 	up_read(&mm->mmap_sem);
+-	if (current->pid == 1) {
+-		yield();
+-		down_read(&mm->mmap_sem);
+-		goto survive;
+-	}
+-	printk(KERN_CRIT "VM: killing process %s\n", current->comm);
+-	if (user_mode(regs))
+-		do_exit(SIGKILL);
++	if (user_mode(regs)) {
++		/* 
++		 * 0-order allocation always success if something really 
++		 * fatal not happen: beancounter overdraft or OOM. Den 
++		 */
++		force_sig(SIGKILL, current);
++		return;
++	}
+ 	goto no_context;
+ }
+diff -uprN linux-2.6.8.1.orig/arch/ia64/mm/init.c linux-2.6.8.1-ve022stab078/arch/ia64/mm/init.c
+--- linux-2.6.8.1.orig/arch/ia64/mm/init.c	2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/mm/init.c	2006-05-11 13:05:38.000000000 +0400
+@@ -37,6 +37,8 @@
+ #include <asm/unistd.h>
+ #include <asm/mca.h>
+ 
++#include <ub/ub_vmpages.h>
++
+ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+ 
+ extern void ia64_tlb_init (void);
+@@ -76,7 +78,7 @@ check_pgt_cache (void)
+ }
+ 
+ void
+-update_mmu_cache (struct vm_area_struct *vma, unsigned long vaddr, pte_t pte)
++lazy_mmu_prot_update (pte_t pte)
+ {
+ 	unsigned long addr;
+ 	struct page *page;
+@@ -85,7 +87,6 @@ update_mmu_cache (struct vm_area_struct 
+ 		return;				/* not an executable page... */
+ 
+ 	page = pte_page(pte);
+-	/* don't use VADDR: it may not be mapped on this CPU (or may have just been flushed): */
+ 	addr = (unsigned long) page_address(page);
+ 
+ 	if (test_bit(PG_arch_1, &page->flags))
+@@ -118,6 +119,10 @@ ia64_init_addr_space (void)
+ 
+ 	ia64_set_rbs_bot();
+ 
++	if (ub_memory_charge(mm_ub(current->mm), PAGE_SIZE,
++				VM_DATA_DEFAULT_FLAGS, NULL, UB_SOFT))
++		return;
++
+ 	/*
+ 	 * If we're out of memory and kmem_cache_alloc() returns NULL, we simply ignore
+ 	 * the problem.  When the process attempts to write to the register backing store
+@@ -131,8 +136,18 @@ ia64_init_addr_space (void)
+ 		vma->vm_end = vma->vm_start + PAGE_SIZE;
+ 		vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7];
+ 		vma->vm_flags = VM_DATA_DEFAULT_FLAGS | VM_GROWSUP;
+-		insert_vm_struct(current->mm, vma);
+-	}
++		down_write(&current->mm->mmap_sem);
++		if (insert_vm_struct(current->mm, vma)) {
++			up_write(&current->mm->mmap_sem);
++			kmem_cache_free(vm_area_cachep, vma);
++			ub_memory_uncharge(mm_ub(current->mm), PAGE_SIZE,
++					VM_DATA_DEFAULT_FLAGS, NULL);
++			return;
++		}
++		up_write(&current->mm->mmap_sem);
++	} else
++		ub_memory_uncharge(mm_ub(current->mm), PAGE_SIZE,
++				VM_DATA_DEFAULT_FLAGS, NULL);
+ 
+ 	/* map NaT-page at address zero to speed up speculative dereferencing of NULL: */
+ 	if (!(current->personality & MMAP_PAGE_ZERO)) {
+@@ -143,7 +158,13 @@ ia64_init_addr_space (void)
+ 			vma->vm_end = PAGE_SIZE;
+ 			vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT);
+ 			vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | VM_RESERVED;
+-			insert_vm_struct(current->mm, vma);
++			down_write(&current->mm->mmap_sem);
++			if (insert_vm_struct(current->mm, vma)) {
++				up_write(&current->mm->mmap_sem);
++				kmem_cache_free(vm_area_cachep, vma);
++				return;
++			}
++			up_write(&current->mm->mmap_sem);
+ 		}
+ 	}
+ }
+@@ -260,8 +281,9 @@ setup_gate (void)
+ 	struct page *page;
+ 
+ 	/*
+-	 * Map the gate page twice: once read-only to export the ELF headers etc. and once
+-	 * execute-only page to enable privilege-promotion via "epc":
++	 * Map the gate page twice: once read-only to export the ELF
++	 * headers etc. and once execute-only page to enable
++	 * privilege-promotion via "epc":
+ 	 */
+ 	page = virt_to_page(ia64_imva(__start_gate_section));
+ 	put_kernel_page(page, GATE_ADDR, PAGE_READONLY);
+@@ -270,6 +292,20 @@ setup_gate (void)
+ 	put_kernel_page(page, GATE_ADDR + PAGE_SIZE, PAGE_GATE);
+ #else
+ 	put_kernel_page(page, GATE_ADDR + PERCPU_PAGE_SIZE, PAGE_GATE);
++	/* Fill in the holes (if any) with read-only zero pages: */
++	{
++		unsigned long addr;
++
++		for (addr = GATE_ADDR + PAGE_SIZE;
++		     addr < GATE_ADDR + PERCPU_PAGE_SIZE;
++		     addr += PAGE_SIZE)
++		{
++			put_kernel_page(ZERO_PAGE(0), addr,
++					PAGE_READONLY);
++			put_kernel_page(ZERO_PAGE(0), addr + PERCPU_PAGE_SIZE,
++					PAGE_READONLY);
++		}
++	}
+ #endif
+ 	ia64_patch_gate();
+ }
+diff -uprN linux-2.6.8.1.orig/arch/ia64/mm/tlb.c linux-2.6.8.1-ve022stab078/arch/ia64/mm/tlb.c
+--- linux-2.6.8.1.orig/arch/ia64/mm/tlb.c	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/mm/tlb.c	2006-05-11 13:05:40.000000000 +0400
+@@ -57,7 +57,7 @@ wrap_mmu_context (struct mm_struct *mm)
+ 
+ 	read_lock(&tasklist_lock);
+   repeat:
+-	for_each_process(tsk) {
++	for_each_process_all(tsk) {
+ 		if (!tsk->mm)
+ 			continue;
+ 		tsk_context = tsk->mm->context;
+diff -uprN linux-2.6.8.1.orig/arch/ia64/pci/pci.c linux-2.6.8.1-ve022stab078/arch/ia64/pci/pci.c
+--- linux-2.6.8.1.orig/arch/ia64/pci/pci.c	2004-08-14 14:55:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/pci/pci.c	2006-05-11 13:05:31.000000000 +0400
+@@ -55,13 +55,13 @@ struct pci_fixup pcibios_fixups[1];
+  */
+ 
+ #define PCI_SAL_ADDRESS(seg, bus, devfn, reg)	\
+-	((u64)(seg << 24) | (u64)(bus << 16) |	\
++	((u64)((u64) seg << 24) | (u64)(bus << 16) |	\
+ 	 (u64)(devfn << 8) | (u64)(reg))
+ 
+ /* SAL 3.2 adds support for extended config space. */
+ 
+ #define PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg)	\
+-	((u64)(seg << 28) | (u64)(bus << 20) |		\
++	((u64)((u64) seg << 28) | (u64)(bus << 20) |		\
+ 	 (u64)(devfn << 12) | (u64)(reg))
+ 
+ static int
+diff -uprN linux-2.6.8.1.orig/arch/ia64/sn/io/hwgfs/ramfs.c linux-2.6.8.1-ve022stab078/arch/ia64/sn/io/hwgfs/ramfs.c
+--- linux-2.6.8.1.orig/arch/ia64/sn/io/hwgfs/ramfs.c	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ia64/sn/io/hwgfs/ramfs.c	2006-05-11 13:05:32.000000000 +0400
+@@ -97,7 +97,7 @@ static int hwgfs_symlink(struct inode * 
+ 	inode = hwgfs_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0);
+ 	if (inode) {
+ 		int l = strlen(symname)+1;
+-		error = page_symlink(inode, symname, l);
++		error = page_symlink(inode, symname, l, GFP_KERNEL);
+ 		if (!error) {
+ 			d_instantiate(dentry, inode);
+ 			dget(dentry);
+diff -uprN linux-2.6.8.1.orig/arch/m68k/kernel/ptrace.c linux-2.6.8.1-ve022stab078/arch/m68k/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/m68k/kernel/ptrace.c	2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/m68k/kernel/ptrace.c	2006-05-11 13:05:26.000000000 +0400
+@@ -277,7 +277,7 @@ asmlinkage int sys_ptrace(long request, 
+ 			long tmp;
+ 
+ 			ret = 0;
+-			if (child->state == TASK_ZOMBIE) /* already dead */
++			if (child->exit_state == EXIT_ZOMBIE) /* already dead */
+ 				break;
+ 			child->exit_code = SIGKILL;
+ 	/* make sure the single step bit is not set. */
+diff -uprN linux-2.6.8.1.orig/arch/m68knommu/kernel/ptrace.c linux-2.6.8.1-ve022stab078/arch/m68knommu/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/m68knommu/kernel/ptrace.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/m68knommu/kernel/ptrace.c	2006-05-11 13:05:26.000000000 +0400
+@@ -271,7 +271,7 @@ asmlinkage int sys_ptrace(long request, 
+ 			long tmp;
+ 
+ 			ret = 0;
+-			if (child->state == TASK_ZOMBIE) /* already dead */
++			if (child->exit_state == EXIT_ZOMBIE) /* already dead */
+ 				break;
+ 			child->exit_code = SIGKILL;
+ 			/* make sure the single step bit is not set. */
+diff -uprN linux-2.6.8.1.orig/arch/mips/kernel/irixelf.c linux-2.6.8.1-ve022stab078/arch/mips/kernel/irixelf.c
+--- linux-2.6.8.1.orig/arch/mips/kernel/irixelf.c	2004-08-14 14:56:25.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/mips/kernel/irixelf.c	2006-05-11 13:05:35.000000000 +0400
+@@ -127,7 +127,9 @@ static void set_brk(unsigned long start,
+ 	end = PAGE_ALIGN(end);
+ 	if (end <= start)
+ 		return;
++	down_write(&current->mm->mmap_sem);
+ 	do_brk(start, end - start);
++	up_write(&current->mm->mmap_sem);
+ }
+ 
+ 
+@@ -376,7 +378,9 @@ static unsigned int load_irix_interp(str
+ 
+ 	/* Map the last of the bss segment */
+ 	if (last_bss > len) {
++		down_write(&current->mm->mmap_sem);
+ 		do_brk(len, (last_bss - len));
++		up_write(&current->mm->mmap_sem);
+ 	}
+ 	kfree(elf_phdata);
+ 
+@@ -448,7 +452,12 @@ static inline int look_for_irix_interpre
+ 		if (retval < 0)
+ 			goto out;
+ 
+-		file = open_exec(*name);
++		/*
++		 * I don't understand this loop.
++		 * Are we suppose to break the loop after successful open and
++		 * read, or close the file, or store it somewhere?  --SAW
++		 */
++		file = open_exec(*name, bprm);
+ 		if (IS_ERR(file)) {
+ 			retval = PTR_ERR(file);
+ 			goto out;
+@@ -564,7 +573,9 @@ void irix_map_prda_page (void)
+ 	unsigned long v;
+ 	struct prda *pp;
+ 
++	down_write(&current->mm->mmap_sem);
+ 	v =  do_brk (PRDA_ADDRESS, PAGE_SIZE);
++	up_write(&current->mm->mmap_sem);
+ 
+ 	if (v < 0)
+ 		return;
+@@ -855,8 +866,11 @@ static int load_irix_library(struct file
+ 
+ 	len = (elf_phdata->p_filesz + elf_phdata->p_vaddr+ 0xfff) & 0xfffff000;
+ 	bss = elf_phdata->p_memsz + elf_phdata->p_vaddr;
+-	if (bss > len)
++	if (bss > len) {
++	  down_write(&current->mm->mmap_sem);
+ 	  do_brk(len, bss-len);
++	  up_write(&current->mm->mmap_sem);
++	}
+ 	kfree(elf_phdata);
+ 	return 0;
+ }
+diff -uprN linux-2.6.8.1.orig/arch/mips/kernel/irixsig.c linux-2.6.8.1-ve022stab078/arch/mips/kernel/irixsig.c
+--- linux-2.6.8.1.orig/arch/mips/kernel/irixsig.c	2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/mips/kernel/irixsig.c	2006-05-11 13:05:25.000000000 +0400
+@@ -184,9 +184,10 @@ asmlinkage int do_irix_signal(sigset_t *
+ 	if (!user_mode(regs))
+ 		return 1;
+ 
+-	if (current->flags & PF_FREEZE) {
+-		refrigerator(0);
+-		goto no_signal;
++	if (unlikely(test_thread_flag(TIF_FREEZE))) {
++		refrigerator();
++		if (!signal_pending(current))
++			goto no_signal;
+ 	}
+ 
+ 	if (!oldset)
+diff -uprN linux-2.6.8.1.orig/arch/mips/kernel/ptrace.c linux-2.6.8.1-ve022stab078/arch/mips/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/mips/kernel/ptrace.c	2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/mips/kernel/ptrace.c	2006-05-11 13:05:26.000000000 +0400
+@@ -277,7 +277,7 @@ asmlinkage int sys_ptrace(long request, 
+ 	 */
+ 	case PTRACE_KILL:
+ 		ret = 0;
+-		if (child->state == TASK_ZOMBIE)	/* already dead */
++		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
+ 			break;
+ 		child->exit_code = SIGKILL;
+ 		wake_up_process(child);
+diff -uprN linux-2.6.8.1.orig/arch/mips/kernel/ptrace32.c linux-2.6.8.1-ve022stab078/arch/mips/kernel/ptrace32.c
+--- linux-2.6.8.1.orig/arch/mips/kernel/ptrace32.c	2004-08-14 14:55:20.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/mips/kernel/ptrace32.c	2006-05-11 13:05:26.000000000 +0400
+@@ -262,7 +262,7 @@ asmlinkage int sys32_ptrace(int request,
+ 	 */
+ 	case PTRACE_KILL:
+ 		ret = 0;
+-		if (child->state == TASK_ZOMBIE)	/* already dead */
++		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
+ 			break;
+ 		child->exit_code = SIGKILL;
+ 		wake_up_process(child);
+diff -uprN linux-2.6.8.1.orig/arch/mips/kernel/signal.c linux-2.6.8.1-ve022stab078/arch/mips/kernel/signal.c
+--- linux-2.6.8.1.orig/arch/mips/kernel/signal.c	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/mips/kernel/signal.c	2006-05-11 13:05:25.000000000 +0400
+@@ -556,9 +556,10 @@ asmlinkage int do_signal(sigset_t *oldse
+ 	if (!user_mode(regs))
+ 		return 1;
+ 
+-	if (current->flags & PF_FREEZE) {
+-		refrigerator(0);
+-		goto no_signal;
++	if (unlikely(test_thread_flag(TIF_FREEZE))) {
++		refrigerator();
++		if (!signal_pending(current))
++			goto no_signal;
+ 	}
+ 
+ 	if (!oldset)
+diff -uprN linux-2.6.8.1.orig/arch/mips/kernel/signal32.c linux-2.6.8.1-ve022stab078/arch/mips/kernel/signal32.c
+--- linux-2.6.8.1.orig/arch/mips/kernel/signal32.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/mips/kernel/signal32.c	2006-05-11 13:05:25.000000000 +0400
+@@ -704,9 +704,10 @@ asmlinkage int do_signal32(sigset_t *old
+ 	if (!user_mode(regs))
+ 		return 1;
+ 
+-	if (current->flags & PF_FREEZE) {
+-		refrigerator(0);
+-		goto no_signal;
++	if (unlikely(test_thread_flag(TIF_FREEZE))) {
++		refrigerator();
++		if (!signal_pending(current))
++			goto no_signal;
+ 	}
+ 
+ 	if (!oldset)
+diff -uprN linux-2.6.8.1.orig/arch/parisc/kernel/ptrace.c linux-2.6.8.1-ve022stab078/arch/parisc/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/parisc/kernel/ptrace.c	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/parisc/kernel/ptrace.c	2006-05-11 13:05:26.000000000 +0400
+@@ -303,7 +303,7 @@ long sys_ptrace(long request, pid_t pid,
+ 		 * that it wants to exit.
+ 		 */
+ 		DBG(("sys_ptrace(KILL)\n"));
+-		if (child->state == TASK_ZOMBIE)	/* already dead */
++		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
+ 			goto out_tsk;
+ 		child->exit_code = SIGKILL;
+ 		goto out_wake_notrap;
+diff -uprN linux-2.6.8.1.orig/arch/ppc/kernel/ptrace.c linux-2.6.8.1-ve022stab078/arch/ppc/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/ppc/kernel/ptrace.c	2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ppc/kernel/ptrace.c	2006-05-11 13:05:26.000000000 +0400
+@@ -377,7 +377,7 @@ int sys_ptrace(long request, long pid, l
+  */
+ 	case PTRACE_KILL: {
+ 		ret = 0;
+-		if (child->state == TASK_ZOMBIE)	/* already dead */
++		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
+ 			break;
+ 		child->exit_code = SIGKILL;
+ 		/* make sure the single step bit is not set. */
+diff -uprN linux-2.6.8.1.orig/arch/ppc64/boot/zlib.c linux-2.6.8.1-ve022stab078/arch/ppc64/boot/zlib.c
+--- linux-2.6.8.1.orig/arch/ppc64/boot/zlib.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ppc64/boot/zlib.c	2006-05-11 13:05:34.000000000 +0400
+@@ -1307,7 +1307,7 @@ local int huft_build(
+   {
+     *t = (inflate_huft *)Z_NULL;
+     *m = 0;
+-    return Z_OK;
++    return Z_DATA_ERROR;
+   }
+ 
+ 
+@@ -1351,6 +1351,7 @@ local int huft_build(
+     if ((j = *p++) != 0)
+       v[x[j]++] = i;
+   } while (++i < n);
++  n = x[g];			/* set n to length of v */
+ 
+ 
+   /* Generate the Huffman codes and for each, make the table entries */
+diff -uprN linux-2.6.8.1.orig/arch/ppc64/kernel/ioctl32.c linux-2.6.8.1-ve022stab078/arch/ppc64/kernel/ioctl32.c
+--- linux-2.6.8.1.orig/arch/ppc64/kernel/ioctl32.c	2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ppc64/kernel/ioctl32.c	2006-05-11 13:05:29.000000000 +0400
+@@ -41,7 +41,6 @@ IOCTL_TABLE_START
+ #include <linux/compat_ioctl.h>
+ #define DECLARES
+ #include "compat_ioctl.c"
+-COMPATIBLE_IOCTL(TCSBRKP)
+ COMPATIBLE_IOCTL(TIOCSTART)
+ COMPATIBLE_IOCTL(TIOCSTOP)
+ COMPATIBLE_IOCTL(TIOCSLTC)
+diff -uprN linux-2.6.8.1.orig/arch/ppc64/kernel/ptrace.c linux-2.6.8.1-ve022stab078/arch/ppc64/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/ppc64/kernel/ptrace.c	2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ppc64/kernel/ptrace.c	2006-05-11 13:05:26.000000000 +0400
+@@ -182,7 +182,7 @@ int sys_ptrace(long request, long pid, l
+ 	 */
+ 	case PTRACE_KILL: {
+ 		ret = 0;
+-		if (child->state == TASK_ZOMBIE)	/* already dead */
++		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
+ 			break;
+ 		child->exit_code = SIGKILL;
+ 		/* make sure the single step bit is not set. */
+diff -uprN linux-2.6.8.1.orig/arch/ppc64/kernel/ptrace32.c linux-2.6.8.1-ve022stab078/arch/ppc64/kernel/ptrace32.c
+--- linux-2.6.8.1.orig/arch/ppc64/kernel/ptrace32.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/ppc64/kernel/ptrace32.c	2006-05-11 13:05:26.000000000 +0400
+@@ -314,7 +314,7 @@ int sys32_ptrace(long request, long pid,
+ 	 */
+ 	case PTRACE_KILL: {
+ 		ret = 0;
+-		if (child->state == TASK_ZOMBIE)	/* already dead */
++		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
+ 			break;
+ 		child->exit_code = SIGKILL;
+ 		/* make sure the single step bit is not set. */
+diff -uprN linux-2.6.8.1.orig/arch/s390/kernel/compat_exec.c linux-2.6.8.1-ve022stab078/arch/s390/kernel/compat_exec.c
+--- linux-2.6.8.1.orig/arch/s390/kernel/compat_exec.c	2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/s390/kernel/compat_exec.c	2006-05-11 13:05:33.000000000 +0400
+@@ -39,7 +39,7 @@ int setup_arg_pages32(struct linux_binpr
+ 	unsigned long stack_base;
+ 	struct vm_area_struct *mpnt;
+ 	struct mm_struct *mm = current->mm;
+-	int i;
++	int i, ret;
+ 
+ 	stack_base = STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE;
+ 	mm->arg_start = bprm->p + stack_base;
+@@ -68,7 +68,11 @@ int setup_arg_pages32(struct linux_binpr
+ 		/* executable stack setting would be applied here */
+ 		mpnt->vm_page_prot = PAGE_COPY;
+ 		mpnt->vm_flags = VM_STACK_FLAGS;
+-		insert_vm_struct(mm, mpnt);
++		if ((ret = insert_vm_struct(mm, mpnt))) {
++			up_write(&mm->mmap_sem);
++			kmem_cache_free(vm_area_cachep, mpnt);
++			return ret;
++		}
+ 		mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+ 	} 
+ 
+diff -uprN linux-2.6.8.1.orig/arch/s390/kernel/compat_ioctl.c linux-2.6.8.1-ve022stab078/arch/s390/kernel/compat_ioctl.c
+--- linux-2.6.8.1.orig/arch/s390/kernel/compat_ioctl.c	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/s390/kernel/compat_ioctl.c	2006-05-11 13:05:29.000000000 +0400
+@@ -65,9 +65,6 @@ COMPATIBLE_IOCTL(BIODASDSATTR)
+ COMPATIBLE_IOCTL(TAPE390_DISPLAY)
+ #endif
+ 
+-/* This one should be architecture independent */
+-COMPATIBLE_IOCTL(TCSBRKP)
+-
+ /* s390 doesn't need handlers here */
+ COMPATIBLE_IOCTL(TIOCGSERIAL)
+ COMPATIBLE_IOCTL(TIOCSSERIAL)
+diff -uprN linux-2.6.8.1.orig/arch/s390/kernel/compat_signal.c linux-2.6.8.1-ve022stab078/arch/s390/kernel/compat_signal.c
+--- linux-2.6.8.1.orig/arch/s390/kernel/compat_signal.c	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/s390/kernel/compat_signal.c	2006-05-11 13:05:34.000000000 +0400
+@@ -245,9 +245,6 @@ sys32_sigaction(int sig, const struct ol
+ 	return ret;
+ }
+ 
+-int
+-do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact);
+-
+ asmlinkage long
+ sys32_rt_sigaction(int sig, const struct sigaction32 __user *act,
+ 	   struct sigaction32 __user *oact,  size_t sigsetsize)
+diff -uprN linux-2.6.8.1.orig/arch/s390/kernel/ptrace.c linux-2.6.8.1-ve022stab078/arch/s390/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/s390/kernel/ptrace.c	2004-08-14 14:56:14.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/s390/kernel/ptrace.c	2006-05-11 13:05:49.000000000 +0400
+@@ -626,7 +626,7 @@ do_ptrace(struct task_struct *child, lon
+ 		 * perhaps it should be put in the status that it wants to 
+ 		 * exit.
+ 		 */
+-		if (child->state == TASK_ZOMBIE) /* already dead */
++		if (child->exit_state == EXIT_ZOMBIE) /* already dead */
+ 			return 0;
+ 		child->exit_code = SIGKILL;
+ 		/* make sure the single step bit is not set. */
+diff -uprN linux-2.6.8.1.orig/arch/s390/mm/fault.c linux-2.6.8.1-ve022stab078/arch/s390/mm/fault.c
+--- linux-2.6.8.1.orig/arch/s390/mm/fault.c	2004-08-14 14:56:26.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/s390/mm/fault.c	2006-05-11 13:05:24.000000000 +0400
+@@ -61,17 +61,9 @@ void bust_spinlocks(int yes)
+ 	if (yes) {
+ 		oops_in_progress = 1;
+ 	} else {
+-		int loglevel_save = console_loglevel;
+ 		oops_in_progress = 0;
+ 		console_unblank();
+-		/*
+-		 * OK, the message is on the console.  Now we call printk()
+-		 * without oops_in_progress set so that printk will give klogd
+-		 * a poke.  Hold onto your hats...
+-		 */
+-		console_loglevel = 15;
+-		printk(" ");
+-		console_loglevel = loglevel_save;
++		wake_up_klogd();
+ 	}
+ }
+ 
+diff -uprN linux-2.6.8.1.orig/arch/sh/kernel/ptrace.c linux-2.6.8.1-ve022stab078/arch/sh/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/sh/kernel/ptrace.c	2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/sh/kernel/ptrace.c	2006-05-11 13:05:26.000000000 +0400
+@@ -217,7 +217,7 @@ asmlinkage int sys_ptrace(long request, 
+  */
+ 	case PTRACE_KILL: {
+ 		ret = 0;
+-		if (child->state == TASK_ZOMBIE)	/* already dead */
++		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
+ 			break;
+ 		child->exit_code = SIGKILL;
+ 		wake_up_process(child);
+diff -uprN linux-2.6.8.1.orig/arch/sh/kernel/signal.c linux-2.6.8.1-ve022stab078/arch/sh/kernel/signal.c
+--- linux-2.6.8.1.orig/arch/sh/kernel/signal.c	2004-08-14 14:56:25.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/sh/kernel/signal.c	2006-05-11 13:05:25.000000000 +0400
+@@ -584,9 +584,10 @@ int do_signal(struct pt_regs *regs, sigs
+ 	if (!user_mode(regs))
+ 		return 1;
+ 
+-	if (current->flags & PF_FREEZE) {
+-		refrigerator(0);
+-		goto no_signal;
++	if (unlikely(test_thread_flag(TIF_FREEZE))) {
++		refrigerator();
++		if (!signal_pending(current))
++			goto no_signal;
+ 	}
+ 
+ 	if (!oldset)
+diff -uprN linux-2.6.8.1.orig/arch/sh64/kernel/ptrace.c linux-2.6.8.1-ve022stab078/arch/sh64/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/sh64/kernel/ptrace.c	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/sh64/kernel/ptrace.c	2006-05-11 13:05:26.000000000 +0400
+@@ -257,7 +257,7 @@ asmlinkage int sys_ptrace(long request, 
+  */
+ 	case PTRACE_KILL: {
+ 		ret = 0;
+-		if (child->state == TASK_ZOMBIE)	/* already dead */
++		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
+ 			break;
+ 		child->exit_code = SIGKILL;
+ 		wake_up_process(child);
+diff -uprN linux-2.6.8.1.orig/arch/sh64/kernel/signal.c linux-2.6.8.1-ve022stab078/arch/sh64/kernel/signal.c
+--- linux-2.6.8.1.orig/arch/sh64/kernel/signal.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/sh64/kernel/signal.c	2006-05-11 13:05:25.000000000 +0400
+@@ -705,10 +705,11 @@ int do_signal(struct pt_regs *regs, sigs
+ 	if (!user_mode(regs))
+ 		return 1;
+ 
+-	if (current->flags & PF_FREEZE) {
+-		refrigerator(0);
+-		goto no_signal;
+-		}
++	if (unlikely(test_thread_flag(TIF_FREEZE))) {
++		refrigerator();
++		if (!signal_pending(current))
++			goto no_signal;
++	}
+ 
+ 	if (!oldset)
+ 		oldset = &current->blocked;
+diff -uprN linux-2.6.8.1.orig/arch/sparc/kernel/ptrace.c linux-2.6.8.1-ve022stab078/arch/sparc/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/sparc/kernel/ptrace.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/sparc/kernel/ptrace.c	2006-05-11 13:05:26.000000000 +0400
+@@ -567,7 +567,7 @@ asmlinkage void do_ptrace(struct pt_regs
+  * exit.
+  */
+ 	case PTRACE_KILL: {
+-		if (child->state == TASK_ZOMBIE) {	/* already dead */
++		if (child->exit_state == EXIT_ZOMBIE) {	/* already dead */
+ 			pt_succ_return(regs, 0);
+ 			goto out_tsk;
+ 		}
+diff -uprN linux-2.6.8.1.orig/arch/sparc64/kernel/binfmt_aout32.c linux-2.6.8.1-ve022stab078/arch/sparc64/kernel/binfmt_aout32.c
+--- linux-2.6.8.1.orig/arch/sparc64/kernel/binfmt_aout32.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/sparc64/kernel/binfmt_aout32.c	2006-05-11 13:05:33.000000000 +0400
+@@ -49,7 +49,9 @@ static void set_brk(unsigned long start,
+ 	end = PAGE_ALIGN(end);
+ 	if (end <= start)
+ 		return;
++	down_write(&current->mm->mmap_sem);
+ 	do_brk(start, end - start);
++	up_write(&current->mm->mmap_sem);
+ }
+ 
+ /*
+@@ -246,10 +248,14 @@ static int load_aout32_binary(struct lin
+ 	if (N_MAGIC(ex) == NMAGIC) {
+ 		loff_t pos = fd_offset;
+ 		/* Fuck me plenty... */
++		down_write(&current->mm->mmap_sem);	
+ 		error = do_brk(N_TXTADDR(ex), ex.a_text);
++		up_write(&current->mm->mmap_sem);
+ 		bprm->file->f_op->read(bprm->file, (char __user *)N_TXTADDR(ex),
+ 			  ex.a_text, &pos);
++		down_write(&current->mm->mmap_sem);
+ 		error = do_brk(N_DATADDR(ex), ex.a_data);
++		up_write(&current->mm->mmap_sem);
+ 		bprm->file->f_op->read(bprm->file, (char __user *)N_DATADDR(ex),
+ 			  ex.a_data, &pos);
+ 		goto beyond_if;
+@@ -257,8 +263,10 @@ static int load_aout32_binary(struct lin
+ 
+ 	if (N_MAGIC(ex) == OMAGIC) {
+ 		loff_t pos = fd_offset;
++		down_write(&current->mm->mmap_sem);
+ 		do_brk(N_TXTADDR(ex) & PAGE_MASK,
+ 			ex.a_text+ex.a_data + PAGE_SIZE - 1);
++		up_write(&current->mm->mmap_sem);
+ 		bprm->file->f_op->read(bprm->file, (char __user *)N_TXTADDR(ex),
+ 			  ex.a_text+ex.a_data, &pos);
+ 	} else {
+@@ -272,7 +280,9 @@ static int load_aout32_binary(struct lin
+ 
+ 		if (!bprm->file->f_op->mmap) {
+ 			loff_t pos = fd_offset;
++			down_write(&current->mm->mmap_sem);
+ 			do_brk(0, ex.a_text+ex.a_data);
++			up_write(&current->mm->mmap_sem);
+ 			bprm->file->f_op->read(bprm->file,
+ 				  (char __user *)N_TXTADDR(ex),
+ 				  ex.a_text+ex.a_data, &pos);
+@@ -389,7 +399,9 @@ static int load_aout32_library(struct fi
+ 	len = PAGE_ALIGN(ex.a_text + ex.a_data);
+ 	bss = ex.a_text + ex.a_data + ex.a_bss;
+ 	if (bss > len) {
++		down_write(&current->mm->mmap_sem);
+ 		error = do_brk(start_addr + len, bss - len);
++		up_write(&current->mm->mmap_sem);
+ 		retval = error;
+ 		if (error != start_addr + len)
+ 			goto out;
+diff -uprN linux-2.6.8.1.orig/arch/sparc64/kernel/ioctl32.c linux-2.6.8.1-ve022stab078/arch/sparc64/kernel/ioctl32.c
+--- linux-2.6.8.1.orig/arch/sparc64/kernel/ioctl32.c	2004-08-14 14:56:14.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/sparc64/kernel/ioctl32.c	2006-05-11 13:05:29.000000000 +0400
+@@ -475,7 +475,6 @@ IOCTL_TABLE_START
+ #include <linux/compat_ioctl.h>
+ #define DECLARES
+ #include "compat_ioctl.c"
+-COMPATIBLE_IOCTL(TCSBRKP)
+ COMPATIBLE_IOCTL(TIOCSTART)
+ COMPATIBLE_IOCTL(TIOCSTOP)
+ COMPATIBLE_IOCTL(TIOCSLTC)
+diff -uprN linux-2.6.8.1.orig/arch/sparc64/kernel/ptrace.c linux-2.6.8.1-ve022stab078/arch/sparc64/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/sparc64/kernel/ptrace.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/sparc64/kernel/ptrace.c	2006-05-11 13:05:26.000000000 +0400
+@@ -559,7 +559,7 @@ asmlinkage void do_ptrace(struct pt_regs
+  * exit.
+  */
+ 	case PTRACE_KILL: {
+-		if (child->state == TASK_ZOMBIE) {	/* already dead */
++		if (child->exit_state == EXIT_ZOMBIE) {	/* already dead */
+ 			pt_succ_return(regs, 0);
+ 			goto out_tsk;
+ 		}
+diff -uprN linux-2.6.8.1.orig/arch/um/kernel/ptrace.c linux-2.6.8.1-ve022stab078/arch/um/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/um/kernel/ptrace.c	2004-08-14 14:56:25.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/um/kernel/ptrace.c	2006-05-11 13:05:26.000000000 +0400
+@@ -163,7 +163,7 @@ int sys_ptrace(long request, long pid, l
+  */
+ 	case PTRACE_KILL: {
+ 		ret = 0;
+-		if (child->state == TASK_ZOMBIE)	/* already dead */
++		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
+ 			break;
+ 		child->exit_code = SIGKILL;
+ 		wake_up_process(child);
+diff -uprN linux-2.6.8.1.orig/arch/um/kernel/tt/process_kern.c linux-2.6.8.1-ve022stab078/arch/um/kernel/tt/process_kern.c
+--- linux-2.6.8.1.orig/arch/um/kernel/tt/process_kern.c	2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/um/kernel/tt/process_kern.c	2006-05-11 13:05:26.000000000 +0400
+@@ -65,7 +65,7 @@ void *switch_to_tt(void *prev, void *nex
+ 		panic("write of switch_pipe failed, errno = %d", -err);
+ 
+ 	reading = 1;
+-	if((from->state == TASK_ZOMBIE) || (from->state == TASK_DEAD))
++	if((from->exit_state == EXIT_ZOMBIE) || (from->exit_state == EXIT_DEAD))
+ 		os_kill_process(os_getpid(), 0);
+ 
+ 	err = os_read_file(from->thread.mode.tt.switch_pipe[0], &c, sizeof(c));
+diff -uprN linux-2.6.8.1.orig/arch/v850/kernel/ptrace.c linux-2.6.8.1-ve022stab078/arch/v850/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/v850/kernel/ptrace.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/v850/kernel/ptrace.c	2006-05-11 13:05:26.000000000 +0400
+@@ -238,7 +238,7 @@ int sys_ptrace(long request, long pid, l
+ 	 */
+ 	case PTRACE_KILL:
+ 		rval = 0;
+-		if (child->state == TASK_ZOMBIE)	/* already dead */
++		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
+ 			break;
+ 		child->exit_code = SIGKILL;
+ 		wake_up_process(child);
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/boot/compressed/head.S linux-2.6.8.1-ve022stab078/arch/x86_64/boot/compressed/head.S
+--- linux-2.6.8.1.orig/arch/x86_64/boot/compressed/head.S	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/boot/compressed/head.S	2006-05-11 13:05:45.000000000 +0400
+@@ -35,7 +35,7 @@
+ startup_32:
+ 	cld
+ 	cli
+-	movl $(__KERNEL_DS),%eax
++	movl $(__BOOT_DS),%eax
+ 	movl %eax,%ds
+ 	movl %eax,%es
+ 	movl %eax,%fs
+@@ -77,7 +77,7 @@ startup_32:
+ 	jnz  3f
+ 	addl $8,%esp
+ 	xorl %ebx,%ebx
+-	ljmp $(__KERNEL_CS), $0x100000
++	ljmp $(__BOOT_CS), $0x100000
+ 
+ /*
+  * We come here, if we were loaded high.
+@@ -105,7 +105,7 @@ startup_32:
+ 	popl %eax	# hcount
+ 	movl $0x100000,%edi
+ 	cli		# make sure we don't get interrupted
+-	ljmp $(__KERNEL_CS), $0x1000 # and jump to the move routine
++	ljmp $(__BOOT_CS), $0x1000 # and jump to the move routine
+ 
+ /*
+  * Routine (template) for moving the decompressed kernel in place,
+@@ -128,7 +128,7 @@ move_routine_start:
+ 	movsl
+ 	movl %ebx,%esi	# Restore setup pointer
+ 	xorl %ebx,%ebx
+-	ljmp $(__KERNEL_CS), $0x100000
++	ljmp $(__BOOT_CS), $0x100000
+ move_routine_end:
+ 
+ 
+@@ -138,5 +138,5 @@ user_stack:	 	
+ 	.fill 4096,4,0
+ stack_start:	
+ 	.long user_stack+4096
+-	.word __KERNEL_DS
++	.word __BOOT_DS
+ 
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/boot/setup.S linux-2.6.8.1-ve022stab078/arch/x86_64/boot/setup.S
+--- linux-2.6.8.1.orig/arch/x86_64/boot/setup.S	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/boot/setup.S	2006-05-11 13:05:45.000000000 +0400
+@@ -727,7 +727,7 @@ flush_instr:
+ 	subw	$DELTA_INITSEG, %si
+ 	shll	$4, %esi			# Convert to 32-bit pointer
+ # NOTE: For high loaded big kernels we need a
+-#	jmpi    0x100000,__KERNEL_CS
++#	jmpi    0x100000,__BOOT_CS
+ #
+ #	but we yet haven't reloaded the CS register, so the default size 
+ #	of the target offset still is 16 bit.
+@@ -738,7 +738,7 @@ flush_instr:
+ 	.byte 0x66, 0xea			# prefix + jmpi-opcode
+ code32:	.long	0x1000				# will be set to 0x100000
+ 						# for big kernels
+-	.word	__KERNEL_CS
++	.word	__BOOT_CS
+ 
+ # Here's a bunch of information about your current kernel..
+ kernel_version:	.ascii	UTS_RELEASE
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/ia32/ia32_aout.c linux-2.6.8.1-ve022stab078/arch/x86_64/ia32/ia32_aout.c
+--- linux-2.6.8.1.orig/arch/x86_64/ia32/ia32_aout.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/ia32/ia32_aout.c	2006-05-11 13:05:40.000000000 +0400
+@@ -113,7 +113,9 @@ static void set_brk(unsigned long start,
+ 	end = PAGE_ALIGN(end);
+ 	if (end <= start)
+ 		return;
++	down_write(&current->mm->mmap_sem);
+ 	do_brk(start, end - start);
++	up_write(&current->mm->mmap_sem);
+ }
+ 
+ #if CORE_DUMP
+@@ -323,7 +325,10 @@ static int load_aout_binary(struct linux
+ 		pos = 32;
+ 		map_size = ex.a_text+ex.a_data;
+ 
++		down_write(&current->mm->mmap_sem);
+ 		error = do_brk(text_addr & PAGE_MASK, map_size);
++		up_write(&current->mm->mmap_sem);
++
+ 		if (error != (text_addr & PAGE_MASK)) {
+ 			send_sig(SIGKILL, current, 0);
+ 			return error;
+@@ -343,14 +348,14 @@ static int load_aout_binary(struct linux
+ 		if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
+ 		    (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ)
+ 		{
+-			printk(KERN_NOTICE "executable not page aligned\n");
++			ve_printk(VE_LOG, KERN_NOTICE "executable not page aligned\n");
+ 			error_time2 = jiffies;
+ 		}
+ 
+ 		if ((fd_offset & ~PAGE_MASK) != 0 &&
+ 		    (jiffies-error_time) > 5*HZ)
+ 		{
+-			printk(KERN_WARNING 
++			ve_printk(VE_LOG, KERN_WARNING 
+ 			       "fd_offset is not page aligned. Please convert program: %s\n",
+ 			       bprm->file->f_dentry->d_name.name);
+ 			error_time = jiffies;
+@@ -359,7 +364,9 @@ static int load_aout_binary(struct linux
+ 
+ 		if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
+ 			loff_t pos = fd_offset;
++			down_write(&current->mm->mmap_sem);
+ 			do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
++			up_write(&current->mm->mmap_sem);
+ 			bprm->file->f_op->read(bprm->file,(char *)N_TXTADDR(ex),
+ 					ex.a_text+ex.a_data, &pos);
+ 			flush_icache_range((unsigned long) N_TXTADDR(ex),
+@@ -461,14 +468,15 @@ static int load_aout_library(struct file
+ 		static unsigned long error_time;
+ 		if ((jiffies-error_time) > 5*HZ)
+ 		{
+-			printk(KERN_WARNING 
++			ve_printk(VE_LOG, KERN_WARNING 
+ 			       "N_TXTOFF is not page aligned. Please convert library: %s\n",
+ 			       file->f_dentry->d_name.name);
+ 			error_time = jiffies;
+ 		}
+ #endif
+-
++		down_write(&current->mm->mmap_sem);
+ 		do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
++		up_write(&current->mm->mmap_sem);
+ 		
+ 		file->f_op->read(file, (char *)start_addr,
+ 			ex.a_text + ex.a_data, &pos);
+@@ -492,7 +500,9 @@ static int load_aout_library(struct file
+ 	len = PAGE_ALIGN(ex.a_text + ex.a_data);
+ 	bss = ex.a_text + ex.a_data + ex.a_bss;
+ 	if (bss > len) {
++		down_write(&current->mm->mmap_sem);
+ 		error = do_brk(start_addr + len, bss - len);
++		up_write(&current->mm->mmap_sem);
+ 		retval = error;
+ 		if (error != start_addr + len)
+ 			goto out;
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/ia32/ia32_binfmt.c linux-2.6.8.1-ve022stab078/arch/x86_64/ia32/ia32_binfmt.c
+--- linux-2.6.8.1.orig/arch/x86_64/ia32/ia32_binfmt.c	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/ia32/ia32_binfmt.c	2006-05-11 13:05:45.000000000 +0400
+@@ -27,12 +27,14 @@
+ #include <asm/ia32.h>
+ #include <asm/vsyscall32.h>
+ 
++#include <ub/ub_vmpages.h>
++
+ #define ELF_NAME "elf/i386"
+ 
+ #define AT_SYSINFO 32
+ #define AT_SYSINFO_EHDR		33
+ 
+-int sysctl_vsyscall32 = 1;
++int sysctl_vsyscall32 = 0;
+ 
+ #define ARCH_DLINFO do {  \
+ 	if (sysctl_vsyscall32) { \
+@@ -46,7 +48,7 @@ struct elf_phdr; 
+ 
+ #define IA32_EMULATOR 1
+ 
+-#define ELF_ET_DYN_BASE		(TASK_UNMAPPED_32 + 0x1000000)
++#define ELF_ET_DYN_BASE		(TASK_UNMAPPED_BASE + 0x1000000)
+ 
+ #undef ELF_ARCH
+ #define ELF_ARCH EM_386
+@@ -73,8 +75,8 @@ typedef elf_greg_t elf_gregset_t[ELF_NGR
+  * Dumping its extra ELF program headers includes all the other information
+  * a debugger needs to easily find how the vsyscall DSO was being used.
+  */
+-#define ELF_CORE_EXTRA_PHDRS		(VSYSCALL32_EHDR->e_phnum)
+-#define ELF_CORE_WRITE_EXTRA_PHDRS					      \
++#define DO_ELF_CORE_EXTRA_PHDRS		(VSYSCALL32_EHDR->e_phnum)
++#define DO_ELF_CORE_WRITE_EXTRA_PHDRS					      \
+ do {									      \
+ 	const struct elf32_phdr *const vsyscall_phdrs =			      \
+ 		(const struct elf32_phdr *) (VSYSCALL32_BASE		      \
+@@ -96,7 +98,7 @@ do {									      \
+ 		DUMP_WRITE(&phdr, sizeof(phdr));			      \
+ 	}								      \
+ } while (0)
+-#define ELF_CORE_WRITE_EXTRA_DATA					      \
++#define DO_ELF_CORE_WRITE_EXTRA_DATA					      \
+ do {									      \
+ 	const struct elf32_phdr *const vsyscall_phdrs =			      \
+ 		(const struct elf32_phdr *) (VSYSCALL32_BASE		      \
+@@ -109,6 +111,21 @@ do {									      \
+ 	}								      \
+ } while (0)
+ 
++extern int sysctl_at_vsyscall;
++
++#define ELF_CORE_EXTRA_PHDRS		({ (sysctl_at_vsyscall != 0 ? \
++		DO_ELF_CORE_EXTRA_PHDRS : 0); })
++
++#define ELF_CORE_WRITE_EXTRA_PHDRS	do {		\
++		if (sysctl_at_vsyscall != 0)		\
++			DO_ELF_CORE_WRITE_EXTRA_PHDRS;	\
++	} while (0)
++
++#define ELF_CORE_WRITE_EXTRA_DATA	do {		\
++		if (sysctl_at_vsyscall != 0)		\
++			DO_ELF_CORE_WRITE_EXTRA_DATA;	\
++	} while (0)
++
+ struct elf_siginfo
+ {
+ 	int	si_signo;			/* signal number */
+@@ -303,6 +320,10 @@ MODULE_AUTHOR("Eric Youngdale, Andi Klee
+ 
+ static void elf32_init(struct pt_regs *);
+ 
++#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
++#define arch_setup_additional_pages syscall32_setup_pages
++extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
++
+ #include "../../../fs/binfmt_elf.c" 
+ 
+ static void elf32_init(struct pt_regs *regs)
+@@ -327,10 +348,10 @@ static void elf32_init(struct pt_regs *r
+ 
+ int setup_arg_pages(struct linux_binprm *bprm, int executable_stack)
+ {
+-	unsigned long stack_base;
++	unsigned long stack_base, vm_end, vm_start;
+ 	struct vm_area_struct *mpnt;
+ 	struct mm_struct *mm = current->mm;
+-	int i;
++	int i, ret;
+ 
+ 	stack_base = IA32_STACK_TOP - MAX_ARG_PAGES * PAGE_SIZE;
+ 	mm->arg_start = bprm->p + stack_base;
+@@ -340,22 +361,28 @@ int setup_arg_pages(struct linux_binprm 
+ 		bprm->loader += stack_base;
+ 	bprm->exec += stack_base;
+ 
++	vm_end = IA32_STACK_TOP;
++	vm_start = PAGE_MASK & (unsigned long)bprm->p;
++
++	ret = -ENOMEM;
++	if (ub_memory_charge(mm_ub(mm), vm_end - vm_start,
++				vm_stack_flags32, NULL, UB_HARD))
++		goto out;
++
+ 	mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+-	if (!mpnt) 
+-		return -ENOMEM; 
+-	
+-	if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) {
+-		kmem_cache_free(vm_area_cachep, mpnt);
+-		return -ENOMEM;
+-	}
++	if (!mpnt)
++		goto out_uncharge;
++
++	if (security_vm_enough_memory((vm_end - vm_start)>>PAGE_SHIFT))
++		goto out_uncharge_free;
+ 
+ 	memset(mpnt, 0, sizeof(*mpnt));
+ 
+ 	down_write(&mm->mmap_sem);
+ 	{
+ 		mpnt->vm_mm = mm;
+-		mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
+-		mpnt->vm_end = IA32_STACK_TOP;
++		mpnt->vm_start = vm_start;
++		mpnt->vm_end = vm_end;
+ 		if (executable_stack == EXSTACK_ENABLE_X)
+ 			mpnt->vm_flags = vm_stack_flags32 |  VM_EXEC;
+ 		else if (executable_stack == EXSTACK_DISABLE_X)
+@@ -364,7 +391,8 @@ int setup_arg_pages(struct linux_binprm 
+ 			mpnt->vm_flags = vm_stack_flags32;
+  		mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC) ? 
+  			PAGE_COPY_EXEC : PAGE_COPY;
+-		insert_vm_struct(mm, mpnt);
++		if ((ret = insert_vm_struct(mm, mpnt)))
++			goto out_up;
+ 		mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+ 	} 
+ 
+@@ -379,6 +407,17 @@ int setup_arg_pages(struct linux_binprm 
+ 	up_write(&mm->mmap_sem);
+ 	
+ 	return 0;
++
++out_up:
++	up_write(&mm->mmap_sem);
++	vm_unacct_memory((vm_end - vm_start) >> PAGE_SHIFT);
++out_uncharge_free:
++	kmem_cache_free(vm_area_cachep, mpnt);
++out_uncharge:
++	ub_memory_uncharge(mm_ub(mm), vm_end - vm_start,
++			vm_stack_flags32, NULL);
++out:
++	return ret;
+ }
+ 
+ static unsigned long
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/ia32/ia32_ioctl.c linux-2.6.8.1-ve022stab078/arch/x86_64/ia32/ia32_ioctl.c
+--- linux-2.6.8.1.orig/arch/x86_64/ia32/ia32_ioctl.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/ia32/ia32_ioctl.c	2006-05-11 13:05:35.000000000 +0400
+@@ -24,17 +24,27 @@
+ static int tiocgdev(unsigned fd, unsigned cmd,  unsigned int __user *ptr) 
+ { 
+ 
+-	struct file *file = fget(fd);
++	struct file *file;
+ 	struct tty_struct *real_tty;
++	int ret;
+ 
++	file = fget(fd);
+ 	if (!file)
+ 		return -EBADF;
++
++	ret = -EINVAL;
+ 	if (file->f_op->ioctl != tty_ioctl)
+-		return -EINVAL; 
++		goto out;
+ 	real_tty = (struct tty_struct *)file->private_data;
+ 	if (!real_tty) 	
+-		return -EINVAL; 
+-	return put_user(new_encode_dev(tty_devnum(real_tty)), ptr); 
++		goto out;
++
++	ret = put_user(new_encode_dev(tty_devnum(real_tty)), ptr); 
++
++out:
++	fput(file);
++
++	return ret;
+ } 
+ 
+ #define RTC_IRQP_READ32	_IOR('p', 0x0b, unsigned int)	 /* Read IRQ rate   */
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/ia32/ia32_signal.c linux-2.6.8.1-ve022stab078/arch/x86_64/ia32/ia32_signal.c
+--- linux-2.6.8.1.orig/arch/x86_64/ia32/ia32_signal.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/ia32/ia32_signal.c	2006-05-11 13:05:45.000000000 +0400
+@@ -44,10 +44,10 @@
+ asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset);
+ void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
+ 
+-int ia32_copy_siginfo_to_user(siginfo_t32 __user *to, siginfo_t *from)
++int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
+ {
+ 	int err;
+-	if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t32)))
++	if (!access_ok (VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
+ 		return -EFAULT;
+ 
+ 	/* If you change siginfo_t structure, please make sure that
+@@ -95,11 +95,11 @@ int ia32_copy_siginfo_to_user(siginfo_t3
+ 	return err;
+ }
+ 
+-int ia32_copy_siginfo_from_user(siginfo_t *to, siginfo_t32 __user *from)
++int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
+ {
+ 	int err;
+ 	u32 ptr32;
+-	if (!access_ok (VERIFY_READ, from, sizeof(siginfo_t32)))
++	if (!access_ok (VERIFY_READ, from, sizeof(compat_siginfo_t)))
+ 		return -EFAULT;
+ 
+ 	err = __get_user(to->si_signo, &from->si_signo);
+@@ -122,6 +122,7 @@ sys32_sigsuspend(int history0, int histo
+ 	mask &= _BLOCKABLE;
+ 	spin_lock_irq(&current->sighand->siglock);
+ 	saveset = current->blocked;
++	set_sigsuspend_state(current, saveset);
+ 	siginitset(&current->blocked, mask);
+ 	recalc_sigpending();
+ 	spin_unlock_irq(&current->sighand->siglock);
+@@ -130,8 +131,10 @@ sys32_sigsuspend(int history0, int histo
+ 	while (1) {
+ 		current->state = TASK_INTERRUPTIBLE;
+ 		schedule();
+-		if (do_signal(&regs, &saveset))
++		if (do_signal(&regs, &saveset)) {
++			clear_sigsuspend_state(current);
+ 			return -EINTR;
++		}
+ 	}
+ }
+ 
+@@ -187,7 +190,7 @@ struct rt_sigframe
+ 	int sig;
+ 	u32 pinfo;
+ 	u32 puc;
+-	struct siginfo32 info;
++	struct compat_siginfo info;
+ 	struct ucontext_ia32 uc;
+ 	struct _fpstate_ia32 fpstate;
+ 	char retcode[8];
+@@ -260,6 +263,12 @@ ia32_restore_sigcontext(struct pt_regs *
+ 			if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
+ 				goto badframe;
+ 			err |= restore_i387_ia32(current, buf, 0);
++		} else {
++			struct task_struct *me = current;
++			if (me->used_math) {
++				clear_fpu(me);
++				me->used_math = 0;
++			}
+ 		}
+ 	}
+ 
+@@ -522,7 +531,7 @@ void ia32_setup_rt_frame(int sig, struct
+ 	}
+ 	err |= __put_user((u32)(u64)&frame->info, &frame->pinfo);
+ 	err |= __put_user((u32)(u64)&frame->uc, &frame->puc);
+-	err |= ia32_copy_siginfo_to_user(&frame->info, info);
++	err |= copy_siginfo_to_user32(&frame->info, info);
+ 	if (err)
+ 		goto give_sigsegv;
+ 
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/ia32/ia32entry.S linux-2.6.8.1-ve022stab078/arch/x86_64/ia32/ia32entry.S
+--- linux-2.6.8.1.orig/arch/x86_64/ia32/ia32entry.S	2004-08-14 14:55:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/ia32/ia32entry.S	2006-05-11 13:05:29.000000000 +0400
+@@ -436,7 +436,7 @@ ia32_sys_call_table:
+ 	.quad sys_init_module
+ 	.quad sys_delete_module
+ 	.quad quiet_ni_syscall		/* 130  get_kernel_syms */
+-	.quad sys32_quotactl		/* quotactl */ 
++	.quad sys_quotactl		/* quotactl */ 
+ 	.quad sys_getpgid
+ 	.quad sys_fchdir
+ 	.quad quiet_ni_syscall	/* bdflush */
+@@ -482,7 +482,7 @@ ia32_sys_call_table:
+ 	.quad sys32_rt_sigaction
+ 	.quad sys32_rt_sigprocmask	/* 175 */
+ 	.quad sys32_rt_sigpending
+-	.quad sys32_rt_sigtimedwait
++	.quad compat_rt_sigtimedwait
+ 	.quad sys32_rt_sigqueueinfo
+ 	.quad stub32_rt_sigsuspend
+ 	.quad sys32_pread		/* 180 */
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/ia32/ptrace32.c linux-2.6.8.1-ve022stab078/arch/x86_64/ia32/ptrace32.c
+--- linux-2.6.8.1.orig/arch/x86_64/ia32/ptrace32.c	2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/ia32/ptrace32.c	2006-05-11 13:05:40.000000000 +0400
+@@ -205,7 +205,7 @@ static struct task_struct *find_target(i
+ 
+ 	*err = -ESRCH;
+ 	read_lock(&tasklist_lock);
+-	child = find_task_by_pid(pid);
++	child = find_task_by_pid_ve(pid);
+ 	if (child)
+ 		get_task_struct(child);
+ 	read_unlock(&tasklist_lock);
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/ia32/sys_ia32.c linux-2.6.8.1-ve022stab078/arch/x86_64/ia32/sys_ia32.c
+--- linux-2.6.8.1.orig/arch/x86_64/ia32/sys_ia32.c	2004-08-14 14:55:20.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/ia32/sys_ia32.c	2006-05-11 13:05:49.000000000 +0400
+@@ -658,11 +658,12 @@ sys32_waitpid(compat_pid_t pid, unsigned
+ int sys32_ni_syscall(int call)
+ { 
+ 	struct task_struct *me = current;
+-	static char lastcomm[8];
+-	if (strcmp(lastcomm, me->comm)) {
+-	printk(KERN_INFO "IA32 syscall %d from %s not implemented\n", call,
+-	       current->comm);
+-		strcpy(lastcomm, me->comm); 
++	static char lastcomm[sizeof(me->comm)];
++
++	if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
++		ve_printk(VE_LOG, KERN_INFO "IA32 syscall %d from %s not implemented\n",
++		       call, me->comm);
++		strncpy(lastcomm, me->comm, sizeof(lastcomm));
+ 	} 
+ 	return -ENOSYS;	       
+ } 
+@@ -782,51 +783,13 @@ sys32_rt_sigpending(compat_sigset_t __us
+ 
+ 
+ asmlinkage long
+-sys32_rt_sigtimedwait(compat_sigset_t __user *uthese, siginfo_t32 __user *uinfo,
+-		      struct compat_timespec __user *uts, compat_size_t sigsetsize)
+-{
+-	sigset_t s;
+-	compat_sigset_t s32;
+-	struct timespec t;
+-	int ret;
+-	mm_segment_t old_fs = get_fs();
+-	siginfo_t info;
+-		
+-	if (copy_from_user (&s32, uthese, sizeof(compat_sigset_t)))
+-		return -EFAULT;
+-	switch (_NSIG_WORDS) {
+-	case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32);
+-	case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32);
+-	case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32);
+-	case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32);
+-	}
+-	if (uts && get_compat_timespec(&t, uts))
+-		return -EFAULT;
+-	if (uinfo) {
+-		/* stop data leak to user space in case of structure fill mismatch
+-		 * between sys_rt_sigtimedwait & ia32_copy_siginfo_to_user.
+-		 */
+-		memset(&info, 0, sizeof(info));
+-	}
+-	set_fs (KERNEL_DS);
+-	ret = sys_rt_sigtimedwait(&s, uinfo ? &info : NULL, uts ? &t : NULL,
+-			sigsetsize);
+-	set_fs (old_fs);
+-	if (ret >= 0 && uinfo) {
+-		if (ia32_copy_siginfo_to_user(uinfo, &info))
+-			return -EFAULT;
+-	}
+-	return ret;
+-}
+-
+-asmlinkage long
+-sys32_rt_sigqueueinfo(int pid, int sig, siginfo_t32 __user *uinfo)
++sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo)
+ {
+ 	siginfo_t info;
+ 	int ret;
+ 	mm_segment_t old_fs = get_fs();
+ 	
+-	if (ia32_copy_siginfo_from_user(&info, uinfo))
++	if (copy_siginfo_from_user32(&info, uinfo))
+ 		return -EFAULT;
+ 	set_fs (KERNEL_DS);
+ 	ret = sys_rt_sigqueueinfo(pid, sig, &info);
+@@ -947,7 +910,7 @@ sys32_sendfile(int out_fd, int in_fd, co
+ 	ret = sys_sendfile(out_fd, in_fd, offset ? &of : NULL, count);
+ 	set_fs(old_fs);
+ 	
+-	if (!ret && offset && put_user(of, offset))
++	if (offset && put_user(of, offset))
+ 		return -EFAULT;
+ 		
+ 	return ret;
+@@ -1067,13 +1030,13 @@ asmlinkage long sys32_olduname(struct ol
+   
+   	down_read(&uts_sem);
+ 	
+-	error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
++	error = __copy_to_user(&name->sysname,&ve_utsname.sysname,__OLD_UTS_LEN);
+ 	 __put_user(0,name->sysname+__OLD_UTS_LEN);
+-	 __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
++	 __copy_to_user(&name->nodename,&ve_utsname.nodename,__OLD_UTS_LEN);
+ 	 __put_user(0,name->nodename+__OLD_UTS_LEN);
+-	 __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
++	 __copy_to_user(&name->release,&ve_utsname.release,__OLD_UTS_LEN);
+ 	 __put_user(0,name->release+__OLD_UTS_LEN);
+-	 __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
++	 __copy_to_user(&name->version,&ve_utsname.version,__OLD_UTS_LEN);
+ 	 __put_user(0,name->version+__OLD_UTS_LEN);
+ 	 { 
+ 		 char *arch = "x86_64";
+@@ -1096,7 +1059,7 @@ long sys32_uname(struct old_utsname __us
+ 	if (!name)
+ 		return -EFAULT;
+ 	down_read(&uts_sem);
+-	err=copy_to_user(name, &system_utsname, sizeof (*name));
++	err=copy_to_user(name, &ve_utsname, sizeof (*name));
+ 	up_read(&uts_sem);
+ 	if (personality(current->personality) == PER_LINUX32) 
+ 		err |= copy_to_user(&name->machine, "i686", 5);
+@@ -1316,23 +1279,11 @@ long sys32_fadvise64_64(int fd, __u32 of
+ long sys32_vm86_warning(void)
+ { 
+ 	struct task_struct *me = current;
+-	static char lastcomm[8];
+-	if (strcmp(lastcomm, me->comm)) {
+-		printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n",
+-		       me->comm);
+-		strcpy(lastcomm, me->comm); 
+-	} 
+-	return -ENOSYS;
+-} 
+-
+-long sys32_quotactl(void)
+-{ 
+-	struct task_struct *me = current;
+-	static char lastcomm[8];
+-	if (strcmp(lastcomm, me->comm)) {
+-		printk(KERN_INFO "%s: 32bit quotactl not supported on 64 bit kernel\n",
++	static char lastcomm[sizeof(me->comm)];
++	if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
++		ve_printk(VE_LOG, KERN_INFO "%s: vm87 mode not supported on 64 bit kernel\n",
+ 		       me->comm);
+-		strcpy(lastcomm, me->comm); 
++		strncpy(lastcomm, me->comm, sizeof(lastcomm)); 
+ 	} 
+ 	return -ENOSYS;
+ } 
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/ia32/syscall32.c linux-2.6.8.1-ve022stab078/arch/x86_64/ia32/syscall32.c
+--- linux-2.6.8.1.orig/arch/x86_64/ia32/syscall32.c	2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/ia32/syscall32.c	2006-05-11 13:05:48.000000000 +0400
+@@ -4,11 +4,13 @@
+    on demand because 32bit cannot reach the kernel's fixmaps */
+ 
+ #include <linux/mm.h>
++#include <linux/mman.h>
+ #include <linux/string.h>
+ #include <linux/kernel.h>
+ #include <linux/gfp.h>
+ #include <linux/init.h>
+ #include <linux/stringify.h>
++#include <linux/security.h>
+ #include <asm/proto.h>
+ #include <asm/tlbflush.h>
+ #include <asm/ia32_unistd.h>
+@@ -30,32 +32,64 @@ extern int sysctl_vsyscall32;
+ char *syscall32_page; 
+ static int use_sysenter __initdata = -1;
+ 
+-/* RED-PEN: This knows too much about high level VM */ 
+-/* Alternative would be to generate a vma with appropriate backing options
+-   and let it be handled by generic VM */ 
+-int map_syscall32(struct mm_struct *mm, unsigned long address) 
+-{ 
+-	pte_t *pte;
+-	pmd_t *pmd;
+-	int err = 0;
+-
+-	down_read(&mm->mmap_sem);
+-	spin_lock(&mm->page_table_lock); 
+-	pmd = pmd_alloc(mm, pgd_offset(mm, address), address); 
+-	if (pmd && (pte = pte_alloc_map(mm, pmd, address)) != NULL) { 
+-		if (pte_none(*pte)) { 
+-			set_pte(pte, 
+-				mk_pte(virt_to_page(syscall32_page), 
+-				       PAGE_KERNEL_VSYSCALL)); 
+-		}
+-		/* Flush only the local CPU. Other CPUs taking a fault
+-		   will just end up here again */
+-		__flush_tlb_one(address); 
+-	} else
+-		err = -ENOMEM; 
+-	spin_unlock(&mm->page_table_lock);
+-	up_read(&mm->mmap_sem);
+-	return err;
++static struct page *
++syscall32_nopage(struct vm_area_struct *vma, unsigned long adr, int *type)
++{
++	struct page *p = virt_to_page(adr - vma->vm_start + syscall32_page);
++	get_page(p);
++	return p;
++}
++
++/* Prevent VMA merging */
++static void syscall32_vma_close(struct vm_area_struct *vma)
++{
++}
++
++static struct vm_operations_struct syscall32_vm_ops = {
++	.close = syscall32_vma_close,
++	.nopage = syscall32_nopage,
++};
++
++struct linux_binprm;
++
++/* Setup a VMA at program startup for the vsyscall page */
++int syscall32_setup_pages(struct linux_binprm *bprm, int exstack)
++{
++	int npages = (VSYSCALL32_END - VSYSCALL32_BASE) >> PAGE_SHIFT;
++	struct vm_area_struct *vma;
++	struct mm_struct *mm = current->mm;
++	int ret;
++
++	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
++	if (!vma)
++		return -ENOMEM;
++	if (security_vm_enough_memory(npages)) {
++		kmem_cache_free(vm_area_cachep, vma);
++		return -ENOMEM;
++	}
++
++	memset(vma, 0, sizeof(struct vm_area_struct));
++	/* Could randomize here */
++	vma->vm_start = VSYSCALL32_BASE;
++	vma->vm_end = VSYSCALL32_END;
++	/* MAYWRITE to allow gdb to COW and set breakpoints */
++	vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYEXEC|VM_MAYWRITE;
++	vma->vm_flags |= mm->def_flags;
++	vma->vm_page_prot = protection_map[vma->vm_flags & 7];
++	vma->vm_ops = &syscall32_vm_ops;
++	vma->vm_mm = mm;
++
++	down_write(&mm->mmap_sem);
++	ret = insert_vm_struct(mm, vma);
++	if (ret) {
++		up_write(&mm->mmap_sem);
++		kmem_cache_free(vm_area_cachep, vma);
++		vm_unacct_memory(npages);
++		return ret;
++	}
++	mm->total_vm += npages;
++	up_write(&mm->mmap_sem);
++	return 0;
+ }
+ 
+ static int __init init_syscall32(void)
+@@ -63,7 +97,6 @@ static int __init init_syscall32(void)
+ 	syscall32_page = (void *)get_zeroed_page(GFP_KERNEL); 
+ 	if (!syscall32_page) 
+ 		panic("Cannot allocate syscall32 page"); 
+-	SetPageReserved(virt_to_page(syscall32_page));
+  	if (use_sysenter > 0) {
+  		memcpy(syscall32_page, syscall32_sysenter,
+  		       syscall32_sysenter_end - syscall32_sysenter);
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/acpi/wakeup.S linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/acpi/wakeup.S
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/acpi/wakeup.S	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/acpi/wakeup.S	2006-05-11 13:05:45.000000000 +0400
+@@ -77,7 +77,7 @@ wakeup_code:
+ 
+ 	.byte 0x66, 0xea			# prefix + jmpi-opcode
+ 	.long	wakeup_32 - __START_KERNEL_map
+-	.word	__KERNEL_CS
++	.word	__BOOT_CS
+ 
+ 	.code32
+ wakeup_32:
+@@ -96,13 +96,13 @@ wakeup_32:
+ 	jnc	bogus_cpu
+ 	movl	%edx,%edi
+ 	
+-	movw	$__KERNEL_DS, %ax
++	movw	$__BOOT_DS, %ax
+ 	movw	%ax, %ds
+ 	movw	%ax, %es
+ 	movw	%ax, %fs
+ 	movw	%ax, %gs
+ 
+-	movw	$__KERNEL_DS, %ax	
++	movw	$__BOOT_DS, %ax	
+ 	movw	%ax, %ss
+ 
+ 	mov	$(wakeup_stack - __START_KERNEL_map), %esp
+@@ -187,7 +187,7 @@ reach_compatibility_mode:
+ 
+ wakeup_jumpvector:
+ 	.long	wakeup_long64 - __START_KERNEL_map
+-	.word	__KERNEL_CS
++	.word	__BOOT_CS
+ 
+ .code64
+ 
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/apic.c linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/apic.c
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/apic.c	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/apic.c	2006-05-11 13:05:40.000000000 +0400
+@@ -883,6 +883,7 @@ void smp_local_timer_interrupt(struct pt
+  */
+ void smp_apic_timer_interrupt(struct pt_regs *regs)
+ {
++	struct ve_struct *ve;
+ 	/*
+ 	 * the NMI deadlock-detector uses this.
+ 	 */
+@@ -898,9 +899,11 @@ void smp_apic_timer_interrupt(struct pt_
+ 	 * Besides, if we don't timer interrupts ignore the global
+ 	 * interrupt lock, which is the WrongThing (tm) to do.
+ 	 */
++	ve = set_exec_env(get_ve0());
+ 	irq_enter();
+ 	smp_local_timer_interrupt(regs);
+ 	irq_exit();
++	(void)set_exec_env(ve);
+ }
+ 
+ /*
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/entry.S linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/entry.S
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/entry.S	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/entry.S	2006-05-11 13:05:40.000000000 +0400
+@@ -173,6 +173,10 @@ rff_trace:
+  *
+  * XXX	if we had a free scratch register we could save the RSP into the stack frame
+  *      and report it properly in ps. Unfortunately we haven't.
++ *
++ * When user can change the frames always force IRET. That is because
++ * it deals with uncanonical addresses better. SYSRET has trouble
++ * with them due to bugs in both AMD and Intel CPUs.
+  */ 			 		
+ 
+ ENTRY(system_call)
+@@ -236,7 +240,10 @@ sysret_signal:
+ 	xorl %esi,%esi # oldset -> arg2
+ 	call ptregscall_common
+ 1:	movl $_TIF_NEED_RESCHED,%edi
+-	jmp sysret_check
++	/* Use IRET because user could have changed frame. This
++	   works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
++	cli
++	jmp int_with_check
+ 	
+ 	/* Do syscall tracing */
+ tracesys:			 
+@@ -257,7 +264,8 @@ tracesys:			 
+ 	call syscall_trace_leave
+ 	RESTORE_TOP_OF_STACK %rbx
+ 	RESTORE_REST
+-	jmp ret_from_sys_call
++	/* Use IRET because user could have changed frame */
++	jmp int_ret_from_sys_call
+ 		
+ badsys:
+ 	movq $-ENOSYS,RAX-ARGOFFSET(%rsp)	
+@@ -358,20 +366,9 @@ ENTRY(stub_execve)
+ 	popq %r11
+ 	CFI_ADJUST_CFA_OFFSET	-8
+ 	SAVE_REST
+-	movq %r11, %r15
+ 	FIXUP_TOP_OF_STACK %r11
+ 	call sys_execve
+-	GET_THREAD_INFO(%rcx)
+-	bt $TIF_IA32,threadinfo_flags(%rcx)
+-	jc exec_32bit
+ 	RESTORE_TOP_OF_STACK %r11
+-	movq %r15, %r11
+-	RESTORE_REST
+-	push %r11
+-	ret
+-
+-exec_32bit:
+-	CFI_ADJUST_CFA_OFFSET	REST_SKIP
+ 	movq %rax,RAX(%rsp)
+ 	RESTORE_REST
+ 	jmp int_ret_from_sys_call
+@@ -728,7 +725,7 @@ ENTRY(kernel_thread)
+ 	xorl %r9d,%r9d
+ 	
+ 	# clone now
+-	call do_fork
++	call do_fork_kthread
+ 	movq %rax,RAX(%rsp)
+ 	xorl %edi,%edi
+ 
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/head.S linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/head.S
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/head.S	2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/head.S	2006-05-11 13:05:45.000000000 +0400
+@@ -39,7 +39,7 @@ startup_32:
+ 
+ 	movl %ebx,%ebp	/* Save trampoline flag */
+ 	
+-	movl $__KERNEL_DS,%eax
++	movl $__BOOT_DS,%eax
+ 	movl %eax,%ds
+ 	
+ 	/* If the CPU doesn't support CPUID this will double fault.
+@@ -159,7 +159,14 @@ reach_long64:
+ 	/* esi is pointer to real mode structure with interesting info.
+ 	   pass it to C */
+ 	movl	%esi, %edi
+-	
++
++	/* Switch to __KERNEL_CS. The segment is the same, but selector
++	 * is different. */
++	pushq	$__KERNEL_CS
++	pushq	$switch_cs
++	lretq
++switch_cs:
++
+ 	/* Finally jump to run C code and to be on real kernel address
+ 	 * Since we are running on identity-mapped space we have to jump
+ 	 * to the full 64bit address , this is only possible as indirect
+@@ -192,7 +199,7 @@ pGDT32:
+ .org 0xf10	
+ ljumpvector:
+ 	.long	reach_long64-__START_KERNEL_map
+-	.word	__KERNEL_CS
++	.word	__BOOT_CS
+ 
+ ENTRY(stext)
+ ENTRY(_stext)
+@@ -326,7 +333,7 @@ gdt:
+ ENTRY(gdt_table32)
+ 	.quad	0x0000000000000000	/* This one is magic */
+ 	.quad	0x0000000000000000	/* unused */
+-	.quad	0x00af9a000000ffff	/* __KERNEL_CS */
++	.quad	0x00af9a000000ffff	/* __BOOT_CS */
+ gdt32_end:	
+ 	
+ /* We need valid kernel segments for data and code in long mode too
+@@ -337,23 +344,30 @@ gdt32_end:	
+ .align L1_CACHE_BYTES
+ 
+ /* The TLS descriptors are currently at a different place compared to i386.
+-   Hopefully nobody expects them at a fixed place (Wine?) */
++   Hopefully nobody expects them at a fixed place (Wine?)
++   Descriptors rearranged to plase 32bit and TLS selectors in the same
++   places, because it is really necessary. sysret/exit mandates order
++   of kernel/user cs/ds, so we have to extend gdt.
++*/
+ 	
+ ENTRY(cpu_gdt_table)
+-	.quad	0x0000000000000000	/* NULL descriptor */
+-	.quad	0x008f9a000000ffff	/* __KERNEL_COMPAT32_CS */	
+-	.quad	0x00af9a000000ffff	/* __KERNEL_CS */
+-	.quad	0x00cf92000000ffff	/* __KERNEL_DS */
+-	.quad	0x00cffe000000ffff	/* __USER32_CS */
+-	.quad	0x00cff2000000ffff	/* __USER_DS, __USER32_DS  */		
+-	.quad	0x00affa000000ffff	/* __USER_CS */
+-	.quad	0x00cf9a000000ffff	/* __KERNEL32_CS */
+-	.quad	0,0			/* TSS */
+-	.quad	0			/* LDT */
+-	.quad   0,0,0			/* three TLS descriptors */ 
+-	.quad	0			/* unused now */
+-	.quad	0x00009a000000ffff	/* __KERNEL16_CS - 16bit PM for S3 wakeup. */
++	.quad	0x0000000000000000	/* 0 NULL descriptor */
++	.quad	0x008f9a000000ffff	/* 1 __KERNEL_COMPAT32_CS */	
++	.quad	0x00af9a000000ffff	/* 2 __BOOT_CS */
++	.quad	0x00cf92000000ffff	/* 3 __BOOT_DS */
++	.quad	0,0			/* 4,5 TSS */
++	.quad   0,0,0			/* 6-8 three TLS descriptors */ 
++	.quad	0x00009a000000ffff	/* 9 __KERNEL16_CS - 16bit PM for S3 wakeup. */
+ 					/* base must be patched for real base address. */
++	.quad	0			/* 10 LDT */
++	.quad	0x00cf9a000000ffff	/* 11 __KERNEL32_CS */
++	.quad	0x00af9a000000ffff	/* 12 __KERNEL_CS */
++	.quad	0x00cf92000000ffff	/* 13 __KERNEL_DS */
++	.quad	0x00cffe000000ffff	/* 14 __USER32_CS */
++	.quad	0x00cff2000000ffff	/* 15 __USER_DS, __USER32_DS  */		
++	.quad	0x00affa000000ffff	/* 16 __USER_CS */
++	.quad	0,0,0,0,0,0,0
++	.quad	0,0,0,0,0,0,0,0
+ gdt_end:	
+ 	/* asm/segment.h:GDT_ENTRIES must match this */	
+ 	/* This should be a multiple of the cache line size */
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/irq.c linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/irq.c
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/irq.c	2004-08-14 14:55:20.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/irq.c	2006-05-11 13:05:40.000000000 +0400
+@@ -45,7 +45,8 @@
+ #include <asm/desc.h>
+ #include <asm/irq.h>
+ 
+-
++#include <ub/beancounter.h>
++#include <ub/ub_task.h>
+ 
+ /*
+  * Linux has a controller-independent x86 interrupt architecture.
+@@ -213,15 +214,18 @@ inline void synchronize_irq(unsigned int
+ int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction * action)
+ {
+ 	int status = 1; /* Force the "do bottom halves" bit */
++	struct user_beancounter *ub;
+ 
+ 	if (!(action->flags & SA_INTERRUPT))
+ 		local_irq_enable();
+ 
++	ub = set_exec_ub(get_ub0());
+ 	do {
+ 		status |= action->flags;
+ 		action->handler(irq, action->dev_id, regs);
+ 		action = action->next;
+ 	} while (action);
++	(void)set_exec_ub(ub);
+ 	if (status & SA_SAMPLE_RANDOM)
+ 		add_interrupt_randomness(irq);
+ 	local_irq_disable();
+@@ -340,9 +344,11 @@ asmlinkage unsigned int do_IRQ(struct pt
+ 	irq_desc_t *desc = irq_desc + irq;
+ 	struct irqaction * action;
+ 	unsigned int status;
++	struct ve_struct *ve;
+ 
+ 	if (irq > 256) BUG();
+ 
++	ve = set_exec_env(get_ve0());
+ 	irq_enter(); 
+ 	kstat_cpu(cpu).irqs[irq]++;
+ 	spin_lock(&desc->lock);
+@@ -405,6 +411,7 @@ out:
+ 	spin_unlock(&desc->lock);
+ 
+ 	irq_exit();
++	(void)set_exec_env(ve);
+ 	return 1;
+ }
+ 
+@@ -833,6 +840,8 @@ static int irq_affinity_read_proc (char 
+ 	return len;
+ }
+ 
++int no_irq_affinity;
++
+ static int irq_affinity_write_proc (struct file *file,
+ 					const char __user *buffer,
+ 					unsigned long count, void *data)
+@@ -840,7 +849,7 @@ static int irq_affinity_write_proc (stru
+ 	int irq = (long) data, full_count = count, err;
+ 	cpumask_t tmp, new_value;
+ 
+-	if (!irq_desc[irq].handler->set_affinity)
++	if (!irq_desc[irq].handler->set_affinity || no_irq_affinity)
+ 		return -EIO;
+ 
+ 	err = cpumask_parse(buffer, count, new_value);
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/nmi.c linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/nmi.c
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/nmi.c	2004-08-14 14:55:31.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/nmi.c	2006-05-11 13:05:29.000000000 +0400
+@@ -59,6 +59,7 @@ static int panic_on_timeout;
+ unsigned int nmi_watchdog = NMI_DEFAULT;
+ static unsigned int nmi_hz = HZ;
+ unsigned int nmi_perfctr_msr;	/* the MSR to reset in NMI handler */
++static unsigned int nmi_p4_cccr_val;
+ 
+ /* Note that these events don't tick when the CPU idles. This means
+    the frequency varies with CPU load. */
+@@ -70,12 +71,41 @@ unsigned int nmi_perfctr_msr;	/* the MSR
+ #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING	0x76
+ #define K7_NMI_EVENT		K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
+ 
+-#define P6_EVNTSEL0_ENABLE	(1 << 22)
+-#define P6_EVNTSEL_INT		(1 << 20)
+-#define P6_EVNTSEL_OS		(1 << 17)
+-#define P6_EVNTSEL_USR		(1 << 16)
+-#define P6_EVENT_CPU_CLOCKS_NOT_HALTED	0x79
+-#define P6_NMI_EVENT		P6_EVENT_CPU_CLOCKS_NOT_HALTED
++#define MSR_P4_MISC_ENABLE	0x1A0
++#define MSR_P4_MISC_ENABLE_PERF_AVAIL	(1<<7)
++#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL	(1<<12)
++#define MSR_P4_PERFCTR0		0x300
++#define MSR_P4_CCCR0		0x360
++#define P4_ESCR_EVENT_SELECT(N)	((N)<<25)
++#define P4_ESCR_OS		(1<<3)
++#define P4_ESCR_USR		(1<<2)
++#define P4_CCCR_OVF_PMI0	(1<<26)
++#define P4_CCCR_OVF_PMI1	(1<<27)
++#define P4_CCCR_THRESHOLD(N)	((N)<<20)
++#define P4_CCCR_COMPLEMENT	(1<<19)
++#define P4_CCCR_COMPARE		(1<<18)
++#define P4_CCCR_REQUIRED	(3<<16)
++#define P4_CCCR_ESCR_SELECT(N)	((N)<<13)
++#define P4_CCCR_ENABLE		(1<<12)
++/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
++   CRU_ESCR0 (with any non-null event selector) through a complemented
++   max threshold. [IA32-Vol3, Section 14.9.9] */
++#define MSR_P4_IQ_COUNTER0	0x30C
++#define P4_NMI_CRU_ESCR0	(P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR)
++#define P4_NMI_IQ_CCCR0	\
++	(P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT|	\
++	 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
++
++static __init inline int nmi_known_cpu(void)
++{
++	switch (boot_cpu_data.x86_vendor) {
++	case X86_VENDOR_AMD:
++		return boot_cpu_data.x86 == 15;
++	case X86_VENDOR_INTEL:
++		return boot_cpu_data.x86 == 15;
++	}
++	return 0;
++}
+ 
+ /* Run after command line and cpu_init init, but before all other checks */
+ void __init nmi_watchdog_default(void)
+@@ -83,19 +113,10 @@ void __init nmi_watchdog_default(void)
+ 	if (nmi_watchdog != NMI_DEFAULT)
+ 		return;
+ 
+-	/* For some reason the IO APIC watchdog doesn't work on the AMD
+-	   8111 chipset. For now switch to local APIC mode using
+-	   perfctr0 there.  On Intel CPUs we don't have code to handle
+-	   the perfctr and the IO-APIC seems to work, so use that.  */
+-
+-	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+-		nmi_watchdog = NMI_LOCAL_APIC; 
+-		printk(KERN_INFO 
+-              "Using local APIC NMI watchdog using perfctr0\n");
+-	} else {
+-		printk(KERN_INFO "Using IO APIC NMI watchdog\n");
++	if (nmi_known_cpu())
++		nmi_watchdog = NMI_LOCAL_APIC;
++	else
+ 		nmi_watchdog = NMI_IO_APIC;
+-	}
+ }
+ 
+ /* Why is there no CPUID flag for this? */
+@@ -181,7 +202,10 @@ static void disable_lapic_nmi_watchdog(v
+ 		wrmsr(MSR_K7_EVNTSEL0, 0, 0);
+ 		break;
+ 	case X86_VENDOR_INTEL:
+-		wrmsr(MSR_IA32_EVNTSEL0, 0, 0);
++		if (boot_cpu_data.x86 == 15) {
++			wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
++			wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
++		}
+ 		break;
+ 	}
+ 	nmi_active = -1;
+@@ -296,6 +320,14 @@ late_initcall(init_lapic_nmi_sysfs);
+  * Original code written by Keith Owens.
+  */
+ 
++static void clear_msr_range(unsigned int base, unsigned int n)
++{
++	unsigned int i;
++
++	for(i = 0; i < n; ++i)
++		wrmsr(base+i, 0, 0);
++}
++
+ static void setup_k7_watchdog(void)
+ {
+ 	int i;
+@@ -327,6 +359,47 @@ static void setup_k7_watchdog(void)
+ 	wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
+ }
+ 
++static int setup_p4_watchdog(void)
++{
++	unsigned int misc_enable, dummy;
++
++	rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy);
++	if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
++		return 0;
++
++	nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
++	nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
++#ifdef CONFIG_SMP
++	if (smp_num_siblings == 2)
++		nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1;
++#endif
++
++	if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL))
++		clear_msr_range(0x3F1, 2);
++	/* MSR 0x3F0 seems to have a default value of 0xFC00, but current
++	   docs doesn't fully define it, so leave it alone for now. */
++	if (boot_cpu_data.x86_model >= 0x3) {
++		/* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */
++		clear_msr_range(0x3A0, 26);
++		clear_msr_range(0x3BC, 3);
++	} else {
++		clear_msr_range(0x3A0, 31);
++	}
++	clear_msr_range(0x3C0, 6);
++	clear_msr_range(0x3C8, 6);
++	clear_msr_range(0x3E0, 2);
++	clear_msr_range(MSR_P4_CCCR0, 18);
++	clear_msr_range(MSR_P4_PERFCTR0, 18);
++
++	wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
++	wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
++	Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
++	wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
++	apic_write(APIC_LVTPC, APIC_DM_NMI);
++	wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
++	return 1;
++}
++
+ void setup_apic_nmi_watchdog(void)
+ {
+ 	switch (boot_cpu_data.x86_vendor) {
+@@ -337,6 +410,13 @@ void setup_apic_nmi_watchdog(void)
+ 			return;
+ 		setup_k7_watchdog();
+ 		break;
++	case X86_VENDOR_INTEL:
++		if (boot_cpu_data.x86 != 15)
++			return;
++		if (!setup_p4_watchdog())
++			return;
++		break;
++
+ 	default:
+ 		return;
+ 	}
+@@ -414,8 +494,20 @@ void nmi_watchdog_tick (struct pt_regs *
+ 		last_irq_sums[cpu] = sum;
+ 		alert_counter[cpu] = 0;
+ 	}
+-	if (nmi_perfctr_msr)
++	if (nmi_perfctr_msr) {
++ 		if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) {
++ 			/*
++ 			 * P4 quirks:
++ 			 * - An overflown perfctr will assert its interrupt
++ 			 *   until the OVF flag in its CCCR is cleared.
++ 			 * - LVTPC is masked on interrupt and must be
++ 			 *   unmasked by the LVTPC handler.
++ 			 */
++ 			wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
++ 			apic_write(APIC_LVTPC, APIC_DM_NMI);
++ 		}
+ 		wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
++	}
+ }
+ 
+ static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/process.c linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/process.c
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/process.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/process.c	2006-05-11 13:05:49.000000000 +0400
+@@ -209,7 +209,8 @@ void __show_regs(struct pt_regs * regs)
+ 	printk("Pid: %d, comm: %.20s %s %s\n", 
+ 	       current->pid, current->comm, print_tainted(), UTS_RELEASE);
+ 	printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
+-	printk_address(regs->rip); 
++	if (decode_call_traces)
++		printk_address(regs->rip); 
+ 	printk("\nRSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags);
+ 	printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
+ 	       regs->rax, regs->rbx, regs->rcx);
+@@ -606,7 +607,7 @@ long do_arch_prctl(struct task_struct *t
+ 
+ 	switch (code) { 
+ 	case ARCH_SET_GS:
+-		if (addr >= TASK_SIZE) 
++		if (addr >= TASK_SIZE_OF(task)) 
+ 			return -EPERM; 
+ 		cpu = get_cpu();
+ 		/* handle small bases via the GDT because that's faster to 
+@@ -632,7 +633,7 @@ long do_arch_prctl(struct task_struct *t
+ 	case ARCH_SET_FS:
+ 		/* Not strictly needed for fs, but do it for symmetry
+ 		   with gs */
+-		if (addr >= TASK_SIZE)
++		if (addr >= TASK_SIZE_OF(task))
+ 			return -EPERM; 
+ 		cpu = get_cpu();
+ 		/* handle small bases via the GDT because that's faster to 
+@@ -711,3 +712,20 @@ int dump_task_regs(struct task_struct *t
+  
+ 	return 1;
+ }
++
++long do_fork_kthread(unsigned long clone_flags,
++	      unsigned long stack_start,
++	      struct pt_regs *regs,
++	      unsigned long stack_size,
++	      int __user *parent_tidptr,
++	      int __user *child_tidptr)
++{
++	if (ve_is_super(get_exec_env()))
++		return do_fork(clone_flags, stack_start, regs, stack_size,
++				parent_tidptr, child_tidptr);
++
++	/* Don't allow kernel_thread() inside VE */
++	printk("kernel_thread call inside VE\n");
++	dump_stack();
++	return -EPERM;
++}
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/ptrace.c linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/ptrace.c
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/ptrace.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/ptrace.c	2006-05-11 13:05:49.000000000 +0400
+@@ -128,12 +128,12 @@ static int putreg(struct task_struct *ch
+ 			value &= 0xffff;
+ 			return 0;
+ 		case offsetof(struct user_regs_struct,fs_base):
+-			if (!((value >> 48) == 0 || (value >> 48) == 0xffff))
++			if (value >= TASK_SIZE_OF(child))
+ 				return -EIO; 
+ 			child->thread.fs = value;
+ 			return 0;
+ 		case offsetof(struct user_regs_struct,gs_base):
+-			if (!((value >> 48) == 0 || (value >> 48) == 0xffff))
++			if (value >= TASK_SIZE_OF(child))
+ 				return -EIO; 
+ 			child->thread.gs = value;
+ 			return 0;
+@@ -148,6 +148,11 @@ static int putreg(struct task_struct *ch
+ 				return -EIO;
+ 			value &= 0xffff;
+ 			break;
++		case offsetof(struct user_regs_struct, rip):
++			/* Check if the new RIP address is canonical */
++			if (value >= TASK_SIZE_OF(child))
++				return -EIO;
++			break;
+ 	}
+ 	put_stack_long(child, regno - sizeof(struct pt_regs), value);
+ 	return 0;
+@@ -169,6 +174,15 @@ static unsigned long getreg(struct task_
+ 			return child->thread.fs;
+ 		case offsetof(struct user_regs_struct, gs_base):
+ 			return child->thread.gs;
++		case offsetof(struct user_regs_struct, cs):
++			if (test_tsk_thread_flag(child, TIF_SYSCALL_TRACE)) {
++				val = get_stack_long(child, regno - sizeof(struct pt_regs));
++				if (val == __USER_CS)
++					return 0x33;
++				if (val == __USER32_CS)
++					return 0x23;
++			}
++			/* fall through */
+ 		default:
+ 			regno = regno - sizeof(struct pt_regs);
+ 			val = get_stack_long(child, regno);
+@@ -202,7 +216,7 @@ asmlinkage long sys_ptrace(long request,
+ 	}
+ 	ret = -ESRCH;
+ 	read_lock(&tasklist_lock);
+-	child = find_task_by_pid(pid);
++	child = find_task_by_pid_ve(pid);
+ 	if (child)
+ 		get_task_struct(child);
+ 	read_unlock(&tasklist_lock);
+@@ -246,7 +260,7 @@ asmlinkage long sys_ptrace(long request,
+ 			break;
+ 
+ 		switch (addr) { 
+-		case 0 ... sizeof(struct user_regs_struct):
++		case 0 ... sizeof(struct user_regs_struct) - sizeof(long):
+ 			tmp = getreg(child, addr);
+ 			break;
+ 		case offsetof(struct user, u_debugreg[0]):
+@@ -285,33 +299,37 @@ asmlinkage long sys_ptrace(long request,
+ 		break;
+ 
+ 	case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
++	{
++		int dsize;
++
++		dsize = test_tsk_thread_flag(child, TIF_IA32) ? 3 : 7;
+ 		ret = -EIO;
+ 		if ((addr & 7) ||
+ 		    addr > sizeof(struct user) - 7)
+ 			break;
+ 
+ 		switch (addr) { 
+-		case 0 ... sizeof(struct user_regs_struct): 
++		case 0 ... sizeof(struct user_regs_struct) - sizeof(long):
+ 			ret = putreg(child, addr, data);
+ 			break;
+ 		/* Disallows to set a breakpoint into the vsyscall */
+ 		case offsetof(struct user, u_debugreg[0]):
+-			if (data >= TASK_SIZE-7) break;
++			if (data >= TASK_SIZE_OF(child) - dsize) break;
+ 			child->thread.debugreg0 = data;
+ 			ret = 0;
+ 			break;
+ 		case offsetof(struct user, u_debugreg[1]):
+-			if (data >= TASK_SIZE-7) break;
++			if (data >= TASK_SIZE_OF(child) - dsize) break;
+ 			child->thread.debugreg1 = data;
+ 			ret = 0;
+ 			break;
+ 		case offsetof(struct user, u_debugreg[2]):
+-			if (data >= TASK_SIZE-7) break;
++			if (data >= TASK_SIZE_OF(child) - dsize) break;
+ 			child->thread.debugreg2 = data;
+ 			ret = 0;
+ 			break;
+ 		case offsetof(struct user, u_debugreg[3]):
+-			if (data >= TASK_SIZE-7) break;
++			if (data >= TASK_SIZE_OF(child) - dsize) break;
+ 			child->thread.debugreg3 = data;
+ 			ret = 0;
+ 			break;
+@@ -333,6 +351,7 @@ asmlinkage long sys_ptrace(long request,
+ 		  break;
+ 		}
+ 		break;
++	}
+ 	case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
+ 	case PTRACE_CONT: { /* restart after signal. */
+ 		long tmp;
+@@ -393,7 +412,7 @@ asmlinkage long sys_ptrace(long request,
+ 		long tmp;
+ 
+ 		ret = 0;
+-		if (child->state == TASK_ZOMBIE)	/* already dead */
++		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
+ 			break;
+ 		child->exit_code = SIGKILL;
+ 		/* make sure the single step bit is not set. */
+@@ -498,8 +517,10 @@ static void syscall_trace(struct pt_regs
+ 	       current_thread_info()->flags, current->ptrace); 
+ #endif
+ 
++	set_pn_state(current, (regs->rax != -ENOSYS) ? PN_STOP_LEAVE : PN_STOP_ENTRY);
+ 	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
+ 				? 0x80 : 0));
++	clear_pn_state(current);
+ 	/*
+ 	 * this isn't the same as continuing with a signal, but it will do
+ 	 * for normal use.  strace only continues with a signal if the
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/setup.c linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/setup.c
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/setup.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/setup.c	2006-05-11 13:05:28.000000000 +0400
+@@ -642,6 +642,23 @@ static int __init init_amd(struct cpuinf
+ 	int r;
+ 	int level;
+ 
++#ifdef CONFIG_SMP
++	unsigned long value;
++
++	/*
++	 * Disable TLB flush filter by setting HWCR.FFDIS on K8
++	 * bit 6 of msr C001_0015
++	 *
++	 * Errata 63 for SH-B3 steppings
++	 * Errata 122 for all steppings (F+ have it disabled by default)
++	 */
++	if (c->x86 == 15) {
++		rdmsrl(MSR_K8_HWCR, value);
++		value |= 1 << 6;
++		wrmsrl(MSR_K8_HWCR, value);
++	}
++#endif
++
+ 	/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+ 	   3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
+ 	clear_bit(0*32+31, &c->x86_capability);
+@@ -1086,7 +1103,7 @@ static int show_cpuinfo(struct seq_file 
+ 		seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
+ 	
+ #ifdef CONFIG_X86_HT
+-	if (cpu_has_ht) {
++	if (smp_num_siblings > 1) {
+ 		seq_printf(m, "physical id\t: %d\n", phys_proc_id[c - cpu_data]);
+ 		seq_printf(m, "siblings\t: %d\n", smp_num_siblings);
+ 	}
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/signal.c linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/signal.c
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/signal.c	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/signal.c	2006-05-11 13:05:45.000000000 +0400
+@@ -29,6 +29,7 @@
+ #include <asm/uaccess.h>
+ #include <asm/i387.h>
+ #include <asm/proto.h>
++#include <asm/ia32_unistd.h>
+ 
+ /* #define DEBUG_SIG 1 */
+ 
+@@ -54,6 +55,7 @@ sys_rt_sigsuspend(sigset_t __user *unews
+ 
+ 	spin_lock_irq(&current->sighand->siglock);
+ 	saveset = current->blocked;
++	set_sigsuspend_state(current, saveset);
+ 	current->blocked = newset;
+ 	recalc_sigpending();
+ 	spin_unlock_irq(&current->sighand->siglock);
+@@ -65,8 +67,10 @@ sys_rt_sigsuspend(sigset_t __user *unews
+ 	while (1) {
+ 		current->state = TASK_INTERRUPTIBLE;
+ 		schedule();
+-		if (do_signal(&regs, &saveset))
++		if (do_signal(&regs, &saveset)) {
++			clear_sigsuspend_state(current);
+ 			return -EINTR;
++		}
+ 	}
+ }
+ 
+@@ -124,6 +128,12 @@ restore_sigcontext(struct pt_regs *regs,
+ 			if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
+ 				goto badframe;
+ 			err |= restore_i387(buf);
++		} else {
++			struct task_struct *me = current;
++			if (me->used_math) {
++				clear_fpu(me);
++				me->used_math = 0;
++			}
+ 		}
+ 	}
+ 
+@@ -287,7 +297,7 @@ static void setup_rt_frame(int sig, stru
+ 	if (ka->sa.sa_flags & SA_RESTORER) {
+ 		err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
+ 	} else {
+-		printk("%s forgot to set SA_RESTORER for signal %d.\n", me->comm, sig); 
++		ve_printk(VE_LOG, "%s forgot to set SA_RESTORER for signal %d.\n", me->comm, sig); 
+ 		goto give_sigsegv; 
+ 	}
+ 
+@@ -349,7 +359,7 @@ handle_signal(unsigned long sig, siginfo
+ #endif
+ 
+ 	/* Are we from a system call? */
+-	if (regs->orig_rax >= 0) {
++	if ((long)regs->orig_rax >= 0) {
+ 		/* If so, check system call restarting.. */
+ 		switch (regs->rax) {
+ 		        case -ERESTART_RESTARTBLOCK:
+@@ -411,9 +421,10 @@ int do_signal(struct pt_regs *regs, sigs
+ 		return 1;
+ 	} 	
+ 
+-	if (current->flags & PF_FREEZE) {
+-		refrigerator(0);
+-		goto no_signal;
++	if (test_thread_flag(TIF_FREEZE)) {
++		refrigerator();
++		if (!signal_pending(current))
++			goto no_signal;
+ 	}
+ 
+ 	if (!oldset)
+@@ -436,7 +447,7 @@ int do_signal(struct pt_regs *regs, sigs
+ 
+  no_signal:
+ 	/* Did we come from a system call? */
+-	if (regs->orig_rax >= 0) {
++	if ((long)regs->orig_rax >= 0) {
+ 		/* Restart the system call - no handlers present */
+ 		long res = regs->rax;
+ 		if (res == -ERESTARTNOHAND ||
+@@ -446,7 +457,9 @@ int do_signal(struct pt_regs *regs, sigs
+ 			regs->rip -= 2;
+ 		}
+ 		if (regs->rax == (unsigned long)-ERESTART_RESTARTBLOCK) {
+-			regs->rax = __NR_restart_syscall;
++			regs->rax = test_thread_flag(TIF_IA32) ?
++					__NR_ia32_restart_syscall :
++					__NR_restart_syscall;
+ 			regs->rip -= 2;
+ 		}
+ 	}
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/smpboot.c linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/smpboot.c
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/smpboot.c	2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/smpboot.c	2006-05-11 13:05:40.000000000 +0400
+@@ -309,8 +309,6 @@ void __init smp_callin(void)
+ 	Dprintk("CALLIN, before setup_local_APIC().\n");
+ 	setup_local_APIC();
+ 
+-	local_irq_enable();
+-
+ 	/*
+ 	 * Get our bogomips.
+ 	 */
+@@ -324,8 +322,6 @@ void __init smp_callin(void)
+ 	 */
+  	smp_store_cpu_info(cpuid);
+ 
+-	local_irq_disable();
+-
+ 	/*
+ 	 * Allow the master to continue.
+ 	 */
+@@ -397,7 +393,8 @@ static struct task_struct * __init fork_
+ 	 * don't care about the eip and regs settings since
+ 	 * we'll never reschedule the forked task.
+ 	 */
+-	return copy_process(CLONE_VM|CLONE_IDLETASK, 0, &regs, 0, NULL, NULL);
++	return copy_process(CLONE_VM|CLONE_IDLETASK, 0,
++			&regs, 0, NULL, NULL, 0);
+ }
+ 
+ #if APIC_DEBUG
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/sys_x86_64.c linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/sys_x86_64.c
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/sys_x86_64.c	2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/sys_x86_64.c	2006-05-11 13:05:40.000000000 +0400
+@@ -69,13 +69,7 @@ out:
+ static void find_start_end(unsigned long flags, unsigned long *begin,
+ 			   unsigned long *end)
+ {
+-#ifdef CONFIG_IA32_EMULATION
+-	if (test_thread_flag(TIF_IA32)) { 
+-		*begin = TASK_UNMAPPED_32;
+-		*end = IA32_PAGE_OFFSET; 
+-	} else 
+-#endif
+-	if (flags & MAP_32BIT) { 
++	if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT)) { 
+ 		/* This is usually used needed to map code in small
+ 		   model, so it needs to be in the first 31bit. Limit
+ 		   it to that.  This means we need to move the
+@@ -83,12 +77,12 @@ static void find_start_end(unsigned long
+ 		   conflicts with the heap, but we assume that glibc
+ 		   malloc knows how to fall back to mmap. Give it 1GB
+ 		   of playground for now. -AK */ 
+-		*begin = 0x40000000; 
+-		*end = 0x80000000;		
+-	} else { 
+-		*begin = TASK_UNMAPPED_64; 
+-		*end = TASK_SIZE; 
+-		}
++		*begin = 0x40000000;
++		*end = 0x80000000;
++	} else {
++		*begin = TASK_UNMAPPED_BASE;
++		*end = TASK_SIZE;
++	}
+ } 
+ 
+ unsigned long
+@@ -146,7 +140,7 @@ asmlinkage long sys_uname(struct new_uts
+ {
+ 	int err;
+ 	down_read(&uts_sem);
+-	err = copy_to_user(name, &system_utsname, sizeof (*name));
++	err = copy_to_user(name, &ve_utsname, sizeof (*name));
+ 	up_read(&uts_sem);
+ 	if (personality(current->personality) == PER_LINUX32) 
+ 		err |= copy_to_user(&name->machine, "i686", 5); 		
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/time.c linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/time.c
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/time.c	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/time.c	2006-05-11 13:05:40.000000000 +0400
+@@ -55,10 +55,13 @@ static int nohpet __initdata = 0;
+ unsigned int cpu_khz;					/* TSC clocks / usec, not used here */
+ unsigned long hpet_period;				/* fsecs / HPET clock */
+ unsigned long hpet_tick;				/* HPET clocks / interrupt */
++static int hpet_use_timer;
+ unsigned long vxtime_hz = PIT_TICK_RATE;
+ int report_lost_ticks;				/* command line option */
+ unsigned long long monotonic_base;
+ 
++EXPORT_SYMBOL(cpu_khz);
++
+ struct vxtime_data __vxtime __section_vxtime;	/* for vsyscalls */
+ 
+ volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
+@@ -96,7 +99,9 @@ static inline unsigned int do_gettimeoff
+ 
+ static inline unsigned int do_gettimeoffset_hpet(void)
+ {
+-	return ((hpet_readl(HPET_COUNTER) - vxtime.last) * vxtime.quot) >> 32;
++	/* cap counter read to one tick to avoid inconsistencies */
++	unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last;
++	return (min(counter,hpet_tick) * vxtime.quot) >> 32;
+ }
+ 
+ unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
+@@ -270,7 +275,7 @@ unsigned long long monotonic_clock(void)
+ 
+ 			last_offset = vxtime.last;
+ 			base = monotonic_base;
+-			this_offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
++			this_offset = hpet_readl(HPET_COUNTER);
+ 
+ 		} while (read_seqretry(&xtime_lock, seq));
+ 		offset = (this_offset - last_offset);
+@@ -309,7 +314,14 @@ static irqreturn_t timer_interrupt(int i
+ 
+ 	write_seqlock(&xtime_lock);
+ 
+-	if (vxtime.hpet_address) {
++	if (vxtime.hpet_address)
++		offset = hpet_readl(HPET_COUNTER);
++
++	if (hpet_use_timer) {
++		/* if we're using the hpet timer functionality,
++		 * we can more accurately know the counter value
++		 * when the timer interrupt occured.
++		 */
+ 		offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
+ 		delay = hpet_readl(HPET_COUNTER) - offset;
+ 	} else {
+@@ -630,8 +642,7 @@ static int hpet_init(void)
+ 
+ 	id = hpet_readl(HPET_ID);
+ 
+-	if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER) ||
+-	    !(id & HPET_ID_LEGSUP))
++	if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER))
+ 		return -1;
+ 
+ 	hpet_period = hpet_readl(HPET_PERIOD);
+@@ -641,6 +652,8 @@ static int hpet_init(void)
+ 	hpet_tick = (1000000000L * (USEC_PER_SEC / HZ) + hpet_period / 2) /
+ 		hpet_period;
+ 
++	hpet_use_timer = (id & HPET_ID_LEGSUP);
++
+ /*
+  * Stop the timers and reset the main counter.
+  */
+@@ -656,17 +669,20 @@ static int hpet_init(void)
+  * and period also hpet_tick.
+  */
+ 
+-	hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
+-		    HPET_TN_32BIT, HPET_T0_CFG);
+-	hpet_writel(hpet_tick, HPET_T0_CMP);
+-	hpet_writel(hpet_tick, HPET_T0_CMP); /* AK: why twice? */
++	if (hpet_use_timer) {
++		hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
++ 		    HPET_TN_32BIT, HPET_T0_CFG);
++		hpet_writel(hpet_tick, HPET_T0_CMP);
++		hpet_writel(hpet_tick, HPET_T0_CMP); /* AK: why twice? */
++		cfg |= HPET_CFG_LEGACY;
++	}
+ 
+ /*
+  * Go!
+  */
+ 
+-	cfg |= HPET_CFG_ENABLE | HPET_CFG_LEGACY;
+-	hpet_writel(cfg, HPET_CFG);
++	cfg |= HPET_CFG_ENABLE;
++ 	hpet_writel(cfg, HPET_CFG);
+ 
+ 	return 0;
+ }
+@@ -719,9 +735,11 @@ void __init time_init(void)
+ 	set_normalized_timespec(&wall_to_monotonic,
+ 	                        -xtime.tv_sec, -xtime.tv_nsec);
+ 
+-	if (!hpet_init()) {
++	if (!hpet_init())
+                 vxtime_hz = (1000000000000000L + hpet_period / 2) /
+ 			hpet_period;
++
++	if (hpet_use_timer) {
+ 		cpu_khz = hpet_calibrate_tsc();
+ 		timename = "HPET";
+ 	} else {
+@@ -754,12 +772,12 @@ void __init time_init_smp(void)
+ 	char *timetype;
+ 
+ 	if (vxtime.hpet_address) {
+-		timetype = "HPET";
++		timetype = hpet_use_timer ? "HPET" : "PIT/HPET";
+ 		vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
+ 		vxtime.mode = VXTIME_HPET;
+ 		do_gettimeoffset = do_gettimeoffset_hpet;
+ 	} else {
+-		timetype = "PIT/TSC";
++		timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC";
+ 		vxtime.mode = VXTIME_TSC;
+ 	}
+ 	printk(KERN_INFO "time.c: Using %s based timekeeping.\n", timetype);
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/trampoline.S linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/trampoline.S
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/trampoline.S	2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/trampoline.S	2006-05-11 13:05:45.000000000 +0400
+@@ -46,7 +46,7 @@ r_base = .
+ 	lidt	idt_48 - r_base	# load idt with 0, 0
+ 	lgdt	gdt_48 - r_base	# load gdt with whatever is appropriate
+ 
+-	movw    $__KERNEL_DS,%ax
++	movw    $__BOOT_DS,%ax
+ 	movw    %ax,%ds
+ 	movw    %ax,%es
+ 	
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/traps.c linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/traps.c
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/traps.c	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/traps.c	2006-05-11 13:05:49.000000000 +0400
+@@ -91,6 +91,9 @@ int printk_address(unsigned long address
+ 	char *delim = ":"; 
+ 	char namebuf[128];
+ 
++	if (!decode_call_traces)
++		return printk("[<%016lx>]", address);
++
+ 	symname = kallsyms_lookup(address, &symsize, &offset, &modname, namebuf); 
+ 	if (!symname) 
+ 		return printk("[<%016lx>]", address);
+@@ -143,7 +146,7 @@ void show_trace(unsigned long *stack)
+ 			if (__kernel_text_address(addr)) {
+ 				i += printk_address(addr);
+ 				i += printk(" "); 
+-				if (i > 50) {
++				if (i > 50 && decode_call_traces) {
+ 					printk("\n"); 
+ 					i = 0;
+ 				}
+@@ -172,7 +175,7 @@ void show_trace(unsigned long *stack)
+ 			 if (__kernel_text_address(addr)) {
+ 				 i += printk_address(addr);
+ 				 i += printk(" "); 
+-				 if (i > 50) { 
++				 if (i > 50 && decode_call_traces) { 
+ 					printk("\n       ");
+ 					 i = 0;
+ 				 } 
+@@ -188,7 +191,7 @@ void show_trace(unsigned long *stack)
+ 		if (__kernel_text_address(addr)) {
+ 			i += printk_address(addr);
+ 			i += printk(" "); 
+-			if (i > 50) { 
++			if (i > 50 && decode_call_traces) { 
+ 				printk("\n       ");
+ 					 i = 0;
+ 			} 
+@@ -254,10 +257,13 @@ void show_registers(struct pt_regs *regs
+ 
+ 		rsp = regs->rsp;
+ 
+-	printk("CPU %d ", cpu);
++	printk("CPU: %d, VCPU: %d:%d ", cpu, task_vsched_id(current), 
++			task_cpu(current));
+ 	__show_regs(regs);
+-	printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
+-		cur->comm, cur->pid, cur->thread_info, cur);
++	printk("Process %s (pid: %d, veid=%d, threadinfo %p, task %p)\n",
++		cur->comm, cur->pid,
++		VEID(VE_TASK_INFO(current)->owner_env),
++		cur->thread_info, cur);
+ 
+ 	/*
+ 	 * When in-kernel, we also print out the stack and code at the
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/vmlinux.lds.S linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/vmlinux.lds.S
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/vmlinux.lds.S	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/vmlinux.lds.S	2006-05-11 13:05:29.000000000 +0400
+@@ -44,32 +44,31 @@ SECTIONS
+ 	}
+   __bss_end = .;
+ 
+-  . = ALIGN(64);
++  . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+   .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+ 
++#define AFTER(x)      BINALIGN(LOADADDR(x) + SIZEOF(x), 16)
++#define BINALIGN(x,y) (((x) + (y) - 1)  & ~((y) - 1))
++#define CACHE_ALIGN(x) BINALIGN(x, CONFIG_X86_L1_CACHE_BYTES)
++
+   .vsyscall_0 -10*1024*1024: AT ((LOADADDR(.data.cacheline_aligned) + SIZEOF(.data.cacheline_aligned) + 4095) & ~(4095)) { *(.vsyscall_0) }
+   __vsyscall_0 = LOADADDR(.vsyscall_0);
+-  . = ALIGN(64);
+-  .xtime_lock : AT ((LOADADDR(.vsyscall_0) + SIZEOF(.vsyscall_0) + 63) & ~(63)) { *(.xtime_lock) }
++  . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
++  .xtime_lock : AT CACHE_ALIGN(AFTER(.vsyscall_0)) { *(.xtime_lock) }
+   xtime_lock = LOADADDR(.xtime_lock);
+-  . = ALIGN(16);
+-  .vxtime : AT ((LOADADDR(.xtime_lock) + SIZEOF(.xtime_lock) + 15) & ~(15)) { *(.vxtime) }
++  .vxtime : AT AFTER(.xtime_lock) { *(.vxtime) }
+   vxtime = LOADADDR(.vxtime);
+-  . = ALIGN(16);
+-  .wall_jiffies : AT ((LOADADDR(.vxtime) + SIZEOF(.vxtime) + 15) & ~(15)) { *(.wall_jiffies) }
++  .wall_jiffies : AT AFTER(.vxtime) { *(.wall_jiffies) }
+   wall_jiffies = LOADADDR(.wall_jiffies);
+-  . = ALIGN(16);
+-  .sys_tz : AT ((LOADADDR(.wall_jiffies) + SIZEOF(.wall_jiffies) + 15) & ~(15)) { *(.sys_tz) }
++  .sys_tz : AT AFTER(.wall_jiffies) { *(.sys_tz) }
+   sys_tz = LOADADDR(.sys_tz);
+-  . = ALIGN(16);
+-  .sysctl_vsyscall : AT ((LOADADDR(.sys_tz) + SIZEOF(.sys_tz) + 15) & ~(15)) { *(.sysctl_vsyscall) }
+-  sysctl_vsyscall = LOADADDR(.sysctl_vsyscall);
+-  . = ALIGN(16);
+-  .jiffies : AT ((LOADADDR(.sysctl_vsyscall) + SIZEOF(.sysctl_vsyscall) + 15) & ~(15)) { *(.jiffies) }
+-  jiffies = LOADADDR(.jiffies);
+-  . = ALIGN(16);
+-  .xtime : AT ((LOADADDR(.jiffies) + SIZEOF(.jiffies) + 15) & ~(15)) { *(.xtime) }
++  .sysctl_vsyscall : AT AFTER(.sys_tz) { *(.sysctl_vsyscall) }
++  sysctl_vsyscall = LOADADDR(.sysctl_vsyscall); 
++  .xtime : AT AFTER(.sysctl_vsyscall) { *(.xtime) }
+   xtime = LOADADDR(.xtime);
++  . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
++  .jiffies : AT CACHE_ALIGN(AFTER(.xtime)) { *(.jiffies) }
++  jiffies = LOADADDR(.jiffies);
+   .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT (LOADADDR(.vsyscall_0) + 1024) { *(.vsyscall_1) }
+   . = LOADADDR(.vsyscall_0) + 4096;
+ 
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/vsyscall.c linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/vsyscall.c
+--- linux-2.6.8.1.orig/arch/x86_64/kernel/vsyscall.c	2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/kernel/vsyscall.c	2006-05-11 13:05:29.000000000 +0400
+@@ -165,14 +165,12 @@ static void __init map_vsyscall(void)
+ 
+ static int __init vsyscall_init(void)
+ {
+-	if ((unsigned long) &vgettimeofday != VSYSCALL_ADDR(__NR_vgettimeofday))
+-		panic("vgettimeofday link addr broken");
+-	if ((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime))
+-		panic("vtime link addr broken");
+-	if (VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))
+-		panic("fixmap first vsyscall %lx should be %lx", __fix_to_virt(VSYSCALL_FIRST_PAGE),
+-		      VSYSCALL_ADDR(0));
++        BUG_ON(((unsigned long) &vgettimeofday != 
++		      VSYSCALL_ADDR(__NR_vgettimeofday)));
++	BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
++	BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
+ 	map_vsyscall();
++	sysctl_vsyscall = 1; 
+ 
+ 	return 0;
+ }
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/lib/copy_user.S linux-2.6.8.1-ve022stab078/arch/x86_64/lib/copy_user.S
+--- linux-2.6.8.1.orig/arch/x86_64/lib/copy_user.S	2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/lib/copy_user.S	2006-05-11 13:05:30.000000000 +0400
+@@ -73,7 +73,7 @@ bad_to_user:
+  * rdx count
+  *
+  * Output:		
+- * eax uncopied bytes or 0 if successfull. 
++ * eax uncopied bytes or 0 if successful.
+  */
+ 	.globl copy_user_generic	
+ 	.p2align 4
+@@ -179,9 +179,9 @@ copy_user_generic:	
+ 	movl $8,%r9d
+ 	subl %ecx,%r9d
+ 	movl %r9d,%ecx
+-	subq %r9,%rdx
+-	jz   .Lsmall_align
+-	js   .Lsmall_align
++	cmpq %r9,%rdx
++	jz   .Lhandle_7
++	js   .Lhandle_7
+ .Lalign_1:		
+ .Ls11:	movb (%rsi),%bl
+ .Ld11:	movb %bl,(%rdi)
+@@ -189,10 +189,8 @@ copy_user_generic:	
+ 	incq %rdi
+ 	decl %ecx
+ 	jnz .Lalign_1
++	subq %r9,%rdx
+ 	jmp .Lafter_bad_alignment
+-.Lsmall_align:
+-	addq %r9,%rdx
+-	jmp .Lhandle_7
+ #endif
+ 	
+ 	/* table sorted by exception address */	
+@@ -219,8 +217,8 @@ copy_user_generic:	
+ 	.quad .Ls10,.Le_byte
+ 	.quad .Ld10,.Le_byte
+ #ifdef FIX_ALIGNMENT	
+-	.quad .Ls11,.Le_byte
+-	.quad .Ld11,.Le_byte
++	.quad .Ls11,.Lzero_rest
++	.quad .Ld11,.Lzero_rest
+ #endif
+ 	.quad .Le5,.Le_zero
+ 	.previous
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/lib/csum-copy.S linux-2.6.8.1-ve022stab078/arch/x86_64/lib/csum-copy.S
+--- linux-2.6.8.1.orig/arch/x86_64/lib/csum-copy.S	2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/lib/csum-copy.S	2006-05-11 13:05:30.000000000 +0400
+@@ -188,8 +188,8 @@ csum_partial_copy_generic:
+ 	source
+ 	movw (%rdi),%bx
+ 	adcl %ebx,%eax
+-	dest
+ 	decl %ecx
++	dest
+ 	movw %bx,(%rsi)
+ 	leaq 2(%rdi),%rdi
+ 	leaq 2(%rsi),%rsi
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/mm/fault.c linux-2.6.8.1-ve022stab078/arch/x86_64/mm/fault.c
+--- linux-2.6.8.1.orig/arch/x86_64/mm/fault.c	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/mm/fault.c	2006-05-11 13:05:40.000000000 +0400
+@@ -34,27 +34,6 @@
+ #include <asm/kdebug.h>
+ #include <asm-generic/sections.h>
+ 
+-void bust_spinlocks(int yes)
+-{
+-	int loglevel_save = console_loglevel;
+-	if (yes) {
+-		oops_in_progress = 1;
+-	} else {
+-#ifdef CONFIG_VT
+-		unblank_screen();
+-#endif
+-		oops_in_progress = 0;
+-		/*
+-		 * OK, the message is on the console.  Now we call printk()
+-		 * without oops_in_progress set so that printk will give klogd
+-		 * a poke.  Hold onto your hats...
+-		 */
+-		console_loglevel = 15;		/* NMI oopser may have shut the console up */
+-		printk(" ");
+-		console_loglevel = loglevel_save;
+-	}
+-}
+-
+ /* Sometimes the CPU reports invalid exceptions on prefetch.
+    Check that here and ignore.
+    Opcode checker based on code by Richard Brunner */
+@@ -219,7 +198,7 @@ int unhandled_signal(struct task_struct 
+ }
+ 
+ int page_fault_trace; 
+-int exception_trace = 1;
++int exception_trace = 0;
+ 
+ /*
+  * This routine handles page faults.  It determines the address,
+@@ -261,7 +240,7 @@ asmlinkage void do_page_fault(struct pt_
+ 		local_irq_enable();
+ 
+ 	if (unlikely(page_fault_trace))
+-		printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
++		ve_printk(VE_LOG, "pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
+ 		       regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); 
+ 
+ 	tsk = current;
+@@ -281,8 +260,27 @@ asmlinkage void do_page_fault(struct pt_
+ 	if (unlikely(in_atomic() || !mm))
+ 		goto bad_area_nosemaphore;
+ 
+- again:
+-	down_read(&mm->mmap_sem);
++	/* When running in the kernel we expect faults to occur only to
++	 * addresses in user space.  All other faults represent errors in the
++	 * kernel and should generate an OOPS.  Unfortunatly, in the case of an
++	 * erroneous fault occuring in a code path which already holds mmap_sem
++	 * we will deadlock attempting to validate the fault against the
++	 * address space.  Luckily the kernel only validly references user
++	 * space from well defined areas of code, which are listed in the
++	 * exceptions table.
++	 *
++	 * As the vast majority of faults will be valid we will only perform
++	 * the source reference check when there is a possibilty of a deadlock.
++	 * Attempt to lock the address space, if we cannot we then validate the
++	 * source.  If this is invalid we can skip the address space check,
++	 * thus avoiding the deadlock.
++	 */
++	if (!down_read_trylock(&mm->mmap_sem)) {
++		if ((error_code & 4) == 0 &&
++		    !search_exception_tables(regs->rip))
++			goto bad_area_nosemaphore;
++		down_read(&mm->mmap_sem);
++	}
+ 
+ 	vma = find_vma(mm, address);
+ 	if (!vma)
+@@ -349,17 +347,6 @@ bad_area:
+ 	up_read(&mm->mmap_sem);
+ 
+ bad_area_nosemaphore:
+-
+-#ifdef CONFIG_IA32_EMULATION
+-		/* 32bit vsyscall. map on demand. */
+-		if (test_thread_flag(TIF_IA32) && 
+-	    address >= 0xffffe000 && address < 0xffffe000 + PAGE_SIZE) { 
+-			if (map_syscall32(mm, address) < 0) 
+-				goto out_of_memory2;
+-			return;
+-		} 			
+-#endif
+-
+ 	/* User mode accesses just cause a SIGSEGV */
+ 	if (error_code & 4) {
+ 		if (is_prefetch(regs, address))
+@@ -376,7 +363,7 @@ bad_area_nosemaphore:
+ 			return;
+ 
+ 		if (exception_trace && unhandled_signal(tsk, SIGSEGV)) {
+-		printk(KERN_INFO 
++		ve_printk(VE_LOG, KERN_INFO 
+ 		       "%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n",
+ 					tsk->comm, tsk->pid, address, regs->rip,
+ 					regs->rsp, error_code);
+@@ -440,14 +427,14 @@ no_context:
+  */
+ out_of_memory:
+ 	up_read(&mm->mmap_sem);
+-out_of_memory2:
+-	if (current->pid == 1) { 
+-		yield();
+-		goto again;
+-	}
+-	printk("VM: killing process %s\n", tsk->comm);
+-	if (error_code & 4)
+-		do_exit(SIGKILL);
++	if (error_code & 4) {
++		/* 
++		 * 0-order allocation always success if something really 
++		 * fatal not happen: beancounter overdraft or OOM. Den 
++		 */
++		force_sig(SIGKILL, tsk);
++		return;
++	}
+ 	goto no_context;
+ 
+ do_sigbus:
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/mm/init.c linux-2.6.8.1-ve022stab078/arch/x86_64/mm/init.c
+--- linux-2.6.8.1.orig/arch/x86_64/mm/init.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/mm/init.c	2006-05-11 13:05:40.000000000 +0400
+@@ -22,6 +22,7 @@
+ #include <linux/pagemap.h>
+ #include <linux/bootmem.h>
+ #include <linux/proc_fs.h>
++#include <linux/module.h>
+ 
+ #include <asm/processor.h>
+ #include <asm/system.h>
+@@ -80,6 +81,8 @@ void show_mem(void)
+ 	printk("%d pages swap cached\n",cached);
+ }
+ 
++EXPORT_SYMBOL(show_mem);
++
+ /* References to section boundaries */
+ 
+ extern char _text, _etext, _edata, __bss_start, _end[];
+@@ -578,9 +581,9 @@ static __init int x8664_sysctl_init(void
+ __initcall(x8664_sysctl_init);
+ #endif
+ 
+-/* Pseudo VMAs to allow ptrace access for the vsyscall pages.  x86-64 has two
+-   different ones: one for 32bit and one for 64bit. Use the appropiate
+-   for the target task. */
++/* A pseudo VMAs to allow ptrace access for the vsyscall page.   This only
++   covers the 64bit vsyscall page now. 32bit has a real VMA now and does
++   not need special handling anymore. */
+ 
+ static struct vm_area_struct gate_vma = {
+ 	.vm_start = VSYSCALL_START,
+@@ -588,19 +591,15 @@ static struct vm_area_struct gate_vma = 
+ 	.vm_page_prot = PAGE_READONLY
+ };
+ 
+-static struct vm_area_struct gate32_vma = {
+-	.vm_start = VSYSCALL32_BASE,
+-	.vm_end = VSYSCALL32_END,
+-	.vm_page_prot = PAGE_READONLY
+-};
+-
+ struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
+ {
+-	return test_tsk_thread_flag(tsk, TIF_IA32) ? &gate32_vma : &gate_vma;
++	return test_tsk_thread_flag(tsk, TIF_IA32) ? NULL : &gate_vma;
+ }
+ 
+ int in_gate_area(struct task_struct *task, unsigned long addr)
+ {
+ 	struct vm_area_struct *vma = get_gate_vma(task);
++	if (!vma)
++		return 0;
+ 	return (addr >= vma->vm_start) && (addr < vma->vm_end);
+ }
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/mm/ioremap.c linux-2.6.8.1-ve022stab078/arch/x86_64/mm/ioremap.c
+--- linux-2.6.8.1.orig/arch/x86_64/mm/ioremap.c	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/mm/ioremap.c	2006-05-11 13:05:30.000000000 +0400
+@@ -16,7 +16,7 @@
+ #include <asm/fixmap.h>
+ #include <asm/cacheflush.h>
+ #include <asm/tlbflush.h>
+-
++#include <asm/proto.h>
+ 
+ static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
+ 	unsigned long phys_addr, unsigned long flags)
+@@ -99,7 +99,31 @@ static int remap_area_pages(unsigned lon
+ }
+ 
+ /*
+- * Generic mapping function (not visible outside):
++ * Fix up the linear direct mapping of the kernel to avoid cache attribute
++ * conflicts.
++ */
++static int
++ioremap_change_attr(unsigned long phys_addr, unsigned long size,
++					unsigned long flags)
++{
++	int err = 0;
++	if (flags && phys_addr + size - 1 < (end_pfn_map << PAGE_SHIFT)) {
++		unsigned long npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
++		unsigned long vaddr = (unsigned long) __va(phys_addr);
++
++		/*
++ 		 * Must use a address here and not struct page because the phys addr
++		 * can be a in hole between nodes and not have an memmap entry.
++		 */
++		err = change_page_attr_addr(vaddr,npages,__pgprot(__PAGE_KERNEL|flags));
++		if (!err)
++			global_flush_tlb();
++	}
++	return err;
++}
++
++/*
++ * Generic mapping function
+  */
+ 
+ /*
+@@ -155,12 +179,17 @@ void * __ioremap(unsigned long phys_addr
+ 	/*
+ 	 * Ok, go for it..
+ 	 */
+-	area = get_vm_area(size, VM_IOREMAP);
++	area = get_vm_area(size, VM_IOREMAP | (flags << 24));
+ 	if (!area)
+ 		return NULL;
+ 	area->phys_addr = phys_addr;
+ 	addr = area->addr;
+ 	if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) {
++		remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
++		return NULL;
++	}
++	if (ioremap_change_attr(phys_addr, size, flags) < 0) {
++		area->flags &= 0xffffff;
+ 		vunmap(addr);
+ 		return NULL;
+ 	}
+@@ -191,43 +220,34 @@ void * __ioremap(unsigned long phys_addr
+ 
+ void *ioremap_nocache (unsigned long phys_addr, unsigned long size)
+ {
+-	void *p = __ioremap(phys_addr, size, _PAGE_PCD);
+-	if (!p) 
+-		return p; 
+-
+-	if (phys_addr + size < virt_to_phys(high_memory)) { 
+-		struct page *ppage = virt_to_page(__va(phys_addr));		
+-		unsigned long npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+-
+-		BUG_ON(phys_addr+size > (unsigned long)high_memory);
+-		BUG_ON(phys_addr + size < phys_addr);
+-
+-		if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) { 
+-			iounmap(p); 
+-			p = NULL;
+-		}
+-		global_flush_tlb();
+-	} 
+-
+-	return p;					
++	return __ioremap(phys_addr, size, _PAGE_PCD);
+ }
+ 
+ void iounmap(void *addr)
+ {
+-	struct vm_struct *p;
++	struct vm_struct *p, **pprev;
++
+ 	if (addr <= high_memory) 
+ 		return; 
+-	p = remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr)); 
++
++	write_lock(&vmlist_lock);
++	for (p = vmlist, pprev = &vmlist; p != NULL; pprev = &p->next, p = *pprev)
++		if (p->addr == (void *)(PAGE_MASK & (unsigned long)addr))
++			break;
+ 	if (!p) { 
+ 		printk("__iounmap: bad address %p\n", addr);
+-		return;
+-	} 
+-
+-	if (p->flags && p->phys_addr < virt_to_phys(high_memory)) { 
+-		change_page_attr(virt_to_page(__va(p->phys_addr)),
++		goto out_unlock;
++	}
++	*pprev = p->next;
++	unmap_vm_area(p);
++	if ((p->flags >> 24) &&
++		p->phys_addr + p->size - 1 < virt_to_phys(high_memory)) {
++		change_page_attr_addr((unsigned long)__va(p->phys_addr),
+ 				 p->size >> PAGE_SHIFT,
+ 				 PAGE_KERNEL); 				 
+ 		global_flush_tlb();
+ 	} 
++out_unlock:
++	write_unlock(&vmlist_lock);
+ 	kfree(p); 
+ }
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/mm/pageattr.c linux-2.6.8.1-ve022stab078/arch/x86_64/mm/pageattr.c
+--- linux-2.6.8.1.orig/arch/x86_64/mm/pageattr.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/arch/x86_64/mm/pageattr.c	2006-05-11 13:05:30.000000000 +0400
+@@ -61,7 +61,10 @@ static void flush_kernel_map(void *addre
+ 			asm volatile("clflush (%0)" :: "r" (address + i)); 
+ 	} else
+ 		asm volatile("wbinvd":::"memory"); 
+-	__flush_tlb_one(address);
++	if (address)
++		__flush_tlb_one(address);
++	else
++		__flush_tlb_all();
+ }
+ 
+ 
+@@ -111,13 +114,12 @@ static void revert_page(unsigned long ad
+ }      
+ 
+ static int
+-__change_page_attr(unsigned long address, struct page *page, pgprot_t prot, 
+-		   pgprot_t ref_prot)
++__change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
++				   pgprot_t ref_prot)
+ { 
+ 	pte_t *kpte; 
+ 	struct page *kpte_page;
+ 	unsigned kpte_flags;
+-
+ 	kpte = lookup_address(address);
+ 	if (!kpte) return 0;
+ 	kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK);
+@@ -125,20 +127,20 @@ __change_page_attr(unsigned long address
+ 	if (pgprot_val(prot) != pgprot_val(ref_prot)) { 
+ 		if ((kpte_flags & _PAGE_PSE) == 0) { 
+ 			pte_t old = *kpte;
+-			pte_t standard = mk_pte(page, ref_prot); 
++			pte_t standard = pfn_pte(pfn, ref_prot);
+ 
+-			set_pte(kpte, mk_pte(page, prot)); 
++			set_pte(kpte, pfn_pte(pfn, prot));
+ 			if (pte_same(old,standard))
+ 				get_page(kpte_page);
+ 		} else {
+ 			struct page *split = split_large_page(address, prot, ref_prot); 
+ 			if (!split)
+ 				return -ENOMEM;
+-			get_page(kpte_page);
++			get_page(split);
+ 			set_pte(kpte,mk_pte(split, ref_prot));
+ 		}	
+ 	} else if ((kpte_flags & _PAGE_PSE) == 0) { 
+-		set_pte(kpte, mk_pte(page, ref_prot));
++		set_pte(kpte, pfn_pte(pfn, ref_prot));
+ 		__put_page(kpte_page);
+ 	}
+ 
+@@ -162,31 +164,38 @@ __change_page_attr(unsigned long address
+  * 
+  * Caller must call global_flush_tlb() after this.
+  */
+-int change_page_attr(struct page *page, int numpages, pgprot_t prot)
++int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
+ {
+ 	int err = 0; 
+ 	int i; 
+ 
+ 	down_write(&init_mm.mmap_sem);
+-	for (i = 0; i < numpages; !err && i++, page++) { 
+-		unsigned long address = (unsigned long)page_address(page); 
+-		err = __change_page_attr(address, page, prot, PAGE_KERNEL); 
++	for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
++		unsigned long pfn = __pa(address) >> PAGE_SHIFT;
++
++		err = __change_page_attr(address, pfn, prot, PAGE_KERNEL);
+ 		if (err) 
+ 			break; 
+ 		/* Handle kernel mapping too which aliases part of the
+ 		 * lowmem */
+ 		/* Disabled right now. Fixme */ 
+-		if (0 && page_to_phys(page) < KERNEL_TEXT_SIZE) {		
++		if (0 && __pa(address) < KERNEL_TEXT_SIZE) {
+ 			unsigned long addr2;
+-			addr2 = __START_KERNEL_map + page_to_phys(page);
+-			err = __change_page_attr(addr2, page, prot, 
+-						 PAGE_KERNEL_EXEC);
++			addr2 = __START_KERNEL_map + __pa(address);
++			err = __change_page_attr(addr2, pfn, prot, PAGE_KERNEL_EXEC);
+ 		} 
+ 	} 	
+ 	up_write(&init_mm.mmap_sem); 
+ 	return err;
+ }
+ 
++/* Don't call this for MMIO areas that may not have a mem_map entry */
++int change_page_attr(struct page *page, int numpages, pgprot_t prot)
++{
++	unsigned long addr = (unsigned long)page_address(page);
++	return change_page_attr_addr(addr, numpages, prot);
++}
++
+ void global_flush_tlb(void)
+ { 
+ 	struct deferred_page *df, *next_df;
+@@ -194,6 +203,8 @@ void global_flush_tlb(void)
+ 	down_read(&init_mm.mmap_sem);
+ 	df = xchg(&df_list, NULL);
+ 	up_read(&init_mm.mmap_sem);
++	if (!df)
++		return;
+ 	flush_map((df && !df->next) ? df->address : 0);
+ 	for (; df; df = next_df) { 
+ 		next_df = df->next;
+diff -uprN linux-2.6.8.1.orig/drivers/base/class.c linux-2.6.8.1-ve022stab078/drivers/base/class.c
+--- linux-2.6.8.1.orig/drivers/base/class.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/base/class.c	2006-05-11 13:05:42.000000000 +0400
+@@ -69,8 +69,13 @@ static struct kobj_type ktype_class = {
+ };
+ 
+ /* Hotplug events for classes go to the class_obj subsys */
+-static decl_subsys(class, &ktype_class, NULL);
++decl_subsys(class, &ktype_class, NULL);
+ 
++#ifndef CONFIG_VE
++#define visible_class_subsys class_subsys
++#else
++#define visible_class_subsys (*get_exec_env()->class_subsys)
++#endif
+ 
+ int class_create_file(struct class * cls, const struct class_attribute * attr)
+ {
+@@ -143,7 +148,7 @@ int class_register(struct class * cls)
+ 	if (error)
+ 		return error;
+ 
+-	subsys_set_kset(cls, class_subsys);
++	subsys_set_kset(cls, visible_class_subsys);
+ 
+ 	error = subsystem_register(&cls->subsys);
+ 	if (!error) {
+@@ -304,8 +309,13 @@ static struct kset_hotplug_ops class_hot
+ 	.hotplug =	class_hotplug,
+ };
+ 
+-static decl_subsys(class_obj, &ktype_class_device, &class_hotplug_ops);
++decl_subsys(class_obj, &ktype_class_device, &class_hotplug_ops);
+ 
++#ifndef CONFIG_VE
++#define visible_class_obj_subsys class_obj_subsys
++#else
++#define visible_class_obj_subsys (*get_exec_env()->class_obj_subsys)
++#endif
+ 
+ static int class_device_add_attrs(struct class_device * cd)
+ {
+@@ -342,7 +352,7 @@ static void class_device_remove_attrs(st
+ 
+ void class_device_initialize(struct class_device *class_dev)
+ {
+-	kobj_set_kset_s(class_dev, class_obj_subsys);
++	kobj_set_kset_s(class_dev, visible_class_obj_subsys);
+ 	kobject_init(&class_dev->kobj);
+ 	INIT_LIST_HEAD(&class_dev->node);
+ }
+@@ -505,12 +515,19 @@ void class_interface_unregister(struct c
+ 	class_put(parent);
+ }
+ 
+-
++void prepare_sysfs_classes(void)
++{
++#ifdef CONFIG_VE
++	get_ve0()->class_subsys = &class_subsys;
++	get_ve0()->class_obj_subsys = &class_obj_subsys;
++#endif
++}
+ 
+ int __init classes_init(void)
+ {
+ 	int retval;
+ 
++	prepare_sysfs_classes();
+ 	retval = subsystem_register(&class_subsys);
+ 	if (retval)
+ 		return retval;
+@@ -542,3 +559,6 @@ EXPORT_SYMBOL(class_device_remove_file);
+ 
+ EXPORT_SYMBOL(class_interface_register);
+ EXPORT_SYMBOL(class_interface_unregister);
++
++EXPORT_SYMBOL(class_subsys);
++EXPORT_SYMBOL(class_obj_subsys);
+diff -uprN linux-2.6.8.1.orig/drivers/block/floppy.c linux-2.6.8.1-ve022stab078/drivers/block/floppy.c
+--- linux-2.6.8.1.orig/drivers/block/floppy.c	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/block/floppy.c	2006-05-11 13:05:35.000000000 +0400
+@@ -3774,7 +3774,7 @@ static int floppy_open(struct inode *ino
+ 	 * Needed so that programs such as fdrawcmd still can work on write
+ 	 * protected disks */
+ 	if (filp->f_mode & 2
+-	    || permission(filp->f_dentry->d_inode, 2, NULL) == 0)
++	    || permission(filp->f_dentry->d_inode, 2, NULL, NULL) == 0)
+ 		filp->private_data = (void *)8;
+ 
+ 	if (UFDCS->rawcmd == 1)
+diff -uprN linux-2.6.8.1.orig/drivers/block/genhd.c linux-2.6.8.1-ve022stab078/drivers/block/genhd.c
+--- linux-2.6.8.1.orig/drivers/block/genhd.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/block/genhd.c	2006-05-11 13:05:40.000000000 +0400
+@@ -18,6 +18,8 @@
+ #define MAX_PROBE_HASH 255	/* random */
+ 
+ static struct subsystem block_subsys;
++struct subsystem *get_block_subsys(void) {return &block_subsys;}
++EXPORT_SYMBOL(get_block_subsys);
+ 
+ /*
+  * Can be deleted altogether. Later.
+diff -uprN linux-2.6.8.1.orig/drivers/block/ioctl.c linux-2.6.8.1-ve022stab078/drivers/block/ioctl.c
+--- linux-2.6.8.1.orig/drivers/block/ioctl.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/block/ioctl.c	2006-05-11 13:05:34.000000000 +0400
+@@ -219,3 +219,5 @@ int blkdev_ioctl(struct inode *inode, st
+ 	}
+ 	return -ENOTTY;
+ }
++
++EXPORT_SYMBOL_GPL(blkdev_ioctl);
+diff -uprN linux-2.6.8.1.orig/drivers/block/ll_rw_blk.c linux-2.6.8.1-ve022stab078/drivers/block/ll_rw_blk.c
+--- linux-2.6.8.1.orig/drivers/block/ll_rw_blk.c	2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/block/ll_rw_blk.c	2006-05-11 13:05:31.000000000 +0400
+@@ -263,6 +263,45 @@ void blk_queue_make_request(request_queu
+ EXPORT_SYMBOL(blk_queue_make_request);
+ 
+ /**
++ * blk_queue_ordered - does this queue support ordered writes
++ * @q:     the request queue
++ * @flag:  see below
++ *
++ * Description:
++ *   For journalled file systems, doing ordered writes on a commit
++ *   block instead of explicitly doing wait_on_buffer (which is bad
++ *   for performance) can be a big win. Block drivers supporting this
++ *   feature should call this function and indicate so.
++ *
++ **/
++void blk_queue_ordered(request_queue_t *q, int flag)
++{
++	if (flag)
++		set_bit(QUEUE_FLAG_ORDERED, &q->queue_flags);
++	else
++		clear_bit(QUEUE_FLAG_ORDERED, &q->queue_flags);
++}
++
++EXPORT_SYMBOL(blk_queue_ordered);
++
++/**
++ * blk_queue_issue_flush_fn - set function for issuing a flush
++ * @q:     the request queue
++ * @iff:   the function to be called issuing the flush
++ *
++ * Description:
++ *   If a driver supports issuing a flush command, the support is notified
++ *   to the block layer by defining it through this call.
++ *
++ **/
++void blk_queue_issue_flush_fn(request_queue_t *q, issue_flush_fn *iff)
++{
++	q->issue_flush_fn = iff;
++}
++
++EXPORT_SYMBOL(blk_queue_issue_flush_fn);
++
++/**
+  * blk_queue_bounce_limit - set bounce buffer limit for queue
+  * @q:  the request queue for the device
+  * @dma_addr:   bus address limit
+@@ -1925,10 +1964,11 @@ int blk_execute_rq(request_queue_t *q, s
+ 	}
+ 
+ 	rq->flags |= REQ_NOMERGE;
+-	rq->waiting = &wait;
++	if (!rq->waiting)
++		rq->waiting = &wait;
+ 	elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
+ 	generic_unplug_device(q);
+-	wait_for_completion(&wait);
++	wait_for_completion(rq->waiting);
+ 	rq->waiting = NULL;
+ 
+ 	if (rq->errors)
+@@ -1939,6 +1979,72 @@ int blk_execute_rq(request_queue_t *q, s
+ 
+ EXPORT_SYMBOL(blk_execute_rq);
+ 
++/**
++ * blkdev_issue_flush - queue a flush
++ * @bdev:	blockdev to issue flush for
++ * @error_sector:	error sector
++ *
++ * Description:
++ *    Issue a flush for the block device in question. Caller can supply
++ *    room for storing the error offset in case of a flush error, if they
++ *    wish to.  Caller must run wait_for_completion() on its own.
++ */
++int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
++{
++	request_queue_t *q;
++
++	if (bdev->bd_disk == NULL)
++		return -ENXIO;
++
++	q = bdev_get_queue(bdev);
++	if (!q)
++		return -ENXIO;
++	if (!q->issue_flush_fn)
++		return -EOPNOTSUPP;
++
++	return q->issue_flush_fn(q, bdev->bd_disk, error_sector);
++}
++
++EXPORT_SYMBOL(blkdev_issue_flush);
++
++/**
++ * blkdev_scsi_issue_flush_fn - issue flush for SCSI devices
++ * @q:		device queue
++ * @disk:	gendisk
++ * @error_sector:	error offset
++ *
++ * Description:
++ *    Devices understanding the SCSI command set, can use this function as
++ *    a helper for issuing a cache flush. Note: driver is required to store
++ *    the error offset (in case of error flushing) in ->sector of struct
++ *    request.
++ */
++int blkdev_scsi_issue_flush_fn(request_queue_t *q, struct gendisk *disk,
++			       sector_t *error_sector)
++{
++	struct request *rq = blk_get_request(q, WRITE, __GFP_WAIT);
++	int ret;
++
++	rq->flags |= REQ_BLOCK_PC | REQ_SOFTBARRIER;
++	rq->sector = 0;
++	memset(rq->cmd, 0, sizeof(rq->cmd));
++	rq->cmd[0] = 0x35;
++	rq->cmd_len = 12;
++	rq->data = NULL;
++	rq->data_len = 0;
++	rq->timeout = 60 * HZ;
++
++	ret = blk_execute_rq(q, disk, rq);
++
++	if (ret && error_sector)
++		*error_sector = rq->sector;
++
++	blk_put_request(rq);
++	return ret;
++}
++
++EXPORT_SYMBOL(blkdev_scsi_issue_flush_fn);
++
+ void drive_stat_acct(struct request *rq, int nr_sectors, int new_io)
+ {
+ 	int rw = rq_data_dir(rq);
+@@ -2192,7 +2298,7 @@ EXPORT_SYMBOL(__blk_attempt_remerge);
+ static int __make_request(request_queue_t *q, struct bio *bio)
+ {
+ 	struct request *req, *freereq = NULL;
+-	int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, ra;
++	int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync;
+ 	sector_t sector;
+ 
+ 	sector = bio->bi_sector;
+@@ -2210,9 +2316,11 @@ static int __make_request(request_queue_
+ 
+ 	spin_lock_prefetch(q->queue_lock);
+ 
+-	barrier = test_bit(BIO_RW_BARRIER, &bio->bi_rw);
+-
+-	ra = bio->bi_rw & (1 << BIO_RW_AHEAD);
++	barrier = bio_barrier(bio);
++	if (barrier && !(q->queue_flags & (1 << QUEUE_FLAG_ORDERED))) {
++		err = -EOPNOTSUPP;
++		goto end_io;
++	}
+ 
+ again:
+ 	spin_lock_irq(q->queue_lock);
+@@ -2238,6 +2346,7 @@ again:
+ 			drive_stat_acct(req, nr_sectors, 0);
+ 			if (!attempt_back_merge(q, req))
+ 				elv_merged_request(q, req);
++			sync = bio_sync(bio);
+ 			goto out;
+ 
+ 		case ELEVATOR_FRONT_MERGE:
+@@ -2264,6 +2373,7 @@ again:
+ 			drive_stat_acct(req, nr_sectors, 0);
+ 			if (!attempt_front_merge(q, req))
+ 				elv_merged_request(q, req);
++			sync = bio_sync(bio);
+ 			goto out;
+ 
+ 		/*
+@@ -2292,7 +2402,8 @@ get_rq:
+ 			/*
+ 			 * READA bit set
+ 			 */
+-			if (ra)
++			err = -EWOULDBLOCK;
++			if (bio_rw_ahead(bio))
+ 				goto end_io;
+ 	
+ 			freereq = get_request_wait(q, rw);
+@@ -2303,10 +2414,9 @@ get_rq:
+ 	req->flags |= REQ_CMD;
+ 
+ 	/*
+-	 * inherit FAILFAST from bio and don't stack up
+-	 * retries for read ahead
++	 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
+ 	 */
+-	if (ra || test_bit(BIO_RW_FAILFAST, &bio->bi_rw))	
++	if (bio_rw_ahead(bio) || bio_failfast(bio))
+ 		req->flags |= REQ_FAILFAST;
+ 
+ 	/*
+@@ -2329,18 +2439,19 @@ get_rq:
+ 	req->rq_disk = bio->bi_bdev->bd_disk;
+ 	req->start_time = jiffies;
+ 
++	sync = bio_sync(bio);
+ 	add_request(q, req);
+ out:
+ 	if (freereq)
+ 		__blk_put_request(q, freereq);
+-	if (bio_sync(bio))
++	if (sync)
+ 		__generic_unplug_device(q);
+ 
+ 	spin_unlock_irq(q->queue_lock);
+ 	return 0;
+ 
+ end_io:
+-	bio_endio(bio, nr_sectors << 9, -EWOULDBLOCK);
++	bio_endio(bio, nr_sectors << 9, err);
+ 	return 0;
+ }
+ 
+@@ -2647,10 +2758,17 @@ void blk_recalc_rq_sectors(struct reques
+ static int __end_that_request_first(struct request *req, int uptodate,
+ 				    int nr_bytes)
+ {
+-	int total_bytes, bio_nbytes, error = 0, next_idx = 0;
++	int total_bytes, bio_nbytes, error, next_idx = 0;
+ 	struct bio *bio;
+ 
+ 	/*
++	 * extend uptodate bool to allow < 0 value to be direct io error
++	 */
++	error = 0;
++	if (end_io_error(uptodate))
++		error = !uptodate ? -EIO : uptodate;
++
++	/*
+ 	 * for a REQ_BLOCK_PC request, we want to carry any eventual
+ 	 * sense key with us all the way through
+ 	 */
+@@ -2658,7 +2776,6 @@ static int __end_that_request_first(stru
+ 		req->errors = 0;
+ 
+ 	if (!uptodate) {
+-		error = -EIO;
+ 		if (blk_fs_request(req) && !(req->flags & REQ_QUIET))
+ 			printk("end_request: I/O error, dev %s, sector %llu\n",
+ 				req->rq_disk ? req->rq_disk->disk_name : "?",
+@@ -2741,7 +2858,7 @@ static int __end_that_request_first(stru
+ /**
+  * end_that_request_first - end I/O on a request
+  * @req:      the request being processed
+- * @uptodate: 0 for I/O error
++ * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error
+  * @nr_sectors: number of sectors to end I/O on
+  *
+  * Description:
+@@ -2762,7 +2879,7 @@ EXPORT_SYMBOL(end_that_request_first);
+ /**
+  * end_that_request_chunk - end I/O on a request
+  * @req:      the request being processed
+- * @uptodate: 0 for I/O error
++ * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error
+  * @nr_bytes: number of bytes to complete
+  *
+  * Description:
+diff -uprN linux-2.6.8.1.orig/drivers/block/scsi_ioctl.c linux-2.6.8.1-ve022stab078/drivers/block/scsi_ioctl.c
+--- linux-2.6.8.1.orig/drivers/block/scsi_ioctl.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/block/scsi_ioctl.c	2006-05-11 13:05:38.000000000 +0400
+@@ -304,7 +304,8 @@ static int sg_scsi_ioctl(struct file *fi
+ 			 struct gendisk *bd_disk, Scsi_Ioctl_Command __user *sic)
+ {
+ 	struct request *rq;
+-	int err, in_len, out_len, bytes, opcode, cmdlen;
++	int err;
++	unsigned int in_len, out_len, bytes, opcode, cmdlen;
+ 	char *buffer = NULL, sense[SCSI_SENSE_BUFFERSIZE];
+ 
+ 	/*
+@@ -316,7 +317,7 @@ static int sg_scsi_ioctl(struct file *fi
+ 		return -EFAULT;
+ 	if (in_len > PAGE_SIZE || out_len > PAGE_SIZE)
+ 		return -EINVAL;
+-	if (get_user(opcode, sic->data))
++	if (get_user(opcode, (int *)sic->data))
+ 		return -EFAULT;
+ 
+ 	bytes = max(in_len, out_len);
+diff -uprN linux-2.6.8.1.orig/drivers/char/keyboard.c linux-2.6.8.1-ve022stab078/drivers/char/keyboard.c
+--- linux-2.6.8.1.orig/drivers/char/keyboard.c	2004-08-14 14:56:26.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/char/keyboard.c	2006-05-11 13:05:24.000000000 +0400
+@@ -1063,7 +1063,7 @@ void kbd_keycode(unsigned int keycode, i
+ 		sysrq_down = down;
+ 		return;
+ 	}
+-	if (sysrq_down && down && !rep) {
++	if ((sysrq_down || sysrq_eat_all()) && down && !rep) {
+ 		handle_sysrq(kbd_sysrq_xlate[keycode], regs, tty);
+ 		return;
+ 	}
+diff -uprN linux-2.6.8.1.orig/drivers/char/n_tty.c linux-2.6.8.1-ve022stab078/drivers/char/n_tty.c
+--- linux-2.6.8.1.orig/drivers/char/n_tty.c	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/char/n_tty.c	2006-05-11 13:05:33.000000000 +0400
+@@ -946,13 +946,13 @@ static inline int copy_from_read_buf(str
+ 
+ {
+ 	int retval;
+-	ssize_t n;
++	size_t n;
+ 	unsigned long flags;
+ 
+ 	retval = 0;
+ 	spin_lock_irqsave(&tty->read_lock, flags);
+ 	n = min(tty->read_cnt, N_TTY_BUF_SIZE - tty->read_tail);
+-	n = min((ssize_t)*nr, n);
++	n = min(*nr, n);
+ 	spin_unlock_irqrestore(&tty->read_lock, flags);
+ 	if (n) {
+ 		mb();
+diff -uprN linux-2.6.8.1.orig/drivers/char/pty.c linux-2.6.8.1-ve022stab078/drivers/char/pty.c
+--- linux-2.6.8.1.orig/drivers/char/pty.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/char/pty.c	2006-05-11 13:05:40.000000000 +0400
+@@ -32,22 +32,48 @@
+ #include <asm/bitops.h>
+ #include <linux/devpts_fs.h>
+ 
++#include <ub/ub_misc.h>
++
+ #if defined(CONFIG_LEGACY_PTYS) || defined(CONFIG_UNIX98_PTYS)
+ 
+ #ifdef CONFIG_LEGACY_PTYS
+ static struct tty_driver *pty_driver, *pty_slave_driver;
++
++struct tty_driver *get_pty_driver(void) {return pty_driver;}
++struct tty_driver *get_pty_slave_driver(void) {return pty_slave_driver;}
++
++EXPORT_SYMBOL(get_pty_driver);
++EXPORT_SYMBOL(get_pty_slave_driver);
+ #endif
+ 
+ /* These are global because they are accessed in tty_io.c */
+ #ifdef CONFIG_UNIX98_PTYS
+ struct tty_driver *ptm_driver;
+ struct tty_driver *pts_driver;
++EXPORT_SYMBOL(ptm_driver);
++EXPORT_SYMBOL(pts_driver);
++
++#ifdef CONFIG_VE
++#define ve_ptm_driver	(get_exec_env()->ptm_driver)
++#else
++#define ve_ptm_driver	ptm_driver
++#endif
++
++void prepare_pty(void)
++{
++#ifdef CONFIG_VE
++	get_ve0()->ptm_driver = ptm_driver;
++	/* don't clean ptm_driver and co. here, they are used in vecalls.c */
++#endif
++}
+ #endif
+ 
+ static void pty_close(struct tty_struct * tty, struct file * filp)
+ {
+ 	if (!tty)
+ 		return;
++
++	ub_pty_uncharge(tty);
+ 	if (tty->driver->subtype == PTY_TYPE_MASTER) {
+ 		if (tty->count > 1)
+ 			printk("master pty_close: count = %d!!\n", tty->count);
+@@ -61,14 +87,18 @@ static void pty_close(struct tty_struct 
+ 	if (!tty->link)
+ 		return;
+ 	tty->link->packet = 0;
++	set_bit(TTY_OTHER_CLOSED, &tty->link->flags);
+ 	wake_up_interruptible(&tty->link->read_wait);
+ 	wake_up_interruptible(&tty->link->write_wait);
+-	set_bit(TTY_OTHER_CLOSED, &tty->link->flags);
+ 	if (tty->driver->subtype == PTY_TYPE_MASTER) {
+ 		set_bit(TTY_OTHER_CLOSED, &tty->flags);
+ #ifdef CONFIG_UNIX98_PTYS
+-		if (tty->driver == ptm_driver)
++		if (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM) {
++			struct ve_struct *old_env;
++			old_env = set_exec_env(VE_OWNER_TTY(tty));
+ 			devpts_pty_kill(tty->index);
++			set_exec_env(old_env);
++		}
+ #endif
+ 		tty_vhangup(tty->link);
+ 	}
+@@ -288,6 +318,8 @@ static int pty_open(struct tty_struct *t
+ 
+ 	if (!tty || !tty->link)
+ 		goto out;
++	if (ub_pty_charge(tty))
++		goto out;
+ 
+ 	retval = -EIO;
+ 	if (test_bit(TTY_OTHER_CLOSED, &tty->flags))
+@@ -455,6 +487,7 @@ static int __init pty_init(void)
+ 		panic("Couldn't register Unix98 pts driver");
+ 
+ 	pty_table[1].data = &ptm_driver->refcount;
++	prepare_pty();
+ #endif /* CONFIG_UNIX98_PTYS */
+ 
+ 	return 0;
+diff -uprN linux-2.6.8.1.orig/drivers/char/qtronix.c linux-2.6.8.1-ve022stab078/drivers/char/qtronix.c
+--- linux-2.6.8.1.orig/drivers/char/qtronix.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/char/qtronix.c	2006-05-11 13:05:32.000000000 +0400
+@@ -537,7 +537,7 @@ repeat:
+ 		i--;
+ 	}
+ 	if (count-i) {
+-		file->f_dentry->d_inode->i_atime = CURRENT_TIME;
++		file->f_dentry->d_inode->i_atime = current_fs_time(inode->i_sb);
+ 		return count-i;
+ 	}
+ 	if (signal_pending(current))
+diff -uprN linux-2.6.8.1.orig/drivers/char/random.c linux-2.6.8.1-ve022stab078/drivers/char/random.c
+--- linux-2.6.8.1.orig/drivers/char/random.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/char/random.c	2006-05-11 13:05:33.000000000 +0400
+@@ -1720,8 +1720,9 @@ random_write(struct file * file, const c
+ 	if (p == buffer) {
+ 		return (ssize_t)ret;
+ 	} else {
+-		file->f_dentry->d_inode->i_mtime = CURRENT_TIME;
+-		mark_inode_dirty(file->f_dentry->d_inode);
++		struct inode *inode = file->f_dentry->d_inode;
++	        inode->i_mtime = current_fs_time(inode->i_sb);
++		mark_inode_dirty(inode);
+ 		return (ssize_t)(p - buffer);
+ 	}
+ }
+@@ -1917,7 +1918,7 @@ static int poolsize_strategy(ctl_table *
+ 			     void __user *oldval, size_t __user *oldlenp,
+ 			     void __user *newval, size_t newlen, void **context)
+ {
+-	int	len;
++	unsigned int	len;
+ 	
+ 	sysctl_poolsize = random_state->poolinfo.POOLBYTES;
+ 
+diff -uprN linux-2.6.8.1.orig/drivers/char/raw.c linux-2.6.8.1-ve022stab078/drivers/char/raw.c
+--- linux-2.6.8.1.orig/drivers/char/raw.c	2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/char/raw.c	2006-05-11 13:05:34.000000000 +0400
+@@ -122,7 +122,7 @@ raw_ioctl(struct inode *inode, struct fi
+ {
+ 	struct block_device *bdev = filp->private_data;
+ 
+-	return ioctl_by_bdev(bdev, command, arg);
++	return blkdev_ioctl(bdev->bd_inode, filp, command, arg);
+ }
+ 
+ static void bind_device(struct raw_config_request *rq)
+diff -uprN linux-2.6.8.1.orig/drivers/char/sonypi.c linux-2.6.8.1-ve022stab078/drivers/char/sonypi.c
+--- linux-2.6.8.1.orig/drivers/char/sonypi.c	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/char/sonypi.c	2006-05-11 13:05:32.000000000 +0400
+@@ -489,7 +489,8 @@ repeat:
+ 		i--;
+         }
+ 	if (count - i) {
+-		file->f_dentry->d_inode->i_atime = CURRENT_TIME;
++		struct inode *inode = file->f_dentry->d_inode;
++		inode->i_atime = current_fs_time(inode->i_sb);
+ 		return count-i;
+ 	}
+ 	if (signal_pending(current))
+diff -uprN linux-2.6.8.1.orig/drivers/char/sysrq.c linux-2.6.8.1-ve022stab078/drivers/char/sysrq.c
+--- linux-2.6.8.1.orig/drivers/char/sysrq.c	2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/char/sysrq.c	2006-05-11 13:05:40.000000000 +0400
+@@ -31,10 +31,12 @@
+ #include <linux/suspend.h>
+ #include <linux/writeback.h>
+ #include <linux/buffer_head.h>		/* for fsync_bdev() */
++#include <linux/kallsyms.h>
+ 
+ #include <linux/spinlock.h>
+ 
+ #include <asm/ptrace.h>
++#include <asm/uaccess.h>
+ 
+ extern void reset_vc(unsigned int);
+ 
+@@ -131,6 +133,296 @@ static struct sysrq_key_op sysrq_mountro
+ 	.action_msg	= "Emergency Remount R/O",
+ };
+ 
++#ifdef CONFIG_SYSRQ_DEBUG
++/*
++ * Alt-SysRq debugger
++ * Implemented functions:
++ *	dumping memory
++ *	resolvind symbols
++ *	writing memory
++ *	quitting :)
++ */
++
++/* Memory accessing routines */
++#define DUMP_LINES	22
++unsigned long *dumpmem_addr;
++
++static void dump_mem(void)
++{
++	unsigned long value[4];
++	mm_segment_t old_fs;
++	int line, err;
++
++	old_fs = get_fs();
++	set_fs(KERNEL_DS);
++	err = 0;
++	for (line = 0; line < DUMP_LINES; line++) {
++		err |= __get_user(value[0], dumpmem_addr++);
++		err |= __get_user(value[1], dumpmem_addr++);
++		err |= __get_user(value[2], dumpmem_addr++);
++		err |= __get_user(value[3], dumpmem_addr++);
++		if (err) {
++			printk("Invalid address 0x%p\n", dumpmem_addr - 4);
++			break;
++		}
++		printk("0x%p: %08lx %08lx %08lx %08lx\n", dumpmem_addr - 4,
++				value[0], value[1], value[2], value[3]);
++	}
++	set_fs(old_fs);
++}
++
++static unsigned long *writemem_addr;
++
++static void write_mem(unsigned long val)
++{
++	mm_segment_t old_fs;
++	unsigned long old_val;
++
++	old_fs = get_fs();
++	set_fs(KERNEL_DS);
++	if (__get_user(old_val, writemem_addr))
++		goto err;
++	printk("Changing [0x%p] %08lX to %08lX\n", writemem_addr, old_val, val);
++	__put_user(val, writemem_addr);
++err:
++	set_fs(old_fs);
++}
++
++/* reading user input */
++#define NAME_LEN (64)
++static struct {
++	unsigned long hex;
++	char name[NAME_LEN + 1];
++	void (*entered)(void);
++} debug_input;
++
++static void debug_read_hex(int key)
++{
++	static int entered = 0;
++	int val;
++
++	if (key >= '0' && key <= '9')
++		val = key - '0';
++	else if (key >= 'a' && key <= 'f')
++		val = key - 'a' + 0xa;
++	else
++		return;
++
++	entered++;
++	debug_input.hex = (debug_input.hex << 4) + val;
++	printk("%c", key);
++	if (entered != sizeof(unsigned long) * 2)
++		return;
++
++	printk("\n");
++	entered = 0;
++	debug_input.entered();
++}
++
++static void debug_read_string(int key)
++{
++	static int pos;
++	static int shift;
++
++	if (key == 0) {
++		/* actually key == 0 not only for shift */
++		shift = 1;
++		return;
++	}
++
++	if (key == 0x0d) /* enter */
++		goto finish;
++
++	if (key >= 'a' && key <= 'z') {
++		if (shift)
++			key = key - 'a' + 'A';
++		goto correct;
++	} 
++	if (key == '-') {
++		if (shift)
++			key = '_';
++		goto correct;
++	}
++	if (key >= '0' && key <= '9')
++		goto correct;
++	return;
++
++correct:
++	debug_input.name[pos] = key;
++	pos++;
++	shift = 0;
++	printk("%c", key);
++	if (pos != NAME_LEN)
++		return;
++
++finish:
++	printk("\n");
++	pos = 0;
++	shift = 0;
++	debug_input.entered();
++	memset(debug_input.name, 0, NAME_LEN);
++}
++
++static int sysrq_debug_mode;
++#define DEBUG_SELECT_ACTION	1
++#define DEBUG_READ_INPUT	2
++static struct sysrq_key_op *debug_sysrq_key_table[];
++static void (*handle_debug_input)(int key);
++static void swap_opts(struct sysrq_key_op **);
++#define PROMPT	"> "
++
++int sysrq_eat_all(void)
++{
++	return sysrq_debug_mode;
++}
++
++static inline void debug_switch_read_input(void (*fn_read)(int),
++		void (*fn_fini)(void))
++{
++	WARN_ON(fn_read == NULL || fn_fini == NULL);
++	debug_input.entered = fn_fini;
++	handle_debug_input = fn_read;
++	sysrq_debug_mode = DEBUG_READ_INPUT;
++}
++
++static inline void debug_switch_select_action(void)
++{
++	sysrq_debug_mode = DEBUG_SELECT_ACTION;
++	handle_debug_input = NULL;
++	printk(PROMPT);
++}
++
++/* handle key press in debug mode */
++static void __handle_debug(int key, struct pt_regs *pt_regs,
++		struct tty_struct *tty)
++{
++	if (sysrq_debug_mode == DEBUG_SELECT_ACTION) {
++		__handle_sysrq(key, pt_regs, tty);
++		if (sysrq_debug_mode)
++			printk(PROMPT);
++	} else {
++		__sysrq_lock_table();
++		handle_debug_input(key);
++		__sysrq_unlock_table();
++	}
++}
++
++/* dump memory */
++static void debug_dumpmem_addr_entered(void)
++{
++	dumpmem_addr = (unsigned long *)debug_input.hex;
++	dump_mem();
++	debug_switch_select_action();
++}
++
++static void sysrq_handle_dumpmem(int key, struct pt_regs *pt_regs,
++		struct tty_struct *tty)
++{
++	debug_switch_read_input(debug_read_hex, debug_dumpmem_addr_entered);
++}
++static struct sysrq_key_op sysrq_debug_dumpmem = {
++	.handler	= sysrq_handle_dumpmem,
++	.help_msg	= "Dump memory\n",
++	.action_msg	= "Enter address",
++};
++
++static void sysrq_handle_dumpnext(int key, struct pt_regs *pt_regs,
++		struct tty_struct *tty)
++{
++	dump_mem();
++}
++static struct sysrq_key_op sysrq_debug_dumpnext = {
++	.handler	= sysrq_handle_dumpnext,
++	.help_msg	= "dump neXt\n",
++	.action_msg	= "",
++};
++
++/* resolve symbol */
++static void debug_resolve_name_entered(void)
++{
++	unsigned long sym_addr;
++
++	sym_addr = kallsyms_lookup_name(debug_input.name);
++	printk("%s: %08lX\n", debug_input.name, sym_addr);
++	if (sym_addr) {
++		printk("Now you can dump it via X\n");
++		dumpmem_addr = (unsigned long *)sym_addr;
++	}
++	debug_switch_select_action();
++}
++
++static void sysrq_handle_resolve(int key, struct pt_regs *pt_regs,
++		struct tty_struct *tty)
++{
++	debug_switch_read_input(debug_read_string, debug_resolve_name_entered);
++}
++static struct sysrq_key_op sysrq_debug_resove = {
++	.handler	= sysrq_handle_resolve,
++	.help_msg	= "Resolve symbol\n",
++	.action_msg	= "Enter symbol name",
++};
++
++/* write memory */
++static void debug_writemem_val_entered(void)
++{
++	write_mem(debug_input.hex);
++	debug_switch_select_action();
++}
++
++static void debug_writemem_addr_entered(void)
++{
++	mm_segment_t old_fs;
++	unsigned long val;
++
++	writemem_addr = (unsigned long *)debug_input.hex;
++	old_fs = get_fs();
++	set_fs(KERNEL_DS);
++	if (!__get_user(val, writemem_addr))
++		printk(" [0x%p] = %08lX\n", writemem_addr, val);
++	set_fs(old_fs);
++	debug_switch_read_input(debug_read_hex, debug_writemem_val_entered);
++}
++
++static void sysrq_handle_writemem(int key, struct pt_regs *pt_regs,
++		struct tty_struct *tty)
++{
++	debug_switch_read_input(debug_read_hex, debug_writemem_addr_entered);
++}
++static struct sysrq_key_op sysrq_debug_writemem = {
++	.handler	= sysrq_handle_writemem,
++	.help_msg	= "Write memory\n",
++	.action_msg	= "Enter address and then value",
++};
++
++/* switch to debug mode */
++static void sysrq_handle_debug(int key, struct pt_regs *pt_regs,
++		struct tty_struct *tty)
++{
++	swap_opts(debug_sysrq_key_table);
++	printk("Welcome sysrq debugging mode\n"
++			"Press H for help\n");
++	debug_switch_select_action();
++}
++static struct sysrq_key_op sysrq_debug_enter = {
++	.handler	= sysrq_handle_debug,
++	.help_msg	= "start Degugging",
++	.action_msg	= "Select desired action",
++};
++
++/* quit debug mode */
++static void sysrq_handle_quit(int key, struct pt_regs *pt_regs,
++		struct tty_struct *tty)
++{
++	swap_opts(NULL);
++	sysrq_debug_mode = 0;
++}
++static struct sysrq_key_op sysrq_debug_quit = {
++	.handler	= sysrq_handle_quit,
++	.help_msg	= "Quit debug mode\n",
++	.action_msg	= "Thank you for using debugger",
++};
++#endif
++
+ /* END SYNC SYSRQ HANDLERS BLOCK */
+ 
+ 
+@@ -139,8 +431,13 @@ static struct sysrq_key_op sysrq_mountro
+ static void sysrq_handle_showregs(int key, struct pt_regs *pt_regs,
+ 				  struct tty_struct *tty) 
+ {
++	bust_spinlocks(1);
+ 	if (pt_regs)
+ 		show_regs(pt_regs);
++	bust_spinlocks(0);
++#ifdef __i386__
++	smp_nmi_call_function(smp_show_regs, NULL, 0);
++#endif
+ }
+ static struct sysrq_key_op sysrq_showregs_op = {
+ 	.handler	= sysrq_handle_showregs,
+@@ -183,7 +480,7 @@ static void send_sig_all(int sig)
+ {
+ 	struct task_struct *p;
+ 
+-	for_each_process(p) {
++	for_each_process_all(p) {
+ 		if (p->mm && p->pid != 1)
+ 			/* Not swapper, init nor kernel thread */
+ 			force_sig(sig, p);
+@@ -214,13 +511,26 @@ static struct sysrq_key_op sysrq_kill_op
+ 	.action_msg	= "Kill All Tasks",
+ };
+ 
++#ifdef CONFIG_SCHED_VCPU
++static void sysrq_handle_vschedstate(int key, struct pt_regs *pt_regs,
++				   struct tty_struct *tty) 
++{
++	show_vsched();
++}
++static struct sysrq_key_op sysrq_vschedstate_op = {
++	.handler	= sysrq_handle_vschedstate,
++	.help_msg	= "showvsChed",
++	.action_msg	= "Show Vsched",
++};
++#endif
++
+ /* END SIGNAL SYSRQ HANDLERS BLOCK */
+ 
+ 
+ /* Key Operations table and lock */
+ static spinlock_t sysrq_key_table_lock = SPIN_LOCK_UNLOCKED;
+ #define SYSRQ_KEY_TABLE_LENGTH 36
+-static struct sysrq_key_op *sysrq_key_table[SYSRQ_KEY_TABLE_LENGTH] = {
++static struct sysrq_key_op *def_sysrq_key_table[SYSRQ_KEY_TABLE_LENGTH] = {
+ /* 0 */	&sysrq_loglevel_op,
+ /* 1 */	&sysrq_loglevel_op,
+ /* 2 */	&sysrq_loglevel_op,
+@@ -235,8 +545,16 @@ static struct sysrq_key_op *sysrq_key_ta
+ 		 it is handled specially on the sparc
+ 		 and will never arrive */
+ /* b */	&sysrq_reboot_op,
++#ifdef CONFIG_SCHED_VCPU
++/* c */ &sysrq_vschedstate_op,
++#else
+ /* c */ NULL,
++#endif
++#ifdef CONFIG_SYSRQ_DEBUG
++/* d */	&sysrq_debug_enter,
++#else
+ /* d */	NULL,
++#endif
+ /* e */	&sysrq_term_op,
+ /* f */	NULL,
+ /* g */	NULL,
+@@ -270,6 +588,29 @@ static struct sysrq_key_op *sysrq_key_ta
+ /* z */	NULL
+ };
+ 
++#ifdef CONFIG_SYSRQ_DEBUG
++static struct sysrq_key_op *debug_sysrq_key_table[SYSRQ_KEY_TABLE_LENGTH] = {
++	[13] = &sysrq_debug_dumpmem,	/* d */
++	[26] = &sysrq_debug_quit,	/* q */
++	[27] = &sysrq_debug_resove,	/* r */
++	[32] = &sysrq_debug_writemem,	/* w */
++	[33] = &sysrq_debug_dumpnext,	/* x */
++};
++
++static struct sysrq_key_op **sysrq_key_table = def_sysrq_key_table;
++
++/* call swap_opts(NULL) to restore opts to defaults */
++static void swap_opts(struct sysrq_key_op **swap_to)
++{
++	if (swap_to)
++		sysrq_key_table = swap_to;
++	else
++		sysrq_key_table = def_sysrq_key_table;
++}
++#else
++#define sysrq_key_table	def_sysrq_key_table
++#endif
++
+ /* key2index calculation, -1 on invalid index */
+ static int sysrq_key_table_key2index(int key) {
+ 	int retval;
+@@ -358,6 +699,12 @@ void handle_sysrq(int key, struct pt_reg
+ {
+ 	if (!sysrq_enabled)
+ 		return;
++#ifdef CONFIG_SYSRQ_DEBUG
++	if (sysrq_debug_mode) {
++		__handle_debug(key, pt_regs, tty);
++		return;
++	}
++#endif
+ 	__handle_sysrq(key, pt_regs, tty);
+ }
+ 
+diff -uprN linux-2.6.8.1.orig/drivers/char/tty_io.c linux-2.6.8.1-ve022stab078/drivers/char/tty_io.c
+--- linux-2.6.8.1.orig/drivers/char/tty_io.c	2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/char/tty_io.c	2006-05-11 13:05:40.000000000 +0400
+@@ -86,6 +86,7 @@
+ #include <linux/string.h>
+ #include <linux/slab.h>
+ #include <linux/poll.h>
++#include <linux/ve_owner.h>
+ #include <linux/proc_fs.h>
+ #include <linux/init.h>
+ #include <linux/module.h>
+@@ -103,6 +104,7 @@
+ #include <linux/devfs_fs_kernel.h>
+ 
+ #include <linux/kmod.h>
++#include <ub/ub_mem.h>
+ 
+ #undef TTY_DEBUG_HANGUP
+ 
+@@ -120,7 +122,12 @@ struct termios tty_std_termios = {	/* fo
+ 
+ EXPORT_SYMBOL(tty_std_termios);
+ 
++/* this lock protects tty_drivers list, this pretty guys do no locking */
++rwlock_t tty_driver_guard = RW_LOCK_UNLOCKED;
++EXPORT_SYMBOL(tty_driver_guard);
++
+ LIST_HEAD(tty_drivers);			/* linked list of tty drivers */
++EXPORT_SYMBOL(tty_drivers);
+ struct tty_ldisc ldiscs[NR_LDISCS];	/* line disc dispatch table	*/
+ 
+ /* Semaphore to protect creating and releasing a tty */
+@@ -130,6 +137,13 @@ DECLARE_MUTEX(tty_sem);
+ extern struct tty_driver *ptm_driver;	/* Unix98 pty masters; for /dev/ptmx */
+ extern int pty_limit;		/* Config limit on Unix98 ptys */
+ static DEFINE_IDR(allocated_ptys);
++#ifdef CONFIG_VE
++#define ve_allocated_ptys	(*(get_exec_env()->allocated_ptys))
++#define ve_ptm_driver		(get_exec_env()->ptm_driver)
++#else
++#define ve_allocated_ptys	allocated_ptys
++#define ve_ptm_driver		ptm_driver
++#endif
+ static DECLARE_MUTEX(allocated_ptys_lock);
+ #endif
+ 
+@@ -150,11 +164,25 @@ extern void rs_360_init(void);
+ static void release_mem(struct tty_struct *tty, int idx);
+ 
+ 
++DCL_VE_OWNER(TTYDRV, TAIL_SOFT, struct tty_driver, owner_env, , ())
++DCL_VE_OWNER(TTY, TAIL_SOFT, struct tty_struct, owner_env, , ())
++
++void prepare_tty(void)
++{
++#ifdef CONFIG_VE
++	get_ve0()->allocated_ptys = &allocated_ptys;
++	/*
++	 * in this case, tty_register_driver() setups
++	 * owner_env correctly right from the bootup
++	 */
++#endif
++}
++
+ static struct tty_struct *alloc_tty_struct(void)
+ {
+ 	struct tty_struct *tty;
+ 
+-	tty = kmalloc(sizeof(struct tty_struct), GFP_KERNEL);
++	tty = ub_kmalloc(sizeof(struct tty_struct), GFP_KERNEL);
+ 	if (tty)
+ 		memset(tty, 0, sizeof(struct tty_struct));
+ 	return tty;
+@@ -307,14 +335,37 @@ struct tty_driver *get_tty_driver(dev_t 
+ {
+ 	struct tty_driver *p;
+ 
++	read_lock(&tty_driver_guard);
+ 	list_for_each_entry(p, &tty_drivers, tty_drivers) {
+ 		dev_t base = MKDEV(p->major, p->minor_start);
+ 		if (device < base || device >= base + p->num)
+ 			continue;
+ 		*index = device - base;
+-		return p;
++#ifdef CONFIG_VE
++		if (in_interrupt())
++			goto found;
++		if (p->major!=PTY_MASTER_MAJOR && p->major!=PTY_SLAVE_MAJOR
++#ifdef CONFIG_UNIX98_PTYS
++		    && (p->major<UNIX98_PTY_MASTER_MAJOR ||
++		    	p->major>UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT-1) &&
++		       (p->major<UNIX98_PTY_SLAVE_MAJOR ||
++		        p->major>UNIX98_PTY_SLAVE_MAJOR+UNIX98_PTY_MAJOR_COUNT-1)
++#endif
++		) goto found;
++		if (ve_is_super(VE_OWNER_TTYDRV(p)) &&
++		    ve_is_super(get_exec_env()))
++			goto found;
++		if (!ve_accessible_strict(VE_OWNER_TTYDRV(p), get_exec_env()))
++			continue;
++#endif
++		goto found;
+ 	}
++	read_unlock(&tty_driver_guard);
+ 	return NULL;
++
++found:
++	read_unlock(&tty_driver_guard);
++	return p;
+ }
+ 
+ /*
+@@ -410,7 +461,6 @@ void do_tty_hangup(void *data)
+ 	struct file * cons_filp = NULL;
+ 	struct file *filp, *f = NULL;
+ 	struct task_struct *p;
+-	struct pid *pid;
+ 	int    closecount = 0, n;
+ 
+ 	if (!tty)
+@@ -481,8 +531,7 @@ void do_tty_hangup(void *data)
+ 	
+ 	read_lock(&tasklist_lock);
+ 	if (tty->session > 0) {
+-		struct list_head *l;
+-		for_each_task_pid(tty->session, PIDTYPE_SID, p, l, pid) {
++		do_each_task_pid_all(tty->session, PIDTYPE_SID, p) {
+ 			if (p->signal->tty == tty)
+ 				p->signal->tty = NULL;
+ 			if (!p->signal->leader)
+@@ -491,7 +540,7 @@ void do_tty_hangup(void *data)
+ 			send_group_sig_info(SIGCONT, SEND_SIG_PRIV, p);
+ 			if (tty->pgrp > 0)
+ 				p->signal->tty_old_pgrp = tty->pgrp;
+-		}
++		} while_each_task_pid_all(tty->session, PIDTYPE_SID, p);
+ 	}
+ 	read_unlock(&tasklist_lock);
+ 
+@@ -563,15 +612,15 @@ void disassociate_ctty(int on_exit)
+ {
+ 	struct tty_struct *tty;
+ 	struct task_struct *p;
+-	struct list_head *l;
+-	struct pid *pid;
+ 	int tty_pgrp = -1;
+ 
+ 	lock_kernel();
+ 
++	down(&tty_sem);
+ 	tty = current->signal->tty;
+ 	if (tty) {
+ 		tty_pgrp = tty->pgrp;
++		up(&tty_sem);
+ 		if (on_exit && tty->driver->type != TTY_DRIVER_TYPE_PTY)
+ 			tty_vhangup(tty);
+ 	} else {
+@@ -579,6 +628,7 @@ void disassociate_ctty(int on_exit)
+ 			kill_pg(current->signal->tty_old_pgrp, SIGHUP, on_exit);
+ 			kill_pg(current->signal->tty_old_pgrp, SIGCONT, on_exit);
+ 		}
++		up(&tty_sem);
+ 		unlock_kernel();	
+ 		return;
+ 	}
+@@ -588,14 +638,19 @@ void disassociate_ctty(int on_exit)
+ 			kill_pg(tty_pgrp, SIGCONT, on_exit);
+ 	}
+ 
++	/* Must lock changes to tty_old_pgrp */
++	down(&tty_sem);
+ 	current->signal->tty_old_pgrp = 0;
+ 	tty->session = 0;
+ 	tty->pgrp = -1;
+ 
++	/* Now clear signal->tty under the lock */
+ 	read_lock(&tasklist_lock);
+-	for_each_task_pid(current->signal->session, PIDTYPE_SID, p, l, pid)
++	do_each_task_pid_all(current->signal->session, PIDTYPE_SID, p) {
+ 		p->signal->tty = NULL;
++	} while_each_task_pid_all(current->signal->session, PIDTYPE_SID, p);
+ 	read_unlock(&tasklist_lock);
++	up(&tty_sem);
+ 	unlock_kernel();
+ }
+ 
+@@ -656,7 +711,7 @@ static ssize_t tty_read(struct file * fi
+ 		i = -EIO;
+ 	unlock_kernel();
+ 	if (i > 0)
+-		inode->i_atime = CURRENT_TIME;
++		inode->i_atime = current_fs_time(inode->i_sb);
+ 	return i;
+ }
+ 
+@@ -702,7 +757,8 @@ static inline ssize_t do_tty_write(
+ 		}
+ 	}
+ 	if (written) {
+-		file->f_dentry->d_inode->i_mtime = CURRENT_TIME;
++		struct inode *inode = file->f_dentry->d_inode;
++		inode->i_mtime = current_fs_time(inode->i_sb);
+ 		ret = written;
+ 	}
+ 	up(&tty->atomic_write);
+@@ -760,27 +816,28 @@ static inline void tty_line_name(struct 
+  * really quite straightforward.  The semaphore locking can probably be
+  * relaxed for the (most common) case of reopening a tty.
+  */
+-static int init_dev(struct tty_driver *driver, int idx,
+-	struct tty_struct **ret_tty)
++static int init_dev(struct tty_driver *driver, int idx, 
++	struct tty_struct *i_tty, struct tty_struct **ret_tty)
+ {
+ 	struct tty_struct *tty, *o_tty;
+ 	struct termios *tp, **tp_loc, *o_tp, **o_tp_loc;
+ 	struct termios *ltp, **ltp_loc, *o_ltp, **o_ltp_loc;
++	struct ve_struct * owner;
+ 	int retval=0;
+ 
+-	/* 
+-	 * Check whether we need to acquire the tty semaphore to avoid
+-	 * race conditions.  For now, play it safe.
+-	 */
+-	down(&tty_sem);
++	owner = VE_OWNER_TTYDRV(driver);
+ 
+-	/* check whether we're reopening an existing tty */
+-	if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
+-		tty = devpts_get_tty(idx);
+-		if (tty && driver->subtype == PTY_TYPE_MASTER)
+-			tty = tty->link;
+-	} else {
+-		tty = driver->ttys[idx];
++	if (i_tty)
++		tty = i_tty;
++	else {
++		/* check whether we're reopening an existing tty */
++		if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
++			tty = devpts_get_tty(idx);
++			if (tty && driver->subtype == PTY_TYPE_MASTER)
++				tty = tty->link;
++		} else {
++			tty = driver->ttys[idx];
++		}
+ 	}
+ 	if (tty) goto fast_track;
+ 
+@@ -808,6 +865,7 @@ static int init_dev(struct tty_driver *d
+ 	tty->driver = driver;
+ 	tty->index = idx;
+ 	tty_line_name(driver, idx, tty->name);
++	SET_VE_OWNER_TTY(tty, owner);
+ 
+ 	if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
+ 		tp_loc = &tty->termios;
+@@ -818,7 +876,7 @@ static int init_dev(struct tty_driver *d
+ 	}
+ 
+ 	if (!*tp_loc) {
+-		tp = (struct termios *) kmalloc(sizeof(struct termios),
++		tp = (struct termios *) ub_kmalloc(sizeof(struct termios),
+ 						GFP_KERNEL);
+ 		if (!tp)
+ 			goto free_mem_out;
+@@ -826,7 +884,7 @@ static int init_dev(struct tty_driver *d
+ 	}
+ 
+ 	if (!*ltp_loc) {
+-		ltp = (struct termios *) kmalloc(sizeof(struct termios),
++		ltp = (struct termios *) ub_kmalloc(sizeof(struct termios),
+ 						 GFP_KERNEL);
+ 		if (!ltp)
+ 			goto free_mem_out;
+@@ -841,6 +899,7 @@ static int init_dev(struct tty_driver *d
+ 		o_tty->driver = driver->other;
+ 		o_tty->index = idx;
+ 		tty_line_name(driver->other, idx, o_tty->name);
++		SET_VE_OWNER_TTY(o_tty, owner);
+ 
+ 		if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
+ 			o_tp_loc = &o_tty->termios;
+@@ -852,7 +911,7 @@ static int init_dev(struct tty_driver *d
+ 
+ 		if (!*o_tp_loc) {
+ 			o_tp = (struct termios *)
+-				kmalloc(sizeof(struct termios), GFP_KERNEL);
++				ub_kmalloc(sizeof(struct termios), GFP_KERNEL);
+ 			if (!o_tp)
+ 				goto free_mem_out;
+ 			*o_tp = driver->other->init_termios;
+@@ -860,7 +919,7 @@ static int init_dev(struct tty_driver *d
+ 
+ 		if (!*o_ltp_loc) {
+ 			o_ltp = (struct termios *)
+-				kmalloc(sizeof(struct termios), GFP_KERNEL);
++				ub_kmalloc(sizeof(struct termios), GFP_KERNEL);
+ 			if (!o_ltp)
+ 				goto free_mem_out;
+ 			memset(o_ltp, 0, sizeof(struct termios));
+@@ -878,6 +937,10 @@ static int init_dev(struct tty_driver *d
+ 			*o_ltp_loc = o_ltp;
+ 		o_tty->termios = *o_tp_loc;
+ 		o_tty->termios_locked = *o_ltp_loc;
++#ifdef CONFIG_VE
++		if (driver->other->refcount == 0)
++			(void)get_ve(owner);
++#endif
+ 		driver->other->refcount++;
+ 		if (driver->subtype == PTY_TYPE_MASTER)
+ 			o_tty->count++;
+@@ -902,6 +965,10 @@ static int init_dev(struct tty_driver *d
+ 		*ltp_loc = ltp;
+ 	tty->termios = *tp_loc;
+ 	tty->termios_locked = *ltp_loc;
++#ifdef CONFIG_VE
++	if (driver->refcount == 0)
++		(void)get_ve(owner);
++#endif
+ 	driver->refcount++;
+ 	tty->count++;
+ 
+@@ -956,7 +1023,6 @@ success:
+ 	
+ 	/* All paths come through here to release the semaphore */
+ end_init:
+-	up(&tty_sem);
+ 	return retval;
+ 
+ 	/* Release locally allocated memory ... nothing placed in slots */
+@@ -1010,6 +1076,10 @@ static void release_mem(struct tty_struc
+ 		}
+ 		o_tty->magic = 0;
+ 		o_tty->driver->refcount--;
++#ifdef CONFIG_VE
++		if (o_tty->driver->refcount == 0)
++			put_ve(VE_OWNER_TTY(o_tty));
++#endif
+ 		file_list_lock();
+ 		list_del_init(&o_tty->tty_files);
+ 		file_list_unlock();
+@@ -1032,6 +1102,10 @@ static void release_mem(struct tty_struc
+ 
+ 	tty->magic = 0;
+ 	tty->driver->refcount--;
++#ifdef CONFIG_VE
++	if (tty->driver->refcount == 0)
++		put_ve(VE_OWNER_TTY(tty));
++#endif
+ 	file_list_lock();
+ 	list_del_init(&tty->tty_files);
+ 	file_list_unlock();
+@@ -1054,6 +1128,9 @@ static void release_dev(struct file * fi
+ 	int	devpts_master, devpts;
+ 	int	idx;
+ 	char	buf[64];
++#ifdef CONFIG_UNIX98_PTYS
++	struct idr *idr_alloced;
++#endif
+ 	
+ 	tty = (struct tty_struct *)filp->private_data;
+ 	if (tty_paranoia_check(tty, filp->f_dentry->d_inode, "release_dev"))
+@@ -1069,6 +1146,9 @@ static void release_dev(struct file * fi
+ 	devpts = (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM) != 0;
+ 	devpts_master = pty_master && devpts;
+ 	o_tty = tty->link;
++#ifdef CONFIG_UNIX98_PTYS
++	idr_alloced = tty->owner_env->allocated_ptys;
++#endif
+ 
+ #ifdef TTY_PARANOIA_CHECK
+ 	if (idx < 0 || idx >= tty->driver->num) {
+@@ -1152,9 +1232,14 @@ static void release_dev(struct file * fi
+ 	 * each iteration we avoid any problems.
+ 	 */
+ 	while (1) {
++		/* Guard against races with tty->count changes elsewhere and
++		   opens on /dev/tty */
++		   
++		down(&tty_sem);
+ 		tty_closing = tty->count <= 1;
+ 		o_tty_closing = o_tty &&
+ 			(o_tty->count <= (pty_master ? 1 : 0));
++		up(&tty_sem);
+ 		do_sleep = 0;
+ 
+ 		if (tty_closing) {
+@@ -1190,6 +1275,8 @@ static void release_dev(struct file * fi
+ 	 * both sides, and we've completed the last operation that could 
+ 	 * block, so it's safe to proceed with closing.
+ 	 */
++	 
++	down(&tty_sem);
+ 	if (pty_master) {
+ 		if (--o_tty->count < 0) {
+ 			printk(KERN_WARNING "release_dev: bad pty slave count "
+@@ -1203,7 +1290,8 @@ static void release_dev(struct file * fi
+ 		       tty->count, tty_name(tty, buf));
+ 		tty->count = 0;
+ 	}
+-
++	up(&tty_sem);
++	
+ 	/*
+ 	 * We've decremented tty->count, so we need to remove this file
+ 	 * descriptor off the tty->tty_files list; this serves two
+@@ -1235,15 +1323,15 @@ static void release_dev(struct file * fi
+ 	 */
+ 	if (tty_closing || o_tty_closing) {
+ 		struct task_struct *p;
+-		struct list_head *l;
+-		struct pid *pid;
+ 
+ 		read_lock(&tasklist_lock);
+-		for_each_task_pid(tty->session, PIDTYPE_SID, p, l, pid)
++		do_each_task_pid_all(tty->session, PIDTYPE_SID, p) {
+ 			p->signal->tty = NULL;
++		} while_each_task_pid_all(tty->session, PIDTYPE_SID, p);
+ 		if (o_tty)
+-			for_each_task_pid(o_tty->session, PIDTYPE_SID, p,l, pid)
++			do_each_task_pid_all(o_tty->session, PIDTYPE_SID, p) {
+ 				p->signal->tty = NULL;
++			} while_each_task_pid_all(o_tty->session, PIDTYPE_SID, p);
+ 		read_unlock(&tasklist_lock);
+ 	}
+ 
+@@ -1294,7 +1382,7 @@ static void release_dev(struct file * fi
+ 	/* Make this pty number available for reallocation */
+ 	if (devpts) {
+ 		down(&allocated_ptys_lock);
+-		idr_remove(&allocated_ptys, idx);
++		idr_remove(idr_alloced, idx);
+ 		up(&allocated_ptys_lock);
+ 	}
+ #endif
+@@ -1315,7 +1403,7 @@ static void release_dev(struct file * fi
+  */
+ static int tty_open(struct inode * inode, struct file * filp)
+ {
+-	struct tty_struct *tty;
++	struct tty_struct *tty, *c_tty;
+ 	int noctty, retval;
+ 	struct tty_driver *driver;
+ 	int index;
+@@ -1327,12 +1415,18 @@ retry_open:
+ 	noctty = filp->f_flags & O_NOCTTY;
+ 	index  = -1;
+ 	retval = 0;
++	c_tty = NULL;
++	
++	down(&tty_sem);
+ 
+ 	if (device == MKDEV(TTYAUX_MAJOR,0)) {
+-		if (!current->signal->tty)
++		if (!current->signal->tty) {
++			up(&tty_sem);
+ 			return -ENXIO;
++		}
+ 		driver = current->signal->tty->driver;
+ 		index = current->signal->tty->index;
++		c_tty = current->signal->tty;
+ 		filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */
+ 		/* noctty = 1; */
+ 		goto got_driver;
+@@ -1341,6 +1435,12 @@ retry_open:
+ 	if (device == MKDEV(TTY_MAJOR,0)) {
+ 		extern int fg_console;
+ 		extern struct tty_driver *console_driver;
++#ifdef CONFIG_VE
++		if (!ve_is_super(get_exec_env())) {
++			up(&tty_sem);
++			return -ENODEV;
++		}
++#endif
+ 		driver = console_driver;
+ 		index = fg_console;
+ 		noctty = 1;
+@@ -1348,6 +1448,12 @@ retry_open:
+ 	}
+ #endif
+ 	if (device == MKDEV(TTYAUX_MAJOR,1)) {
++#ifdef CONFIG_VE
++		if (!ve_is_super(get_exec_env())) {
++			up(&tty_sem);
++			return -ENODEV;
++		}
++#endif
+ 		driver = console_device(&index);
+ 		if (driver) {
+ 			/* Don't let /dev/console block */
+@@ -1355,6 +1461,7 @@ retry_open:
+ 			noctty = 1;
+ 			goto got_driver;
+ 		}
++		up(&tty_sem);
+ 		return -ENODEV;
+ 	}
+ 
+@@ -1364,29 +1471,33 @@ retry_open:
+ 
+ 		/* find a device that is not in use. */
+ 		down(&allocated_ptys_lock);
+-		if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) {
++		if (!idr_pre_get(&ve_allocated_ptys, GFP_KERNEL)) {
+ 			up(&allocated_ptys_lock);
++			up(&tty_sem);
+ 			return -ENOMEM;
+ 		}
+-		idr_ret = idr_get_new(&allocated_ptys, NULL, &index);
++		idr_ret = idr_get_new(&ve_allocated_ptys, NULL, &index);
+ 		if (idr_ret < 0) {
+ 			up(&allocated_ptys_lock);
++			up(&tty_sem);
+ 			if (idr_ret == -EAGAIN)
+ 				return -ENOMEM;
+ 			return -EIO;
+ 		}
+ 		if (index >= pty_limit) {
+-			idr_remove(&allocated_ptys, index);
++			idr_remove(&ve_allocated_ptys, index);
+ 			up(&allocated_ptys_lock);
++			up(&tty_sem);
+ 			return -EIO;
+ 		}
+ 		up(&allocated_ptys_lock);
+ 
+-		driver = ptm_driver;
+-		retval = init_dev(driver, index, &tty);
++		driver = ve_ptm_driver;
++		retval = init_dev(driver, index, NULL, &tty);
++		up(&tty_sem);
+ 		if (retval) {
+ 			down(&allocated_ptys_lock);
+-			idr_remove(&allocated_ptys, index);
++			idr_remove(&ve_allocated_ptys, index);
+ 			up(&allocated_ptys_lock);
+ 			return retval;
+ 		}
+@@ -1398,10 +1509,13 @@ retry_open:
+ #endif
+ 	{
+ 		driver = get_tty_driver(device, &index);
+-		if (!driver)
++		if (!driver) {
++			up(&tty_sem);
+ 			return -ENODEV;
++		}
+ got_driver:
+-		retval = init_dev(driver, index, &tty);
++		retval = init_dev(driver, index, c_tty, &tty);
++		up(&tty_sem);
+ 		if (retval)
+ 			return retval;
+ 	}
+@@ -1435,7 +1549,7 @@ got_driver:
+ #ifdef CONFIG_UNIX98_PTYS
+ 		if (index != -1) {
+ 			down(&allocated_ptys_lock);
+-			idr_remove(&allocated_ptys, index);
++			idr_remove(&ve_allocated_ptys, index);
+ 			up(&allocated_ptys_lock);
+ 		}
+ #endif
+@@ -1566,10 +1680,12 @@ static int tiocswinsz(struct tty_struct 
+ 
+ static int tioccons(struct file *file)
+ {
++	if (!capable(CAP_SYS_ADMIN))
++		return -EPERM;
++	if (!ve_is_super(get_exec_env()))
++		return -EACCES;
+ 	if (file->f_op->write == redirected_tty_write) {
+ 		struct file *f;
+-		if (!capable(CAP_SYS_ADMIN))
+-			return -EPERM;
+ 		spin_lock(&redirect_lock);
+ 		f = redirect;
+ 		redirect = NULL;
+@@ -1606,8 +1722,6 @@ static int fionbio(struct file *file, in
+ 
+ static int tiocsctty(struct tty_struct *tty, int arg)
+ {
+-	struct list_head *l;
+-	struct pid *pid;
+ 	task_t *p;
+ 
+ 	if (current->signal->leader &&
+@@ -1630,8 +1744,9 @@ static int tiocsctty(struct tty_struct *
+ 			 */
+ 
+ 			read_lock(&tasklist_lock);
+-			for_each_task_pid(tty->session, PIDTYPE_SID, p, l, pid)
++			do_each_task_pid_all(tty->session, PIDTYPE_SID, p) {
+ 				p->signal->tty = NULL;
++			} while_each_task_pid_all(tty->session, PIDTYPE_SID, p);
+ 			read_unlock(&tasklist_lock);
+ 		} else
+ 			return -EPERM;
+@@ -1653,7 +1768,7 @@ static int tiocgpgrp(struct tty_struct *
+ 	 */
+ 	if (tty == real_tty && current->signal->tty != real_tty)
+ 		return -ENOTTY;
+-	return put_user(real_tty->pgrp, p);
++	return put_user(pid_type_to_vpid(PIDTYPE_PGID, real_tty->pgrp), p);
+ }
+ 
+ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
+@@ -1673,6 +1788,9 @@ static int tiocspgrp(struct tty_struct *
+ 		return -EFAULT;
+ 	if (pgrp < 0)
+ 		return -EINVAL;
++	pgrp = vpid_to_pid(pgrp);
++	if (pgrp < 0)
++		return -EPERM;
+ 	if (session_of_pgrp(pgrp) != current->signal->session)
+ 		return -EPERM;
+ 	real_tty->pgrp = pgrp;
+@@ -1689,7 +1807,7 @@ static int tiocgsid(struct tty_struct *t
+ 		return -ENOTTY;
+ 	if (real_tty->session <= 0)
+ 		return -ENOTTY;
+-	return put_user(real_tty->session, p);
++	return put_user(pid_type_to_vpid(PIDTYPE_SID, real_tty->session), p);
+ }
+ 
+ static int tiocsetd(struct tty_struct *tty, int __user *p)
+@@ -1938,8 +2056,6 @@ static void __do_SAK(void *arg)
+ #else
+ 	struct tty_struct *tty = arg;
+ 	struct task_struct *p;
+-	struct list_head *l;
+-	struct pid *pid;
+ 	int session;
+ 	int		i;
+ 	struct file	*filp;
+@@ -1952,7 +2068,7 @@ static void __do_SAK(void *arg)
+ 	if (tty->driver->flush_buffer)
+ 		tty->driver->flush_buffer(tty);
+ 	read_lock(&tasklist_lock);
+-	for_each_task_pid(session, PIDTYPE_SID, p, l, pid) {
++	do_each_task_pid_all(session, PIDTYPE_SID, p) {
+ 		if (p->signal->tty == tty || session > 0) {
+ 			printk(KERN_NOTICE "SAK: killed process %d"
+ 			    " (%s): p->signal->session==tty->session\n",
+@@ -1979,7 +2095,7 @@ static void __do_SAK(void *arg)
+ 			spin_unlock(&p->files->file_lock);
+ 		}
+ 		task_unlock(p);
+-	}
++	} while_each_task_pid_all(session, PIDTYPE_SID, p);
+ 	read_unlock(&tasklist_lock);
+ #endif
+ }
+@@ -2303,8 +2419,11 @@ int tty_register_driver(struct tty_drive
+ 
+ 	if (!driver->put_char)
+ 		driver->put_char = tty_default_put_char;
+-	
++
++	SET_VE_OWNER_TTYDRV(driver, get_exec_env());
++	write_lock_irq(&tty_driver_guard);
+ 	list_add(&driver->tty_drivers, &tty_drivers);
++	write_unlock_irq(&tty_driver_guard);
+ 	
+ 	if ( !(driver->flags & TTY_DRIVER_NO_DEVFS) ) {
+ 		for(i = 0; i < driver->num; i++)
+@@ -2331,7 +2450,9 @@ int tty_unregister_driver(struct tty_dri
+ 	unregister_chrdev_region(MKDEV(driver->major, driver->minor_start),
+ 				driver->num);
+ 
++	write_lock_irq(&tty_driver_guard);
+ 	list_del(&driver->tty_drivers);
++	write_unlock_irq(&tty_driver_guard);
+ 
+ 	/*
+ 	 * Free the termios and termios_locked structures because
+@@ -2459,6 +2580,7 @@ static int __init tty_init(void)
+ 
+ 	vty_init();
+ #endif
++	prepare_tty();
+ 	return 0;
+ }
+ module_init(tty_init);
+diff -uprN linux-2.6.8.1.orig/drivers/char/vt.c linux-2.6.8.1-ve022stab078/drivers/char/vt.c
+--- linux-2.6.8.1.orig/drivers/char/vt.c	2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/char/vt.c	2006-05-11 13:05:33.000000000 +0400
+@@ -748,6 +748,8 @@ inline int resize_screen(int currcons, i
+  * [this is to be used together with some user program
+  * like resize that changes the hardware videomode]
+  */
++#define VC_RESIZE_MAXCOL (32767)
++#define VC_RESIZE_MAXROW (32767)
+ int vc_resize(int currcons, unsigned int cols, unsigned int lines)
+ {
+ 	unsigned long old_origin, new_origin, new_scr_end, rlth, rrem, err = 0;
+@@ -760,6 +762,9 @@ int vc_resize(int currcons, unsigned int
+ 	if (!vc_cons_allocated(currcons))
+ 		return -ENXIO;
+ 
++	if (cols > VC_RESIZE_MAXCOL || lines > VC_RESIZE_MAXROW)
++		return -EINVAL;
++
+ 	new_cols = (cols ? cols : video_num_columns);
+ 	new_rows = (lines ? lines : video_num_lines);
+ 	new_row_size = new_cols << 1;
+diff -uprN linux-2.6.8.1.orig/drivers/ide/pci/cmd64x.c linux-2.6.8.1-ve022stab078/drivers/ide/pci/cmd64x.c
+--- linux-2.6.8.1.orig/drivers/ide/pci/cmd64x.c	2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/ide/pci/cmd64x.c	2006-05-11 13:05:27.000000000 +0400
+@@ -596,7 +596,7 @@ static unsigned int __devinit init_chips
+ 
+ #ifdef __i386__
+ 	if (dev->resource[PCI_ROM_RESOURCE].start) {
+-		pci_write_config_byte(dev, PCI_ROM_ADDRESS, dev->resource[PCI_ROM_RESOURCE].start | PCI_ROM_ADDRESS_ENABLE);
++		pci_write_config_dword(dev, PCI_ROM_ADDRESS, dev->resource[PCI_ROM_RESOURCE].start | PCI_ROM_ADDRESS_ENABLE);
+ 		printk(KERN_INFO "%s: ROM enabled at 0x%08lx\n", name, dev->resource[PCI_ROM_RESOURCE].start);
+ 	}
+ #endif
+diff -uprN linux-2.6.8.1.orig/drivers/ide/pci/hpt34x.c linux-2.6.8.1-ve022stab078/drivers/ide/pci/hpt34x.c
+--- linux-2.6.8.1.orig/drivers/ide/pci/hpt34x.c	2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/ide/pci/hpt34x.c	2006-05-11 13:05:27.000000000 +0400
+@@ -251,7 +251,7 @@ static unsigned int __devinit init_chips
+ 
+ 	if (cmd & PCI_COMMAND_MEMORY) {
+ 		if (pci_resource_start(dev, PCI_ROM_RESOURCE)) {
+-			pci_write_config_byte(dev, PCI_ROM_ADDRESS,
++			pci_write_config_dword(dev, PCI_ROM_ADDRESS,
+ 				dev->resource[PCI_ROM_RESOURCE].start | PCI_ROM_ADDRESS_ENABLE);
+ 			printk(KERN_INFO "HPT345: ROM enabled at 0x%08lx\n",
+ 				dev->resource[PCI_ROM_RESOURCE].start);
+diff -uprN linux-2.6.8.1.orig/drivers/ide/pci/hpt366.c linux-2.6.8.1-ve022stab078/drivers/ide/pci/hpt366.c
+--- linux-2.6.8.1.orig/drivers/ide/pci/hpt366.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/ide/pci/hpt366.c	2006-05-11 13:05:27.000000000 +0400
+@@ -1089,7 +1089,7 @@ static unsigned int __devinit init_chips
+ 	u8 test = 0;
+ 
+ 	if (dev->resource[PCI_ROM_RESOURCE].start)
+-		pci_write_config_byte(dev, PCI_ROM_ADDRESS,
++		pci_write_config_dword(dev, PCI_ROM_ADDRESS,
+ 			dev->resource[PCI_ROM_RESOURCE].start | PCI_ROM_ADDRESS_ENABLE);
+ 
+ 	pci_read_config_byte(dev, PCI_CACHE_LINE_SIZE, &test);
+diff -uprN linux-2.6.8.1.orig/drivers/ieee1394/ieee1394_core.c linux-2.6.8.1-ve022stab078/drivers/ieee1394/ieee1394_core.c
+--- linux-2.6.8.1.orig/drivers/ieee1394/ieee1394_core.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/ieee1394/ieee1394_core.c	2006-05-11 13:05:25.000000000 +0400
+@@ -1034,8 +1034,8 @@ static int hpsbpkt_thread(void *__hi)
+ 		if (khpsbpkt_kill)
+ 			break;
+ 
+-		if (current->flags & PF_FREEZE) {
+-			refrigerator(0);
++		if (test_thread_flag(TIF_FREEZE)) {
++			refrigerator();
+ 			continue;
+ 		}
+ 
+diff -uprN linux-2.6.8.1.orig/drivers/ieee1394/nodemgr.c linux-2.6.8.1-ve022stab078/drivers/ieee1394/nodemgr.c
+--- linux-2.6.8.1.orig/drivers/ieee1394/nodemgr.c	2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/ieee1394/nodemgr.c	2006-05-11 13:05:25.000000000 +0400
+@@ -1481,8 +1481,8 @@ static int nodemgr_host_thread(void *__h
+ 
+ 		if (down_interruptible(&hi->reset_sem) ||
+ 		    down_interruptible(&nodemgr_serialize)) {
+-			if (current->flags & PF_FREEZE) {
+-				refrigerator(0);
++			if (test_thread_flag(TIF_FREEZE)) {
++				refrigerator();
+ 				continue;
+ 			}
+ 			printk("NodeMgr: received unexpected signal?!\n" );
+diff -uprN linux-2.6.8.1.orig/drivers/input/serio/serio.c linux-2.6.8.1-ve022stab078/drivers/input/serio/serio.c
+--- linux-2.6.8.1.orig/drivers/input/serio/serio.c	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/input/serio/serio.c	2006-05-11 13:05:25.000000000 +0400
+@@ -153,8 +153,8 @@ static int serio_thread(void *nothing)
+ 	do {
+ 		serio_handle_events();
+ 		wait_event_interruptible(serio_wait, !list_empty(&serio_event_list));
+-		if (current->flags & PF_FREEZE)
+-			refrigerator(PF_FREEZE);
++		if (test_thread_flag(TIF_FREEZE))
++			refrigerator();
+ 	} while (!signal_pending(current));
+ 
+ 	printk(KERN_DEBUG "serio: kseriod exiting\n");
+diff -uprN linux-2.6.8.1.orig/drivers/input/serio/serport.c linux-2.6.8.1-ve022stab078/drivers/input/serio/serport.c
+--- linux-2.6.8.1.orig/drivers/input/serio/serport.c	2004-08-14 14:56:14.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/input/serio/serport.c	2006-05-11 13:05:33.000000000 +0400
+@@ -66,6 +66,9 @@ static int serport_ldisc_open(struct tty
+ 	struct serport *serport;
+ 	char name[64];
+ 
++	if (!capable(CAP_SYS_ADMIN))
++		return -EPERM;
++
+ 	serport = kmalloc(sizeof(struct serport), GFP_KERNEL);
+ 	if (unlikely(!serport))
+ 		return -ENOMEM;
+diff -uprN linux-2.6.8.1.orig/drivers/md/md.c linux-2.6.8.1-ve022stab078/drivers/md/md.c
+--- linux-2.6.8.1.orig/drivers/md/md.c	2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/md/md.c	2006-05-11 13:05:25.000000000 +0400
+@@ -2822,8 +2822,8 @@ int md_thread(void * arg)
+ 
+ 		wait_event_interruptible(thread->wqueue,
+ 					 test_bit(THREAD_WAKEUP, &thread->flags));
+-		if (current->flags & PF_FREEZE)
+-			refrigerator(PF_FREEZE);
++		if (test_thread_flag(TIF_FREEZE))
++			refrigerator();
+ 
+ 		clear_bit(THREAD_WAKEUP, &thread->flags);
+ 
+diff -uprN linux-2.6.8.1.orig/drivers/net/8139too.c linux-2.6.8.1-ve022stab078/drivers/net/8139too.c
+--- linux-2.6.8.1.orig/drivers/net/8139too.c	2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/net/8139too.c	2006-05-11 13:05:25.000000000 +0400
+@@ -1624,8 +1624,8 @@ static int rtl8139_thread (void *data)
+ 		do {
+ 			timeout = interruptible_sleep_on_timeout (&tp->thr_wait, timeout);
+ 			/* make swsusp happy with our thread */
+-			if (current->flags & PF_FREEZE)
+-				refrigerator(PF_FREEZE);
++			if (test_thread_flag(TIF_FREEZE))
++				refrigerator();
+ 		} while (!signal_pending (current) && (timeout > 0));
+ 
+ 		if (signal_pending (current)) {
+diff -uprN linux-2.6.8.1.orig/drivers/net/forcedeth.c linux-2.6.8.1-ve022stab078/drivers/net/forcedeth.c
+--- linux-2.6.8.1.orig/drivers/net/forcedeth.c	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/net/forcedeth.c	2006-05-11 13:05:27.000000000 +0400
+@@ -1618,6 +1618,9 @@ static int nv_open(struct net_device *de
+ 		writel(NVREG_MIISTAT_MASK, base + NvRegMIIStatus);
+ 		dprintk(KERN_INFO "startup: got 0x%08x.\n", miistat);
+ 	}
++	/* set linkspeed to invalid value, thus force nv_update_linkspeed
++	 * to init hw */
++	np->linkspeed = 0; 
+ 	ret = nv_update_linkspeed(dev);
+ 	nv_start_rx(dev);
+ 	nv_start_tx(dev);
+diff -uprN linux-2.6.8.1.orig/drivers/net/irda/sir_kthread.c linux-2.6.8.1-ve022stab078/drivers/net/irda/sir_kthread.c
+--- linux-2.6.8.1.orig/drivers/net/irda/sir_kthread.c	2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/net/irda/sir_kthread.c	2006-05-11 13:05:25.000000000 +0400
+@@ -136,8 +136,8 @@ static int irda_thread(void *startup)
+ 		remove_wait_queue(&irda_rq_queue.kick, &wait);
+ 
+ 		/* make swsusp happy with our thread */
+-		if (current->flags & PF_FREEZE)
+-			refrigerator(PF_FREEZE);
++		if (test_thread_flag(TIF_FREEZE))
++			refrigerator();
+ 
+ 		run_irda_queue();
+ 	}
+diff -uprN linux-2.6.8.1.orig/drivers/net/irda/stir4200.c linux-2.6.8.1-ve022stab078/drivers/net/irda/stir4200.c
+--- linux-2.6.8.1.orig/drivers/net/irda/stir4200.c	2004-08-14 14:54:52.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/net/irda/stir4200.c	2006-05-11 13:05:25.000000000 +0400
+@@ -767,7 +767,7 @@ static int stir_transmit_thread(void *ar
+ 	       && !signal_pending(current))
+ 	{
+ 		/* if suspending, then power off and wait */
+-		if (current->flags & PF_FREEZE) {
++		if (test_thread_flag(TIF_FREEZE)) {
+ 			if (stir->receiving)
+ 				receive_stop(stir);
+ 			else
+@@ -775,7 +775,7 @@ static int stir_transmit_thread(void *ar
+ 
+ 			write_reg(stir, REG_CTRL1, CTRL1_TXPWD|CTRL1_RXPWD);
+ 
+-			refrigerator(PF_FREEZE);
++			refrigerator();
+ 
+ 			if (change_speed(stir, stir->speed))
+ 				break;
+diff -uprN linux-2.6.8.1.orig/drivers/net/irda/vlsi_ir.h linux-2.6.8.1-ve022stab078/drivers/net/irda/vlsi_ir.h
+--- linux-2.6.8.1.orig/drivers/net/irda/vlsi_ir.h	2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/net/irda/vlsi_ir.h	2006-05-11 13:05:40.000000000 +0400
+@@ -58,7 +58,7 @@ typedef void irqreturn_t;
+ 
+ /* PDE() introduced in 2.5.4 */
+ #ifdef CONFIG_PROC_FS
+-#define PDE(inode) ((inode)->u.generic_ip)
++#define LPDE(inode) ((inode)->u.generic_ip)
+ #endif
+ 
+ /* irda crc16 calculation exported in 2.5.42 */
+diff -uprN linux-2.6.8.1.orig/drivers/net/loopback.c linux-2.6.8.1-ve022stab078/drivers/net/loopback.c
+--- linux-2.6.8.1.orig/drivers/net/loopback.c	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/net/loopback.c	2006-05-11 13:05:45.000000000 +0400
+@@ -127,6 +127,11 @@ static int loopback_xmit(struct sk_buff 
+ {
+ 	struct net_device_stats *lb_stats;
+ 
++	if (unlikely(get_exec_env()->disable_net)) {
++		kfree_skb(skb);
++		return 0;
++	}
++
+ 	skb_orphan(skb);
+ 
+ 	skb->protocol=eth_type_trans(skb,dev);
+@@ -183,6 +188,30 @@ static struct net_device_stats *get_stat
+ 	return stats;
+ }
+ 
++static void loopback_destructor(struct net_device *dev)
++{
++	kfree(dev->priv);
++	dev->priv = NULL;
++}
++
++struct net_device templ_loopback_dev = {
++	.name	 		= "lo",
++	.mtu			= (16 * 1024) + 20 + 20 + 12,
++	.hard_start_xmit	= loopback_xmit,
++	.hard_header		= eth_header,
++	.hard_header_cache	= eth_header_cache,
++	.header_cache_update	= eth_header_cache_update,
++	.hard_header_len	= ETH_HLEN,	/* 14	*/
++	.addr_len		= ETH_ALEN,	/* 6	*/
++	.tx_queue_len		= 0,
++	.type			= ARPHRD_LOOPBACK,	/* 0x0001*/
++	.rebuild_header		= eth_rebuild_header,
++	.flags			= IFF_LOOPBACK,
++	.features 		= NETIF_F_SG|NETIF_F_FRAGLIST
++				  |NETIF_F_NO_CSUM|NETIF_F_HIGHDMA
++				  |NETIF_F_LLTX|NETIF_F_VIRTUAL,
++};
++
+ struct net_device loopback_dev = {
+ 	.name	 		= "lo",
+ 	.mtu			= (16 * 1024) + 20 + 20 + 12,
+@@ -212,9 +241,11 @@ int __init loopback_init(void)
+ 		memset(stats, 0, sizeof(struct net_device_stats));
+ 		loopback_dev.priv = stats;
+ 		loopback_dev.get_stats = &get_stats;
++		loopback_dev.destructor = &loopback_destructor;
+ 	}
+ 	
+ 	return register_netdev(&loopback_dev);
+ };
+ 
+ EXPORT_SYMBOL(loopback_dev);
++EXPORT_SYMBOL(templ_loopback_dev);
+diff -uprN linux-2.6.8.1.orig/drivers/net/net_init.c linux-2.6.8.1-ve022stab078/drivers/net/net_init.c
+--- linux-2.6.8.1.orig/drivers/net/net_init.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/net/net_init.c	2006-05-11 13:05:40.000000000 +0400
+@@ -51,6 +51,7 @@
+ #include <linux/if_ltalk.h>
+ #include <linux/rtnetlink.h>
+ #include <net/neighbour.h>
++#include <ub/ub_mem.h>
+ 
+ /* The network devices currently exist only in the socket namespace, so these
+    entries are unused.  The only ones that make sense are
+@@ -83,7 +84,7 @@ struct net_device *alloc_netdev(int size
+ 			& ~NETDEV_ALIGN_CONST;
+ 	alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
+ 
+-	p = kmalloc (alloc_size, GFP_KERNEL);
++	p = ub_kmalloc(alloc_size, GFP_KERNEL);
+ 	if (!p) {
+ 		printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
+ 		return NULL;
+@@ -392,6 +393,10 @@ int register_netdev(struct net_device *d
+ 
+ out:
+ 	rtnl_unlock();
++	if (err == 0 && dev->reg_state != NETREG_REGISTERED) {
++		unregister_netdev(dev);
++		err = -ENOMEM;
++	}
+ 	return err;
+ }
+ 
+diff -uprN linux-2.6.8.1.orig/drivers/net/open_vznet.c linux-2.6.8.1-ve022stab078/drivers/net/open_vznet.c
+--- linux-2.6.8.1.orig/drivers/net/open_vznet.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/drivers/net/open_vznet.c	2006-05-11 13:05:40.000000000 +0400
+@@ -0,0 +1,190 @@
++/*
++ *  open_vznet.c
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++/*
++ * Virtual Networking device used to change VE ownership on packets
++ */
++
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/seq_file.h>
++
++#include <linux/inet.h>
++#include <net/ip.h>
++#include <linux/skbuff.h>
++#include <linux/venet.h>
++
++void veip_stop(struct ve_struct *ve)
++{
++	struct list_head *p, *tmp;
++
++	write_lock_irq(&veip_hash_lock);
++	if (ve->veip == NULL)
++		goto unlock;
++	list_for_each_safe(p, tmp, &ve->veip->ip_lh) {
++		struct ip_entry_struct *ptr;
++		ptr = list_entry(p, struct ip_entry_struct, ve_list);
++		ptr->active_env = NULL;
++		list_del(&ptr->ve_list);
++		list_del(&ptr->ip_hash);
++		kfree(ptr);
++	}
++	veip_put(ve->veip);
++	ve->veip = NULL;
++unlock:
++	write_unlock_irq(&veip_hash_lock);
++}
++
++int veip_start(struct ve_struct *ve)
++{
++	int err;
++
++	err = 0;
++	write_lock_irq(&veip_hash_lock);
++	ve->veip = veip_findcreate(ve->veid);
++	if (ve->veip == NULL)
++		err = -ENOMEM;
++	write_unlock_irq(&veip_hash_lock);
++	return err;
++}
++
++int veip_entry_add(struct ve_struct *ve, struct sockaddr_in *addr)
++{
++	struct ip_entry_struct *entry, *found;
++	int err;
++
++	entry = kmalloc(sizeof(struct ip_entry_struct), GFP_KERNEL);
++	if (entry == NULL)
++		return -ENOMEM;
++
++	memset(entry, 0, sizeof(struct ip_entry_struct));
++	entry->ip = addr->sin_addr.s_addr;
++
++	write_lock_irq(&veip_hash_lock);
++	err = -EADDRINUSE;
++	found = ip_entry_lookup(entry->ip);
++	if (found != NULL)
++		goto out_unlock;
++	else {
++		ip_entry_hash(entry, ve->veip);
++		found = entry;
++		entry = NULL;
++	}
++	err = 0;
++	found->active_env = ve;
++out_unlock:
++	write_unlock_irq(&veip_hash_lock);
++	if (entry != NULL)
++		kfree(entry);
++	return err;
++}
++
++int veip_entry_del(envid_t veid, struct sockaddr_in *addr)
++{
++	struct ip_entry_struct *found;
++	int err;
++
++	err = -EADDRNOTAVAIL;
++	write_lock_irq(&veip_hash_lock);
++	found = ip_entry_lookup(addr->sin_addr.s_addr);
++	if (found == NULL)
++		goto out;
++	if (found->active_env->veid != veid)
++		goto out;
++
++	err = 0;
++	found->active_env = NULL;
++
++	list_del(&found->ip_hash);
++	list_del(&found->ve_list);
++	kfree(found);
++out:
++	write_unlock_irq(&veip_hash_lock);
++	return err;
++}
++
++static struct ve_struct *venet_find_ve(__u32 ip)
++{
++	struct ip_entry_struct *entry;
++
++	entry = ip_entry_lookup(ip);
++	if (entry == NULL)
++		return NULL;
++
++	return entry->active_env;
++}
++
++int venet_change_skb_owner(struct sk_buff *skb)
++{
++	struct ve_struct *ve, *ve_old;
++	struct iphdr *iph;
++
++	ve_old = skb->owner_env;
++	iph = skb->nh.iph;
++
++	read_lock(&veip_hash_lock);
++	if (!ve_is_super(ve_old)) {
++		/* from VE to host */
++		ve = venet_find_ve(iph->saddr);
++		if (ve == NULL)
++			goto out_drop;
++		if (!ve_accessible_strict(ve, ve_old))
++			goto out_source;
++		skb->owner_env = get_ve0();
++	} else {
++		/* from host to VE */
++		ve = venet_find_ve(iph->daddr);
++		if (ve == NULL)
++			goto out_drop;
++		skb->owner_env = ve;
++	}
++	read_unlock(&veip_hash_lock);
++
++	return 0;
++
++out_drop:
++	read_unlock(&veip_hash_lock);
++	return -ESRCH;
++
++out_source:
++	read_unlock(&veip_hash_lock);
++	if (net_ratelimit()) {
++		printk(KERN_WARNING "Dropped packet, source wrong "
++		       "veid=%u src-IP=%u.%u.%u.%u "
++		       "dst-IP=%u.%u.%u.%u\n",
++		       skb->owner_env->veid,
++		       NIPQUAD(skb->nh.iph->saddr),
++		       NIPQUAD(skb->nh.iph->daddr));
++	}
++	return -EACCES;
++}
++
++#ifdef CONFIG_PROC_FS
++int veip_seq_show(struct seq_file *m, void *v)
++{
++	struct list_head *p;
++	struct ip_entry_struct *entry;
++	char s[16];
++
++	p = (struct list_head *)v;
++	if (p == ip_entry_hash_table) {
++		seq_puts(m, "Version: 2.5\n");
++		return 0;
++	}
++	entry = list_entry(p, struct ip_entry_struct, ip_hash);
++	sprintf(s, "%u.%u.%u.%u", NIPQUAD(entry->ip));
++	seq_printf(m, "%15s %10u\n", s, 0);
++	return 0;
++}
++#endif
++
++MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
++MODULE_DESCRIPTION("Virtuozzo Virtual Network Device");
++MODULE_LICENSE("GPL v2");
+diff -uprN linux-2.6.8.1.orig/drivers/net/ppp_async.c linux-2.6.8.1-ve022stab078/drivers/net/ppp_async.c
+--- linux-2.6.8.1.orig/drivers/net/ppp_async.c	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/net/ppp_async.c	2006-05-11 13:05:33.000000000 +0400
+@@ -973,7 +973,7 @@ static void async_lcp_peek(struct asyncp
+ 	data += 4;
+ 	dlen -= 4;
+ 	/* data[0] is code, data[1] is length */
+-	while (dlen >= 2 && dlen >= data[1]) {
++	while (dlen >= 2 && dlen >= data[1] && data[1] >= 2) {
+ 		switch (data[0]) {
+ 		case LCP_MRU:
+ 			val = (data[2] << 8) + data[3];
+diff -uprN linux-2.6.8.1.orig/drivers/net/tun.c linux-2.6.8.1-ve022stab078/drivers/net/tun.c
+--- linux-2.6.8.1.orig/drivers/net/tun.c	2004-08-14 14:55:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/net/tun.c	2006-05-11 13:05:42.000000000 +0400
+@@ -44,6 +44,7 @@
+ 
+ #include <asm/system.h>
+ #include <asm/uaccess.h>
++#include <ub/beancounter.h>
+ 
+ #ifdef TUN_DEBUG
+ static int debug;
+@@ -71,6 +72,7 @@ static int tun_net_close(struct net_devi
+ static int tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+ 	struct tun_struct *tun = netdev_priv(dev);
++	struct user_beancounter *ub;
+ 
+ 	DBG(KERN_INFO "%s: tun_net_xmit %d\n", tun->dev->name, skb->len);
+ 
+@@ -90,6 +92,19 @@ static int tun_net_xmit(struct sk_buff *
+ 		if (skb_queue_len(&tun->readq) >= dev->tx_queue_len)
+ 			goto drop;
+ 	}
++
++	ub = netdev_bc(dev)->exec_ub;
++	if (ub && (skb_bc(skb)->charged == 0)) {
++		unsigned long charge;
++		charge = skb_charge_fullsize(skb);
++		if (charge_beancounter(ub, UB_OTHERSOCKBUF, charge, 1))
++			goto drop;
++		get_beancounter(ub);
++		skb_bc(skb)->ub = ub;
++		skb_bc(skb)->charged = charge;
++		skb_bc(skb)->resource = UB_OTHERSOCKBUF;
++	}
++
+ 	skb_queue_tail(&tun->readq, skb);
+ 
+ 	/* Notify and wake up reader process */
+@@ -174,22 +189,26 @@ static __inline__ ssize_t tun_get_user(s
+ {
+ 	struct tun_pi pi = { 0, __constant_htons(ETH_P_IP) };
+ 	struct sk_buff *skb;
+-	size_t len = count;
++	size_t len = count, align = 0;
+ 
+ 	if (!(tun->flags & TUN_NO_PI)) {
+-		if ((len -= sizeof(pi)) > len)
++		if ((len -= sizeof(pi)) > count)
+ 			return -EINVAL;
+ 
+ 		if(memcpy_fromiovec((void *)&pi, iv, sizeof(pi)))
+ 			return -EFAULT;
+ 	}
++
++	if ((tun->flags & TUN_TYPE_MASK) == TUN_TAP_DEV)
++		align = NET_IP_ALIGN;
+  
+-	if (!(skb = alloc_skb(len + 2, GFP_KERNEL))) {
++	if (!(skb = alloc_skb(len + align, GFP_KERNEL))) {
+ 		tun->stats.rx_dropped++;
+ 		return -ENOMEM;
+ 	}
+ 
+-	skb_reserve(skb, 2);
++	if (align)
++		skb_reserve(skb, align);
+ 	if (memcpy_fromiovec(skb_put(skb, len), iv, len))
+ 		return -EFAULT;
+ 
+@@ -322,6 +341,7 @@ static ssize_t tun_chr_readv(struct file
+ 
+ 		ret = tun_put_user(tun, skb, (struct iovec *) iv, len);
+ 
++		/* skb will be uncharged in kfree_skb() */
+ 		kfree_skb(skb);
+ 		break;
+ 	}
+@@ -355,6 +375,7 @@ static void tun_setup(struct net_device 
+ 	dev->stop = tun_net_close;
+ 	dev->get_stats = tun_net_stats;
+ 	dev->destructor = free_netdev;
++	dev->features |= NETIF_F_VIRTUAL;
+ }
+ 
+ static struct tun_struct *tun_get_by_name(const char *name)
+@@ -363,8 +384,9 @@ static struct tun_struct *tun_get_by_nam
+ 
+ 	ASSERT_RTNL();
+ 	list_for_each_entry(tun, &tun_dev_list, list) {
+-		if (!strncmp(tun->dev->name, name, IFNAMSIZ))
+-		    return tun;
++		if (ve_accessible_strict(tun->dev->owner_env, get_exec_env()) &&
++		    !strncmp(tun->dev->name, name, IFNAMSIZ))
++			return tun;
+ 	}
+ 
+ 	return NULL;
+@@ -383,7 +405,8 @@ static int tun_set_iff(struct file *file
+ 
+ 		/* Check permissions */
+ 		if (tun->owner != -1 &&
+-		    current->euid != tun->owner && !capable(CAP_NET_ADMIN))
++		    current->euid != tun->owner && 
++		    !capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
+ 			return -EPERM;
+ 	} 
+ 	else if (__dev_get_by_name(ifr->ifr_name)) 
+diff -uprN linux-2.6.8.1.orig/drivers/net/venet_core.c linux-2.6.8.1-ve022stab078/drivers/net/venet_core.c
+--- linux-2.6.8.1.orig/drivers/net/venet_core.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/drivers/net/venet_core.c	2006-05-11 13:05:45.000000000 +0400
+@@ -0,0 +1,626 @@
++/*
++ *  venet_core.c
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++/*
++ * Common part for Virtuozzo virtual network devices
++ */
++
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/interrupt.h>
++#include <linux/fs.h>
++#include <linux/types.h>
++#include <linux/string.h>
++#include <linux/socket.h>
++#include <linux/errno.h>
++#include <linux/fcntl.h>
++#include <linux/in.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/tcp.h>
++#include <linux/proc_fs.h>
++#include <linux/seq_file.h>
++
++#include <asm/system.h>
++#include <asm/uaccess.h>
++#include <asm/io.h>
++#include <asm/unistd.h>
++
++#include <linux/inet.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <net/ip.h>
++#include <linux/skbuff.h>
++#include <net/sock.h>
++#include <linux/if_ether.h>	/* For the statistics structure. */
++#include <linux/if_arp.h>	/* For ARPHRD_ETHER */
++#include <linux/venet.h>
++#include <linux/ve_proto.h>
++#include <linux/vzctl.h>
++#include <linux/vzctl_venet.h>
++
++struct list_head ip_entry_hash_table[VEIP_HASH_SZ];
++rwlock_t veip_hash_lock = RW_LOCK_UNLOCKED;
++LIST_HEAD(veip_lh);
++
++#define ip_entry_hash_function(ip)  (ntohl(ip) & (VEIP_HASH_SZ - 1))
++
++void ip_entry_hash(struct ip_entry_struct *entry, struct veip_struct *veip)
++{
++	list_add(&entry->ip_hash,
++		 ip_entry_hash_table + ip_entry_hash_function(entry->ip));
++	list_add(&entry->ve_list, &veip->ip_lh);
++}
++
++void veip_put(struct veip_struct *veip)
++{
++	if (!list_empty(&veip->ip_lh))
++		return;
++	if (!list_empty(&veip->src_lh))
++		return;
++	if (!list_empty(&veip->dst_lh))
++		return;
++
++	list_del(&veip->list);
++	kfree(veip);
++}
++
++struct ip_entry_struct *ip_entry_lookup(u32 addr)
++{
++	struct ip_entry_struct *entry;
++	struct list_head *tmp;
++
++	list_for_each(tmp, ip_entry_hash_table + ip_entry_hash_function(addr)) {
++		entry = list_entry(tmp, struct ip_entry_struct, ip_hash);
++		if (entry->ip != addr)
++			continue;
++		return entry;
++	}
++	return NULL;
++}
++
++struct veip_struct *veip_find(envid_t veid)
++{
++	struct veip_struct *ptr;
++	list_for_each_entry(ptr, &veip_lh, list) {
++		if (ptr->veid != veid)
++			continue;
++		return ptr;
++	}
++	return NULL;
++}
++
++struct veip_struct *veip_findcreate(envid_t veid)
++{
++	struct veip_struct *ptr;
++
++	ptr = veip_find(veid);
++	if (ptr != NULL)
++		return ptr;
++
++	ptr = kmalloc(sizeof(struct veip_struct), GFP_ATOMIC);
++	if (ptr == NULL)
++		return NULL;
++	memset(ptr, 0, sizeof(struct veip_struct));
++	INIT_LIST_HEAD(&ptr->ip_lh);
++	INIT_LIST_HEAD(&ptr->src_lh);
++	INIT_LIST_HEAD(&ptr->dst_lh);
++	list_add(&ptr->list, &veip_lh);
++	ptr->veid = veid;
++	return ptr;
++}
++
++/*
++ * Device functions
++ */
++
++static int venet_open(struct net_device *dev)
++{
++	if (!try_module_get(THIS_MODULE))
++		return -EBUSY;
++	return 0;
++}
++
++static int venet_close(struct net_device *master)
++{
++	module_put(THIS_MODULE);
++	return 0;
++}
++
++static void venet_destructor(struct net_device *dev)
++{
++	kfree(dev->priv);
++	dev->priv = NULL;
++}
++
++/*
++ * The higher levels take care of making this non-reentrant (it's
++ * called with bh's disabled).
++ */
++static int venet_xmit(struct sk_buff *skb, struct net_device *dev)
++{
++	struct net_device_stats *stats = (struct net_device_stats *)dev->priv;
++	struct net_device *rcv = NULL;
++	struct iphdr *iph;
++	int length;
++
++	if (unlikely(get_exec_env()->disable_net))
++		goto outf;
++
++	/*
++	 *	Optimise so buffers with skb->free=1 are not copied but
++	 *	instead are lobbed from tx queue to rx queue
++	 */
++	if (atomic_read(&skb->users) != 1) {
++	  	struct sk_buff *skb2 = skb;
++	  	skb = skb_clone(skb, GFP_ATOMIC);	/* Clone the buffer */
++	  	if (skb == NULL) {
++			kfree_skb(skb2);
++			goto out;
++		}
++	  	kfree_skb(skb2);
++	} else
++		skb_orphan(skb);
++
++	if (skb->protocol != __constant_htons(ETH_P_IP))
++		goto outf;
++
++	iph = skb->nh.iph;
++	if (MULTICAST(iph->daddr))
++		goto outf;
++
++	if (venet_change_skb_owner(skb) < 0)
++		goto outf;
++
++	if (unlikely(VE_OWNER_SKB(skb)->disable_net))
++		goto outf;
++
++	rcv = VE_OWNER_SKB(skb)->_venet_dev;
++	if (!rcv)
++		/* VE going down */
++		goto outf;
++
++	dev_hold(rcv);
++
++	if (!(rcv->flags & IFF_UP)) {
++		/* Target VE does not want to receive packets */
++		dev_put(rcv);
++		goto outf;
++	}
++
++	skb->pkt_type = PACKET_HOST;
++	skb->dev = rcv;
++
++	skb->mac.raw = skb->data;
++	memset(skb->data - dev->hard_header_len, 0, dev->hard_header_len);
++
++	dst_release(skb->dst);
++	skb->dst = NULL;
++#ifdef CONFIG_NETFILTER
++	nf_conntrack_put(skb->nfct);
++	skb->nfct = NULL;
++#ifdef CONFIG_NETFILTER_DEBUG
++	skb->nf_debug = 0;
++#endif
++#endif
++	length = skb->len;
++
++	netif_rx(skb);
++
++	stats->tx_bytes += length;
++	stats->tx_packets++;
++	if (rcv) {
++		struct net_device_stats *rcv_stats =
++			(struct net_device_stats *)rcv->priv;
++		rcv_stats->rx_bytes += length;
++		rcv_stats->rx_packets++;
++		dev_put(rcv);
++	}
++
++	return 0;
++
++outf:
++	kfree_skb(skb);
++	++stats->tx_dropped;
++out:
++	return 0;
++}
++
++static struct net_device_stats *get_stats(struct net_device *dev)
++{
++	return (struct net_device_stats *)dev->priv;
++}
++
++/* Initialize the rest of the LOOPBACK device. */
++int venet_init_dev(struct net_device *dev)
++{
++	dev->hard_start_xmit	= venet_xmit;
++	dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
++	if (dev->priv == NULL)
++		return -ENOMEM;
++	memset(dev->priv, 0, sizeof(struct net_device_stats));
++	dev->get_stats = get_stats;
++	dev->open = venet_open;
++	dev->stop = venet_close;
++	dev->destructor = venet_destructor;
++
++	/*
++	 *	Fill in the generic fields of the device structure.
++	 */
++	dev->type		= ARPHRD_VOID;
++	dev->hard_header_len 	= ETH_HLEN;
++	dev->mtu		= 1500; /* eth_mtu */
++	dev->tx_queue_len	= 0;
++
++	memset(dev->broadcast, 0xFF, ETH_ALEN);
++
++	/* New-style flags. */
++	dev->flags		= IFF_BROADCAST|IFF_NOARP|IFF_POINTOPOINT;
++	return 0;
++}
++
++static void venet_setup(struct net_device *dev)
++{
++	dev->init = venet_init_dev;
++	/*
++	 * No other features, as they are:
++	 *  - checksumming is required, and nobody else will done our job
++	 */
++	dev->features |= NETIF_F_VENET | NETIF_F_VIRTUAL;
++}
++
++#ifdef CONFIG_PROC_FS
++static int veinfo_seq_show(struct seq_file *m, void *v)
++{
++	struct ve_struct *ve = (struct ve_struct *)v;
++	struct list_head *tmp;
++
++	seq_printf(m, "%10u %5u %5u", ve->veid,
++                                ve->class_id, atomic_read(&ve->pcounter));
++	read_lock(&veip_hash_lock);
++	if (ve->veip == NULL)
++		goto unlock;
++	list_for_each(tmp, &ve->veip->ip_lh) {
++		char ip[16];
++		struct ip_entry_struct *entry;
++
++		entry = list_entry(tmp, struct ip_entry_struct, ve_list);
++		if (entry->active_env == NULL)
++			continue;
++
++		sprintf(ip, "%u.%u.%u.%u", NIPQUAD(entry->ip));
++		seq_printf(m, " %15s", ip);
++	}
++unlock:
++	read_unlock(&veip_hash_lock);
++	seq_putc(m, '\n');
++	return 0;
++}
++
++static void *ve_seq_start(struct seq_file *m, loff_t *pos)
++{
++	struct ve_struct *ve, *curve;
++	loff_t l;
++
++	curve = get_exec_env();
++	read_lock(&ve_list_guard);
++	if (!ve_is_super(curve)) {
++		if (*pos != 0)
++			return NULL;
++		return curve;
++	}
++	for (ve = ve_list_head, l = *pos;
++	     ve != NULL && l > 0;
++	     ve = ve->next, l--);
++	return ve;
++}
++
++static void *ve_seq_next(struct seq_file *m, void *v, loff_t *pos)
++{
++	struct ve_struct *ve = (struct ve_struct *)v;
++
++	if (!ve_is_super(get_exec_env()))
++		return NULL;
++	(*pos)++;
++	return ve->next;
++}
++
++static void ve_seq_stop(struct seq_file *m, void *v)
++{
++	read_unlock(&ve_list_guard);
++}
++
++
++static struct seq_operations veinfo_seq_op = {
++        start:  ve_seq_start,
++        next:   ve_seq_next,
++        stop:   ve_seq_stop,
++        show:   veinfo_seq_show
++};
++
++static int veinfo_open(struct inode *inode, struct file *file)
++{
++        return seq_open(file, &veinfo_seq_op);
++}
++
++static struct file_operations proc_veinfo_operations = {
++        open:           veinfo_open,
++        read:           seq_read,
++        llseek:         seq_lseek,
++        release:        seq_release
++};
++
++static void *veip_seq_start(struct seq_file *m, loff_t *pos)
++{
++	loff_t l;
++	struct list_head *p;
++	int i;
++
++	l = *pos;
++	write_lock_irq(&veip_hash_lock);
++	if (l == 0)
++		return ip_entry_hash_table;
++	for (i = 0; i < VEIP_HASH_SZ; i++) {
++		list_for_each(p, ip_entry_hash_table + i) {
++			if (--l == 0)
++				return p;
++		}
++	}
++	return NULL;
++}
++
++static void *veip_seq_next(struct seq_file *m, void *v, loff_t *pos)
++{
++	struct list_head *p;
++
++	p = (struct list_head *)v;
++	while (1) {
++		p = p->next;
++		if (p < ip_entry_hash_table ||
++		    p >= ip_entry_hash_table + VEIP_HASH_SZ) {
++			(*pos)++;
++			return p;
++		}
++		if (++p >= ip_entry_hash_table + VEIP_HASH_SZ)
++			return NULL;
++	}
++	return NULL;
++}
++
++static void veip_seq_stop(struct seq_file *m, void *v)
++{
++	write_unlock_irq(&veip_hash_lock);
++}
++
++static struct seq_operations veip_seq_op = {
++        start:  veip_seq_start,
++        next:   veip_seq_next,
++        stop:   veip_seq_stop,
++        show:   veip_seq_show
++};
++
++static int veip_open(struct inode *inode, struct file *file)
++{
++        return seq_open(file, &veip_seq_op);
++}
++
++static struct file_operations proc_veip_operations = {
++        open:           veip_open,
++        read:           seq_read,
++        llseek:         seq_lseek,
++        release:        seq_release
++};
++#endif
++
++int real_ve_ip_map(envid_t veid, int op, struct sockaddr *uservaddr, int addrlen)
++{
++	int err;
++	struct sockaddr_in addr;
++	struct ve_struct *ve;
++
++	err = -EPERM;
++	if (!capable(CAP_SETVEID))
++		goto out;
++
++	err = -EINVAL;
++	if (addrlen != sizeof(struct sockaddr_in))
++		goto out;
++
++	err = move_addr_to_kernel(uservaddr, addrlen, &addr);
++	if (err < 0)
++		goto out;
++
++	switch (op)
++	{
++		case VE_IP_ADD:
++			ve = get_ve_by_id(veid);
++			err = -ESRCH;
++			if (!ve)
++				goto out;
++
++			down_read(&ve->op_sem);
++			if (ve->is_running)
++				err = veip_entry_add(ve, &addr);
++			up_read(&ve->op_sem);
++			put_ve(ve);
++			break;
++
++		case VE_IP_DEL:
++			err = veip_entry_del(veid, &addr);
++			break;
++		default:
++			err = -EINVAL;
++	}
++
++out:
++	return err;
++}
++
++int venet_ioctl(struct inode *ino, struct file *file, unsigned int cmd,
++		unsigned long arg)
++{
++	int err;
++
++	err = -ENOTTY;
++	switch(cmd) {
++	    case VENETCTL_VE_IP_MAP: {
++			struct vzctl_ve_ip_map s;
++			err = -EFAULT;
++			if (copy_from_user(&s, (void *)arg, sizeof(s)))
++				break;
++			err = real_ve_ip_map(s.veid, s.op, s.addr, s.addrlen);
++		}
++		break;
++	}
++	return err;
++}
++
++static struct vzioctlinfo venetcalls = {
++	type: VENETCTLTYPE,
++	func: venet_ioctl,
++	owner: THIS_MODULE,
++};
++
++int venet_dev_start(struct ve_struct *env)
++{
++	struct net_device *dev_venet;
++	int err;
++
++	dev_venet = alloc_netdev(0, "venet%d", venet_setup);
++	if (!dev_venet)
++		return -ENOMEM;
++	err = dev_alloc_name(dev_venet, dev_venet->name);
++	if (err<0)
++		goto err;
++	if ((err = register_netdev(dev_venet)) != 0)
++		goto err;
++	env->_venet_dev = dev_venet;
++	return 0;
++err:
++	free_netdev(dev_venet);
++	printk(KERN_ERR "VENET initialization error err=%d\n", err);
++	return err;
++}
++
++static int venet_start(unsigned int hooknum, void *data)
++{
++	struct ve_struct *env;
++	int err;
++
++	env = (struct ve_struct *)data;
++	if (env->veip)
++		return -EEXIST;
++	if (!ve_is_super(env) && !try_module_get(THIS_MODULE))
++		return 0;
++
++	err = veip_start(env);
++	if (err)
++		goto err;
++
++	err = venet_dev_start(env);
++	if (err)
++		goto err_free;
++	return 0;
++
++err_free:
++	veip_stop(env);
++err:
++	if (!ve_is_super(env))
++		module_put(THIS_MODULE);
++	return err;
++}
++
++static int venet_stop(unsigned int hooknum, void *data)
++{
++	struct ve_struct *env;
++
++	env = (struct ve_struct *)data;
++	veip_stop(env);
++	if (!ve_is_super(env))
++		module_put(THIS_MODULE);
++	return 0;
++}
++
++#define VE_HOOK_PRI_NET		0
++
++static struct ve_hook venet_ve_hook_init = {
++	hook:	venet_start,
++	undo:	venet_stop,
++	hooknum: VE_HOOK_INIT,
++	priority: VE_HOOK_PRI_NET
++};
++
++static struct ve_hook venet_ve_hook_fini = {
++	hook:	venet_stop,
++	hooknum: VE_HOOK_FINI,
++	priority: VE_HOOK_PRI_NET
++};
++
++__init int venet_init(void)
++{
++#ifdef CONFIG_PROC_FS
++	struct proc_dir_entry *de;
++#endif
++	int i, err;
++
++	if (get_ve0()->_venet_dev != NULL)
++		return -EEXIST;
++
++	for (i = 0; i < VEIP_HASH_SZ; i++)
++		INIT_LIST_HEAD(ip_entry_hash_table + i);
++
++	err = venet_start(VE_HOOK_INIT, (void *)get_ve0());
++	if (err)
++		return err;
++
++#ifdef CONFIG_PROC_FS
++	de = create_proc_glob_entry("vz/veinfo",
++			S_IFREG|S_IRUSR, NULL);
++	if (de)
++		de->proc_fops = &proc_veinfo_operations;
++	else
++		printk(KERN_WARNING "venet: can't make veinfo proc entry\n");
++
++	de = create_proc_entry("vz/veip", S_IFREG|S_IRUSR, NULL);
++	if (de)
++		de->proc_fops = &proc_veip_operations;
++	else
++		printk(KERN_WARNING "venet: can't make veip proc entry\n");
++#endif
++
++	ve_hook_register(&venet_ve_hook_init);
++	ve_hook_register(&venet_ve_hook_fini);
++	vzioctl_register(&venetcalls);
++	return 0;
++}
++
++__exit void venet_exit(void)
++{
++	struct net_device *dev_venet;
++
++	vzioctl_unregister(&venetcalls);
++	ve_hook_unregister(&venet_ve_hook_fini);
++	ve_hook_unregister(&venet_ve_hook_init);
++#ifdef CONFIG_PROC_FS
++	remove_proc_entry("vz/veip", NULL);
++	remove_proc_entry("vz/veinfo", NULL);
++#endif
++
++	dev_venet = get_ve0()->_venet_dev;
++	if (dev_venet != NULL) {
++		get_ve0()->_venet_dev = NULL;
++		unregister_netdev(dev_venet);
++		free_netdev(dev_venet);
++	}
++	veip_stop(get_ve0());
++}
++
++module_init(venet_init);
++module_exit(venet_exit);
+diff -uprN linux-2.6.8.1.orig/drivers/net/wireless/airo.c linux-2.6.8.1-ve022stab078/drivers/net/wireless/airo.c
+--- linux-2.6.8.1.orig/drivers/net/wireless/airo.c	2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/net/wireless/airo.c	2006-05-11 13:05:25.000000000 +0400
+@@ -2901,8 +2901,8 @@ static int airo_thread(void *data) {
+ 			flush_signals(current);
+ 
+ 		/* make swsusp happy with our thread */
+-		if (current->flags & PF_FREEZE)
+-			refrigerator(PF_FREEZE);
++		if (test_thread_flag(TIF_FREEZE))
++			refrigerator();
+ 
+ 		if (test_bit(JOB_DIE, &ai->flags))
+ 			break;
+diff -uprN linux-2.6.8.1.orig/drivers/pci/probe.c linux-2.6.8.1-ve022stab078/drivers/pci/probe.c
+--- linux-2.6.8.1.orig/drivers/pci/probe.c	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/pci/probe.c	2006-05-11 13:05:40.000000000 +0400
+@@ -26,6 +26,7 @@ LIST_HEAD(pci_root_buses);
+ EXPORT_SYMBOL(pci_root_buses);
+ 
+ LIST_HEAD(pci_devices);
++EXPORT_SYMBOL(pci_devices);
+ 
+ /*
+  * PCI Bus Class
+diff -uprN linux-2.6.8.1.orig/drivers/pci/quirks.c linux-2.6.8.1-ve022stab078/drivers/pci/quirks.c
+--- linux-2.6.8.1.orig/drivers/pci/quirks.c	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/pci/quirks.c	2006-05-11 13:05:28.000000000 +0400
+@@ -292,6 +292,46 @@ static void __devinit quirk_ich4_lpc_acp
+ 	quirk_io_region(dev, region, 64, PCI_BRIDGE_RESOURCES+1);
+ }
+ 
++#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP)
++#include <asm/irq.h>
++
++static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
++{
++	u8 config, rev;
++	u32 word;
++	extern struct pci_raw_ops *raw_pci_ops;
++
++	pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
++	if (rev > 0x9)
++		return;
++
++	printk(KERN_INFO "Intel E7520/7320/7525 detected.");
++
++	/* enable access to config space*/
++	pci_read_config_byte(dev, 0xf4, &config);
++	config |= 0x2;
++	pci_write_config_byte(dev, 0xf4, config);
++
++	/* read xTPR register */
++	raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word);
++
++	if (!(word & (1 << 13))) {
++		printk(KERN_INFO "Disabling irq balancing and affinity\n");
++#ifdef __i386__
++#ifdef CONFIG_IRQBALANCE
++		irqbalance_disable("");
++#endif
++		noirqdebug_setup("");
++#endif
++		no_irq_affinity = 1;
++	}
++
++	config &= ~0x2;
++	/* disable access to config space*/
++	pci_write_config_byte(dev, 0xf4, config);
++}
++#endif
++
+ /*
+  * VIA ACPI: One IO region pointed to by longword at
+  *	0x48 or 0x20 (256 bytes of ACPI registers)
+@@ -1039,6 +1079,10 @@ static struct pci_fixup pci_fixups[] __d
+ #endif /* CONFIG_SCSI_SATA */
+ 
+ 	{ PCI_FIXUP_FINAL,      PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_SMCH,	quirk_pciehp_msi },
++#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP)
++	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_E7320_MCH,	quirk_intel_irqbalance },
++	{ PCI_FIXUP_FINAL,	PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_E7525_MCH,	quirk_intel_irqbalance },
++#endif
+ 
+ 	{ 0 }
+ };
+diff -uprN linux-2.6.8.1.orig/drivers/pcmcia/cs.c linux-2.6.8.1-ve022stab078/drivers/pcmcia/cs.c
+--- linux-2.6.8.1.orig/drivers/pcmcia/cs.c	2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/pcmcia/cs.c	2006-05-11 13:05:25.000000000 +0400
+@@ -724,8 +724,8 @@ static int pccardd(void *__skt)
+ 		}
+ 
+ 		schedule();
+-		if (current->flags & PF_FREEZE)
+-			refrigerator(PF_FREEZE);
++		if (test_thread_flag(TIF_FREEZE))
++			refrigerator();
+ 
+ 		if (!skt->thread)
+ 			break;
+diff -uprN linux-2.6.8.1.orig/drivers/sbus/char/bbc_envctrl.c linux-2.6.8.1-ve022stab078/drivers/sbus/char/bbc_envctrl.c
+--- linux-2.6.8.1.orig/drivers/sbus/char/bbc_envctrl.c	2004-08-14 14:56:26.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/sbus/char/bbc_envctrl.c	2006-05-11 13:05:40.000000000 +0400
+@@ -614,7 +614,7 @@ void bbc_envctrl_cleanup(void)
+ 			int found = 0;
+ 
+ 			read_lock(&tasklist_lock);
+-			for_each_process(p) {
++			for_each_process_all(p) {
+ 				if (p == kenvctrld_task) {
+ 					found = 1;
+ 					break;
+diff -uprN linux-2.6.8.1.orig/drivers/sbus/char/envctrl.c linux-2.6.8.1-ve022stab078/drivers/sbus/char/envctrl.c
+--- linux-2.6.8.1.orig/drivers/sbus/char/envctrl.c	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/sbus/char/envctrl.c	2006-05-11 13:05:40.000000000 +0400
+@@ -1170,7 +1170,7 @@ static void __exit envctrl_cleanup(void)
+ 			int found = 0;
+ 
+ 			read_lock(&tasklist_lock);
+-			for_each_process(p) {
++			for_each_process_all(p) {
+ 				if (p == kenvctrld_task) {
+ 					found = 1;
+ 					break;
+diff -uprN linux-2.6.8.1.orig/drivers/scsi/aic7xxx/aic79xx_osm.c linux-2.6.8.1-ve022stab078/drivers/scsi/aic7xxx/aic79xx_osm.c
+--- linux-2.6.8.1.orig/drivers/scsi/aic7xxx/aic79xx_osm.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/scsi/aic7xxx/aic79xx_osm.c	2006-05-11 13:05:25.000000000 +0400
+@@ -2591,7 +2591,6 @@ ahd_linux_dv_thread(void *data)
+ 	sprintf(current->comm, "ahd_dv_%d", ahd->unit);
+ #else
+ 	daemonize("ahd_dv_%d", ahd->unit);
+-	current->flags |= PF_FREEZE;
+ #endif
+ 	unlock_kernel();
+ 
+diff -uprN linux-2.6.8.1.orig/drivers/scsi/aic7xxx/aic7xxx_osm.c linux-2.6.8.1-ve022stab078/drivers/scsi/aic7xxx/aic7xxx_osm.c
+--- linux-2.6.8.1.orig/drivers/scsi/aic7xxx/aic7xxx_osm.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/scsi/aic7xxx/aic7xxx_osm.c	2006-05-11 13:05:25.000000000 +0400
+@@ -2295,7 +2295,6 @@ ahc_linux_dv_thread(void *data)
+ 	sprintf(current->comm, "ahc_dv_%d", ahc->unit);
+ #else
+ 	daemonize("ahc_dv_%d", ahc->unit);
+-	current->flags |= PF_FREEZE;
+ #endif
+ 	unlock_kernel();
+ 
+diff -uprN linux-2.6.8.1.orig/drivers/scsi/scsi_error.c linux-2.6.8.1-ve022stab078/drivers/scsi/scsi_error.c
+--- linux-2.6.8.1.orig/drivers/scsi/scsi_error.c	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/scsi/scsi_error.c	2006-05-11 13:05:25.000000000 +0400
+@@ -558,7 +558,7 @@ static int scsi_request_sense(struct scs
+ 
+ 	memcpy(scmd->cmnd, generic_sense, sizeof(generic_sense));
+ 
+-	scsi_result = kmalloc(252, GFP_ATOMIC | (scmd->device->host->hostt->unchecked_isa_dma) ? __GFP_DMA : 0);
++	scsi_result = kmalloc(252, GFP_ATOMIC | ((scmd->device->host->hostt->unchecked_isa_dma) ? __GFP_DMA : 0));
+ 
+ 
+ 	if (unlikely(!scsi_result)) {
+diff -uprN linux-2.6.8.1.orig/drivers/scsi/scsi_scan.c linux-2.6.8.1-ve022stab078/drivers/scsi/scsi_scan.c
+--- linux-2.6.8.1.orig/drivers/scsi/scsi_scan.c	2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/scsi/scsi_scan.c	2006-05-11 13:05:25.000000000 +0400
+@@ -733,7 +733,7 @@ static int scsi_probe_and_add_lun(struct
+ 	if (!sreq)
+ 		goto out_free_sdev;
+ 	result = kmalloc(256, GFP_ATOMIC |
+-			(host->unchecked_isa_dma) ? __GFP_DMA : 0);
++			((host->unchecked_isa_dma) ? __GFP_DMA : 0));
+ 	if (!result)
+ 		goto out_free_sreq;
+ 
+diff -uprN linux-2.6.8.1.orig/drivers/scsi/sg.c linux-2.6.8.1-ve022stab078/drivers/scsi/sg.c
+--- linux-2.6.8.1.orig/drivers/scsi/sg.c	2004-08-14 14:55:31.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/scsi/sg.c	2006-05-11 13:05:34.000000000 +0400
+@@ -2877,23 +2877,22 @@ static void * dev_seq_start(struct seq_f
+ {
+ 	struct sg_proc_deviter * it = kmalloc(sizeof(*it), GFP_KERNEL);
+ 
++	s->private = it;
+ 	if (! it)
+ 		return NULL;
++
+ 	if (NULL == sg_dev_arr)
+-		goto err1;
++		return NULL;
+ 	it->index = *pos;
+ 	it->max = sg_last_dev();
+ 	if (it->index >= it->max)
+-		goto err1;
++		return NULL;
+ 	return it;
+-err1:
+-	kfree(it);
+-	return NULL;
+ }
+ 
+ static void * dev_seq_next(struct seq_file *s, void *v, loff_t *pos)
+ {
+-	struct sg_proc_deviter * it = (struct sg_proc_deviter *) v;
++	struct sg_proc_deviter * it = s->private;
+ 
+ 	*pos = ++it->index;
+ 	return (it->index < it->max) ? it : NULL;
+@@ -2901,7 +2900,7 @@ static void * dev_seq_next(struct seq_fi
+ 
+ static void dev_seq_stop(struct seq_file *s, void *v)
+ {
+-	kfree (v);
++	kfree(s->private);
+ }
+ 
+ static int sg_proc_open_dev(struct inode *inode, struct file *file)
+diff -uprN linux-2.6.8.1.orig/drivers/serial/8250.c linux-2.6.8.1-ve022stab078/drivers/serial/8250.c
+--- linux-2.6.8.1.orig/drivers/serial/8250.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/serial/8250.c	2006-05-11 13:05:28.000000000 +0400
+@@ -20,27 +20,28 @@
+  *  membase is an 'ioremapped' cookie.
+  */
+ #include <linux/config.h>
++#if defined(CONFIG_SERIAL_8250_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
++#define SUPPORT_SYSRQ
++#endif
++
+ #include <linux/module.h>
+ #include <linux/moduleparam.h>
+-#include <linux/tty.h>
+ #include <linux/ioport.h>
+ #include <linux/init.h>
+ #include <linux/console.h>
+ #include <linux/sysrq.h>
++#include <linux/delay.h>
++#include <linux/device.h>
++#include <linux/tty.h>
++#include <linux/tty_flip.h>
+ #include <linux/serial_reg.h>
++#include <linux/serial_core.h>
+ #include <linux/serial.h>
+ #include <linux/serialP.h>
+-#include <linux/delay.h>
+-#include <linux/device.h>
+ 
+ #include <asm/io.h>
+ #include <asm/irq.h>
+ 
+-#if defined(CONFIG_SERIAL_8250_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
+-#define SUPPORT_SYSRQ
+-#endif
+-
+-#include <linux/serial_core.h>
+ #include "8250.h"
+ 
+ /*
+@@ -827,16 +828,22 @@ receive_chars(struct uart_8250_port *up,
+ 	struct tty_struct *tty = up->port.info->tty;
+ 	unsigned char ch;
+ 	int max_count = 256;
++	char flag;
+ 
+ 	do {
++		/* The following is not allowed by the tty layer and
++		   unsafe. It should be fixed ASAP */
+ 		if (unlikely(tty->flip.count >= TTY_FLIPBUF_SIZE)) {
+-			tty->flip.work.func((void *)tty);
+-			if (tty->flip.count >= TTY_FLIPBUF_SIZE)
+-				return; // if TTY_DONT_FLIP is set
++			if(tty->low_latency) {
++				spin_unlock(&up->port.lock);
++				tty_flip_buffer_push(tty);
++				spin_lock(&up->port.lock);
++			}
++			/* If this failed then we will throw away the
++			   bytes but must do so to clear interrupts */
+ 		}
+ 		ch = serial_inp(up, UART_RX);
+-		*tty->flip.char_buf_ptr = ch;
+-		*tty->flip.flag_buf_ptr = TTY_NORMAL;
++		flag = TTY_NORMAL;
+ 		up->port.icount.rx++;
+ 
+ 		if (unlikely(*status & (UART_LSR_BI | UART_LSR_PE |
+@@ -876,35 +883,30 @@ receive_chars(struct uart_8250_port *up,
+ #endif
+ 			if (*status & UART_LSR_BI) {
+ 				DEBUG_INTR("handling break....");
+-				*tty->flip.flag_buf_ptr = TTY_BREAK;
++				flag = TTY_BREAK;
+ 			} else if (*status & UART_LSR_PE)
+-				*tty->flip.flag_buf_ptr = TTY_PARITY;
++				flag = TTY_PARITY;
+ 			else if (*status & UART_LSR_FE)
+-				*tty->flip.flag_buf_ptr = TTY_FRAME;
++				flag = TTY_FRAME;
+ 		}
+ 		if (uart_handle_sysrq_char(&up->port, ch, regs))
+ 			goto ignore_char;
+-		if ((*status & up->port.ignore_status_mask) == 0) {
+-			tty->flip.flag_buf_ptr++;
+-			tty->flip.char_buf_ptr++;
+-			tty->flip.count++;
+-		}
++		if ((*status & up->port.ignore_status_mask) == 0)
++			tty_insert_flip_char(tty, ch, flag);
+ 		if ((*status & UART_LSR_OE) &&
+-		    tty->flip.count < TTY_FLIPBUF_SIZE) {
++		    tty->flip.count < TTY_FLIPBUF_SIZE)
+ 			/*
+ 			 * Overrun is special, since it's reported
+ 			 * immediately, and doesn't affect the current
+ 			 * character.
+ 			 */
+-			*tty->flip.flag_buf_ptr = TTY_OVERRUN;
+-			tty->flip.flag_buf_ptr++;
+-			tty->flip.char_buf_ptr++;
+-			tty->flip.count++;
+-		}
++			tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+ 	ignore_char:
+ 		*status = serial_inp(up, UART_LSR);
+ 	} while ((*status & UART_LSR_DR) && (max_count-- > 0));
++	spin_unlock(&up->port.lock);
+ 	tty_flip_buffer_push(tty);
++	spin_lock(&up->port.lock);
+ }
+ 
+ static _INLINE_ void transmit_chars(struct uart_8250_port *up)
+diff -uprN linux-2.6.8.1.orig/drivers/usb/core/hub.c linux-2.6.8.1-ve022stab078/drivers/usb/core/hub.c
+--- linux-2.6.8.1.orig/drivers/usb/core/hub.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/usb/core/hub.c	2006-05-11 13:05:25.000000000 +0400
+@@ -1922,8 +1922,8 @@ static int hub_thread(void *__unused)
+ 	do {
+ 		hub_events();
+ 		wait_event_interruptible(khubd_wait, !list_empty(&hub_event_list)); 
+-		if (current->flags & PF_FREEZE)
+-			refrigerator(PF_FREEZE);
++		if (test_thread_flag(TIF_FREEZE))
++			refrigerator();
+ 	} while (!signal_pending(current));
+ 
+ 	pr_debug ("%s: khubd exiting\n", usbcore_name);
+diff -uprN linux-2.6.8.1.orig/drivers/w1/w1.c linux-2.6.8.1-ve022stab078/drivers/w1/w1.c
+--- linux-2.6.8.1.orig/drivers/w1/w1.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/drivers/w1/w1.c	2006-05-11 13:05:25.000000000 +0400
+@@ -465,8 +465,8 @@ int w1_control(void *data)
+ 		timeout = w1_timeout;
+ 		do {
+ 			timeout = interruptible_sleep_on_timeout(&w1_control_wait, timeout);
+-			if (current->flags & PF_FREEZE)
+-				refrigerator(PF_FREEZE);
++			if (test_thread_flag(TIF_FREEZE))
++				refrigerator();
+ 		} while (!signal_pending(current) && (timeout > 0));
+ 
+ 		if (signal_pending(current))
+@@ -536,8 +536,8 @@ int w1_process(void *data)
+ 		timeout = w1_timeout;
+ 		do {
+ 			timeout = interruptible_sleep_on_timeout(&dev->kwait, timeout);
+-			if (current->flags & PF_FREEZE)
+-				refrigerator(PF_FREEZE);
++			if (test_thread_flag(TIF_FREEZE))
++				refrigerator();
+ 		} while (!signal_pending(current) && (timeout > 0));
+ 
+ 		if (signal_pending(current))
+diff -uprN linux-2.6.8.1.orig/fs/adfs/adfs.h linux-2.6.8.1-ve022stab078/fs/adfs/adfs.h
+--- linux-2.6.8.1.orig/fs/adfs/adfs.h	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/adfs/adfs.h	2006-05-11 13:05:35.000000000 +0400
+@@ -72,7 +72,7 @@ int adfs_get_block(struct inode *inode, 
+ 		   struct buffer_head *bh, int create);
+ struct inode *adfs_iget(struct super_block *sb, struct object_info *obj);
+ void adfs_read_inode(struct inode *inode);
+-void adfs_write_inode(struct inode *inode,int unused);
++int adfs_write_inode(struct inode *inode,int unused);
+ int adfs_notify_change(struct dentry *dentry, struct iattr *attr);
+ 
+ /* map.c */
+diff -uprN linux-2.6.8.1.orig/fs/adfs/inode.c linux-2.6.8.1-ve022stab078/fs/adfs/inode.c
+--- linux-2.6.8.1.orig/fs/adfs/inode.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/adfs/inode.c	2006-05-11 13:05:35.000000000 +0400
+@@ -372,10 +372,11 @@ out:
+  * The adfs-specific inode data has already been updated by
+  * adfs_notify_change()
+  */
+-void adfs_write_inode(struct inode *inode, int unused)
++int adfs_write_inode(struct inode *inode, int unused)
+ {
+ 	struct super_block *sb = inode->i_sb;
+ 	struct object_info obj;
++	int ret;
+ 
+ 	lock_kernel();
+ 	obj.file_id	= inode->i_ino;
+@@ -386,7 +387,8 @@ void adfs_write_inode(struct inode *inod
+ 	obj.attr	= ADFS_I(inode)->attr;
+ 	obj.size	= inode->i_size;
+ 
+-	adfs_dir_update(sb, &obj);
++	ret = adfs_dir_update(sb, &obj);
+ 	unlock_kernel();
++	return ret;
+ }
+ MODULE_LICENSE("GPL");
+diff -uprN linux-2.6.8.1.orig/fs/affs/inode.c linux-2.6.8.1-ve022stab078/fs/affs/inode.c
+--- linux-2.6.8.1.orig/fs/affs/inode.c	2004-08-14 14:55:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/affs/inode.c	2006-05-11 13:05:35.000000000 +0400
+@@ -181,7 +181,7 @@ bad_inode:
+ 	return;
+ }
+ 
+-void
++int
+ affs_write_inode(struct inode *inode, int unused)
+ {
+ 	struct super_block	*sb = inode->i_sb;
+@@ -194,11 +194,11 @@ affs_write_inode(struct inode *inode, in
+ 
+ 	if (!inode->i_nlink)
+ 		// possibly free block
+-		return;
++		return 0;
+ 	bh = affs_bread(sb, inode->i_ino);
+ 	if (!bh) {
+ 		affs_error(sb,"write_inode","Cannot read block %lu",inode->i_ino);
+-		return;
++		return -EIO;
+ 	}
+ 	tail = AFFS_TAIL(sb, bh);
+ 	if (tail->stype == be32_to_cpu(ST_ROOT)) {
+@@ -226,6 +226,7 @@ affs_write_inode(struct inode *inode, in
+ 	mark_buffer_dirty_inode(bh, inode);
+ 	affs_brelse(bh);
+ 	affs_free_prealloc(inode);
++	return 0;
+ }
+ 
+ int
+diff -uprN linux-2.6.8.1.orig/fs/afs/mntpt.c linux-2.6.8.1-ve022stab078/fs/afs/mntpt.c
+--- linux-2.6.8.1.orig/fs/afs/mntpt.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/afs/mntpt.c	2006-05-11 13:05:40.000000000 +0400
+@@ -162,6 +162,7 @@ static struct vfsmount *afs_mntpt_do_aut
+ 	char *buf, *devname = NULL, *options = NULL;
+ 	filler_t *filler;
+ 	int ret;
++	struct file_system_type *fstype;
+ 
+ 	kenter("{%s}", mntpt->d_name.name);
+ 
+@@ -210,7 +211,12 @@ static struct vfsmount *afs_mntpt_do_aut
+ 
+ 	/* try and do the mount */
+ 	kdebug("--- attempting mount %s -o %s ---", devname, options);
+-	mnt = do_kern_mount("afs", 0, devname, options);
++	fstype = get_fs_type("afs");
++	ret = -ENODEV;
++	if (!fstype)
++		goto error;
++	mnt = do_kern_mount(fstype, 0, devname, options);
++	put_filesystem(fstype);
+ 	kdebug("--- mount result %p ---", mnt);
+ 
+ 	free_page((unsigned long) devname);
+diff -uprN linux-2.6.8.1.orig/fs/attr.c linux-2.6.8.1-ve022stab078/fs/attr.c
+--- linux-2.6.8.1.orig/fs/attr.c	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/attr.c	2006-05-11 13:05:32.000000000 +0400
+@@ -14,6 +14,7 @@
+ #include <linux/fcntl.h>
+ #include <linux/quotaops.h>
+ #include <linux/security.h>
++#include <linux/time.h>
+ 
+ /* Taken over from the old code... */
+ 
+@@ -87,11 +88,14 @@ int inode_setattr(struct inode * inode, 
+ 	if (ia_valid & ATTR_GID)
+ 		inode->i_gid = attr->ia_gid;
+ 	if (ia_valid & ATTR_ATIME)
+-		inode->i_atime = attr->ia_atime;
++		inode->i_atime = timespec_trunc(attr->ia_atime,
++						get_sb_time_gran(inode->i_sb));
+ 	if (ia_valid & ATTR_MTIME)
+-		inode->i_mtime = attr->ia_mtime;
++		inode->i_mtime = timespec_trunc(attr->ia_mtime,
++						get_sb_time_gran(inode->i_sb));
+ 	if (ia_valid & ATTR_CTIME)
+-		inode->i_ctime = attr->ia_ctime;
++		inode->i_ctime = timespec_trunc(attr->ia_ctime,
++						get_sb_time_gran(inode->i_sb));
+ 	if (ia_valid & ATTR_MODE) {
+ 		umode_t mode = attr->ia_mode;
+ 
+@@ -131,14 +135,17 @@ int setattr_mask(unsigned int ia_valid)
+ int notify_change(struct dentry * dentry, struct iattr * attr)
+ {
+ 	struct inode *inode = dentry->d_inode;
+-	mode_t mode = inode->i_mode;
++	mode_t mode;
+ 	int error;
+-	struct timespec now = CURRENT_TIME;
++	struct timespec now;
+ 	unsigned int ia_valid = attr->ia_valid;
+ 
+ 	if (!inode)
+ 		BUG();
+ 
++	mode = inode->i_mode;
++	now = current_fs_time(inode->i_sb);
++
+ 	attr->ia_ctime = now;
+ 	if (!(ia_valid & ATTR_ATIME_SET))
+ 		attr->ia_atime = now;
+diff -uprN linux-2.6.8.1.orig/fs/autofs/autofs_i.h linux-2.6.8.1-ve022stab078/fs/autofs/autofs_i.h
+--- linux-2.6.8.1.orig/fs/autofs/autofs_i.h	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/autofs/autofs_i.h	2006-05-11 13:05:42.000000000 +0400
+@@ -123,7 +123,7 @@ static inline struct autofs_sb_info *aut
+    filesystem without "magic".) */
+ 
+ static inline int autofs_oz_mode(struct autofs_sb_info *sbi) {
+-	return sbi->catatonic || process_group(current) == sbi->oz_pgrp;
++	return sbi->catatonic || virt_pgid(current) == sbi->oz_pgrp;
+ }
+ 
+ /* Hash operations */
+diff -uprN linux-2.6.8.1.orig/fs/autofs/init.c linux-2.6.8.1-ve022stab078/fs/autofs/init.c
+--- linux-2.6.8.1.orig/fs/autofs/init.c	2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/autofs/init.c	2006-05-11 13:05:42.000000000 +0400
+@@ -25,6 +25,7 @@ static struct file_system_type autofs_fs
+ 	.name		= "autofs",
+ 	.get_sb		= autofs_get_sb,
+ 	.kill_sb	= kill_anon_super,
++	.fs_flags	= FS_VIRTUALIZED,
+ };
+ 
+ static int __init init_autofs_fs(void)
+diff -uprN linux-2.6.8.1.orig/fs/autofs/inode.c linux-2.6.8.1-ve022stab078/fs/autofs/inode.c
+--- linux-2.6.8.1.orig/fs/autofs/inode.c	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/autofs/inode.c	2006-05-11 13:05:42.000000000 +0400
+@@ -66,7 +66,7 @@ static int parse_options(char *options, 
+ 
+ 	*uid = current->uid;
+ 	*gid = current->gid;
+-	*pgrp = process_group(current);
++	*pgrp = virt_pgid(current);
+ 
+ 	*minproto = *maxproto = AUTOFS_PROTO_VERSION;
+ 
+@@ -138,7 +138,7 @@ int autofs_fill_super(struct super_block
+ 	sbi->magic = AUTOFS_SBI_MAGIC;
+ 	sbi->catatonic = 0;
+ 	sbi->exp_timeout = 0;
+-	sbi->oz_pgrp = process_group(current);
++	sbi->oz_pgrp = virt_pgid(current);
+ 	autofs_initialize_hash(&sbi->dirhash);
+ 	sbi->queues = NULL;
+ 	memset(sbi->symlink_bitmap, 0, sizeof(long)*AUTOFS_SYMLINK_BITMAP_LEN);
+diff -uprN linux-2.6.8.1.orig/fs/autofs/root.c linux-2.6.8.1-ve022stab078/fs/autofs/root.c
+--- linux-2.6.8.1.orig/fs/autofs/root.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/autofs/root.c	2006-05-11 13:05:42.000000000 +0400
+@@ -347,7 +347,7 @@ static int autofs_root_unlink(struct ino
+ 
+ 	/* This allows root to remove symlinks */
+ 	lock_kernel();
+-	if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) ) {
++	if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) ) {
+ 		unlock_kernel();
+ 		return -EACCES;
+ 	}
+@@ -534,7 +534,7 @@ static int autofs_root_ioctl(struct inod
+ 	     _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT )
+ 		return -ENOTTY;
+ 	
+-	if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) )
++	if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) )
+ 		return -EPERM;
+ 	
+ 	switch(cmd) {
+diff -uprN linux-2.6.8.1.orig/fs/autofs4/autofs_i.h linux-2.6.8.1-ve022stab078/fs/autofs4/autofs_i.h
+--- linux-2.6.8.1.orig/fs/autofs4/autofs_i.h	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/autofs4/autofs_i.h	2006-05-11 13:05:42.000000000 +0400
+@@ -91,6 +91,7 @@ struct autofs_wait_queue {
+ 
+ struct autofs_sb_info {
+ 	u32 magic;
++	struct dentry *root;
+ 	struct file *pipe;
+ 	pid_t oz_pgrp;
+ 	int catatonic;
+@@ -119,7 +120,7 @@ static inline struct autofs_info *autofs
+    filesystem without "magic".) */
+ 
+ static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
+-	return sbi->catatonic || process_group(current) == sbi->oz_pgrp;
++	return sbi->catatonic || virt_pgid(current) == sbi->oz_pgrp;
+ }
+ 
+ /* Does a dentry have some pending activity? */
+diff -uprN linux-2.6.8.1.orig/fs/autofs4/init.c linux-2.6.8.1-ve022stab078/fs/autofs4/init.c
+--- linux-2.6.8.1.orig/fs/autofs4/init.c	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/autofs4/init.c	2006-05-11 13:05:42.000000000 +0400
+@@ -25,6 +25,7 @@ static struct file_system_type autofs_fs
+ 	.name		= "autofs",
+ 	.get_sb		= autofs_get_sb,
+ 	.kill_sb	= kill_anon_super,
++	.fs_flags	= FS_VIRTUALIZED,
+ };
+ 
+ static int __init init_autofs4_fs(void)
+diff -uprN linux-2.6.8.1.orig/fs/autofs4/inode.c linux-2.6.8.1-ve022stab078/fs/autofs4/inode.c
+--- linux-2.6.8.1.orig/fs/autofs4/inode.c	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/autofs4/inode.c	2006-05-11 13:05:42.000000000 +0400
+@@ -16,6 +16,7 @@
+ #include <linux/pagemap.h>
+ #include <linux/parser.h>
+ #include <asm/bitops.h>
++#include <linux/smp_lock.h>
+ #include "autofs_i.h"
+ #include <linux/module.h>
+ 
+@@ -76,6 +77,66 @@ void autofs4_free_ino(struct autofs_info
+ 	kfree(ino);
+ }
+ 
++/*
++ * Deal with the infamous "Busy inodes after umount ..." message.
++ *
++ * Clean up the dentry tree. This happens with autofs if the user
++ * space program goes away due to a SIGKILL, SIGSEGV etc.
++ */
++static void autofs4_force_release(struct autofs_sb_info *sbi)
++{
++	struct dentry *this_parent = sbi->root;
++	struct list_head *next;
++
++	spin_lock(&dcache_lock);
++repeat:
++	next = this_parent->d_subdirs.next;
++resume:
++	while (next != &this_parent->d_subdirs) {
++		struct dentry *dentry = list_entry(next, struct dentry, d_child);
++
++		/* Negative dentry - don`t care */
++		if (!simple_positive(dentry)) {
++			next = next->next;
++			continue;
++		}
++
++		if (!list_empty(&dentry->d_subdirs)) {
++			this_parent = dentry;
++			goto repeat;
++		}
++
++		next = next->next;
++		spin_unlock(&dcache_lock);
++
++		DPRINTK("dentry %p %.*s",
++			dentry, (int)dentry->d_name.len, dentry->d_name.name);
++
++		dput(dentry);
++		spin_lock(&dcache_lock);
++	}
++
++	if (this_parent != sbi->root) {
++		struct dentry *dentry = this_parent;
++
++		next = this_parent->d_child.next;
++		this_parent = this_parent->d_parent;
++		spin_unlock(&dcache_lock);
++		DPRINTK("parent dentry %p %.*s",
++			dentry, (int)dentry->d_name.len, dentry->d_name.name);
++		dput(dentry);
++		spin_lock(&dcache_lock);
++		goto resume;
++	}
++	spin_unlock(&dcache_lock);
++
++	dput(sbi->root);
++	sbi->root = NULL;
++	shrink_dcache_sb(sbi->sb);
++
++	return;
++}
++
+ static void autofs4_put_super(struct super_block *sb)
+ {
+ 	struct autofs_sb_info *sbi = autofs4_sbi(sb);
+@@ -85,6 +146,10 @@ static void autofs4_put_super(struct sup
+ 	if ( !sbi->catatonic )
+ 		autofs4_catatonic_mode(sbi); /* Free wait queues, close pipe */
+ 
++	/* Clean up and release dangling references */
++	if (sbi)
++		autofs4_force_release(sbi);
++
+ 	kfree(sbi);
+ 
+ 	DPRINTK("shutting down");
+@@ -116,7 +181,7 @@ static int parse_options(char *options, 
+ 
+ 	*uid = current->uid;
+ 	*gid = current->gid;
+-	*pgrp = process_group(current);
++	*pgrp = virt_pgid(current);
+ 
+ 	*minproto = AUTOFS_MIN_PROTO_VERSION;
+ 	*maxproto = AUTOFS_MAX_PROTO_VERSION;
+@@ -199,9 +264,10 @@ int autofs4_fill_super(struct super_bloc
+ 
+ 	s->s_fs_info = sbi;
+ 	sbi->magic = AUTOFS_SBI_MAGIC;
++	sbi->root = NULL;
+ 	sbi->catatonic = 0;
+ 	sbi->exp_timeout = 0;
+-	sbi->oz_pgrp = process_group(current);
++	sbi->oz_pgrp = virt_pgid(current);
+ 	sbi->sb = s;
+ 	sbi->version = 0;
+ 	sbi->sub_version = 0;
+@@ -265,6 +331,13 @@ int autofs4_fill_super(struct super_bloc
+ 	sbi->pipe = pipe;
+ 
+ 	/*
++	 * Take a reference to the root dentry so we get a chance to
++	 * clean up the dentry tree on umount.
++	 * See autofs4_force_release.
++	 */
++	sbi->root = dget(root);
++
++	/*
+ 	 * Success! Install the root dentry now to indicate completion.
+ 	 */
+ 	s->s_root = root;
+diff -uprN linux-2.6.8.1.orig/fs/autofs4/root.c linux-2.6.8.1-ve022stab078/fs/autofs4/root.c
+--- linux-2.6.8.1.orig/fs/autofs4/root.c	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/autofs4/root.c	2006-05-11 13:05:42.000000000 +0400
+@@ -593,7 +593,7 @@ static int autofs4_dir_unlink(struct ino
+ 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
+ 	
+ 	/* This allows root to remove symlinks */
+-	if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) )
++	if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) )
+ 		return -EACCES;
+ 
+ 	dput(ino->dentry);
+@@ -621,7 +621,9 @@ static int autofs4_dir_rmdir(struct inod
+ 		spin_unlock(&dcache_lock);
+ 		return -ENOTEMPTY;
+ 	}
++	spin_lock(&dentry->d_lock);
+ 	__d_drop(dentry);
++	spin_unlock(&dentry->d_lock);
+ 	spin_unlock(&dcache_lock);
+ 
+ 	dput(ino->dentry);
+@@ -783,7 +785,7 @@ static int autofs4_root_ioctl(struct ino
+ 	     _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT )
+ 		return -ENOTTY;
+ 	
+-	if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) )
++	if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) )
+ 		return -EPERM;
+ 	
+ 	switch(cmd) {
+diff -uprN linux-2.6.8.1.orig/fs/bad_inode.c linux-2.6.8.1-ve022stab078/fs/bad_inode.c
+--- linux-2.6.8.1.orig/fs/bad_inode.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/bad_inode.c	2006-05-11 13:05:32.000000000 +0400
+@@ -105,7 +105,8 @@ void make_bad_inode(struct inode * inode
+ 	remove_inode_hash(inode);
+ 
+ 	inode->i_mode = S_IFREG;
+-	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
++	inode->i_atime = inode->i_mtime = inode->i_ctime = 
++		current_fs_time(inode->i_sb);
+ 	inode->i_op = &bad_inode_ops;	
+ 	inode->i_fop = &bad_file_ops;	
+ }
+diff -uprN linux-2.6.8.1.orig/fs/bfs/inode.c linux-2.6.8.1-ve022stab078/fs/bfs/inode.c
+--- linux-2.6.8.1.orig/fs/bfs/inode.c	2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/bfs/inode.c	2006-05-11 13:05:35.000000000 +0400
+@@ -85,7 +85,7 @@ static void bfs_read_inode(struct inode 
+ 	brelse(bh);
+ }
+ 
+-static void bfs_write_inode(struct inode * inode, int unused)
++static int bfs_write_inode(struct inode * inode, int unused)
+ {
+ 	unsigned long ino = inode->i_ino;
+ 	struct bfs_inode * di;
+@@ -94,7 +94,7 @@ static void bfs_write_inode(struct inode
+ 
+ 	if (ino < BFS_ROOT_INO || ino > BFS_SB(inode->i_sb)->si_lasti) {
+ 		printf("Bad inode number %s:%08lx\n", inode->i_sb->s_id, ino);
+-		return;
++		return -EIO;
+ 	}
+ 
+ 	lock_kernel();
+@@ -103,7 +103,7 @@ static void bfs_write_inode(struct inode
+ 	if (!bh) {
+ 		printf("Unable to read inode %s:%08lx\n", inode->i_sb->s_id, ino);
+ 		unlock_kernel();
+-		return;
++		return -EIO;
+ 	}
+ 
+ 	off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK;
+@@ -129,6 +129,7 @@ static void bfs_write_inode(struct inode
+ 	mark_buffer_dirty(bh);
+ 	brelse(bh);
+ 	unlock_kernel();
++	return 0;
+ }
+ 
+ static void bfs_delete_inode(struct inode * inode)
+diff -uprN linux-2.6.8.1.orig/fs/binfmt_aout.c linux-2.6.8.1-ve022stab078/fs/binfmt_aout.c
+--- linux-2.6.8.1.orig/fs/binfmt_aout.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/binfmt_aout.c	2006-05-11 13:05:45.000000000 +0400
+@@ -43,13 +43,21 @@ static struct linux_binfmt aout_format =
+ 	.min_coredump	= PAGE_SIZE
+ };
+ 
+-static void set_brk(unsigned long start, unsigned long end)
++#define BAD_ADDR(x)	((unsigned long)(x) >= TASK_SIZE)
++
++static int set_brk(unsigned long start, unsigned long end)
+ {
+ 	start = PAGE_ALIGN(start);
+ 	end = PAGE_ALIGN(end);
+-	if (end <= start)
+-		return;
+-	do_brk(start, end - start);
++	if (end > start) {
++		unsigned long addr;
++		down_write(&current->mm->mmap_sem);
++		addr = do_brk(start, end - start);
++		up_write(&current->mm->mmap_sem);
++		if (BAD_ADDR(addr))
++			return addr;
++	}
++	return 0;
+ }
+ 
+ /*
+@@ -318,10 +326,14 @@ static int load_aout_binary(struct linux
+ 		loff_t pos = fd_offset;
+ 		/* Fuck me plenty... */
+ 		/* <AOL></AOL> */
++		down_write(&current->mm->mmap_sem);	
+ 		error = do_brk(N_TXTADDR(ex), ex.a_text);
++		up_write(&current->mm->mmap_sem);
+ 		bprm->file->f_op->read(bprm->file, (char *) N_TXTADDR(ex),
+ 			  ex.a_text, &pos);
++		down_write(&current->mm->mmap_sem);
+ 		error = do_brk(N_DATADDR(ex), ex.a_data);
++		up_write(&current->mm->mmap_sem);
+ 		bprm->file->f_op->read(bprm->file, (char *) N_DATADDR(ex),
+ 			  ex.a_data, &pos);
+ 		goto beyond_if;
+@@ -341,8 +353,9 @@ static int load_aout_binary(struct linux
+ 		pos = 32;
+ 		map_size = ex.a_text+ex.a_data;
+ #endif
+-
++		down_write(&current->mm->mmap_sem);
+ 		error = do_brk(text_addr & PAGE_MASK, map_size);
++		up_write(&current->mm->mmap_sem);
+ 		if (error != (text_addr & PAGE_MASK)) {
+ 			send_sig(SIGKILL, current, 0);
+ 			return error;
+@@ -377,7 +390,9 @@ static int load_aout_binary(struct linux
+ 
+ 		if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
+ 			loff_t pos = fd_offset;
++			down_write(&current->mm->mmap_sem);
+ 			do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
++			up_write(&current->mm->mmap_sem);
+ 			bprm->file->f_op->read(bprm->file,
+ 					(char __user *)N_TXTADDR(ex),
+ 					ex.a_text+ex.a_data, &pos);
+@@ -413,7 +428,11 @@ static int load_aout_binary(struct linux
+ beyond_if:
+ 	set_binfmt(&aout_format);
+ 
+-	set_brk(current->mm->start_brk, current->mm->brk);
++	retval = set_brk(current->mm->start_brk, current->mm->brk);
++	if (retval < 0) {
++		send_sig(SIGKILL, current, 0);
++		return retval;
++	}
+ 
+ 	retval = setup_arg_pages(bprm, EXSTACK_DEFAULT);
+ 	if (retval < 0) { 
+@@ -429,9 +448,11 @@ beyond_if:
+ #endif
+ 	start_thread(regs, ex.a_entry, current->mm->start_stack);
+ 	if (unlikely(current->ptrace & PT_PTRACED)) {
+-		if (current->ptrace & PT_TRACE_EXEC)
++		if (current->ptrace & PT_TRACE_EXEC) {
++			set_pn_state(current, PN_STOP_EXEC);
+ 			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
+-		else
++			clear_pn_state(current);
++		} else
+ 			send_sig(SIGTRAP, current, 0);
+ 	}
+ 	return 0;
+@@ -478,8 +499,9 @@ static int load_aout_library(struct file
+ 			       file->f_dentry->d_name.name);
+ 			error_time = jiffies;
+ 		}
+-
++		down_write(&current->mm->mmap_sem);
+ 		do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
++		up_write(&current->mm->mmap_sem);
+ 		
+ 		file->f_op->read(file, (char __user *)start_addr,
+ 			ex.a_text + ex.a_data, &pos);
+@@ -503,7 +525,9 @@ static int load_aout_library(struct file
+ 	len = PAGE_ALIGN(ex.a_text + ex.a_data);
+ 	bss = ex.a_text + ex.a_data + ex.a_bss;
+ 	if (bss > len) {
++		down_write(&current->mm->mmap_sem);
+ 		error = do_brk(start_addr + len, bss - len);
++		up_write(&current->mm->mmap_sem);
+ 		retval = error;
+ 		if (error != start_addr + len)
+ 			goto out;
+diff -uprN linux-2.6.8.1.orig/fs/binfmt_elf.c linux-2.6.8.1-ve022stab078/fs/binfmt_elf.c
+--- linux-2.6.8.1.orig/fs/binfmt_elf.c	2004-08-14 14:55:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/binfmt_elf.c	2006-05-11 13:05:45.000000000 +0400
+@@ -87,7 +87,10 @@ static int set_brk(unsigned long start, 
+ 	start = ELF_PAGEALIGN(start);
+ 	end = ELF_PAGEALIGN(end);
+ 	if (end > start) {
+-		unsigned long addr = do_brk(start, end - start);
++		unsigned long addr;
++		down_write(&current->mm->mmap_sem);
++		addr = do_brk(start, end - start);
++		up_write(&current->mm->mmap_sem);
+ 		if (BAD_ADDR(addr))
+ 			return addr;
+ 	}
+@@ -102,15 +105,17 @@ static int set_brk(unsigned long start, 
+    be in memory */
+ 
+ 
+-static void padzero(unsigned long elf_bss)
++static int padzero(unsigned long elf_bss)
+ {
+ 	unsigned long nbyte;
+ 
+ 	nbyte = ELF_PAGEOFFSET(elf_bss);
+ 	if (nbyte) {
+ 		nbyte = ELF_MIN_ALIGN - nbyte;
+-		clear_user((void __user *) elf_bss, nbyte);
++		if (clear_user((void __user *) elf_bss, nbyte))
++			return -EFAULT;
+ 	}
++	return 0;
+ }
+ 
+ /* Let's use some macros to make this stack manipulation a litle clearer */
+@@ -126,7 +131,7 @@ static void padzero(unsigned long elf_bs
+ #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
+ #endif
+ 
+-static void
++static int
+ create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec,
+ 		int interp_aout, unsigned long load_addr,
+ 		unsigned long interp_load_addr)
+@@ -171,7 +176,8 @@ create_elf_tables(struct linux_binprm *b
+ 			STACK_ALLOC(p, ((current->pid % 64) << 7));
+ #endif
+ 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
+-		__copy_to_user(u_platform, k_platform, len);
++		if (__copy_to_user(u_platform, k_platform, len))
++			return -EFAULT;
+ 	}
+ 
+ 	/* Create the ELF interpreter info */
+@@ -233,7 +239,8 @@ create_elf_tables(struct linux_binprm *b
+ #endif
+ 
+ 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
+-	__put_user(argc, sp++);
++	if (__put_user(argc, sp++))
++		return -EFAULT;
+ 	if (interp_aout) {
+ 		argv = sp + 2;
+ 		envp = argv + argc + 1;
+@@ -245,31 +252,35 @@ create_elf_tables(struct linux_binprm *b
+ 	}
+ 
+ 	/* Populate argv and envp */
+-	p = current->mm->arg_start;
++	p = current->mm->arg_end = current->mm->arg_start;
+ 	while (argc-- > 0) {
+ 		size_t len;
+ 		__put_user((elf_addr_t)p, argv++);
+ 		len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
+ 		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
+-			return;
++			return 0;
+ 		p += len;
+ 	}
+-	__put_user(0, argv);
++	if (__put_user(0, argv))
++		return -EFAULT;
+ 	current->mm->arg_end = current->mm->env_start = p;
+ 	while (envc-- > 0) {
+ 		size_t len;
+ 		__put_user((elf_addr_t)p, envp++);
+ 		len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
+ 		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
+-			return;
++			return 0;
+ 		p += len;
+ 	}
+-	__put_user(0, envp);
++	if (__put_user(0, envp))
++		return -EFAULT;
+ 	current->mm->env_end = p;
+ 
+ 	/* Put the elf_info on the stack in the right place.  */
+ 	sp = (elf_addr_t __user *)envp + 1;
+-	copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t));
++	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
++		return -EFAULT;
++	return 0;
+ }
+ 
+ #ifndef elf_map
+@@ -334,14 +345,17 @@ static unsigned long load_elf_interp(str
+ 		goto out;
+ 
+ 	retval = kernel_read(interpreter,interp_elf_ex->e_phoff,(char *)elf_phdata,size);
+-	error = retval;
+-	if (retval < 0)
++	error = -EIO;
++	if (retval != size) {
++		if (retval < 0)
++			error = retval;	
+ 		goto out_close;
++	}
+ 
+ 	eppnt = elf_phdata;
+ 	for (i=0; i<interp_elf_ex->e_phnum; i++, eppnt++) {
+ 	  if (eppnt->p_type == PT_LOAD) {
+-	    int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
++	    int elf_type = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECPRIO;
+ 	    int elf_prot = 0;
+ 	    unsigned long vaddr = 0;
+ 	    unsigned long k, map_addr;
+@@ -399,12 +413,18 @@ static unsigned long load_elf_interp(str
+ 	 * that there are zero-mapped pages up to and including the 
+ 	 * last bss page.
+ 	 */
+-	padzero(elf_bss);
++	if (padzero(elf_bss)) {
++		error = -EFAULT;
++		goto out_close;
++	}
++
+ 	elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);	/* What we have mapped so far */
+ 
+ 	/* Map the last of the bss segment */
+ 	if (last_bss > elf_bss) {
++		down_write(&current->mm->mmap_sem);
+ 		error = do_brk(elf_bss, last_bss - elf_bss);
++		up_write(&current->mm->mmap_sem);
+ 		if (BAD_ADDR(error))
+ 			goto out_close;
+ 	}
+@@ -444,7 +464,9 @@ static unsigned long load_aout_interp(st
+ 		goto out;
+ 	}
+ 
++	down_write(&current->mm->mmap_sem);	
+ 	do_brk(0, text_data);
++	up_write(&current->mm->mmap_sem);
+ 	if (!interpreter->f_op || !interpreter->f_op->read)
+ 		goto out;
+ 	if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
+@@ -452,8 +474,11 @@ static unsigned long load_aout_interp(st
+ 	flush_icache_range((unsigned long)addr,
+ 	                   (unsigned long)addr + text_data);
+ 
++
++	down_write(&current->mm->mmap_sem);	
+ 	do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
+ 		interp_ex->a_bss);
++	up_write(&current->mm->mmap_sem);
+ 	elf_entry = interp_ex->a_entry;
+ 
+ out:
+@@ -487,25 +512,33 @@ static int load_elf_binary(struct linux_
+ 	unsigned long elf_entry, interp_load_addr = 0;
+ 	unsigned long start_code, end_code, start_data, end_data;
+ 	unsigned long reloc_func_desc = 0;
+-	struct elfhdr elf_ex;
+-	struct elfhdr interp_elf_ex;
+-  	struct exec interp_ex;
+ 	char passed_fileno[6];
+ 	struct files_struct *files;
+ 	int have_pt_gnu_stack, executable_stack = EXSTACK_DEFAULT;
+ 	unsigned long def_flags = 0;
++	struct {
++		struct elfhdr elf_ex;
++		struct elfhdr interp_elf_ex;
++  		struct exec interp_ex;
++	} *loc;
++
++	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
++	if (!loc) {
++		retval = -ENOMEM;
++		goto out_ret;
++	}
+ 	
+ 	/* Get the exec-header */
+-	elf_ex = *((struct elfhdr *) bprm->buf);
++	loc->elf_ex = *((struct elfhdr *) bprm->buf);
+ 
+ 	retval = -ENOEXEC;
+ 	/* First of all, some simple consistency checks */
+-	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
++	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
+ 		goto out;
+ 
+-	if (elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN)
++	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
+ 		goto out;
+-	if (!elf_check_arch(&elf_ex))
++	if (!elf_check_arch(&loc->elf_ex))
+ 		goto out;
+ 	if (!bprm->file->f_op||!bprm->file->f_op->mmap)
+ 		goto out;
+@@ -513,18 +546,21 @@ static int load_elf_binary(struct linux_
+ 	/* Now read in all of the header information */
+ 
+ 	retval = -ENOMEM;
+-	if (elf_ex.e_phentsize != sizeof(struct elf_phdr))
++	if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
+ 		goto out;
+-	if (elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
++	if (loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
+ 		goto out;
+-	size = elf_ex.e_phnum * sizeof(struct elf_phdr);
++	size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
+ 	elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL);
+ 	if (!elf_phdata)
+ 		goto out;
+ 
+-	retval = kernel_read(bprm->file, elf_ex.e_phoff, (char *) elf_phdata, size);
+-	if (retval < 0)
++	retval = kernel_read(bprm->file, loc->elf_ex.e_phoff, (char *) elf_phdata, size);
++	if (retval != size) {
++		if (retval >= 0)
++			retval = -EIO;
+ 		goto out_free_ph;
++	}
+ 
+ 	files = current->files;		/* Refcounted so ok */
+ 	retval = unshare_files();
+@@ -553,7 +589,7 @@ static int load_elf_binary(struct linux_
+ 	start_data = 0;
+ 	end_data = 0;
+ 
+-	for (i = 0; i < elf_ex.e_phnum; i++) {
++	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
+ 		if (elf_ppnt->p_type == PT_INTERP) {
+ 			/* This is the program interpreter used for
+ 			 * shared libraries - for now assume that this
+@@ -561,7 +597,8 @@ static int load_elf_binary(struct linux_
+ 			 */
+ 
+ 			retval = -ENOMEM;
+-			if (elf_ppnt->p_filesz > PATH_MAX)
++			if (elf_ppnt->p_filesz > PATH_MAX || 
++			    elf_ppnt->p_filesz == 0)
+ 				goto out_free_file;
+ 			elf_interpreter = (char *) kmalloc(elf_ppnt->p_filesz,
+ 							   GFP_KERNEL);
+@@ -571,8 +608,16 @@ static int load_elf_binary(struct linux_
+ 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
+ 					   elf_interpreter,
+ 					   elf_ppnt->p_filesz);
+-			if (retval < 0)
++			if (retval != elf_ppnt->p_filesz) {
++				if (retval >= 0)
++					retval = -EIO;
++				goto out_free_interp;
++			}
++			/* make sure path is NULL terminated */
++			retval = -EINVAL;
++			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
+ 				goto out_free_interp;
++
+ 			/* If the program interpreter is one of these two,
+ 			 * then assume an iBCS2 image. Otherwise assume
+ 			 * a native linux image.
+@@ -600,26 +645,29 @@ static int load_elf_binary(struct linux_
+ 			 * switch really is going to happen - do this in
+ 			 * flush_thread().	- akpm
+ 			 */
+-			SET_PERSONALITY(elf_ex, ibcs2_interpreter);
++			SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
+ 
+-			interpreter = open_exec(elf_interpreter);
++			interpreter = open_exec(elf_interpreter, NULL);
+ 			retval = PTR_ERR(interpreter);
+ 			if (IS_ERR(interpreter))
+ 				goto out_free_interp;
+ 			retval = kernel_read(interpreter, 0, bprm->buf, BINPRM_BUF_SIZE);
+-			if (retval < 0)
++			if (retval != BINPRM_BUF_SIZE) {
++				if (retval >= 0)
++					retval = -EIO;
+ 				goto out_free_dentry;
++			}
+ 
+ 			/* Get the exec headers */
+-			interp_ex = *((struct exec *) bprm->buf);
+-			interp_elf_ex = *((struct elfhdr *) bprm->buf);
++			loc->interp_ex = *((struct exec *) bprm->buf);
++			loc->interp_elf_ex = *((struct elfhdr *) bprm->buf);
+ 			break;
+ 		}
+ 		elf_ppnt++;
+ 	}
+ 
+ 	elf_ppnt = elf_phdata;
+-	for (i = 0; i < elf_ex.e_phnum; i++, elf_ppnt++)
++	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
+ 		if (elf_ppnt->p_type == PT_GNU_STACK) {
+ 			if (elf_ppnt->p_flags & PF_X)
+ 				executable_stack = EXSTACK_ENABLE_X;
+@@ -627,19 +675,19 @@ static int load_elf_binary(struct linux_
+ 				executable_stack = EXSTACK_DISABLE_X;
+ 			break;
+ 		}
+-	have_pt_gnu_stack = (i < elf_ex.e_phnum);
++	have_pt_gnu_stack = (i < loc->elf_ex.e_phnum);
+ 
+ 	/* Some simple consistency checks for the interpreter */
+ 	if (elf_interpreter) {
+ 		interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
+ 
+ 		/* Now figure out which format our binary is */
+-		if ((N_MAGIC(interp_ex) != OMAGIC) &&
+-		    (N_MAGIC(interp_ex) != ZMAGIC) &&
+-		    (N_MAGIC(interp_ex) != QMAGIC))
++		if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
++		    (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
++		    (N_MAGIC(loc->interp_ex) != QMAGIC))
+ 			interpreter_type = INTERPRETER_ELF;
+ 
+-		if (memcmp(interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
++		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
+ 			interpreter_type &= ~INTERPRETER_ELF;
+ 
+ 		retval = -ELIBBAD;
+@@ -655,11 +703,11 @@ static int load_elf_binary(struct linux_
+ 		}
+ 		/* Verify the interpreter has a valid arch */
+ 		if ((interpreter_type == INTERPRETER_ELF) &&
+-		    !elf_check_arch(&interp_elf_ex))
++		    !elf_check_arch(&loc->interp_elf_ex))
+ 			goto out_free_dentry;
+ 	} else {
+ 		/* Executables without an interpreter also need a personality  */
+-		SET_PERSONALITY(elf_ex, ibcs2_interpreter);
++		SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
+ 	}
+ 
+ 	/* OK, we are done with that, now set up the arg stuff,
+@@ -699,8 +747,8 @@ static int load_elf_binary(struct linux_
+ 
+ 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
+ 	   may depend on the personality.  */
+-	SET_PERSONALITY(elf_ex, ibcs2_interpreter);
+-	if (elf_read_implies_exec(elf_ex, have_pt_gnu_stack))
++	SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
++	if (elf_read_implies_exec(loc->elf_ex, have_pt_gnu_stack))
+ 		current->personality |= READ_IMPLIES_EXEC;
+ 
+ 	/* Do this so that we can load the interpreter, if need be.  We will
+@@ -720,7 +768,7 @@ static int load_elf_binary(struct linux_
+ 	   the image should be loaded at fixed address, not at a variable
+ 	   address. */
+ 
+-	for(i = 0, elf_ppnt = elf_phdata; i < elf_ex.e_phnum; i++, elf_ppnt++) {
++	for(i = 0, elf_ppnt = elf_phdata; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
+ 		int elf_prot = 0, elf_flags;
+ 		unsigned long k, vaddr;
+ 
+@@ -744,7 +792,13 @@ static int load_elf_binary(struct linux_
+ 				nbyte = ELF_MIN_ALIGN - nbyte;
+ 				if (nbyte > elf_brk - elf_bss)
+ 					nbyte = elf_brk - elf_bss;
+-				clear_user((void __user *) elf_bss + load_bias, nbyte);
++				/*
++				 * This bss-zeroing can fail if the ELF file
++				 * specifies odd protections.  So we don't check
++				 * the return value
++				 */
++				(void)clear_user((void __user *)elf_bss +
++							load_bias, nbyte);
+ 			}
+ 		}
+ 
+@@ -752,12 +806,13 @@ static int load_elf_binary(struct linux_
+ 		if (elf_ppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
+ 		if (elf_ppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
+ 
+-		elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE;
++		elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE|
++			MAP_EXECPRIO;
+ 
+ 		vaddr = elf_ppnt->p_vaddr;
+-		if (elf_ex.e_type == ET_EXEC || load_addr_set) {
++		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
+ 			elf_flags |= MAP_FIXED;
+-		} else if (elf_ex.e_type == ET_DYN) {
++		} else if (loc->elf_ex.e_type == ET_DYN) {
+ 			/* Try and get dynamic programs out of the way of the default mmap
+ 			   base, as well as whatever program they might try to exec.  This
+ 			   is because the brk will follow the loader, and is not movable.  */
+@@ -765,13 +820,15 @@ static int load_elf_binary(struct linux_
+ 		}
+ 
+ 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags);
+-		if (BAD_ADDR(error))
+-			continue;
++		if (BAD_ADDR(error)) {
++			send_sig(SIGKILL, current, 0);
++			goto out_free_dentry;
++		}
+ 
+ 		if (!load_addr_set) {
+ 			load_addr_set = 1;
+ 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
+-			if (elf_ex.e_type == ET_DYN) {
++			if (loc->elf_ex.e_type == ET_DYN) {
+ 				load_bias += error -
+ 				             ELF_PAGESTART(load_bias + vaddr);
+ 				load_addr += load_bias;
+@@ -808,7 +865,7 @@ static int load_elf_binary(struct linux_
+ 			elf_brk = k;
+ 	}
+ 
+-	elf_ex.e_entry += load_bias;
++	loc->elf_ex.e_entry += load_bias;
+ 	elf_bss += load_bias;
+ 	elf_brk += load_bias;
+ 	start_code += load_bias;
+@@ -826,14 +883,18 @@ static int load_elf_binary(struct linux_
+ 		send_sig(SIGKILL, current, 0);
+ 		goto out_free_dentry;
+ 	}
+-	padzero(elf_bss);
++	if (padzero(elf_bss)) {
++		send_sig(SIGSEGV, current, 0);
++		retval = -EFAULT; /* Nobody gets to see this, but.. */
++		goto out_free_dentry;
++	}
+ 
+ 	if (elf_interpreter) {
+ 		if (interpreter_type == INTERPRETER_AOUT)
+-			elf_entry = load_aout_interp(&interp_ex,
++			elf_entry = load_aout_interp(&loc->interp_ex,
+ 						     interpreter);
+ 		else
+-			elf_entry = load_elf_interp(&interp_elf_ex,
++			elf_entry = load_elf_interp(&loc->interp_elf_ex,
+ 						    interpreter,
+ 						    &interp_load_addr);
+ 		if (BAD_ADDR(elf_entry)) {
+@@ -848,7 +909,12 @@ static int load_elf_binary(struct linux_
+ 		fput(interpreter);
+ 		kfree(elf_interpreter);
+ 	} else {
+-		elf_entry = elf_ex.e_entry;
++		elf_entry = loc->elf_ex.e_entry;
++		if (BAD_ADDR(elf_entry)) {
++			send_sig(SIGSEGV, current, 0);
++			retval = -ENOEXEC; /* Nobody gets to see this, but.. */
++			goto out_free_dentry;
++		}
+ 	}
+ 
+ 	kfree(elf_phdata);
+@@ -858,9 +924,17 @@ static int load_elf_binary(struct linux_
+ 
+ 	set_binfmt(&elf_format);
+ 
++#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
++	retval = arch_setup_additional_pages(bprm, executable_stack);
++	if (retval < 0) {
++		send_sig(SIGKILL, current, 0);
++		goto out;
++	}
++#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
++
+ 	compute_creds(bprm);
+ 	current->flags &= ~PF_FORKNOEXEC;
+-	create_elf_tables(bprm, &elf_ex, (interpreter_type == INTERPRETER_AOUT),
++	create_elf_tables(bprm, &loc->elf_ex, (interpreter_type == INTERPRETER_AOUT),
+ 			load_addr, interp_load_addr);
+ 	/* N.B. passed_fileno might not be initialized? */
+ 	if (interpreter_type == INTERPRETER_AOUT)
+@@ -898,13 +972,17 @@ static int load_elf_binary(struct linux_
+ 
+ 	start_thread(regs, elf_entry, bprm->p);
+ 	if (unlikely(current->ptrace & PT_PTRACED)) {
+-		if (current->ptrace & PT_TRACE_EXEC)
++		if (current->ptrace & PT_TRACE_EXEC) {
++			set_pn_state(current, PN_STOP_EXEC);
+ 			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
+-		else
++			clear_pn_state(current);
++		} else
+ 			send_sig(SIGTRAP, current, 0);
+ 	}
+ 	retval = 0;
+ out:
++	kfree(loc);
++out_ret:
+ 	return retval;
+ 
+ 	/* error cleanup */
+@@ -933,6 +1011,7 @@ out_free_ph:
+ static int load_elf_library(struct file *file)
+ {
+ 	struct elf_phdr *elf_phdata;
++	struct elf_phdr *eppnt;
+ 	unsigned long elf_bss, bss, len;
+ 	int retval, error, i, j;
+ 	struct elfhdr elf_ex;
+@@ -956,43 +1035,52 @@ static int load_elf_library(struct file 
+ 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
+ 
+ 	error = -ENOMEM;
+-	elf_phdata = (struct elf_phdr *) kmalloc(j, GFP_KERNEL);
++	elf_phdata = kmalloc(j, GFP_KERNEL);
+ 	if (!elf_phdata)
+ 		goto out;
+ 
++	eppnt = elf_phdata;
+ 	error = -ENOEXEC;
+-	retval = kernel_read(file, elf_ex.e_phoff, (char *) elf_phdata, j);
++	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
+ 	if (retval != j)
+ 		goto out_free_ph;
+ 
+ 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
+-		if ((elf_phdata + i)->p_type == PT_LOAD) j++;
++		if ((eppnt + i)->p_type == PT_LOAD)
++			j++;
+ 	if (j != 1)
+ 		goto out_free_ph;
+ 
+-	while (elf_phdata->p_type != PT_LOAD) elf_phdata++;
++	while (eppnt->p_type != PT_LOAD)
++		eppnt++;
+ 
+ 	/* Now use mmap to map the library into memory. */
+ 	down_write(&current->mm->mmap_sem);
+ 	error = do_mmap(file,
+-			ELF_PAGESTART(elf_phdata->p_vaddr),
+-			(elf_phdata->p_filesz +
+-			 ELF_PAGEOFFSET(elf_phdata->p_vaddr)),
++			ELF_PAGESTART(eppnt->p_vaddr),
++			(eppnt->p_filesz +
++			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
+ 			PROT_READ | PROT_WRITE | PROT_EXEC,
+ 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
+-			(elf_phdata->p_offset -
+-			 ELF_PAGEOFFSET(elf_phdata->p_vaddr)));
++			(eppnt->p_offset -
++			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
+ 	up_write(&current->mm->mmap_sem);
+-	if (error != ELF_PAGESTART(elf_phdata->p_vaddr))
++	if (error != ELF_PAGESTART(eppnt->p_vaddr))
+ 		goto out_free_ph;
+ 
+-	elf_bss = elf_phdata->p_vaddr + elf_phdata->p_filesz;
+-	padzero(elf_bss);
++	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
++	if (padzero(elf_bss)) {
++		error = -EFAULT;
++		goto out_free_ph;
++	}
+ 
+-	len = ELF_PAGESTART(elf_phdata->p_filesz + elf_phdata->p_vaddr + ELF_MIN_ALIGN - 1);
+-	bss = elf_phdata->p_memsz + elf_phdata->p_vaddr;
+-	if (bss > len)
++	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr + ELF_MIN_ALIGN - 1);
++	bss = eppnt->p_memsz + eppnt->p_vaddr;
++	if (bss > len) {
++		down_write(&current->mm->mmap_sem);
+ 		do_brk(len, bss - len);
++		up_write(&current->mm->mmap_sem);
++	}
+ 	error = 0;
+ 
+ out_free_ph:
+@@ -1172,20 +1260,20 @@ static void fill_prstatus(struct elf_prs
+ 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
+ 	prstatus->pr_sigpend = p->pending.signal.sig[0];
+ 	prstatus->pr_sighold = p->blocked.sig[0];
+-	prstatus->pr_pid = p->pid;
+-	prstatus->pr_ppid = p->parent->pid;
+-	prstatus->pr_pgrp = process_group(p);
+-	prstatus->pr_sid = p->signal->session;
++	prstatus->pr_pid = virt_pid(p);
++	prstatus->pr_ppid = virt_pid(p->parent);
++	prstatus->pr_pgrp = virt_pgid(p);
++	prstatus->pr_sid = virt_sid(p);
+ 	jiffies_to_timeval(p->utime, &prstatus->pr_utime);
+ 	jiffies_to_timeval(p->stime, &prstatus->pr_stime);
+ 	jiffies_to_timeval(p->cutime, &prstatus->pr_cutime);
+ 	jiffies_to_timeval(p->cstime, &prstatus->pr_cstime);
+ }
+ 
+-static void fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
+-			struct mm_struct *mm)
++static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
++		       struct mm_struct *mm)
+ {
+-	int i, len;
++	unsigned int i, len;
+ 	
+ 	/* first copy the parameters from user space */
+ 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
+@@ -1193,17 +1281,18 @@ static void fill_psinfo(struct elf_prpsi
+ 	len = mm->arg_end - mm->arg_start;
+ 	if (len >= ELF_PRARGSZ)
+ 		len = ELF_PRARGSZ-1;
+-	copy_from_user(&psinfo->pr_psargs,
+-		       (const char __user *)mm->arg_start, len);
++	if (copy_from_user(&psinfo->pr_psargs,
++		           (const char __user *)mm->arg_start, len))
++		return -EFAULT;
+ 	for(i = 0; i < len; i++)
+ 		if (psinfo->pr_psargs[i] == 0)
+ 			psinfo->pr_psargs[i] = ' ';
+ 	psinfo->pr_psargs[len] = 0;
+ 
+-	psinfo->pr_pid = p->pid;
+-	psinfo->pr_ppid = p->parent->pid;
+-	psinfo->pr_pgrp = process_group(p);
+-	psinfo->pr_sid = p->signal->session;
++	psinfo->pr_pid = virt_pid(p);
++	psinfo->pr_ppid = virt_pid(p->parent);
++	psinfo->pr_pgrp = virt_pgid(p);
++	psinfo->pr_sid = virt_sid(p);
+ 
+ 	i = p->state ? ffz(~p->state) + 1 : 0;
+ 	psinfo->pr_state = i;
+@@ -1215,7 +1304,7 @@ static void fill_psinfo(struct elf_prpsi
+ 	SET_GID(psinfo->pr_gid, p->gid);
+ 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
+ 	
+-	return;
++	return 0;
+ }
+ 
+ /* Here is the structure in which status of each thread is captured. */
+@@ -1344,7 +1433,7 @@ static int elf_core_dump(long signr, str
+ 	/* capture the status of all other threads */
+ 	if (signr) {
+ 		read_lock(&tasklist_lock);
+-		do_each_thread(g,p)
++		do_each_thread_ve(g,p)
+ 			if (current->mm == p->mm && current != p) {
+ 				int sz = elf_dump_thread_status(signr, p, &thread_list);
+ 				if (!sz) {
+@@ -1353,7 +1442,7 @@ static int elf_core_dump(long signr, str
+ 				} else
+ 					thread_status_size += sz;
+ 			}
+-		while_each_thread(g,p);
++		while_each_thread_ve(g,p);
+ 		read_unlock(&tasklist_lock);
+ 	}
+ 
+diff -uprN linux-2.6.8.1.orig/fs/binfmt_em86.c linux-2.6.8.1-ve022stab078/fs/binfmt_em86.c
+--- linux-2.6.8.1.orig/fs/binfmt_em86.c	2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/binfmt_em86.c	2006-05-11 13:05:35.000000000 +0400
+@@ -82,7 +82,7 @@ static int load_em86(struct linux_binprm
+ 	 * Note that we use open_exec() as the name is now in kernel
+ 	 * space, and we don't need to copy it.
+ 	 */
+-	file = open_exec(interp);
++	file = open_exec(interp, bprm);
+ 	if (IS_ERR(file))
+ 		return PTR_ERR(file);
+ 
+diff -uprN linux-2.6.8.1.orig/fs/binfmt_flat.c linux-2.6.8.1-ve022stab078/fs/binfmt_flat.c
+--- linux-2.6.8.1.orig/fs/binfmt_flat.c	2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/binfmt_flat.c	2006-05-11 13:05:35.000000000 +0400
+@@ -774,7 +774,7 @@ static int load_flat_shared_library(int 
+ 
+ 	/* Open the file up */
+ 	bprm.filename = buf;
+-	bprm.file = open_exec(bprm.filename);
++	bprm.file = open_exec(bprm.filename, &bprm);
+ 	res = PTR_ERR(bprm.file);
+ 	if (IS_ERR(bprm.file))
+ 		return res;
+diff -uprN linux-2.6.8.1.orig/fs/binfmt_misc.c linux-2.6.8.1-ve022stab078/fs/binfmt_misc.c
+--- linux-2.6.8.1.orig/fs/binfmt_misc.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/binfmt_misc.c	2006-05-11 13:05:35.000000000 +0400
+@@ -150,7 +150,8 @@ static int load_misc_binary(struct linux
+ 
+ 		/* if the binary is not readable than enforce mm->dumpable=0
+ 		   regardless of the interpreter's permissions */
+-		if (permission(bprm->file->f_dentry->d_inode, MAY_READ, NULL))
++		if (permission(bprm->file->f_dentry->d_inode, MAY_READ,
++					NULL, NULL))
+ 			bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
+ 
+ 		allow_write_access(bprm->file);
+@@ -179,7 +180,7 @@ static int load_misc_binary(struct linux
+ 
+ 	bprm->interp = iname;	/* for binfmt_script */
+ 
+-	interp_file = open_exec (iname);
++	interp_file = open_exec (iname, bprm);
+ 	retval = PTR_ERR (interp_file);
+ 	if (IS_ERR (interp_file))
+ 		goto _error;
+@@ -509,7 +510,8 @@ static struct inode *bm_get_inode(struct
+ 		inode->i_gid = 0;
+ 		inode->i_blksize = PAGE_CACHE_SIZE;
+ 		inode->i_blocks = 0;
+-		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
++		inode->i_atime = inode->i_mtime = inode->i_ctime =
++			current_fs_time(inode->i_sb);
+ 	}
+ 	return inode;
+ }
+diff -uprN linux-2.6.8.1.orig/fs/binfmt_script.c linux-2.6.8.1-ve022stab078/fs/binfmt_script.c
+--- linux-2.6.8.1.orig/fs/binfmt_script.c	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/binfmt_script.c	2006-05-11 13:05:35.000000000 +0400
+@@ -85,7 +85,7 @@ static int load_script(struct linux_binp
+ 	/*
+ 	 * OK, now restart the process with the interpreter's dentry.
+ 	 */
+-	file = open_exec(interp);
++	file = open_exec(interp, bprm);
+ 	if (IS_ERR(file))
+ 		return PTR_ERR(file);
+ 
+diff -uprN linux-2.6.8.1.orig/fs/bio.c linux-2.6.8.1-ve022stab078/fs/bio.c
+--- linux-2.6.8.1.orig/fs/bio.c	2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/bio.c	2006-05-11 13:05:28.000000000 +0400
+@@ -388,20 +388,17 @@ int bio_uncopy_user(struct bio *bio)
+ 	struct bio_vec *bvec;
+ 	int i, ret = 0;
+ 
+-	if (bio_data_dir(bio) == READ) {
+-		char *uaddr = bio->bi_private;
++	char *uaddr = bio->bi_private;
+ 
+-		__bio_for_each_segment(bvec, bio, i, 0) {
+-			char *addr = page_address(bvec->bv_page);
+-
+-			if (!ret && copy_to_user(uaddr, addr, bvec->bv_len))
+-				ret = -EFAULT;
++	__bio_for_each_segment(bvec, bio, i, 0) {
++		char *addr = page_address(bvec->bv_page);
++		if (bio_data_dir(bio) == READ && !ret &&
++		    copy_to_user(uaddr, addr, bvec->bv_len))
++			ret = -EFAULT;
+ 
+-			__free_page(bvec->bv_page);
+-			uaddr += bvec->bv_len;
+-		}
++		__free_page(bvec->bv_page);
++		uaddr += bvec->bv_len;
+ 	}
+-
+ 	bio_put(bio);
+ 	return ret;
+ }
+@@ -457,6 +454,7 @@ struct bio *bio_copy_user(request_queue_
+ 	 */
+ 	if (!ret) {
+ 		if (!write_to_vm) {
++			unsigned long p = uaddr;
+ 			bio->bi_rw |= (1 << BIO_RW);
+ 			/*
+ 	 		 * for a write, copy in data to kernel pages
+@@ -465,8 +463,9 @@ struct bio *bio_copy_user(request_queue_
+ 			bio_for_each_segment(bvec, bio, i) {
+ 				char *addr = page_address(bvec->bv_page);
+ 
+-				if (copy_from_user(addr, (char *) uaddr, bvec->bv_len))
++				if (copy_from_user(addr, (char *) p, bvec->bv_len))
+ 					goto cleanup;
++				p += bvec->bv_len;
+ 			}
+ 		}
+ 
+diff -uprN linux-2.6.8.1.orig/fs/block_dev.c linux-2.6.8.1-ve022stab078/fs/block_dev.c
+--- linux-2.6.8.1.orig/fs/block_dev.c	2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/block_dev.c	2006-05-11 13:05:40.000000000 +0400
+@@ -548,9 +548,16 @@ static int do_open(struct block_device *
+ {
+ 	struct module *owner = NULL;
+ 	struct gendisk *disk;
+-	int ret = -ENXIO;
++	int ret;
+ 	int part;
+ 
++#ifdef CONFIG_VE
++	ret = get_device_perms_ve(S_IFBLK, bdev->bd_dev,
++				  file->f_mode&(FMODE_READ|FMODE_WRITE));
++	if (ret)
++	        return ret;
++#endif
++	ret = -ENXIO;
+ 	file->f_mapping = bdev->bd_inode->i_mapping;
+ 	lock_kernel();
+ 	disk = get_gendisk(bdev->bd_dev, &part);
+@@ -821,7 +828,7 @@ EXPORT_SYMBOL(ioctl_by_bdev);
+  * namespace if possible and return it.  Return ERR_PTR(error)
+  * otherwise.
+  */
+-struct block_device *lookup_bdev(const char *path)
++struct block_device *lookup_bdev(const char *path, int mode)
+ {
+ 	struct block_device *bdev;
+ 	struct inode *inode;
+@@ -839,6 +846,11 @@ struct block_device *lookup_bdev(const c
+ 	error = -ENOTBLK;
+ 	if (!S_ISBLK(inode->i_mode))
+ 		goto fail;
++#ifdef CONFIG_VE
++	error = get_device_perms_ve(S_IFBLK, inode->i_rdev, mode);
++	if (error)
++		goto fail;
++#endif
+ 	error = -EACCES;
+ 	if (nd.mnt->mnt_flags & MNT_NODEV)
+ 		goto fail;
+@@ -870,12 +882,13 @@ struct block_device *open_bdev_excl(cons
+ 	mode_t mode = FMODE_READ;
+ 	int error = 0;
+ 
+-	bdev = lookup_bdev(path);
++	if (!(flags & MS_RDONLY))
++		mode |= FMODE_WRITE;
++
++	bdev = lookup_bdev(path, mode);
+ 	if (IS_ERR(bdev))
+ 		return bdev;
+ 
+-	if (!(flags & MS_RDONLY))
+-		mode |= FMODE_WRITE;
+ 	error = blkdev_get(bdev, mode, 0);
+ 	if (error)
+ 		return ERR_PTR(error);
+diff -uprN linux-2.6.8.1.orig/fs/buffer.c linux-2.6.8.1-ve022stab078/fs/buffer.c
+--- linux-2.6.8.1.orig/fs/buffer.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/buffer.c	2006-05-11 13:05:35.000000000 +0400
+@@ -505,6 +505,7 @@ __find_get_block_slow(struct block_devic
+ 	struct buffer_head *bh;
+ 	struct buffer_head *head;
+ 	struct page *page;
++	int all_mapped = 1;
+ 
+ 	index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
+ 	page = find_get_page(bd_mapping, index);
+@@ -522,14 +523,23 @@ __find_get_block_slow(struct block_devic
+ 			get_bh(bh);
+ 			goto out_unlock;
+ 		}
++		if (!buffer_mapped(bh))
++			all_mapped = 0;
+ 		bh = bh->b_this_page;
+ 	} while (bh != head);
+ 
+-	printk("__find_get_block_slow() failed. "
+-		"block=%llu, b_blocknr=%llu\n",
+-		(unsigned long long)block, (unsigned long long)bh->b_blocknr);
+-	printk("b_state=0x%08lx, b_size=%u\n", bh->b_state, bh->b_size);
+-	printk("device blocksize: %d\n", 1 << bd_inode->i_blkbits);
++	/* we might be here because some of the buffers on this page are
++	 * not mapped.  This is due to various races between
++	 * file io on the block device and getblk.  It gets dealt with
++	 * elsewhere, don't buffer_error if we had some unmapped buffers
++	 */
++	if (all_mapped) {
++		printk("__find_get_block_slow() failed. "
++			"block=%llu, b_blocknr=%llu\n",
++			(unsigned long long)block, (unsigned long long)bh->b_blocknr);
++		printk("b_state=0x%08lx, b_size=%u\n", bh->b_state, bh->b_size);
++		printk("device blocksize: %d\n", 1 << bd_inode->i_blkbits);
++	}
+ out_unlock:
+ 	spin_unlock(&bd_mapping->private_lock);
+ 	page_cache_release(page);
+@@ -1177,18 +1187,16 @@ init_page_buffers(struct page *page, str
+ {
+ 	struct buffer_head *head = page_buffers(page);
+ 	struct buffer_head *bh = head;
+-	unsigned int b_state;
+-
+-	b_state = 1 << BH_Mapped;
+-	if (PageUptodate(page))
+-		b_state |= 1 << BH_Uptodate;
++	int uptodate = PageUptodate(page);
+ 
+ 	do {
+-		if (!(bh->b_state & (1 << BH_Mapped))) {
++		if (!buffer_mapped(bh)) {
+ 			init_buffer(bh, NULL, NULL);
+ 			bh->b_bdev = bdev;
+ 			bh->b_blocknr = block;
+-			bh->b_state = b_state;
++			if (uptodate)
++				set_buffer_uptodate(bh);
++			set_buffer_mapped(bh);
+ 		}
+ 		block++;
+ 		bh = bh->b_this_page;
+@@ -1217,8 +1225,10 @@ grow_dev_page(struct block_device *bdev,
+ 
+ 	if (page_has_buffers(page)) {
+ 		bh = page_buffers(page);
+-		if (bh->b_size == size)
++		if (bh->b_size == size) {
++			init_page_buffers(page, bdev, block, size);
+ 			return page;
++		}
+ 		if (!try_to_free_buffers(page))
+ 			goto failed;
+ 	}
+@@ -2022,8 +2032,9 @@ static int __block_prepare_write(struct 
+ 				goto out;
+ 			if (buffer_new(bh)) {
+ 				clear_buffer_new(bh);
+-				unmap_underlying_metadata(bh->b_bdev,
+-							bh->b_blocknr);
++				if (buffer_mapped(bh))
++					unmap_underlying_metadata(bh->b_bdev,
++								bh->b_blocknr);
+ 				if (PageUptodate(page)) {
+ 					set_buffer_uptodate(bh);
+ 					continue;
+@@ -2756,21 +2767,31 @@ static int end_bio_bh_io_sync(struct bio
+ 	if (bio->bi_size)
+ 		return 1;
+ 
++	if (err == -EOPNOTSUPP)
++		set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
++
+ 	bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
+ 	bio_put(bio);
+ 	return 0;
+ }
+ 
+-void submit_bh(int rw, struct buffer_head * bh)
++int submit_bh(int rw, struct buffer_head * bh)
+ {
+ 	struct bio *bio;
++	int ret = 0;
+ 
+ 	BUG_ON(!buffer_locked(bh));
+ 	BUG_ON(!buffer_mapped(bh));
+ 	BUG_ON(!bh->b_end_io);
+ 
+-	/* Only clear out a write error when rewriting */
+-	if (test_set_buffer_req(bh) && rw == WRITE)
++	if (buffer_ordered(bh) && (rw == WRITE))
++		rw = WRITE_BARRIER;
++
++	/*
++	 * Only clear out a write error when rewriting, should this
++	 * include WRITE_SYNC as well?
++	 */
++	if (test_set_buffer_req(bh) && (rw == WRITE || rw == WRITE_BARRIER))
+ 		clear_buffer_write_io_error(bh);
+ 
+ 	/*
+@@ -2792,7 +2813,14 @@ void submit_bh(int rw, struct buffer_hea
+ 	bio->bi_end_io = end_bio_bh_io_sync;
+ 	bio->bi_private = bh;
+ 
++	bio_get(bio);
+ 	submit_bio(rw, bio);
++
++	if (bio_flagged(bio, BIO_EOPNOTSUPP))
++		ret = -EOPNOTSUPP;
++
++	bio_put(bio);
++	return ret;
+ }
+ 
+ /**
+@@ -2901,7 +2929,7 @@ drop_buffers(struct page *page, struct b
+ 
+ 	bh = head;
+ 	do {
+-		if (buffer_write_io_error(bh))
++		if (buffer_write_io_error(bh) && page->mapping)
+ 			set_bit(AS_EIO, &page->mapping->flags);
+ 		if (buffer_busy(bh))
+ 			goto failed;
+@@ -3100,7 +3128,7 @@ void __init buffer_init(void)
+ 
+ 	bh_cachep = kmem_cache_create("buffer_head",
+ 			sizeof(struct buffer_head), 0,
+-			SLAB_PANIC, init_buffer_head, NULL);
++			SLAB_RECLAIM_ACCOUNT|SLAB_PANIC, init_buffer_head, NULL);
+ 	for (i = 0; i < ARRAY_SIZE(bh_wait_queue_heads); i++)
+ 		init_waitqueue_head(&bh_wait_queue_heads[i].wqh);
+ 
+diff -uprN linux-2.6.8.1.orig/fs/char_dev.c linux-2.6.8.1-ve022stab078/fs/char_dev.c
+--- linux-2.6.8.1.orig/fs/char_dev.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/char_dev.c	2006-05-11 13:05:40.000000000 +0400
+@@ -257,6 +257,13 @@ int chrdev_open(struct inode * inode, st
+ 	struct cdev *new = NULL;
+ 	int ret = 0;
+ 
++#ifdef CONFIG_VE
++	ret = get_device_perms_ve(S_IFCHR, inode->i_rdev,
++				  filp->f_mode&(FMODE_READ|FMODE_WRITE));
++	if (ret)
++		return ret;
++#endif
++
+ 	spin_lock(&cdev_lock);
+ 	p = inode->i_cdev;
+ 	if (!p) {
+diff -uprN linux-2.6.8.1.orig/fs/cifs/cifsfs.c linux-2.6.8.1-ve022stab078/fs/cifs/cifsfs.c
+--- linux-2.6.8.1.orig/fs/cifs/cifsfs.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/cifs/cifsfs.c	2006-05-11 13:05:35.000000000 +0400
+@@ -188,7 +188,8 @@ cifs_statfs(struct super_block *sb, stru
+ 	return 0;		/* always return success? what if volume is no longer available? */
+ }
+ 
+-static int cifs_permission(struct inode * inode, int mask, struct nameidata *nd)
++static int cifs_permission(struct inode * inode, int mask,
++		struct nameidata *nd, struct exec_perm *exec_perm)
+ {
+ 	struct cifs_sb_info *cifs_sb;
+ 
+@@ -200,7 +201,7 @@ static int cifs_permission(struct inode 
+ 		on the client (above and beyond ACL on servers) for  
+ 		servers which do not support setting and viewing mode bits,
+ 		so allowing client to check permissions is useful */ 
+-		return vfs_permission(inode, mask);
++		return vfs_permission(inode, mask, exec_perm);
+ }
+ 
+ static kmem_cache_t *cifs_inode_cachep;
+diff -uprN linux-2.6.8.1.orig/fs/coda/dir.c linux-2.6.8.1-ve022stab078/fs/coda/dir.c
+--- linux-2.6.8.1.orig/fs/coda/dir.c	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/coda/dir.c	2006-05-11 13:05:35.000000000 +0400
+@@ -147,7 +147,8 @@ exit:
+ }
+ 
+ 
+-int coda_permission(struct inode *inode, int mask, struct nameidata *nd)
++int coda_permission(struct inode *inode, int mask, struct nameidata *nd,
++		struct exec_perm *perm)
+ {
+         int error = 0;
+  
+diff -uprN linux-2.6.8.1.orig/fs/coda/pioctl.c linux-2.6.8.1-ve022stab078/fs/coda/pioctl.c
+--- linux-2.6.8.1.orig/fs/coda/pioctl.c	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/coda/pioctl.c	2006-05-11 13:05:35.000000000 +0400
+@@ -25,7 +25,7 @@
+ 
+ /* pioctl ops */
+ static int coda_ioctl_permission(struct inode *inode, int mask,
+-				 struct nameidata *nd);
++				 struct nameidata *nd, struct exec_perm *);
+ static int coda_pioctl(struct inode * inode, struct file * filp, 
+                        unsigned int cmd, unsigned long user_data);
+ 
+@@ -43,7 +43,8 @@ struct file_operations coda_ioctl_operat
+ 
+ /* the coda pioctl inode ops */
+ static int coda_ioctl_permission(struct inode *inode, int mask,
+-				 struct nameidata *nd)
++				 struct nameidata *nd,
++				 struct exec_perm *exec_perm)
+ {
+         return 0;
+ }
+diff -uprN linux-2.6.8.1.orig/fs/compat.c linux-2.6.8.1-ve022stab078/fs/compat.c
+--- linux-2.6.8.1.orig/fs/compat.c	2004-08-14 14:55:31.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/compat.c	2006-05-11 13:05:49.000000000 +0400
+@@ -25,6 +25,7 @@
+ #include <linux/file.h>
+ #include <linux/vfs.h>
+ #include <linux/ioctl32.h>
++#include <linux/virtinfo.h>
+ #include <linux/init.h>
+ #include <linux/sockios.h>	/* for SIOCDEVPRIVATE */
+ #include <linux/smb.h>
+@@ -155,6 +156,8 @@ asmlinkage long compat_sys_statfs(const 
+ 	if (!error) {
+ 		struct kstatfs tmp;
+ 		error = vfs_statfs(nd.dentry->d_inode->i_sb, &tmp);
++		if (!error)
++			error = faudit_statfs(nd.mnt->mnt_sb, &tmp);
+ 		if (!error && put_compat_statfs(buf, &tmp))
+ 			error = -EFAULT;
+ 		path_release(&nd);
+@@ -173,6 +176,8 @@ asmlinkage long compat_sys_fstatfs(unsig
+ 	if (!file)
+ 		goto out;
+ 	error = vfs_statfs(file->f_dentry->d_inode->i_sb, &tmp);
++	if (!error)
++		error = faudit_statfs(file->f_vfsmnt->mnt_sb, &tmp);
+ 	if (!error && put_compat_statfs(buf, &tmp))
+ 		error = -EFAULT;
+ 	fput(file);
+@@ -216,6 +221,8 @@ asmlinkage long compat_statfs64(const ch
+ 	if (!error) {
+ 		struct kstatfs tmp;
+ 		error = vfs_statfs(nd.dentry->d_inode->i_sb, &tmp);
++		if (!error)
++			error = faudit_statfs(nd.mnt->mnt_sb, &tmp);
+ 		if (!error && put_compat_statfs64(buf, &tmp))
+ 			error = -EFAULT;
+ 		path_release(&nd);
+@@ -237,6 +244,8 @@ asmlinkage long compat_fstatfs64(unsigne
+ 	if (!file)
+ 		goto out;
+ 	error = vfs_statfs(file->f_dentry->d_inode->i_sb, &tmp);
++	if (!error)
++		error = faudit_statfs(file->f_vfsmnt->mnt_sb, &tmp);
+ 	if (!error && put_compat_statfs64(buf, &tmp))
+ 		error = -EFAULT;
+ 	fput(file);
+@@ -429,6 +438,8 @@ asmlinkage long compat_sys_ioctl(unsigne
+ 			       		fn = d_path(filp->f_dentry,
+ 						filp->f_vfsmnt, path,
+ 						PAGE_SIZE);
++					if (IS_ERR(fn))
++						fn = "(err)";
+ 				}
+ 
+ 				sprintf(buf,"'%c'", (cmd>>24) & 0x3f);
+@@ -1375,7 +1386,11 @@ int compat_do_execve(char * filename,
+ 
+ 	sched_balance_exec();
+ 
+-	file = open_exec(filename);
++	retval = virtinfo_gencall(VIRTINFO_DOEXECVE, NULL);
++	if (retval)
++		return retval;
++
++	file = open_exec(filename, &bprm);
+ 
+ 	retval = PTR_ERR(file);
+ 	if (IS_ERR(file))
+diff -uprN linux-2.6.8.1.orig/fs/compat_ioctl.c linux-2.6.8.1-ve022stab078/fs/compat_ioctl.c
+--- linux-2.6.8.1.orig/fs/compat_ioctl.c	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/compat_ioctl.c	2006-05-11 13:05:35.000000000 +0400
+@@ -640,8 +640,11 @@ int siocdevprivate_ioctl(unsigned int fd
+ 	/* Don't check these user accesses, just let that get trapped
+ 	 * in the ioctl handler instead.
+ 	 */
+-	copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0], IFNAMSIZ);
+-	__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data);
++	if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
++			 IFNAMSIZ))
++		return -EFAULT;
++	if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
++		return -EFAULT;
+ 
+ 	return sys_ioctl(fd, cmd, (unsigned long) u_ifreq64);
+ }
+@@ -679,6 +682,11 @@ static int dev_ifsioc(unsigned int fd, u
+ 	set_fs (old_fs);
+ 	if (!err) {
+ 		switch (cmd) {
++		/* TUNSETIFF is defined as _IOW, it should be _IORW
++		 * as the data is copied back to user space, but that
++		 * cannot be fixed without breaking all existing apps.
++		 */
++		case TUNSETIFF:
+ 		case SIOCGIFFLAGS:
+ 		case SIOCGIFMETRIC:
+ 		case SIOCGIFMTU:
+@@ -785,13 +793,16 @@ static int routing_ioctl(unsigned int fd
+ 		r = (void *) &r4;
+ 	}
+ 
+-	if (ret)
+-		return -EFAULT;
++	if (ret) {
++		ret = -EFAULT;
++		goto out;
++	}
+ 
+ 	set_fs (KERNEL_DS);
+ 	ret = sys_ioctl (fd, cmd, (unsigned long) r);
+ 	set_fs (old_fs);
+ 
++out:
+ 	if (mysock)
+ 		sockfd_put(mysock);
+ 
+@@ -2336,7 +2347,9 @@ put_dirent32 (struct dirent *d, struct c
+         __put_user(d->d_ino, &d32->d_ino);
+         __put_user(d->d_off, &d32->d_off);
+         __put_user(d->d_reclen, &d32->d_reclen);
+-        __copy_to_user(d32->d_name, d->d_name, d->d_reclen);
++        if (__copy_to_user(d32->d_name, d->d_name, d->d_reclen))
++		return -EFAULT;
++
+         return ret;
+ }
+ 
+@@ -2479,7 +2492,8 @@ static int serial_struct_ioctl(unsigned 
+         if (cmd == TIOCSSERIAL) {
+                 if (verify_area(VERIFY_READ, ss32, sizeof(SS32)))
+                         return -EFAULT;
+-                __copy_from_user(&ss, ss32, offsetof(SS32, iomem_base));
++                if (__copy_from_user(&ss, ss32, offsetof(SS32, iomem_base)))
++			return -EFAULT;
+                 __get_user(udata, &ss32->iomem_base);
+                 ss.iomem_base = compat_ptr(udata);
+                 __get_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift);
+@@ -2492,7 +2506,8 @@ static int serial_struct_ioctl(unsigned 
+         if (cmd == TIOCGSERIAL && err >= 0) {
+                 if (verify_area(VERIFY_WRITE, ss32, sizeof(SS32)))
+                         return -EFAULT;
+-                __copy_to_user(ss32,&ss,offsetof(SS32,iomem_base));
++                if (__copy_to_user(ss32,&ss,offsetof(SS32,iomem_base)))
++			return -EFAULT;
+                 __put_user((unsigned long)ss.iomem_base  >> 32 ?
+                             0xffffffff : (unsigned)(unsigned long)ss.iomem_base,
+                             &ss32->iomem_base);
+diff -uprN linux-2.6.8.1.orig/fs/dcache.c linux-2.6.8.1-ve022stab078/fs/dcache.c
+--- linux-2.6.8.1.orig/fs/dcache.c	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/dcache.c	2006-05-11 13:05:40.000000000 +0400
+@@ -19,6 +19,7 @@
+ #include <linux/mm.h>
+ #include <linux/fs.h>
+ #include <linux/slab.h>
++#include <linux/kmem_cache.h>
+ #include <linux/init.h>
+ #include <linux/smp_lock.h>
+ #include <linux/hash.h>
+@@ -26,11 +27,15 @@
+ #include <linux/module.h>
+ #include <linux/mount.h>
+ #include <linux/file.h>
++#include <linux/namei.h>
+ #include <asm/uaccess.h>
+ #include <linux/security.h>
+ #include <linux/seqlock.h>
+ #include <linux/swap.h>
+ #include <linux/bootmem.h>
++#include <linux/kernel_stat.h>
++
++#include <ub/ub_dcache.h>
+ 
+ /* #define DCACHE_DEBUG 1 */
+ 
+@@ -43,7 +48,10 @@ EXPORT_SYMBOL(dcache_lock);
+ 
+ static kmem_cache_t *dentry_cache; 
+ 
+-#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
++unsigned int dentry_memusage(void)
++{
++	return kmem_cache_memusage(dentry_cache);
++}
+ 
+ /*
+  * This is the single most critical data structure when it comes
+@@ -70,6 +78,7 @@ static void d_callback(struct rcu_head *
+ {
+ 	struct dentry * dentry = container_of(head, struct dentry, d_rcu);
+ 
++	ub_dentry_free(dentry);
+ 	if (dname_external(dentry))
+ 		kfree(dentry->d_name.name);
+ 	kmem_cache_free(dentry_cache, dentry); 
+@@ -109,6 +118,75 @@ static inline void dentry_iput(struct de
+ 	}
+ }
+ 
++struct dcache_shrinker {
++	struct list_head list;
++	struct dentry *dentry;
++};
++
++DECLARE_WAIT_QUEUE_HEAD(dcache_shrinker_wq);
++
++/* called under dcache_lock */
++static void dcache_shrinker_add(struct dcache_shrinker *ds,
++		struct dentry *parent, struct dentry *dentry)
++{
++	struct super_block *sb;
++
++	sb = parent->d_sb;
++	ds->dentry = parent;
++	list_add(&ds->list, &sb->s_dshrinkers);
++}
++
++/* called under dcache_lock */
++static void dcache_shrinker_del(struct dcache_shrinker *ds)
++{
++	if (ds == NULL || list_empty(&ds->list))
++		return;
++
++	list_del_init(&ds->list);
++	wake_up_all(&dcache_shrinker_wq);
++}
++
++/* called under dcache_lock, drops inside */
++static void dcache_shrinker_wait(struct super_block *sb)
++{
++	DECLARE_WAITQUEUE(wq, current);
++
++	__set_current_state(TASK_UNINTERRUPTIBLE);
++	add_wait_queue(&dcache_shrinker_wq, &wq);
++	spin_unlock(&dcache_lock);
++
++	schedule();
++	remove_wait_queue(&dcache_shrinker_wq, &wq);
++	__set_current_state(TASK_RUNNING);
++}
++
++void dcache_shrinker_wait_sb(struct super_block *sb)
++{
++	/* the root dentry can be held in dput_recursive */
++	spin_lock(&dcache_lock);
++	while (!list_empty(&sb->s_dshrinkers)) {
++		dcache_shrinker_wait(sb);
++		spin_lock(&dcache_lock);
++	}
++	spin_unlock(&dcache_lock);
++}
++
++/* dcache_lock protects shrinker's list */
++static void shrink_dcache_racecheck(struct dentry *parent, int *racecheck)
++{
++	struct super_block *sb;
++	struct dcache_shrinker *ds;
++
++	sb = parent->d_sb;
++	list_for_each_entry(ds, &sb->s_dshrinkers, list) {
++		/* is one of dcache shrinkers working on the dentry? */
++		if (ds->dentry == parent) {
++			*racecheck = 1;
++			break;
++		}
++	}
++}
++
+ /* 
+  * This is dput
+  *
+@@ -127,26 +205,26 @@ static inline void dentry_iput(struct de
+  */
+ 
+ /*
+- * dput - release a dentry
+- * @dentry: dentry to release 
++ * dput_recursive - go upward through the dentry tree and release dentries
++ * @dentry: starting dentry
++ * @ds: shrinker to be added to active list (see shrink_dcache_parent)
+  *
+  * Release a dentry. This will drop the usage count and if appropriate
+  * call the dentry unlink method as well as removing it from the queues and
+  * releasing its resources. If the parent dentries were scheduled for release
+  * they too may now get deleted.
+  *
++ * This traverse upward doesn't change d_inuse of any dentry
++ *
+  * no dcache lock, please.
+  */
+-
+-void dput(struct dentry *dentry)
++static void dput_recursive(struct dentry *dentry, struct dcache_shrinker *ds)
+ {
+-	if (!dentry)
+-		return;
+-
+-repeat:
+ 	if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
+ 		return;
++	dcache_shrinker_del(ds);
+ 
++repeat:
+ 	spin_lock(&dentry->d_lock);
+ 	if (atomic_read(&dentry->d_count)) {
+ 		spin_unlock(&dentry->d_lock);
+@@ -178,6 +256,7 @@ unhash_it:
+ 
+ kill_it: {
+ 		struct dentry *parent;
++		struct dcache_shrinker lds;
+ 
+ 		/* If dentry was on d_lru list
+ 		 * delete it from there
+@@ -187,18 +266,50 @@ kill_it: {
+   			dentry_stat.nr_unused--;
+   		}
+   		list_del(&dentry->d_child);
++		parent = dentry->d_parent;
++		dcache_shrinker_add(&lds, parent, dentry);
+ 		dentry_stat.nr_dentry--;	/* For d_free, below */
+ 		/*drops the locks, at that point nobody can reach this dentry */
+ 		dentry_iput(dentry);
+-		parent = dentry->d_parent;
+ 		d_free(dentry);
+-		if (dentry == parent)
++		if (unlikely(dentry == parent)) {
++			spin_lock(&dcache_lock);
++			dcache_shrinker_del(&lds);
++			spin_unlock(&dcache_lock);
+ 			return;
++		}
+ 		dentry = parent;
+-		goto repeat;
++		spin_lock(&dcache_lock);
++		dcache_shrinker_del(&lds);
++		if (atomic_dec_and_test(&dentry->d_count))
++			goto repeat;
++		spin_unlock(&dcache_lock);
+ 	}
+ }
+ 
++/*
++ * dput - release a dentry
++ * @dentry: dentry to release 
++ *
++ * Release a dentry. This will drop the usage count and if appropriate
++ * call the dentry unlink method as well as removing it from the queues and
++ * releasing its resources. If the parent dentries were scheduled for release
++ * they too may now get deleted.
++ *
++ * no dcache lock, please.
++ */
++
++void dput(struct dentry *dentry)
++{
++	if (!dentry)
++		return;
++
++	spin_lock(&dcache_lock);
++	ub_dentry_uncharge(dentry);
++	spin_unlock(&dcache_lock);
++	dput_recursive(dentry, NULL);
++}
++
+ /**
+  * d_invalidate - invalidate a dentry
+  * @dentry: dentry to invalidate
+@@ -265,6 +376,8 @@ static inline struct dentry * __dget_loc
+ 		dentry_stat.nr_unused--;
+ 		list_del_init(&dentry->d_lru);
+ 	}
++
++	ub_dentry_charge_nofail(dentry);
+ 	return dentry;
+ }
+ 
+@@ -327,13 +440,16 @@ restart:
+ 	tmp = head;
+ 	while ((tmp = tmp->next) != head) {
+ 		struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
++		spin_lock(&dentry->d_lock);
+ 		if (!atomic_read(&dentry->d_count)) {
+ 			__dget_locked(dentry);
+ 			__d_drop(dentry);
++			spin_unlock(&dentry->d_lock);
+ 			spin_unlock(&dcache_lock);
+ 			dput(dentry);
+ 			goto restart;
+ 		}
++		spin_unlock(&dentry->d_lock);
+ 	}
+ 	spin_unlock(&dcache_lock);
+ }
+@@ -344,19 +460,27 @@ restart:
+  * removed.
+  * Called with dcache_lock, drops it and then regains.
+  */
+-static inline void prune_one_dentry(struct dentry * dentry)
++static void prune_one_dentry(struct dentry * dentry)
+ {
+ 	struct dentry * parent;
++	struct dcache_shrinker ds;
+ 
+ 	__d_drop(dentry);
+ 	list_del(&dentry->d_child);
++	parent = dentry->d_parent;
++	dcache_shrinker_add(&ds, parent, dentry);
+ 	dentry_stat.nr_dentry--;	/* For d_free, below */
+ 	dentry_iput(dentry);
+ 	parent = dentry->d_parent;
+ 	d_free(dentry);
+ 	if (parent != dentry)
+-		dput(parent);
++		/*
++		 * dentry is not in use, only child (not outside)
++		 * references change, so parent->d_inuse does not change
++		 */
++		dput_recursive(parent, &ds);
+ 	spin_lock(&dcache_lock);
++	dcache_shrinker_del(&ds);
+ }
+ 
+ /**
+@@ -379,6 +503,8 @@ static void prune_dcache(int count)
+ 		struct dentry *dentry;
+ 		struct list_head *tmp;
+ 
++		cond_resched_lock(&dcache_lock);
++
+ 		tmp = dentry_unused.prev;
+ 		if (tmp == &dentry_unused)
+ 			break;
+@@ -472,6 +598,7 @@ repeat:
+ 			continue;
+ 		}
+ 		prune_one_dentry(dentry);
++		cond_resched_lock(&dcache_lock);
+ 		goto repeat;
+ 	}
+ 	spin_unlock(&dcache_lock);
+@@ -536,13 +663,12 @@ positive:
+  * whenever the d_subdirs list is non-empty and continue
+  * searching.
+  */
+-static int select_parent(struct dentry * parent)
++static int select_parent(struct dentry * parent, int * racecheck)
+ {
+ 	struct dentry *this_parent = parent;
+ 	struct list_head *next;
+ 	int found = 0;
+ 
+-	spin_lock(&dcache_lock);
+ repeat:
+ 	next = this_parent->d_subdirs.next;
+ resume:
+@@ -564,6 +690,15 @@ resume:
+ 			dentry_stat.nr_unused++;
+ 			found++;
+ 		}
++
++		/*
++		 * We can return to the caller if we have found some (this
++		 * ensures forward progress). We'll be coming back to find
++		 * the rest.
++		 */
++		if (found && need_resched())
++			goto out;
++
+ 		/*
+ 		 * Descend a level if the d_subdirs list is non-empty.
+ 		 */
+@@ -575,6 +710,9 @@ dentry->d_parent->d_name.name, dentry->d
+ #endif
+ 			goto repeat;
+ 		}
++
++		if (!found && racecheck != NULL)
++			shrink_dcache_racecheck(dentry, racecheck);
+ 	}
+ 	/*
+ 	 * All done at this level ... ascend and resume the search.
+@@ -588,7 +726,7 @@ this_parent->d_parent->d_name.name, this
+ #endif
+ 		goto resume;
+ 	}
+-	spin_unlock(&dcache_lock);
++out:
+ 	return found;
+ }
+ 
+@@ -601,10 +739,66 @@ this_parent->d_parent->d_name.name, this
+  
+ void shrink_dcache_parent(struct dentry * parent)
+ {
+-	int found;
++	int found, r;
++
++	while (1) {
++		spin_lock(&dcache_lock);
++		found = select_parent(parent, NULL);
++		if (found)
++			goto found;
+ 
+-	while ((found = select_parent(parent)) != 0)
++		/*
++		 * try again with a dput_recursive() race check.
++		 * it returns quickly if everything was really shrinked
++		 */
++		r = 0;
++		found = select_parent(parent, &r);
++		if (found)
++			goto found;
++		if (!r)
++			break;
++
++		/* drops the lock inside */
++		dcache_shrinker_wait(parent->d_sb);
++		continue;
++
++found:
++		spin_unlock(&dcache_lock);
+ 		prune_dcache(found);
++	}
++	spin_unlock(&dcache_lock);
++}
++
++/*
++ * Move any unused anon dentries to the end of the unused list.
++ * called under dcache_lock
++ */
++static int select_anon(struct hlist_head *head, int *racecheck)
++{
++	struct hlist_node *lp;
++	int found = 0;
++
++	hlist_for_each(lp, head) {
++		struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
++		if (!list_empty(&this->d_lru)) {
++			dentry_stat.nr_unused--;
++			list_del_init(&this->d_lru);
++		}
++
++		/* 
++		 * move only zero ref count dentries to the end 
++		 * of the unused list for prune_dcache
++		 */
++		if (!atomic_read(&this->d_count)) {
++			list_add_tail(&this->d_lru, &dentry_unused);
++			dentry_stat.nr_unused++;
++			found++;
++		}
++
++		if (!found && racecheck != NULL)
++			shrink_dcache_racecheck(this, racecheck);
++	}
++	return found;
+ }
+ 
+ /**
+@@ -617,33 +811,36 @@ void shrink_dcache_parent(struct dentry 
+  * done under dcache_lock.
+  *
+  */
+-void shrink_dcache_anon(struct hlist_head *head)
++void shrink_dcache_anon(struct super_block *sb)
+ {
+-	struct hlist_node *lp;
+-	int found;
+-	do {
+-		found = 0;
++	int found, r;
++
++	while (1) {
+ 		spin_lock(&dcache_lock);
+-		hlist_for_each(lp, head) {
+-			struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
+-			if (!list_empty(&this->d_lru)) {
+-				dentry_stat.nr_unused--;
+-				list_del_init(&this->d_lru);
+-			}
++		found = select_anon(&sb->s_anon, NULL);
++		if (found)
++			goto found;
+ 
+-			/* 
+-			 * move only zero ref count dentries to the end 
+-			 * of the unused list for prune_dcache
+-			 */
+-			if (!atomic_read(&this->d_count)) {
+-				list_add_tail(&this->d_lru, &dentry_unused);
+-				dentry_stat.nr_unused++;
+-				found++;
+-			}
+-		}
++		/*
++		 * try again with a dput_recursive() race check.
++		 * it returns quickly if everything was really shrinked
++		 */
++		r = 0;
++		found = select_anon(&sb->s_anon, &r);
++		if (found)
++			goto found;
++		if (!r)
++			break;
++
++		/* drops the lock inside */
++		dcache_shrinker_wait(sb);
++		continue;
++
++found:
+ 		spin_unlock(&dcache_lock);
+ 		prune_dcache(found);
+-	} while(found);
++	}
++	spin_unlock(&dcache_lock);
+ }
+ 
+ /*
+@@ -660,12 +857,18 @@ void shrink_dcache_anon(struct hlist_hea
+  */
+ static int shrink_dcache_memory(int nr, unsigned int gfp_mask)
+ {
++	int res = -1;
++
++	KSTAT_PERF_ENTER(shrink_dcache)
+ 	if (nr) {
+ 		if (!(gfp_mask & __GFP_FS))
+-			return -1;
++			goto out;
+ 		prune_dcache(nr);
+ 	}
+-	return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
++	res = (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
++out:
++	KSTAT_PERF_LEAVE(shrink_dcache)
++	return res;
+ }
+ 
+ /**
+@@ -685,19 +888,20 @@ struct dentry *d_alloc(struct dentry * p
+ 
+ 	dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); 
+ 	if (!dentry)
+-		return NULL;
++		goto err_dentry;
+ 
+ 	if (name->len > DNAME_INLINE_LEN-1) {
+ 		dname = kmalloc(name->len + 1, GFP_KERNEL);
+-		if (!dname) {
+-			kmem_cache_free(dentry_cache, dentry); 
+-			return NULL;
+-		}
++		if (!dname)
++			goto err_name;
+ 	} else  {
+ 		dname = dentry->d_iname;
+ 	}	
+ 	dentry->d_name.name = dname;
+ 
++	if (ub_dentry_alloc(dentry))
++		goto err_charge;
++
+ 	dentry->d_name.len = name->len;
+ 	dentry->d_name.hash = name->hash;
+ 	memcpy(dname, name->name, name->len);
+@@ -727,12 +931,23 @@ struct dentry *d_alloc(struct dentry * p
+ 	}
+ 
+ 	spin_lock(&dcache_lock);
+-	if (parent)
++	if (parent) {
+ 		list_add(&dentry->d_child, &parent->d_subdirs);
++		if (parent->d_flags & DCACHE_VIRTUAL)
++			dentry->d_flags |= DCACHE_VIRTUAL;
++	}
+ 	dentry_stat.nr_dentry++;
+ 	spin_unlock(&dcache_lock);
+ 
+ 	return dentry;
++
++err_charge:
++	if (name->len > DNAME_INLINE_LEN - 1)
++		kfree(dname);
++err_name:
++	kmem_cache_free(dentry_cache, dentry); 
++err_dentry:
++	return NULL;
+ }
+ 
+ /**
+@@ -1016,6 +1231,7 @@ struct dentry * __d_lookup(struct dentry
+ 		if (!d_unhashed(dentry)) {
+ 			atomic_inc(&dentry->d_count);
+ 			found = dentry;
++			goto found;
+ 		}
+ terminate:
+ 		spin_unlock(&dentry->d_lock);
+@@ -1026,6 +1242,17 @@ next:
+  	rcu_read_unlock();
+ 
+  	return found;
++
++found:
++	/*
++	 * d_lock and rcu_read_lock
++	 * are dropped in ub_dentry_charge()
++	 */
++	if (!ub_dentry_charge(found))
++		return found;
++
++	dput(found);
++	return NULL;
+ }
+ 
+ /**
+@@ -1262,6 +1489,32 @@ already_unhashed:
+ }
+ 
+ /**
++ * __d_path_add_deleted - prepend "(deleted) " text
++ * @end: a pointer to the character after free space at the beginning of the
++ *       buffer
++ * @buflen: remaining free space
++ */
++static inline char * __d_path_add_deleted(char * end, int buflen)
++{
++	buflen -= 10;
++	if (buflen < 0)
++		return ERR_PTR(-ENAMETOOLONG);
++	end -= 10;
++	memcpy(end, "(deleted) ", 10);
++	return end;
++}
++
++/**
++ * d_root_check - checks if dentry is accessible from current's fs root
++ * @dentry: dentry to be verified
++ * @vfsmnt: vfsmnt to which the dentry belongs
++ */
++int d_root_check(struct dentry *dentry, struct vfsmount *vfsmnt)
++{
++	return PTR_ERR(d_path(dentry, vfsmnt, NULL, 0));
++}
++
++/**
+  * d_path - return the path of a dentry
+  * @dentry: dentry to report
+  * @vfsmnt: vfsmnt to which the dentry belongs
+@@ -1282,36 +1535,35 @@ static char * __d_path( struct dentry *d
+ 			char *buffer, int buflen)
+ {
+ 	char * end = buffer+buflen;
+-	char * retval;
++	char * retval = NULL;
+ 	int namelen;
++	int deleted;
++	struct vfsmount *oldvfsmnt;
+ 
+-	*--end = '\0';
+-	buflen--;
+-	if (!IS_ROOT(dentry) && d_unhashed(dentry)) {
+-		buflen -= 10;
+-		end -= 10;
+-		if (buflen < 0)
++	oldvfsmnt = vfsmnt;
++	deleted = (!IS_ROOT(dentry) && d_unhashed(dentry));
++	if (buffer != NULL) {
++		*--end = '\0';
++		buflen--;
++
++		if (buflen < 1)
+ 			goto Elong;
+-		memcpy(end, " (deleted)", 10);
++		/* Get '/' right */
++		retval = end-1;
++		*retval = '/';
+ 	}
+ 
+-	if (buflen < 1)
+-		goto Elong;
+-	/* Get '/' right */
+-	retval = end-1;
+-	*retval = '/';
+-
+ 	for (;;) {
+ 		struct dentry * parent;
+ 
+ 		if (dentry == root && vfsmnt == rootmnt)
+ 			break;
+ 		if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
+-			/* Global root? */
++			/* root of a tree? */
+ 			spin_lock(&vfsmount_lock);
+ 			if (vfsmnt->mnt_parent == vfsmnt) {
+ 				spin_unlock(&vfsmount_lock);
+-				goto global_root;
++				goto other_root;
+ 			}
+ 			dentry = vfsmnt->mnt_mountpoint;
+ 			vfsmnt = vfsmnt->mnt_parent;
+@@ -1320,27 +1572,51 @@ static char * __d_path( struct dentry *d
+ 		}
+ 		parent = dentry->d_parent;
+ 		prefetch(parent);
++		if (buffer != NULL) {
++			namelen = dentry->d_name.len;
++			buflen -= namelen + 1;
++			if (buflen < 0)
++				goto Elong;
++			end -= namelen;
++			memcpy(end, dentry->d_name.name, namelen);
++			*--end = '/';
++			retval = end;
++		}
++		dentry = parent;
++	}
++	/* the given root point is reached */
++finish:
++	if (buffer != NULL && deleted)
++		retval = __d_path_add_deleted(end, buflen);
++	return retval;
++
++other_root:
++	/*
++	 * We traversed the tree upward and reached a root, but the given
++	 * lookup terminal point wasn't encountered.  It means either that the
++	 * dentry is out of our scope or belongs to an abstract space like
++	 * sock_mnt or pipe_mnt.  Check for it.
++	 *
++	 * There are different options to check it.
++	 * We may assume that any dentry tree is unreachable unless it's
++	 * connected to `root' (defined as fs root of init aka child reaper)
++	 * and expose all paths that are not connected to it.
++	 * The other option is to allow exposing of known abstract spaces
++	 * explicitly and hide the path information for other cases.
++	 * This approach is more safe, let's take it.  2001/04/22  SAW
++	 */
++	if (!(oldvfsmnt->mnt_sb->s_flags & MS_NOUSER))
++		return ERR_PTR(-EINVAL);
++	if (buffer != NULL) {
+ 		namelen = dentry->d_name.len;
+-		buflen -= namelen + 1;
++		buflen -= namelen;
+ 		if (buflen < 0)
+ 			goto Elong;
+-		end -= namelen;
+-		memcpy(end, dentry->d_name.name, namelen);
+-		*--end = '/';
+-		retval = end;
+-		dentry = parent;
++		retval -= namelen-1;	/* hit the slash */
++		memcpy(retval, dentry->d_name.name, namelen);
+ 	}
++	goto finish;
+ 
+-	return retval;
+-
+-global_root:
+-	namelen = dentry->d_name.len;
+-	buflen -= namelen;
+-	if (buflen < 0)
+-		goto Elong;
+-	retval -= namelen-1;	/* hit the slash */
+-	memcpy(retval, dentry->d_name.name, namelen);
+-	return retval;
+ Elong:
+ 	return ERR_PTR(-ENAMETOOLONG);
+ }
+@@ -1365,6 +1641,226 @@ char * d_path(struct dentry *dentry, str
+ 	return res;
+ }
+ 
++#ifdef CONFIG_VE
++#include <net/sock.h>
++#include <linux/ip.h>
++#include <linux/file.h>
++#include <linux/namespace.h>
++#include <linux/vzratelimit.h>
++
++static void mark_sub_tree_virtual(struct dentry *d)
++{
++	struct dentry *orig_root;
++
++	orig_root = d;
++	while (1) {
++		spin_lock(&d->d_lock);
++		d->d_flags |= DCACHE_VIRTUAL;
++		spin_unlock(&d->d_lock);
++
++		if (!list_empty(&d->d_subdirs)) {
++			d = list_entry(d->d_subdirs.next,
++					struct dentry, d_child);
++			continue;
++		}
++		if (d == orig_root)
++			break;
++		while (d == list_entry(d->d_parent->d_subdirs.prev,
++					struct dentry, d_child)) {
++			d = d->d_parent;
++			if (d == orig_root)
++				goto out;
++		}
++		d = list_entry(d->d_child.next,
++				struct dentry, d_child);
++	}
++out:
++	return;
++}
++
++void mark_tree_virtual(struct vfsmount *m, struct dentry *d)
++{
++	struct vfsmount *orig_rootmnt;
++
++	spin_lock(&dcache_lock);
++	spin_lock(&vfsmount_lock);
++	orig_rootmnt = m;
++	while (1) {
++		mark_sub_tree_virtual(d);
++		if (!list_empty(&m->mnt_mounts)) {
++			m = list_entry(m->mnt_mounts.next,
++					struct vfsmount, mnt_child);
++			d = m->mnt_root;
++			continue;
++		}
++		if (m == orig_rootmnt)
++			break;
++		while (m == list_entry(m->mnt_parent->mnt_mounts.prev,
++					struct vfsmount, mnt_child)) {
++			m = m->mnt_parent;
++			if (m == orig_rootmnt)
++				goto out;
++		}
++		m = list_entry(m->mnt_child.next,
++				struct vfsmount, mnt_child);
++		d = m->mnt_root;
++	}
++out:
++	spin_unlock(&vfsmount_lock);
++	spin_unlock(&dcache_lock);
++}
++EXPORT_SYMBOL(mark_tree_virtual);
++
++static struct vz_rate_info area_ri = { 20, 10*HZ };
++#define VE_AREA_ACC_CHECK	0x0001
++#define VE_AREA_ACC_DENY	0x0002
++#define VE_AREA_EXEC_CHECK	0x0010
++#define VE_AREA_EXEC_DENY	0x0020
++#define VE0_AREA_ACC_CHECK	0x0100
++#define VE0_AREA_ACC_DENY	0x0200
++#define VE0_AREA_EXEC_CHECK	0x1000
++#define VE0_AREA_EXEC_DENY	0x2000
++int ve_area_access_check = 0;
++
++static void print_connection_info(struct task_struct *tsk)
++{
++	struct files_struct *files;
++	int fd;
++
++	files = get_files_struct(tsk);
++	if (!files)
++		return;
++
++	spin_lock(&files->file_lock);
++	for (fd = 0; fd < files->max_fds; fd++) {
++		struct file *file;
++		struct inode *inode;
++		struct socket *socket;
++		struct sock *sk;
++		struct inet_opt *inet;
++
++		file = files->fd[fd];
++		if (file == NULL)
++			continue;
++
++		inode = file->f_dentry->d_inode;
++		if (!inode->i_sock)
++			continue;
++
++		socket = SOCKET_I(inode);
++		if (socket == NULL)
++			continue;
++
++		sk = socket->sk;
++		if (sk->sk_family != PF_INET || sk->sk_type != SOCK_STREAM)
++			continue;
++
++		inet = inet_sk(sk);
++		printk(KERN_ALERT "connection from %u.%u.%u.%u:%u to port %u\n",
++				NIPQUAD(inet->daddr), ntohs(inet->dport),
++				inet->num);
++	}
++	spin_unlock(&files->file_lock);
++	put_files_struct(files);
++}
++
++static void check_alert(struct vfsmount *vfsmnt, struct dentry *dentry,
++		char *str)
++{
++	struct task_struct *tsk;
++	unsigned long page;
++	struct super_block *sb;
++	char *p;
++
++	if (!vz_ratelimit(&area_ri))
++		return;
++
++	tsk = current;
++	p = ERR_PTR(-ENOMEM);
++	page = __get_free_page(GFP_KERNEL);
++	if (page) {
++		spin_lock(&dcache_lock);
++		p = __d_path(dentry, vfsmnt, tsk->fs->root, tsk->fs->rootmnt,
++				(char *)page, PAGE_SIZE);
++		spin_unlock(&dcache_lock);
++	}
++	if (IS_ERR(p))
++		p = "(undefined)";
++
++	sb = dentry->d_sb;
++	printk(KERN_ALERT "%s check alert! file:[%s] from %d/%s, dev%x\n"
++			"Task %d/%d[%s] from VE%d, execenv %d\n",
++			str, p,	VE_OWNER_FSTYPE(sb->s_type)->veid,
++			sb->s_type->name, sb->s_dev,
++			tsk->pid, virt_pid(tsk), tsk->comm,
++			VE_TASK_INFO(tsk)->owner_env->veid,
++			get_exec_env()->veid);
++
++	free_page(page);
++
++	print_connection_info(tsk);
++
++	read_lock(&tasklist_lock);
++	tsk = tsk->real_parent;
++	get_task_struct(tsk);
++	read_unlock(&tasklist_lock);
++
++	printk(KERN_ALERT "Parent %d/%d[%s] from VE%d\n",
++			tsk->pid, virt_pid(tsk), tsk->comm,
++			VE_TASK_INFO(tsk)->owner_env->veid);
++
++	print_connection_info(tsk);
++	put_task_struct(tsk);
++	dump_stack();
++}
++#endif
++
++int check_area_access_ve(struct dentry *dentry, struct vfsmount *mnt)
++{
++#ifdef CONFIG_VE
++	int check, alert, deny;
++
++	if (ve_is_super(get_exec_env())) {
++		check = ve_area_access_check & VE0_AREA_ACC_CHECK;
++		alert = dentry->d_flags & DCACHE_VIRTUAL;
++		deny = ve_area_access_check & VE0_AREA_ACC_DENY;
++	} else {
++		check = ve_area_access_check & VE_AREA_ACC_CHECK;
++		alert = !(dentry->d_flags & DCACHE_VIRTUAL);
++		deny = ve_area_access_check & VE_AREA_ACC_DENY;
++	}
++
++	if (check && alert)
++		check_alert(mnt, dentry, "Access");
++	if (deny && alert)
++		return -EACCES;
++#endif
++	return 0;
++}
++
++int check_area_execute_ve(struct dentry *dentry, struct vfsmount *mnt)
++{
++#ifdef CONFIG_VE
++	int check, alert, deny;
++
++	if (ve_is_super(get_exec_env())) {
++		check = ve_area_access_check & VE0_AREA_EXEC_CHECK;
++		alert = dentry->d_flags & DCACHE_VIRTUAL;
++		deny = ve_area_access_check & VE0_AREA_EXEC_DENY;
++	} else {
++		check = ve_area_access_check & VE_AREA_EXEC_CHECK;
++		alert = !(dentry->d_flags & DCACHE_VIRTUAL);
++		deny = ve_area_access_check & VE_AREA_EXEC_DENY;
++	}
++
++	if (check && alert)
++		check_alert(mnt, dentry, "Exec");
++	if (deny && alert)
++		return -EACCES;
++#endif
++	return 0;
++}
++
+ /*
+  * NOTE! The user-level library version returns a
+  * character pointer. The kernel system call just
+@@ -1501,10 +1997,12 @@ resume:
+ 			goto repeat;
+ 		}
+ 		atomic_dec(&dentry->d_count);
++		ub_dentry_uncharge(dentry);
+ 	}
+ 	if (this_parent != root) {
+ 		next = this_parent->d_child.next; 
+ 		atomic_dec(&this_parent->d_count);
++		ub_dentry_uncharge(this_parent);
+ 		this_parent = this_parent->d_parent;
+ 		goto resume;
+ 	}
+@@ -1627,7 +2125,7 @@ void __init vfs_caches_init(unsigned lon
+ 			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+ 
+ 	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
+-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, filp_ctor, filp_dtor);
++			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, filp_ctor, filp_dtor);
+ 
+ 	dcache_init(mempages);
+ 	inode_init(mempages);
+diff -uprN linux-2.6.8.1.orig/fs/dcookies.c linux-2.6.8.1-ve022stab078/fs/dcookies.c
+--- linux-2.6.8.1.orig/fs/dcookies.c	2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/dcookies.c	2006-05-11 13:05:37.000000000 +0400
+@@ -93,12 +93,10 @@ static struct dcookie_struct * alloc_dco
+ 	if (!dcs)
+ 		return NULL;
+ 
+-	atomic_inc(&dentry->d_count);
+-	atomic_inc(&vfsmnt->mnt_count);
+ 	dentry->d_cookie = dcs;
+ 
+-	dcs->dentry = dentry;
+-	dcs->vfsmnt = vfsmnt;
++	dcs->dentry = dget(dentry);
++	dcs->vfsmnt = mntget(vfsmnt);
+ 	hash_dcookie(dcs);
+ 
+ 	return dcs;
+diff -uprN linux-2.6.8.1.orig/fs/devpts/inode.c linux-2.6.8.1-ve022stab078/fs/devpts/inode.c
+--- linux-2.6.8.1.orig/fs/devpts/inode.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/devpts/inode.c	2006-05-11 13:05:42.000000000 +0400
+@@ -12,6 +12,7 @@
+ 
+ #include <linux/module.h>
+ #include <linux/init.h>
++#include <linux/ve.h>
+ #include <linux/fs.h>
+ #include <linux/sched.h>
+ #include <linux/namei.h>
+@@ -25,13 +26,29 @@
+ static struct vfsmount *devpts_mnt;
+ static struct dentry *devpts_root;
+ 
+-static struct {
+-	int setuid;
+-	int setgid;
+-	uid_t   uid;
+-	gid_t   gid;
+-	umode_t mode;
+-} config = {.mode = 0600};
++void prepare_devpts(void)
++{
++#ifdef CONFIG_VE
++	get_ve0()->devpts_mnt = devpts_mnt;
++	devpts_mnt = (struct vfsmount *)0x11121314;
++
++	/* ve0.devpts_root should be filled inside fill_super() */
++	BUG_ON(devpts_root != NULL);
++	devpts_root = (struct dentry *)0x12131415;
++#endif
++}
++
++#ifndef CONFIG_VE
++#define visible_devpts_mnt	devpts_mnt
++#define visible_devpts_root	devpts_root
++#define visible_devpts_config	config
++#else
++#define visible_devpts_mnt	(get_exec_env()->devpts_mnt)
++#define visible_devpts_root	(get_exec_env()->devpts_root)
++#define visible_devpts_config	(*(get_exec_env()->devpts_config))
++#endif
++
++static struct devpts_config config = {.mode = 0600};
+ 
+ static int devpts_remount(struct super_block *sb, int *flags, char *data)
+ {
+@@ -57,15 +74,16 @@ static int devpts_remount(struct super_b
+ 		} else if (sscanf(this_char, "mode=%o%c", &n, &dummy) == 1)
+ 			mode = n & ~S_IFMT;
+ 		else {
+-			printk("devpts: called with bogus options\n");
++			ve_printk(VE_LOG,
++					"devpts: called with bogus options\n");
+ 			return -EINVAL;
+ 		}
+ 	}
+-	config.setuid  = setuid;
+-	config.setgid  = setgid;
+-	config.uid     = uid;
+-	config.gid     = gid;
+-	config.mode    = mode;
++	visible_devpts_config.setuid  = setuid;
++	visible_devpts_config.setgid  = setgid;
++	visible_devpts_config.uid     = uid;
++	visible_devpts_config.gid     = gid;
++	visible_devpts_config.mode    = mode;
+ 
+ 	return 0;
+ }
+@@ -98,10 +116,10 @@ devpts_fill_super(struct super_block *s,
+ 	inode->i_fop = &simple_dir_operations;
+ 	inode->i_nlink = 2;
+ 
+-	devpts_root = s->s_root = d_alloc_root(inode);
++	visible_devpts_root = s->s_root = d_alloc_root(inode);
+ 	if (s->s_root)
+ 		return 0;
+-	
++
+ 	printk("devpts: get root dentry failed\n");
+ 	iput(inode);
+ fail:
+@@ -114,13 +132,15 @@ static struct super_block *devpts_get_sb
+ 	return get_sb_single(fs_type, flags, data, devpts_fill_super);
+ }
+ 
+-static struct file_system_type devpts_fs_type = {
++struct file_system_type devpts_fs_type = {
+ 	.owner		= THIS_MODULE,
+ 	.name		= "devpts",
+ 	.get_sb		= devpts_get_sb,
+ 	.kill_sb	= kill_anon_super,
+ };
+ 
++EXPORT_SYMBOL(devpts_fs_type);
++
+ /*
+  * The normal naming convention is simply /dev/pts/<number>; this conforms
+  * to the System V naming convention
+@@ -129,7 +149,7 @@ static struct file_system_type devpts_fs
+ static struct dentry *get_node(int num)
+ {
+ 	char s[12];
+-	struct dentry *root = devpts_root;
++	struct dentry *root = visible_devpts_root;
+ 	down(&root->d_inode->i_sem);
+ 	return lookup_one_len(s, root, sprintf(s, "%d", num));
+ }
+@@ -147,7 +167,7 @@ int devpts_pty_new(struct tty_struct *tt
+ 	struct tty_driver *driver = tty->driver;
+ 	dev_t device = MKDEV(driver->major, driver->minor_start+number);
+ 	struct dentry *dentry;
+-	struct inode *inode = new_inode(devpts_mnt->mnt_sb);
++	struct inode *inode = new_inode(visible_devpts_mnt->mnt_sb);
+ 
+ 	/* We're supposed to be given the slave end of a pty */
+ 	BUG_ON(driver->type != TTY_DRIVER_TYPE_PTY);
+@@ -158,10 +178,12 @@ int devpts_pty_new(struct tty_struct *tt
+ 
+ 	inode->i_ino = number+2;
+ 	inode->i_blksize = 1024;
+-	inode->i_uid = config.setuid ? config.uid : current->fsuid;
+-	inode->i_gid = config.setgid ? config.gid : current->fsgid;
++	inode->i_uid = visible_devpts_config.setuid ?
++		visible_devpts_config.uid : current->fsuid;
++	inode->i_gid = visible_devpts_config.setgid ?
++		visible_devpts_config.gid : current->fsgid;
+ 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+-	init_special_inode(inode, S_IFCHR|config.mode, device);
++	init_special_inode(inode, S_IFCHR|visible_devpts_config.mode, device);
+ 	inode->i_op = &devpts_file_inode_operations;
+ 	inode->u.generic_ip = tty;
+ 
+@@ -169,7 +191,7 @@ int devpts_pty_new(struct tty_struct *tt
+ 	if (!IS_ERR(dentry) && !dentry->d_inode)
+ 		d_instantiate(dentry, inode);
+ 
+-	up(&devpts_root->d_inode->i_sem);
++	up(&visible_devpts_root->d_inode->i_sem);
+ 
+ 	return 0;
+ }
+@@ -179,10 +201,14 @@ struct tty_struct *devpts_get_tty(int nu
+ 	struct dentry *dentry = get_node(number);
+ 	struct tty_struct *tty;
+ 
+-	tty = (IS_ERR(dentry) || !dentry->d_inode) ? NULL :
+-			dentry->d_inode->u.generic_ip;
++	tty = NULL;
++	if (!IS_ERR(dentry)) {
++		if (dentry->d_inode)
++			tty = dentry->d_inode->u.generic_ip;
++		dput(dentry);
++	}
+ 
+-	up(&devpts_root->d_inode->i_sem);
++	up(&visible_devpts_root->d_inode->i_sem);
+ 
+ 	return tty;
+ }
+@@ -200,7 +226,7 @@ void devpts_pty_kill(int number)
+ 		}
+ 		dput(dentry);
+ 	}
+-	up(&devpts_root->d_inode->i_sem);
++	up(&visible_devpts_root->d_inode->i_sem);
+ }
+ 
+ static int __init init_devpts_fs(void)
+@@ -208,17 +234,22 @@ static int __init init_devpts_fs(void)
+ 	int err = init_devpts_xattr();
+ 	if (err)
+ 		return err;
++#ifdef CONFIG_VE
++	get_ve0()->devpts_config = &config;
++#endif
+ 	err = register_filesystem(&devpts_fs_type);
+ 	if (!err) {
+ 		devpts_mnt = kern_mount(&devpts_fs_type);
+ 		if (IS_ERR(devpts_mnt))
+ 			err = PTR_ERR(devpts_mnt);
+ 	}
++	prepare_devpts();
+ 	return err;
+ }
+ 
+ static void __exit exit_devpts_fs(void)
+ {
++	/* the code is never called, the argument is irrelevant */
+ 	unregister_filesystem(&devpts_fs_type);
+ 	mntput(devpts_mnt);
+ 	exit_devpts_xattr();
+diff -uprN linux-2.6.8.1.orig/fs/direct-io.c linux-2.6.8.1-ve022stab078/fs/direct-io.c
+--- linux-2.6.8.1.orig/fs/direct-io.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/direct-io.c	2006-05-11 13:05:25.000000000 +0400
+@@ -833,8 +833,10 @@ do_holes:
+ 				char *kaddr;
+ 
+ 				/* AKPM: eargh, -ENOTBLK is a hack */
+-				if (dio->rw == WRITE)
++				if (dio->rw == WRITE) {
++					page_cache_release(page);
+ 					return -ENOTBLK;
++				}
+ 
+ 				if (dio->block_in_file >=
+ 					i_size_read(dio->inode)>>blkbits) {
+diff -uprN linux-2.6.8.1.orig/fs/eventpoll.c linux-2.6.8.1-ve022stab078/fs/eventpoll.c
+--- linux-2.6.8.1.orig/fs/eventpoll.c	2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/eventpoll.c	2006-05-11 13:05:48.000000000 +0400
+@@ -149,10 +149,9 @@
+ #define EP_ITEM_FROM_EPQUEUE(p) (container_of(p, struct ep_pqueue, pt)->epi)
+ 
+ 
+-struct epoll_filefd {
+-	struct file *file;
+-	int fd;
+-};
++/* Maximum msec timeout value storeable in a long int */
++#define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ)
++
+ 
+ /*
+  * Node that is linked into the "wake_task_list" member of the "struct poll_safewake".
+@@ -176,36 +175,6 @@ struct poll_safewake {
+ 	spinlock_t lock;
+ };
+ 
+-/*
+- * This structure is stored inside the "private_data" member of the file
+- * structure and rapresent the main data sructure for the eventpoll
+- * interface.
+- */
+-struct eventpoll {
+-	/* Protect the this structure access */
+-	rwlock_t lock;
+-
+-	/*
+-	 * This semaphore is used to ensure that files are not removed
+-	 * while epoll is using them. This is read-held during the event
+-	 * collection loop and it is write-held during the file cleanup
+-	 * path, the epoll file exit code and the ctl operations.
+-	 */
+-	struct rw_semaphore sem;
+-
+-	/* Wait queue used by sys_epoll_wait() */
+-	wait_queue_head_t wq;
+-
+-	/* Wait queue used by file->poll() */
+-	wait_queue_head_t poll_wait;
+-
+-	/* List of ready file descriptors */
+-	struct list_head rdllist;
+-
+-	/* RB-Tree root used to store monitored fd structs */
+-	struct rb_root rbr;
+-};
+-
+ /* Wait structure used by the poll hooks */
+ struct eppoll_entry {
+ 	/* List header used to link this structure to the "struct epitem" */
+@@ -224,50 +193,6 @@ struct eppoll_entry {
+ 	wait_queue_head_t *whead;
+ };
+ 
+-/*
+- * Each file descriptor added to the eventpoll interface will
+- * have an entry of this type linked to the hash.
+- */
+-struct epitem {
+-	/* RB-Tree node used to link this structure to the eventpoll rb-tree */
+-	struct rb_node rbn;
+-
+-	/* List header used to link this structure to the eventpoll ready list */
+-	struct list_head rdllink;
+-
+-	/* The file descriptor information this item refers to */
+-	struct epoll_filefd ffd;
+-
+-	/* Number of active wait queue attached to poll operations */
+-	int nwait;
+-
+-	/* List containing poll wait queues */
+-	struct list_head pwqlist;
+-
+-	/* The "container" of this item */
+-	struct eventpoll *ep;
+-
+-	/* The structure that describe the interested events and the source fd */
+-	struct epoll_event event;
+-
+-	/*
+-	 * Used to keep track of the usage count of the structure. This avoids
+-	 * that the structure will desappear from underneath our processing.
+-	 */
+-	atomic_t usecnt;
+-
+-	/* List header used to link this item to the "struct file" items list */
+-	struct list_head fllink;
+-
+-	/* List header used to link the item to the transfer list */
+-	struct list_head txlink;
+-
+-	/*
+-	 * This is used during the collection/transfer of events to userspace
+-	 * to pin items empty events set.
+-	 */
+-	unsigned int revents;
+-};
+ 
+ /* Wrapper struct used by poll queueing */
+ struct ep_pqueue {
+@@ -282,13 +207,13 @@ static void ep_poll_safewake(struct poll
+ static int ep_getfd(int *efd, struct inode **einode, struct file **efile);
+ static int ep_file_init(struct file *file);
+ static void ep_free(struct eventpoll *ep);
+-static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
++struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
+ static void ep_use_epitem(struct epitem *epi);
+ static void ep_release_epitem(struct epitem *epi);
+ static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
+ 				 poll_table *pt);
+ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi);
+-static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
++int ep_insert(struct eventpoll *ep, struct epoll_event *event,
+ 		     struct file *tfile, int fd);
+ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
+ 		     struct epoll_event *event);
+@@ -615,6 +540,7 @@ eexit_1:
+ 	return error;
+ }
+ 
++#define MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
+ 
+ /*
+  * Implement the event wait interface for the eventpoll file. It is the kernel
+@@ -631,7 +557,7 @@ asmlinkage long sys_epoll_wait(int epfd,
+ 		     current, epfd, events, maxevents, timeout));
+ 
+ 	/* The maximum number of event must be greater than zero */
+-	if (maxevents <= 0)
++	if (maxevents <= 0 || maxevents > MAX_EVENTS)
+ 		return -EINVAL;
+ 
+ 	/* Verify that the area passed by the user is writeable */
+@@ -816,7 +742,7 @@ static void ep_free(struct eventpoll *ep
+  * the returned item, so the caller must call ep_release_epitem()
+  * after finished using the "struct epitem".
+  */
+-static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
++struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
+ {
+ 	int kcmp;
+ 	unsigned long flags;
+@@ -916,7 +842,7 @@ static void ep_rbtree_insert(struct even
+ }
+ 
+ 
+-static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
++int ep_insert(struct eventpoll *ep, struct epoll_event *event,
+ 		     struct file *tfile, int fd)
+ {
+ 	int error, revents, pwake = 0;
+@@ -1474,8 +1400,8 @@ static int ep_poll(struct eventpoll *ep,
+ 	 * and the overflow condition. The passed timeout is in milliseconds,
+ 	 * that why (t * HZ) / 1000.
+ 	 */
+-	jtimeout = timeout == -1 || timeout > (MAX_SCHEDULE_TIMEOUT - 1000) / HZ ?
+-		MAX_SCHEDULE_TIMEOUT: (timeout * HZ + 999) / 1000;
++	jtimeout = (timeout < 0 || timeout >= EP_MAX_MSTIMEO) ?
++		MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000;
+ 
+ retry:
+ 	write_lock_irqsave(&ep->lock, flags);
+diff -uprN linux-2.6.8.1.orig/fs/exec.c linux-2.6.8.1-ve022stab078/fs/exec.c
+--- linux-2.6.8.1.orig/fs/exec.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/exec.c	2006-05-11 13:05:49.000000000 +0400
+@@ -26,6 +26,7 @@
+ #include <linux/slab.h>
+ #include <linux/file.h>
+ #include <linux/mman.h>
++#include <linux/virtinfo.h>
+ #include <linux/a.out.h>
+ #include <linux/stat.h>
+ #include <linux/fcntl.h>
+@@ -50,6 +51,8 @@
+ #include <asm/uaccess.h>
+ #include <asm/mmu_context.h>
+ 
++#include <ub/ub_vmpages.h>
++
+ #ifdef CONFIG_KMOD
+ #include <linux/kmod.h>
+ #endif
+@@ -58,6 +61,8 @@ int core_uses_pid;
+ char core_pattern[65] = "core";
+ /* The maximal length of core_pattern is also specified in sysctl.c */
+ 
++int sysctl_at_vsyscall;
++
+ static struct linux_binfmt *formats;
+ static rwlock_t binfmt_lock = RW_LOCK_UNLOCKED;
+ 
+@@ -130,7 +135,7 @@ asmlinkage long sys_uselib(const char __
+ 	if (!S_ISREG(nd.dentry->d_inode->i_mode))
+ 		goto exit;
+ 
+-	error = permission(nd.dentry->d_inode, MAY_READ | MAY_EXEC, &nd);
++	error = permission(nd.dentry->d_inode, MAY_READ | MAY_EXEC, &nd, NULL);
+ 	if (error)
+ 		goto exit;
+ 
+@@ -298,10 +303,14 @@ void install_arg_page(struct vm_area_str
+ 			struct page *page, unsigned long address)
+ {
+ 	struct mm_struct *mm = vma->vm_mm;
++	struct page_beancounter *pbc;
+ 	pgd_t * pgd;
+ 	pmd_t * pmd;
+ 	pte_t * pte;
+ 
++	if (pb_alloc(&pbc))
++		return;
++
+ 	if (unlikely(anon_vma_prepare(vma)))
+ 		goto out_sig;
+ 
+@@ -320,9 +329,14 @@ void install_arg_page(struct vm_area_str
+ 		goto out;
+ 	}
+ 	mm->rss++;
++	vma->vm_rss++;
+ 	lru_cache_add_active(page);
+ 	set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(
+ 					page, vma->vm_page_prot))));
++
++	ub_unused_privvm_dec(mm_ub(mm), 1, vma);
++	pb_add_ref(page, mm_ub(mm), &pbc);
++
+ 	page_add_anon_rmap(page, vma, address);
+ 	pte_unmap(pte);
+ 	spin_unlock(&mm->page_table_lock);
+@@ -334,6 +348,31 @@ out:
+ out_sig:
+ 	__free_page(page);
+ 	force_sig(SIGKILL, current);
++	pb_free(&pbc);
++}
++
++static inline void get_stack_vma_params(struct mm_struct *mm, int exec_stack,
++		unsigned long stack_base, struct linux_binprm *bprm,
++		unsigned long *start, unsigned long *end, unsigned long *flags)
++{
++#ifdef CONFIG_STACK_GROWSUP
++	*start = stack_base;
++	*end = PAGE_MASK &
++		(PAGE_SIZE - 1 + (unsigned long) bprm->p);
++#else
++	*start = PAGE_MASK & (unsigned long) bprm->p;
++	*end = STACK_TOP;
++#endif
++	/* Adjust stack execute permissions; explicitly enable
++	 * for EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X
++	 * and leave alone (arch default) otherwise. */
++	if (unlikely(exec_stack == EXSTACK_ENABLE_X))
++		*flags = VM_STACK_FLAGS |  VM_EXEC;
++	else if (exec_stack == EXSTACK_DISABLE_X)
++		*flags = VM_STACK_FLAGS & ~VM_EXEC;
++	else
++		*flags = VM_STACK_FLAGS;
++	*flags |= mm->def_flags;
+ }
+ 
+ int setup_arg_pages(struct linux_binprm *bprm, int executable_stack)
+@@ -341,9 +380,13 @@ int setup_arg_pages(struct linux_binprm 
+ 	unsigned long stack_base;
+ 	struct vm_area_struct *mpnt;
+ 	struct mm_struct *mm = current->mm;
+-	int i;
++	int i, ret;
+ 	long arg_size;
+ 
++	unsigned long vm_start;
++	unsigned long vm_end;
++	unsigned long vm_flags;	
++
+ #ifdef CONFIG_STACK_GROWSUP
+ 	/* Move the argument and environment strings to the bottom of the
+ 	 * stack space.
+@@ -399,40 +442,32 @@ int setup_arg_pages(struct linux_binprm 
+ 		bprm->loader += stack_base;
+ 	bprm->exec += stack_base;
+ 
+-	mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
++	get_stack_vma_params(mm, executable_stack, stack_base, bprm,
++			&vm_start, &vm_end, &vm_flags);
++
++	ret = -ENOMEM;
++	if (ub_memory_charge(mm_ub(mm), vm_end - vm_start, vm_flags,
++				NULL, UB_SOFT))
++		goto out;
++	mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL | __GFP_SOFT_UBC);
+ 	if (!mpnt)
+-		return -ENOMEM;
++		goto out_uncharge;
+ 
+-	if (security_vm_enough_memory(arg_size >> PAGE_SHIFT)) {
+-		kmem_cache_free(vm_area_cachep, mpnt);
+-		return -ENOMEM;
+-	}
++	if (security_vm_enough_memory(arg_size >> PAGE_SHIFT))
++		goto out_free;
+ 
+ 	memset(mpnt, 0, sizeof(*mpnt));
+ 
+ 	down_write(&mm->mmap_sem);
+ 	{
+ 		mpnt->vm_mm = mm;
+-#ifdef CONFIG_STACK_GROWSUP
+-		mpnt->vm_start = stack_base;
+-		mpnt->vm_end = PAGE_MASK &
+-			(PAGE_SIZE - 1 + (unsigned long) bprm->p);
+-#else
+-		mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
+-		mpnt->vm_end = STACK_TOP;
+-#endif
+-		/* Adjust stack execute permissions; explicitly enable
+-		 * for EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X
+-		 * and leave alone (arch default) otherwise. */
+-		if (unlikely(executable_stack == EXSTACK_ENABLE_X))
+-			mpnt->vm_flags = VM_STACK_FLAGS |  VM_EXEC;
+-		else if (executable_stack == EXSTACK_DISABLE_X)
+-			mpnt->vm_flags = VM_STACK_FLAGS & ~VM_EXEC;
+-		else
+-			mpnt->vm_flags = VM_STACK_FLAGS;
+-		mpnt->vm_flags |= mm->def_flags;
++		mpnt->vm_start = vm_start;
++		mpnt->vm_end = vm_end;
++		mpnt->vm_flags = vm_flags;
++		mpnt->vm_rss = 0;
+ 		mpnt->vm_page_prot = protection_map[mpnt->vm_flags & 0x7];
+-		insert_vm_struct(mm, mpnt);
++		if ((ret = insert_vm_struct(mm, mpnt)))
++			goto out_up;
+ 		mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+ 	}
+ 
+@@ -447,6 +482,16 @@ int setup_arg_pages(struct linux_binprm 
+ 	up_write(&mm->mmap_sem);
+ 	
+ 	return 0;
++
++out_up:
++	up_write(&mm->mmap_sem);
++	vm_unacct_memory(arg_size >> PAGE_SHIFT);
++out_free:
++	kmem_cache_free(vm_area_cachep, mpnt);
++out_uncharge:
++	ub_memory_uncharge(mm_ub(mm), vm_end - vm_start, vm_flags, NULL);
++out:
++	return ret;
+ }
+ 
+ EXPORT_SYMBOL(setup_arg_pages);
+@@ -468,7 +513,7 @@ static inline void free_arg_pages(struct
+ 
+ #endif /* CONFIG_MMU */
+ 
+-struct file *open_exec(const char *name)
++struct file *open_exec(const char *name, struct linux_binprm *bprm)
+ {
+ 	struct nameidata nd;
+ 	int err;
+@@ -483,9 +528,13 @@ struct file *open_exec(const char *name)
+ 		file = ERR_PTR(-EACCES);
+ 		if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
+ 		    S_ISREG(inode->i_mode)) {
+-			int err = permission(inode, MAY_EXEC, &nd);
+-			if (!err && !(inode->i_mode & 0111))
+-				err = -EACCES;
++			int err;
++			if (bprm != NULL) {
++				bprm->perm.set = 0;
++				err = permission(inode, MAY_EXEC, &nd,
++						&bprm->perm);
++			} else
++				err = permission(inode, MAY_EXEC, &nd, NULL);
+ 			file = ERR_PTR(err);
+ 			if (!err) {
+ 				file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
+@@ -524,35 +573,65 @@ int kernel_read(struct file *file, unsig
+ 
+ EXPORT_SYMBOL(kernel_read);
+ 
+-static int exec_mmap(struct mm_struct *mm)
++static int exec_mmap(struct linux_binprm *bprm)
+ {
+ 	struct task_struct *tsk;
+-	struct mm_struct * old_mm, *active_mm;
+-
+-	/* Add it to the list of mm's */
+-	spin_lock(&mmlist_lock);
+-	list_add(&mm->mmlist, &init_mm.mmlist);
+-	mmlist_nr++;
+-	spin_unlock(&mmlist_lock);
++	struct mm_struct *mm, *old_mm, *active_mm;
++	int ret;
+ 
+ 	/* Notify parent that we're no longer interested in the old VM */
+ 	tsk = current;
+ 	old_mm = current->mm;
+ 	mm_release(tsk, old_mm);
+ 
++	if (old_mm) {
++		/*
++		 * Make sure that if there is a core dump in progress
++		 * for the old mm, we get out and die instead of going
++		 * through with the exec.  We must hold mmap_sem around
++		 * checking core_waiters and changing tsk->mm.  The
++		 * core-inducing thread will increment core_waiters for
++		 * each thread whose ->mm == old_mm.
++		 */
++		down_read(&old_mm->mmap_sem);
++		if (unlikely(old_mm->core_waiters)) {
++			up_read(&old_mm->mmap_sem);
++			return -EINTR;
++		}
++	}
++
++	ret = 0;
+ 	task_lock(tsk);
++	mm = bprm->mm;
+ 	active_mm = tsk->active_mm;
+ 	tsk->mm = mm;
+ 	tsk->active_mm = mm;
+ 	activate_mm(active_mm, mm);
+ 	task_unlock(tsk);
++
++	/* Add it to the list of mm's */
++	spin_lock(&mmlist_lock);
++	list_add(&mm->mmlist, &init_mm.mmlist);
++	mmlist_nr++;
++	spin_unlock(&mmlist_lock);
++	bprm->mm = NULL;		/* We're using it now */
++
++#ifdef CONFIG_VZ_GENCALLS
++	if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_EXECMMAP,
++				bprm) & NOTIFY_FAIL) {
++		/* similar to binfmt_elf */
++		send_sig(SIGKILL, current, 0);
++		ret = -ENOMEM;
++	}
++#endif
+ 	if (old_mm) {
++		up_read(&old_mm->mmap_sem);
+ 		if (active_mm != old_mm) BUG();
+ 		mmput(old_mm);
+-		return 0;
++		return ret;
+ 	}
+ 	mmdrop(active_mm);
+-	return 0;
++	return ret;
+ }
+ 
+ /*
+@@ -563,52 +642,26 @@ static int exec_mmap(struct mm_struct *m
+  */
+ static inline int de_thread(struct task_struct *tsk)
+ {
+-	struct signal_struct *newsig, *oldsig = tsk->signal;
++	struct signal_struct *sig = tsk->signal;
+ 	struct sighand_struct *newsighand, *oldsighand = tsk->sighand;
+ 	spinlock_t *lock = &oldsighand->siglock;
++	struct task_struct *leader = NULL;
+ 	int count;
+ 
+ 	/*
+ 	 * If we don't share sighandlers, then we aren't sharing anything
+ 	 * and we can just re-use it all.
+ 	 */
+-	if (atomic_read(&oldsighand->count) <= 1)
++	if (atomic_read(&oldsighand->count) <= 1) {
++		BUG_ON(atomic_read(&sig->count) != 1);
++		exit_itimers(sig);
+ 		return 0;
++	}
+ 
+ 	newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
+ 	if (!newsighand)
+ 		return -ENOMEM;
+ 
+-	spin_lock_init(&newsighand->siglock);
+-	atomic_set(&newsighand->count, 1);
+-	memcpy(newsighand->action, oldsighand->action, sizeof(newsighand->action));
+-
+-	/*
+-	 * See if we need to allocate a new signal structure
+-	 */
+-	newsig = NULL;
+-	if (atomic_read(&oldsig->count) > 1) {
+-		newsig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
+-		if (!newsig) {
+-			kmem_cache_free(sighand_cachep, newsighand);
+-			return -ENOMEM;
+-		}
+-		atomic_set(&newsig->count, 1);
+-		newsig->group_exit = 0;
+-		newsig->group_exit_code = 0;
+-		newsig->group_exit_task = NULL;
+-		newsig->group_stop_count = 0;
+-		newsig->curr_target = NULL;
+-		init_sigpending(&newsig->shared_pending);
+-		INIT_LIST_HEAD(&newsig->posix_timers);
+-
+-		newsig->tty = oldsig->tty;
+-		newsig->pgrp = oldsig->pgrp;
+-		newsig->session = oldsig->session;
+-		newsig->leader = oldsig->leader;
+-		newsig->tty_old_pgrp = oldsig->tty_old_pgrp;
+-	}
+-
+ 	if (thread_group_empty(current))
+ 		goto no_thread_group;
+ 
+@@ -618,7 +671,7 @@ static inline int de_thread(struct task_
+ 	 */
+ 	read_lock(&tasklist_lock);
+ 	spin_lock_irq(lock);
+-	if (oldsig->group_exit) {
++	if (sig->group_exit) {
+ 		/*
+ 		 * Another group action in progress, just
+ 		 * return so that the signal is processed.
+@@ -626,11 +679,9 @@ static inline int de_thread(struct task_
+ 		spin_unlock_irq(lock);
+ 		read_unlock(&tasklist_lock);
+ 		kmem_cache_free(sighand_cachep, newsighand);
+-		if (newsig)
+-			kmem_cache_free(signal_cachep, newsig);
+ 		return -EAGAIN;
+ 	}
+-	oldsig->group_exit = 1;
++	sig->group_exit = 1;
+ 	zap_other_threads(current);
+ 	read_unlock(&tasklist_lock);
+ 
+@@ -640,14 +691,16 @@ static inline int de_thread(struct task_
+ 	count = 2;
+ 	if (current->pid == current->tgid)
+ 		count = 1;
+-	while (atomic_read(&oldsig->count) > count) {
+-		oldsig->group_exit_task = current;
+-		oldsig->notify_count = count;
++	while (atomic_read(&sig->count) > count) {
++		sig->group_exit_task = current;
++		sig->notify_count = count;
+ 		__set_current_state(TASK_UNINTERRUPTIBLE);
+ 		spin_unlock_irq(lock);
+ 		schedule();
+ 		spin_lock_irq(lock);
+ 	}
++	sig->group_exit_task = NULL;
++	sig->notify_count = 0;
+ 	spin_unlock_irq(lock);
+ 
+ 	/*
+@@ -656,22 +709,23 @@ static inline int de_thread(struct task_
+ 	 * and to assume its PID:
+ 	 */
+ 	if (current->pid != current->tgid) {
+-		struct task_struct *leader = current->group_leader, *parent;
+-		struct dentry *proc_dentry1, *proc_dentry2;
+-		unsigned long state, ptrace;
++		struct task_struct *parent;
++		struct dentry *proc_dentry1[2], *proc_dentry2[2];
++		unsigned long exit_state, ptrace;
+ 
+ 		/*
+ 		 * Wait for the thread group leader to be a zombie.
+ 		 * It should already be zombie at this point, most
+ 		 * of the time.
+ 		 */
+-		while (leader->state != TASK_ZOMBIE)
++		leader = current->group_leader;
++		while (leader->exit_state != EXIT_ZOMBIE)
+ 			yield();
+ 
+ 		spin_lock(&leader->proc_lock);
+ 		spin_lock(&current->proc_lock);
+-		proc_dentry1 = proc_pid_unhash(current);
+-		proc_dentry2 = proc_pid_unhash(leader);
++		proc_pid_unhash(current, proc_dentry1);
++		proc_pid_unhash(leader, proc_dentry2);
+ 		write_lock_irq(&tasklist_lock);
+ 
+ 		if (leader->tgid != current->tgid)
+@@ -709,7 +763,7 @@ static inline int de_thread(struct task_
+ 		list_del(&current->tasks);
+ 		list_add_tail(&current->tasks, &init_task.tasks);
+ 		current->exit_signal = SIGCHLD;
+-		state = leader->state;
++		exit_state = leader->exit_state;
+ 
+ 		write_unlock_irq(&tasklist_lock);
+ 		spin_unlock(&leader->proc_lock);
+@@ -717,37 +771,53 @@ static inline int de_thread(struct task_
+ 		proc_pid_flush(proc_dentry1);
+ 		proc_pid_flush(proc_dentry2);
+ 
+-		if (state != TASK_ZOMBIE)
++		if (exit_state != EXIT_ZOMBIE)
+ 			BUG();
+-		release_task(leader);
+         }
+ 
++	/*
++	 * Now there are really no other threads at all,
++	 * so it's safe to stop telling them to kill themselves.
++	 */
++	sig->group_exit = 0;
++
+ no_thread_group:
++	exit_itimers(sig);
++	if (leader)
++		release_task(leader);
++	BUG_ON(atomic_read(&sig->count) != 1);
+ 
+-	write_lock_irq(&tasklist_lock);
+-	spin_lock(&oldsighand->siglock);
+-	spin_lock(&newsighand->siglock);
+-
+-	if (current == oldsig->curr_target)
+-		oldsig->curr_target = next_thread(current);
+-	if (newsig)
+-		current->signal = newsig;
+-	current->sighand = newsighand;
+-	init_sigpending(&current->pending);
+-	recalc_sigpending();
+-
+-	spin_unlock(&newsighand->siglock);
+-	spin_unlock(&oldsighand->siglock);
+-	write_unlock_irq(&tasklist_lock);
++	if (atomic_read(&oldsighand->count) == 1) {
++		/*
++		 * Now that we nuked the rest of the thread group,
++		 * it turns out we are not sharing sighand any more either.
++		 * So we can just keep it.
++		 */
++		kmem_cache_free(sighand_cachep, newsighand);
++	} else {
++		/*
++		 * Move our state over to newsighand and switch it in.
++		 */
++		spin_lock_init(&newsighand->siglock);
++		atomic_set(&newsighand->count, 1);
++		memcpy(newsighand->action, oldsighand->action,
++				sizeof(newsighand->action));
+ 
+-	if (newsig && atomic_dec_and_test(&oldsig->count))
+-		kmem_cache_free(signal_cachep, oldsig);
++		write_lock_irq(&tasklist_lock);
++		spin_lock(&oldsighand->siglock);
++		spin_lock(&newsighand->siglock);
+ 
+-	if (atomic_dec_and_test(&oldsighand->count))
+-		kmem_cache_free(sighand_cachep, oldsighand);
++		current->sighand = newsighand;
++		recalc_sigpending();
++
++		spin_unlock(&newsighand->siglock);
++		spin_unlock(&oldsighand->siglock);
++		write_unlock_irq(&tasklist_lock);
++
++		if (atomic_dec_and_test(&oldsighand->count))
++			kmem_cache_free(sighand_cachep, oldsighand);
++	}
+ 
+-	if (!thread_group_empty(current))
+-		BUG();
+ 	if (current->tgid != current->pid)
+ 		BUG();
+ 	return 0;
+@@ -786,11 +856,27 @@ static inline void flush_old_files(struc
+ 	spin_unlock(&files->file_lock);
+ }
+ 
++void get_task_comm(char *buf, struct task_struct *tsk)
++{
++	/* buf must be at least sizeof(tsk->comm) in size */
++	task_lock(tsk);
++	strncpy(buf, tsk->comm, sizeof(tsk->comm));
++	task_unlock(tsk);
++}
++
++void set_task_comm(struct task_struct *tsk, char *buf)
++{
++	task_lock(tsk);
++	strlcpy(tsk->comm, buf, sizeof(tsk->comm));
++	task_unlock(tsk);
++}
++
+ int flush_old_exec(struct linux_binprm * bprm)
+ {
+ 	char * name;
+ 	int i, ch, retval;
+ 	struct files_struct *files;
++	char tcomm[sizeof(current->comm)];
+ 
+ 	/*
+ 	 * Make sure we have a private signal table and that
+@@ -812,12 +898,10 @@ int flush_old_exec(struct linux_binprm *
+ 	/*
+ 	 * Release all of the old mmap stuff
+ 	 */
+-	retval = exec_mmap(bprm->mm);
++	retval = exec_mmap(bprm);
+ 	if (retval)
+ 		goto mmap_failed;
+ 
+-	bprm->mm = NULL;		/* We're using it now */
+-
+ 	/* This is the point of no return */
+ 	steal_locks(files);
+ 	put_files_struct(files);
+@@ -831,17 +915,19 @@ int flush_old_exec(struct linux_binprm *
+ 		if (ch == '/')
+ 			i = 0;
+ 		else
+-			if (i < 15)
+-				current->comm[i++] = ch;
++			if (i < (sizeof(tcomm) - 1))
++				tcomm[i++] = ch;
+ 	}
+-	current->comm[i] = '\0';
++	tcomm[i] = '\0';
++	set_task_comm(current, tcomm);
+ 
+ 	flush_thread();
+ 
+ 	if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || 
+-	    permission(bprm->file->f_dentry->d_inode,MAY_READ, NULL) ||
++	    permission(bprm->file->f_dentry->d_inode, MAY_READ, NULL, NULL) ||
+ 	    (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP))
+ 		current->mm->dumpable = 0;
++	current->mm->vps_dumpable = 1;
+ 
+ 	/* An exec changes our domain. We are no longer part of the thread
+ 	   group */
+@@ -872,13 +958,6 @@ int prepare_binprm(struct linux_binprm *
+ 	struct inode * inode = bprm->file->f_dentry->d_inode;
+ 	int retval;
+ 
+-	mode = inode->i_mode;
+-	/*
+-	 * Check execute perms again - if the caller has CAP_DAC_OVERRIDE,
+-	 * vfs_permission lets a non-executable through
+-	 */
+-	if (!(mode & 0111))	/* with at least _one_ execute bit set */
+-		return -EACCES;
+ 	if (bprm->file->f_op == NULL)
+ 		return -EACCES;
+ 
+@@ -886,10 +965,24 @@ int prepare_binprm(struct linux_binprm *
+ 	bprm->e_gid = current->egid;
+ 
+ 	if(!(bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)) {
++		if (!bprm->perm.set) {
++			/*
++			 * This piece of code creates a time window between
++			 * MAY_EXEC permission check and setuid/setgid
++			 * operations and may be considered as a security hole.
++			 * This code is here for compatibility reasons,
++			 * if the filesystem is unable to return info now.
++			 */
++			bprm->perm.mode = inode->i_mode;
++			bprm->perm.uid = inode->i_uid;
++			bprm->perm.gid = inode->i_gid;
++		}
++		mode = bprm->perm.mode;
++
+ 		/* Set-uid? */
+ 		if (mode & S_ISUID) {
+ 			current->personality &= ~PER_CLEAR_ON_SETID;
+-			bprm->e_uid = inode->i_uid;
++			bprm->e_uid = bprm->perm.uid;
+ 		}
+ 
+ 		/* Set-gid? */
+@@ -900,7 +993,7 @@ int prepare_binprm(struct linux_binprm *
+ 		 */
+ 		if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
+ 			current->personality &= ~PER_CLEAR_ON_SETID;
+-			bprm->e_gid = inode->i_gid;
++			bprm->e_gid = bprm->perm.gid;
+ 		}
+ 	}
+ 
+@@ -993,7 +1086,7 @@ int search_binary_handler(struct linux_b
+ 
+ 	        loader = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
+ 
+-		file = open_exec("/sbin/loader");
++		file = open_exec("/sbin/loader", bprm);
+ 		retval = PTR_ERR(file);
+ 		if (IS_ERR(file))
+ 			return retval;
+@@ -1079,7 +1172,11 @@ int do_execve(char * filename,
+ 	int retval;
+ 	int i;
+ 
+-	file = open_exec(filename);
++	retval = virtinfo_gencall(VIRTINFO_DOEXECVE, NULL);
++	if (retval)
++		return retval;
++
++	file = open_exec(filename, &bprm);
+ 
+ 	retval = PTR_ERR(file);
+ 	if (IS_ERR(file))
+@@ -1222,7 +1319,7 @@ void format_corename(char *corename, con
+ 			case 'p':
+ 				pid_in_pattern = 1;
+ 				rc = snprintf(out_ptr, out_end - out_ptr,
+-					      "%d", current->tgid);
++					      "%d", virt_tgid(current));
+ 				if (rc > out_end - out_ptr)
+ 					goto out;
+ 				out_ptr += rc;
+@@ -1266,7 +1363,7 @@ void format_corename(char *corename, con
+ 			case 'h':
+ 				down_read(&uts_sem);
+ 				rc = snprintf(out_ptr, out_end - out_ptr,
+-					      "%s", system_utsname.nodename);
++					      "%s", ve_utsname.nodename);
+ 				up_read(&uts_sem);
+ 				if (rc > out_end - out_ptr)
+ 					goto out;
+@@ -1294,7 +1391,7 @@ void format_corename(char *corename, con
+ 	if (!pid_in_pattern
+             && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) {
+ 		rc = snprintf(out_ptr, out_end - out_ptr,
+-			      ".%d", current->tgid);
++			      ".%d", virt_tgid(current));
+ 		if (rc > out_end - out_ptr)
+ 			goto out;
+ 		out_ptr += rc;
+@@ -1308,6 +1405,7 @@ static void zap_threads (struct mm_struc
+ 	struct task_struct *g, *p;
+ 	struct task_struct *tsk = current;
+ 	struct completion *vfork_done = tsk->vfork_done;
++	int traced = 0;
+ 
+ 	/*
+ 	 * Make sure nobody is waiting for us to release the VM,
+@@ -1319,14 +1417,34 @@ static void zap_threads (struct mm_struc
+ 	}
+ 
+ 	read_lock(&tasklist_lock);
+-	do_each_thread(g,p)
++	do_each_thread_ve(g,p)
+ 		if (mm == p->mm && p != tsk) {
+ 			force_sig_specific(SIGKILL, p);
+ 			mm->core_waiters++;
++			if (unlikely(p->ptrace) &&
++			    unlikely(p->parent->mm == mm))
++				traced = 1;
+ 		}
+-	while_each_thread(g,p);
++	while_each_thread_ve(g,p);
+ 
+ 	read_unlock(&tasklist_lock);
++
++	if (unlikely(traced)) {
++		/*
++		 * We are zapping a thread and the thread it ptraces.
++		 * If the tracee went into a ptrace stop for exit tracing,
++		 * we could deadlock since the tracer is waiting for this
++		 * coredump to finish.  Detach them so they can both die.
++		 */
++		write_lock_irq(&tasklist_lock);
++		do_each_thread_ve(g,p) {
++			if (mm == p->mm && p != tsk &&
++			    p->ptrace && p->parent->mm == mm) {
++				__ptrace_detach(p, 0);
++			}
++		} while_each_thread_ve(g,p);
++		write_unlock_irq(&tasklist_lock);
++	}
+ }
+ 
+ static void coredump_wait(struct mm_struct *mm)
+@@ -1362,7 +1480,8 @@ int do_coredump(long signr, int exit_cod
+ 	if (!binfmt || !binfmt->core_dump)
+ 		goto fail;
+ 	down_write(&mm->mmap_sem);
+-	if (!mm->dumpable) {
++	if (!mm->dumpable ||
++	   (!mm->vps_dumpable && !ve_is_super(get_exec_env()))) {
+ 		up_write(&mm->mmap_sem);
+ 		goto fail;
+ 	}
+diff -uprN linux-2.6.8.1.orig/fs/ext2/acl.c linux-2.6.8.1-ve022stab078/fs/ext2/acl.c
+--- linux-2.6.8.1.orig/fs/ext2/acl.c	2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ext2/acl.c	2006-05-11 13:05:35.000000000 +0400
+@@ -286,7 +286,7 @@ ext2_set_acl(struct inode *inode, int ty
+  * inode->i_sem: don't care
+  */
+ int
+-ext2_permission(struct inode *inode, int mask, struct nameidata *nd)
++__ext2_permission(struct inode *inode, int mask)
+ {
+ 	int mode = inode->i_mode;
+ 
+@@ -336,6 +336,29 @@ check_capabilities:
+ 	return -EACCES;
+ }
+ 
++int
++ext2_permission(struct inode *inode, int mask, struct nameidata *nd,
++		struct exec_perm *exec_perm)
++{
++	int ret;
++
++	if (exec_perm != NULL)
++		down(&inode->i_sem);
++
++	ret = __ext2_permission(inode, mask);
++
++	if (exec_perm != NULL) {
++		if (!ret) {
++			exec_perm->set = 1;
++			exec_perm->mode = inode->i_mode;
++			exec_perm->uid = inode->i_uid;
++			exec_perm->gid = inode->i_gid;
++		}
++		up(&inode->i_sem);
++	}
++	return ret;
++}
++
+ /*
+  * Initialize the ACLs of a new inode. Called from ext2_new_inode.
+  *
+diff -uprN linux-2.6.8.1.orig/fs/ext2/acl.h linux-2.6.8.1-ve022stab078/fs/ext2/acl.h
+--- linux-2.6.8.1.orig/fs/ext2/acl.h	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ext2/acl.h	2006-05-11 13:05:35.000000000 +0400
+@@ -10,18 +10,18 @@
+ #define EXT2_ACL_MAX_ENTRIES	32
+ 
+ typedef struct {
+-	__u16		e_tag;
+-	__u16		e_perm;
+-	__u32		e_id;
++	__le16		e_tag;
++	__le16		e_perm;
++	__le32		e_id;
+ } ext2_acl_entry;
+ 
+ typedef struct {
+-	__u16		e_tag;
+-	__u16		e_perm;
++	__le16		e_tag;
++	__le16		e_perm;
+ } ext2_acl_entry_short;
+ 
+ typedef struct {
+-	__u32		a_version;
++	__le32		a_version;
+ } ext2_acl_header;
+ 
+ static inline size_t ext2_acl_size(int count)
+@@ -59,7 +59,8 @@ static inline int ext2_acl_count(size_t 
+ #define EXT2_ACL_NOT_CACHED ((void *)-1)
+ 
+ /* acl.c */
+-extern int ext2_permission (struct inode *, int, struct nameidata *);
++extern int ext2_permission (struct inode *, int, struct nameidata *,
++		struct exec_perm *);
+ extern int ext2_acl_chmod (struct inode *);
+ extern int ext2_init_acl (struct inode *, struct inode *);
+ 
+diff -uprN linux-2.6.8.1.orig/fs/ext2/balloc.c linux-2.6.8.1-ve022stab078/fs/ext2/balloc.c
+--- linux-2.6.8.1.orig/fs/ext2/balloc.c	2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ext2/balloc.c	2006-05-11 13:05:31.000000000 +0400
+@@ -88,8 +88,8 @@ read_block_bitmap(struct super_block *sb
+ 	if (!bh)
+ 		ext2_error (sb, "read_block_bitmap",
+ 			    "Cannot read block bitmap - "
+-			    "block_group = %d, block_bitmap = %lu",
+-			    block_group, (unsigned long) desc->bg_block_bitmap);
++			    "block_group = %d, block_bitmap = %u",
++			    block_group, le32_to_cpu(desc->bg_block_bitmap));
+ error_out:
+ 	return bh;
+ }
+diff -uprN linux-2.6.8.1.orig/fs/ext2/dir.c linux-2.6.8.1-ve022stab078/fs/ext2/dir.c
+--- linux-2.6.8.1.orig/fs/ext2/dir.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ext2/dir.c	2006-05-11 13:05:33.000000000 +0400
+@@ -251,7 +251,7 @@ ext2_readdir (struct file * filp, void *
+ 	loff_t pos = filp->f_pos;
+ 	struct inode *inode = filp->f_dentry->d_inode;
+ 	struct super_block *sb = inode->i_sb;
+-	unsigned offset = pos & ~PAGE_CACHE_MASK;
++	unsigned int offset = pos & ~PAGE_CACHE_MASK;
+ 	unsigned long n = pos >> PAGE_CACHE_SHIFT;
+ 	unsigned long npages = dir_pages(inode);
+ 	unsigned chunk_mask = ~(ext2_chunk_size(inode)-1);
+@@ -270,8 +270,13 @@ ext2_readdir (struct file * filp, void *
+ 		ext2_dirent *de;
+ 		struct page *page = ext2_get_page(inode, n);
+ 
+-		if (IS_ERR(page))
++		if (IS_ERR(page)) {
++			ext2_error(sb, __FUNCTION__,
++				   "bad page in #%lu",
++				   inode->i_ino);
++			filp->f_pos += PAGE_CACHE_SIZE - offset;
+ 			continue;
++		}
+ 		kaddr = page_address(page);
+ 		if (need_revalidate) {
+ 			offset = ext2_validate_entry(kaddr, offset, chunk_mask);
+@@ -303,6 +308,7 @@ ext2_readdir (struct file * filp, void *
+ 					goto success;
+ 				}
+ 			}
++			filp->f_pos += le16_to_cpu(de->rec_len);
+ 		}
+ 		ext2_put_page(page);
+ 	}
+@@ -310,7 +316,6 @@ ext2_readdir (struct file * filp, void *
+ success:
+ 	ret = 0;
+ done:
+-	filp->f_pos = (n << PAGE_CACHE_SHIFT) | offset;
+ 	filp->f_version = inode->i_version;
+ 	return ret;
+ }
+@@ -420,7 +425,7 @@ void ext2_set_link(struct inode *dir, st
+ 	ext2_set_de_type (de, inode);
+ 	err = ext2_commit_chunk(page, from, to);
+ 	ext2_put_page(page);
+-	dir->i_mtime = dir->i_ctime = CURRENT_TIME;
++	dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
+ 	EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
+ 	mark_inode_dirty(dir);
+ }
+@@ -510,7 +515,7 @@ got_it:
+ 	de->inode = cpu_to_le32(inode->i_ino);
+ 	ext2_set_de_type (de, inode);
+ 	err = ext2_commit_chunk(page, from, to);
+-	dir->i_mtime = dir->i_ctime = CURRENT_TIME;
++	dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
+ 	EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
+ 	mark_inode_dirty(dir);
+ 	/* OFFSET_CACHE */
+@@ -558,7 +563,7 @@ int ext2_delete_entry (struct ext2_dir_e
+ 		pde->rec_len = cpu_to_le16(to-from);
+ 	dir->inode = 0;
+ 	err = ext2_commit_chunk(page, from, to);
+-	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
++	inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
+ 	EXT2_I(inode)->i_flags &= ~EXT2_BTREE_FL;
+ 	mark_inode_dirty(inode);
+ out:
+@@ -586,6 +591,7 @@ int ext2_make_empty(struct inode *inode,
+ 		goto fail;
+ 	}
+ 	kaddr = kmap_atomic(page, KM_USER0);
++       memset(kaddr, 0, chunk_size);
+ 	de = (struct ext2_dir_entry_2 *)kaddr;
+ 	de->name_len = 1;
+ 	de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1));
+diff -uprN linux-2.6.8.1.orig/fs/ext2/ext2.h linux-2.6.8.1-ve022stab078/fs/ext2/ext2.h
+--- linux-2.6.8.1.orig/fs/ext2/ext2.h	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ext2/ext2.h	2006-05-11 13:05:35.000000000 +0400
+@@ -5,7 +5,7 @@
+  * second extended file system inode data in memory
+  */
+ struct ext2_inode_info {
+-	__u32	i_data[15];
++	__le32	i_data[15];
+ 	__u32	i_flags;
+ 	__u32	i_faddr;
+ 	__u8	i_frag_no;
+@@ -115,7 +115,7 @@ extern unsigned long ext2_count_free (st
+ 
+ /* inode.c */
+ extern void ext2_read_inode (struct inode *);
+-extern void ext2_write_inode (struct inode *, int);
++extern int ext2_write_inode (struct inode *, int);
+ extern void ext2_put_inode (struct inode *);
+ extern void ext2_delete_inode (struct inode *);
+ extern int ext2_sync_inode (struct inode *);
+@@ -131,9 +131,6 @@ extern int ext2_ioctl (struct inode *, s
+ /* super.c */
+ extern void ext2_error (struct super_block *, const char *, const char *, ...)
+ 	__attribute__ ((format (printf, 3, 4)));
+-extern NORET_TYPE void ext2_panic (struct super_block *, const char *,
+-				   const char *, ...)
+-	__attribute__ ((NORET_AND format (printf, 3, 4)));
+ extern void ext2_warning (struct super_block *, const char *, const char *, ...)
+ 	__attribute__ ((format (printf, 3, 4)));
+ extern void ext2_update_dynamic_rev (struct super_block *sb);
+diff -uprN linux-2.6.8.1.orig/fs/ext2/ialloc.c linux-2.6.8.1-ve022stab078/fs/ext2/ialloc.c
+--- linux-2.6.8.1.orig/fs/ext2/ialloc.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ext2/ialloc.c	2006-05-11 13:05:32.000000000 +0400
+@@ -57,8 +57,8 @@ read_inode_bitmap(struct super_block * s
+ 	if (!bh)
+ 		ext2_error(sb, "read_inode_bitmap",
+ 			    "Cannot read inode bitmap - "
+-			    "block_group = %lu, inode_bitmap = %lu",
+-			    block_group, (unsigned long) desc->bg_inode_bitmap);
++			    "block_group = %lu, inode_bitmap = %u",
++			    block_group, le32_to_cpu(desc->bg_inode_bitmap));
+ error_out:
+ 	return bh;
+ }
+@@ -577,7 +577,7 @@ got:
+ 	inode->i_ino = ino;
+ 	inode->i_blksize = PAGE_SIZE;	/* This is the optimal IO size (for stat), not the fs block size */
+ 	inode->i_blocks = 0;
+-	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
++	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
+ 	memset(ei->i_data, 0, sizeof(ei->i_data));
+ 	ei->i_flags = EXT2_I(dir)->i_flags & ~EXT2_BTREE_FL;
+ 	if (S_ISLNK(mode))
+diff -uprN linux-2.6.8.1.orig/fs/ext2/inode.c linux-2.6.8.1-ve022stab078/fs/ext2/inode.c
+--- linux-2.6.8.1.orig/fs/ext2/inode.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ext2/inode.c	2006-05-11 13:05:35.000000000 +0400
+@@ -142,12 +142,12 @@ static int ext2_alloc_block (struct inod
+ }
+ 
+ typedef struct {
+-	u32	*p;
+-	u32	key;
++	__le32	*p;
++	__le32	key;
+ 	struct buffer_head *bh;
+ } Indirect;
+ 
+-static inline void add_chain(Indirect *p, struct buffer_head *bh, u32 *v)
++static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
+ {
+ 	p->key = *(p->p = v);
+ 	p->bh = bh;
+@@ -280,7 +280,7 @@ static Indirect *ext2_get_branch(struct 
+ 		read_lock(&EXT2_I(inode)->i_meta_lock);
+ 		if (!verify_chain(chain, p))
+ 			goto changed;
+-		add_chain(++p, bh, (u32*)bh->b_data + *++offsets);
++		add_chain(++p, bh, (__le32*)bh->b_data + *++offsets);
+ 		read_unlock(&EXT2_I(inode)->i_meta_lock);
+ 		if (!p->key)
+ 			goto no_block;
+@@ -321,8 +321,8 @@ no_block:
+ static unsigned long ext2_find_near(struct inode *inode, Indirect *ind)
+ {
+ 	struct ext2_inode_info *ei = EXT2_I(inode);
+-	u32 *start = ind->bh ? (u32*) ind->bh->b_data : ei->i_data;
+-	u32 *p;
++	__le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data;
++	__le32 *p;
+ 	unsigned long bg_start;
+ 	unsigned long colour;
+ 
+@@ -440,7 +440,7 @@ static int ext2_alloc_branch(struct inod
+ 		lock_buffer(bh);
+ 		memset(bh->b_data, 0, blocksize);
+ 		branch[n].bh = bh;
+-		branch[n].p = (u32*) bh->b_data + offsets[n];
++		branch[n].p = (__le32 *) bh->b_data + offsets[n];
+ 		*branch[n].p = branch[n].key;
+ 		set_buffer_uptodate(bh);
+ 		unlock_buffer(bh);
+@@ -506,7 +506,7 @@ static inline int ext2_splice_branch(str
+ 
+ 	/* We are done with atomic stuff, now do the rest of housekeeping */
+ 
+-	inode->i_ctime = CURRENT_TIME;
++	inode->i_ctime = CURRENT_TIME_SEC;
+ 
+ 	/* had we spliced it onto indirect block? */
+ 	if (where->bh)
+@@ -702,7 +702,7 @@ struct address_space_operations ext2_nob
+  * or memcmp with zero_page, whatever is better for particular architecture.
+  * Linus?
+  */
+-static inline int all_zeroes(u32 *p, u32 *q)
++static inline int all_zeroes(__le32 *p, __le32 *q)
+ {
+ 	while (p < q)
+ 		if (*p++)
+@@ -748,7 +748,7 @@ static Indirect *ext2_find_shared(struct
+ 				int depth,
+ 				int offsets[4],
+ 				Indirect chain[4],
+-				u32 *top)
++				__le32 *top)
+ {
+ 	Indirect *partial, *p;
+ 	int k, err;
+@@ -768,7 +768,7 @@ static Indirect *ext2_find_shared(struct
+ 		write_unlock(&EXT2_I(inode)->i_meta_lock);
+ 		goto no_top;
+ 	}
+-	for (p=partial; p>chain && all_zeroes((u32*)p->bh->b_data,p->p); p--)
++	for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--)
+ 		;
+ 	/*
+ 	 * OK, we've found the last block that must survive. The rest of our
+@@ -803,7 +803,7 @@ no_top:
+  *	stored as little-endian 32-bit) and updating @inode->i_blocks
+  *	appropriately.
+  */
+-static inline void ext2_free_data(struct inode *inode, u32 *p, u32 *q)
++static inline void ext2_free_data(struct inode *inode, __le32 *p, __le32 *q)
+ {
+ 	unsigned long block_to_free = 0, count = 0;
+ 	unsigned long nr;
+@@ -843,7 +843,7 @@ static inline void ext2_free_data(struct
+  *	stored as little-endian 32-bit) and updating @inode->i_blocks
+  *	appropriately.
+  */
+-static void ext2_free_branches(struct inode *inode, u32 *p, u32 *q, int depth)
++static void ext2_free_branches(struct inode *inode, __le32 *p, __le32 *q, int depth)
+ {
+ 	struct buffer_head * bh;
+ 	unsigned long nr;
+@@ -867,8 +867,8 @@ static void ext2_free_branches(struct in
+ 				continue;
+ 			}
+ 			ext2_free_branches(inode,
+-					   (u32*)bh->b_data,
+-					   (u32*)bh->b_data + addr_per_block,
++					   (__le32*)bh->b_data,
++					   (__le32*)bh->b_data + addr_per_block,
+ 					   depth);
+ 			bforget(bh);
+ 			ext2_free_blocks(inode, nr, 1);
+@@ -880,12 +880,12 @@ static void ext2_free_branches(struct in
+ 
+ void ext2_truncate (struct inode * inode)
+ {
+-	u32 *i_data = EXT2_I(inode)->i_data;
++	__le32 *i_data = EXT2_I(inode)->i_data;
+ 	int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
+ 	int offsets[4];
+ 	Indirect chain[4];
+ 	Indirect *partial;
+-	int nr = 0;
++	__le32 nr = 0;
+ 	int n;
+ 	long iblock;
+ 	unsigned blocksize;
+@@ -933,7 +933,7 @@ void ext2_truncate (struct inode * inode
+ 	while (partial > chain) {
+ 		ext2_free_branches(inode,
+ 				   partial->p + 1,
+-				   (u32*)partial->bh->b_data + addr_per_block,
++				   (__le32*)partial->bh->b_data+addr_per_block,
+ 				   (chain+n-1) - partial);
+ 		mark_buffer_dirty_inode(partial->bh, inode);
+ 		brelse (partial->bh);
+@@ -966,7 +966,7 @@ do_indirects:
+ 		case EXT2_TIND_BLOCK:
+ 			;
+ 	}
+-	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
++	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
+ 	if (inode_needs_sync(inode)) {
+ 		sync_mapping_buffers(inode->i_mapping);
+ 		ext2_sync_inode (inode);
+@@ -1248,9 +1248,9 @@ static int ext2_update_inode(struct inod
+ 	return err;
+ }
+ 
+-void ext2_write_inode(struct inode *inode, int wait)
++int ext2_write_inode(struct inode *inode, int wait)
+ {
+-	ext2_update_inode(inode, wait);
++	return ext2_update_inode(inode, wait);
+ }
+ 
+ int ext2_sync_inode(struct inode *inode)
+diff -uprN linux-2.6.8.1.orig/fs/ext2/ioctl.c linux-2.6.8.1-ve022stab078/fs/ext2/ioctl.c
+--- linux-2.6.8.1.orig/fs/ext2/ioctl.c	2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ext2/ioctl.c	2006-05-11 13:05:32.000000000 +0400
+@@ -59,7 +59,7 @@ int ext2_ioctl (struct inode * inode, st
+ 		ei->i_flags = flags;
+ 
+ 		ext2_set_inode_flags(inode);
+-		inode->i_ctime = CURRENT_TIME;
++		inode->i_ctime = CURRENT_TIME_SEC;
+ 		mark_inode_dirty(inode);
+ 		return 0;
+ 	}
+@@ -72,7 +72,7 @@ int ext2_ioctl (struct inode * inode, st
+ 			return -EROFS;
+ 		if (get_user(inode->i_generation, (int __user *) arg))
+ 			return -EFAULT;	
+-		inode->i_ctime = CURRENT_TIME;
++		inode->i_ctime = CURRENT_TIME_SEC;
+ 		mark_inode_dirty(inode);
+ 		return 0;
+ 	default:
+diff -uprN linux-2.6.8.1.orig/fs/ext2/namei.c linux-2.6.8.1-ve022stab078/fs/ext2/namei.c
+--- linux-2.6.8.1.orig/fs/ext2/namei.c	2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ext2/namei.c	2006-05-11 13:05:43.000000000 +0400
+@@ -30,6 +30,7 @@
+  */
+ 
+ #include <linux/pagemap.h>
++#include <linux/quotaops.h>
+ #include "ext2.h"
+ #include "xattr.h"
+ #include "acl.h"
+@@ -181,7 +182,7 @@ static int ext2_symlink (struct inode * 
+ 			inode->i_mapping->a_ops = &ext2_nobh_aops;
+ 		else
+ 			inode->i_mapping->a_ops = &ext2_aops;
+-		err = page_symlink(inode, symname, l);
++		err = page_symlink(inode, symname, l, GFP_KERNEL);
+ 		if (err)
+ 			goto out_fail;
+ 	} else {
+@@ -210,7 +211,7 @@ static int ext2_link (struct dentry * ol
+ 	if (inode->i_nlink >= EXT2_LINK_MAX)
+ 		return -EMLINK;
+ 
+-	inode->i_ctime = CURRENT_TIME;
++	inode->i_ctime = CURRENT_TIME_SEC;
+ 	ext2_inc_count(inode);
+ 	atomic_inc(&inode->i_count);
+ 
+@@ -269,6 +270,8 @@ static int ext2_unlink(struct inode * di
+ 	struct page * page;
+ 	int err = -ENOENT;
+ 
++	DQUOT_INIT(inode);
++
+ 	de = ext2_find_entry (dir, dentry, &page);
+ 	if (!de)
+ 		goto out;
+@@ -311,6 +314,9 @@ static int ext2_rename (struct inode * o
+ 	struct ext2_dir_entry_2 * old_de;
+ 	int err = -ENOENT;
+ 
++	if (new_inode)
++		DQUOT_INIT(new_inode);
++
+ 	old_de = ext2_find_entry (old_dir, old_dentry, &old_page);
+ 	if (!old_de)
+ 		goto out;
+@@ -336,7 +342,7 @@ static int ext2_rename (struct inode * o
+ 			goto out_dir;
+ 		ext2_inc_count(old_inode);
+ 		ext2_set_link(new_dir, new_de, new_page, old_inode);
+-		new_inode->i_ctime = CURRENT_TIME;
++		new_inode->i_ctime = CURRENT_TIME_SEC;
+ 		if (dir_de)
+ 			new_inode->i_nlink--;
+ 		ext2_dec_count(new_inode);
+@@ -361,7 +367,7 @@ static int ext2_rename (struct inode * o
+  	 * rename.
+ 	 * ext2_dec_count() will mark the inode dirty.
+ 	 */
+-	old_inode->i_ctime = CURRENT_TIME;
++	old_inode->i_ctime = CURRENT_TIME_SEC;
+ 
+ 	ext2_delete_entry (old_de, old_page);
+ 	ext2_dec_count(old_inode);
+diff -uprN linux-2.6.8.1.orig/fs/ext2/super.c linux-2.6.8.1-ve022stab078/fs/ext2/super.c
+--- linux-2.6.8.1.orig/fs/ext2/super.c	2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ext2/super.c	2006-05-11 13:05:40.000000000 +0400
+@@ -37,8 +37,6 @@ static void ext2_sync_super(struct super
+ static int ext2_remount (struct super_block * sb, int * flags, char * data);
+ static int ext2_statfs (struct super_block * sb, struct kstatfs * buf);
+ 
+-static char error_buf[1024];
+-
+ void ext2_error (struct super_block * sb, const char * function,
+ 		 const char * fmt, ...)
+ {
+@@ -52,51 +50,32 @@ void ext2_error (struct super_block * sb
+ 			cpu_to_le16(le16_to_cpu(es->s_state) | EXT2_ERROR_FS);
+ 		ext2_sync_super(sb, es);
+ 	}
+-	va_start (args, fmt);
+-	vsprintf (error_buf, fmt, args);
+-	va_end (args);
+-	if (test_opt (sb, ERRORS_PANIC))
+-		panic ("EXT2-fs panic (device %s): %s: %s\n",
+-		       sb->s_id, function, error_buf);
+-	printk (KERN_CRIT "EXT2-fs error (device %s): %s: %s\n",
+-		sb->s_id, function, error_buf);
+-	if (test_opt (sb, ERRORS_RO)) {
+-		printk ("Remounting filesystem read-only\n");
++
++	va_start(args, fmt);
++	printk(KERN_CRIT "EXT2-fs error (device %s): %s: ",sb->s_id, function);
++	vprintk(fmt, args);
++	printk("\n");
++	va_end(args);
++
++	if (test_opt(sb, ERRORS_PANIC))
++		panic("EXT2-fs panic from previous error\n");
++	if (test_opt(sb, ERRORS_RO)) {
++		printk("Remounting filesystem read-only\n");
+ 		sb->s_flags |= MS_RDONLY;
+ 	}
+ }
+ 
+-NORET_TYPE void ext2_panic (struct super_block * sb, const char * function,
+-			    const char * fmt, ...)
+-{
+-	va_list args;
+-	struct ext2_sb_info *sbi = EXT2_SB(sb);
+-
+-	if (!(sb->s_flags & MS_RDONLY)) {
+-		sbi->s_mount_state |= EXT2_ERROR_FS;
+-		sbi->s_es->s_state =
+-			cpu_to_le16(le16_to_cpu(sbi->s_es->s_state) | EXT2_ERROR_FS);
+-		mark_buffer_dirty(sbi->s_sbh);
+-		sb->s_dirt = 1;
+-	}
+-	va_start (args, fmt);
+-	vsprintf (error_buf, fmt, args);
+-	va_end (args);
+-	sb->s_flags |= MS_RDONLY;
+-	panic ("EXT2-fs panic (device %s): %s: %s\n",
+-	       sb->s_id, function, error_buf);
+-}
+-
+ void ext2_warning (struct super_block * sb, const char * function,
+ 		   const char * fmt, ...)
+ {
+ 	va_list args;
+ 
+-	va_start (args, fmt);
+-	vsprintf (error_buf, fmt, args);
+-	va_end (args);
+-	printk (KERN_WARNING "EXT2-fs warning (device %s): %s: %s\n",
+-		sb->s_id, function, error_buf);
++	va_start(args, fmt);
++	printk(KERN_WARNING "EXT2-fs warning (device %s): %s: ",
++	       sb->s_id, function);
++	vprintk(fmt, args);
++	printk("\n");
++	va_end(args);
+ }
+ 
+ void ext2_update_dynamic_rev(struct super_block *sb)
+@@ -134,7 +113,7 @@ static void ext2_put_super (struct super
+ 	if (!(sb->s_flags & MS_RDONLY)) {
+ 		struct ext2_super_block *es = sbi->s_es;
+ 
+-		es->s_state = le16_to_cpu(sbi->s_mount_state);
++		es->s_state = cpu_to_le16(sbi->s_mount_state);
+ 		ext2_sync_super(sb, es);
+ 	}
+ 	db_count = sbi->s_gdb_count;
+@@ -143,6 +122,9 @@ static void ext2_put_super (struct super
+ 			brelse (sbi->s_group_desc[i]);
+ 	kfree(sbi->s_group_desc);
+ 	kfree(sbi->s_debts);
++	percpu_counter_destroy(&sbi->s_freeblocks_counter);
++	percpu_counter_destroy(&sbi->s_freeinodes_counter);
++	percpu_counter_destroy(&sbi->s_dirs_counter);
+ 	brelse (sbi->s_sbh);
+ 	sb->s_fs_info = NULL;
+ 	kfree(sbi);
+@@ -189,7 +171,7 @@ static int init_inodecache(void)
+ {
+ 	ext2_inode_cachep = kmem_cache_create("ext2_inode_cache",
+ 					     sizeof(struct ext2_inode_info),
+-					     0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
++					     0, SLAB_RECLAIM_ACCOUNT,
+ 					     init_once, NULL);
+ 	if (ext2_inode_cachep == NULL)
+ 		return -ENOMEM;
+@@ -449,8 +431,8 @@ static int ext2_setup_super (struct supe
+ 		(le32_to_cpu(es->s_lastcheck) + le32_to_cpu(es->s_checkinterval) <= get_seconds()))
+ 		printk ("EXT2-fs warning: checktime reached, "
+ 			"running e2fsck is recommended\n");
+-	if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
+-		es->s_max_mnt_count = (__s16) cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT);
++	if (!le16_to_cpu(es->s_max_mnt_count))
++		es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT);
+ 	es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1);
+ 	ext2_write_super(sb);
+ 	if (test_opt (sb, DEBUG))
+@@ -529,12 +511,18 @@ static int ext2_check_descriptors (struc
+ static loff_t ext2_max_size(int bits)
+ {
+ 	loff_t res = EXT2_NDIR_BLOCKS;
++	/* This constant is calculated to be the largest file size for a
++	 * dense, 4k-blocksize file such that the total number of
++	 * sectors in the file, including data and all indirect blocks,
++	 * does not exceed 2^32. */
++	const loff_t upper_limit = 0x1ff7fffd000LL;
++
+ 	res += 1LL << (bits-2);
+ 	res += 1LL << (2*(bits-2));
+ 	res += 1LL << (3*(bits-2));
+ 	res <<= bits;
+-	if (res > (512LL << 32) - (1 << bits))
+-		res = (512LL << 32) - (1 << bits);
++	if (res > upper_limit)
++		res = upper_limit;
+ 	return res;
+ }
+ 
+@@ -572,6 +560,7 @@ static int ext2_fill_super(struct super_
+ 	int blocksize = BLOCK_SIZE;
+ 	int db_count;
+ 	int i, j;
++	__le32 features;
+ 
+ 	sbi = kmalloc(sizeof(*sbi), GFP_KERNEL);
+ 	if (!sbi)
+@@ -614,7 +603,7 @@ static int ext2_fill_super(struct super_
+ 	es = (struct ext2_super_block *) (((char *)bh->b_data) + offset);
+ 	sbi->s_es = es;
+ 	sb->s_magic = le16_to_cpu(es->s_magic);
+-	sb->s_flags |= MS_ONE_SECOND;
++	set_sb_time_gran(sb, 1000000000U);
+ 	if (sb->s_magic != EXT2_SUPER_MAGIC) {
+ 		if (!silent)
+ 			printk ("VFS: Can't find ext2 filesystem on dev %s.\n",
+@@ -661,17 +650,18 @@ static int ext2_fill_super(struct super_
+ 	 * previously didn't change the revision level when setting the flags,
+ 	 * so there is a chance incompat flags are set on a rev 0 filesystem.
+ 	 */
+-	if ((i = EXT2_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP))) {
++	features = EXT2_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP);
++	if (features) {
+ 		printk("EXT2-fs: %s: couldn't mount because of "
+ 		       "unsupported optional features (%x).\n",
+-		       sb->s_id, i);
++		       sb->s_id, le32_to_cpu(features));
+ 		goto failed_mount;
+ 	}
+ 	if (!(sb->s_flags & MS_RDONLY) &&
+-	    (i = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))){
++	    (features = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))){
+ 		printk("EXT2-fs: %s: couldn't mount RDWR because of "
+ 		       "unsupported optional features (%x).\n",
+-		       sb->s_id, i);
++		       sb->s_id, le32_to_cpu(features));
+ 		goto failed_mount;
+ 	}
+ 	blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
+@@ -694,7 +684,7 @@ static int ext2_fill_super(struct super_
+ 		}
+ 		es = (struct ext2_super_block *) (((char *)bh->b_data) + offset);
+ 		sbi->s_es = es;
+-		if (es->s_magic != le16_to_cpu(EXT2_SUPER_MAGIC)) {
++		if (es->s_magic != cpu_to_le16(EXT2_SUPER_MAGIC)) {
+ 			printk ("EXT2-fs: Magic mismatch, very weird !\n");
+ 			goto failed_mount;
+ 		}
+@@ -937,12 +927,12 @@ static int ext2_remount (struct super_bl
+ 		es->s_state = cpu_to_le16(sbi->s_mount_state);
+ 		es->s_mtime = cpu_to_le32(get_seconds());
+ 	} else {
+-		int ret;
+-		if ((ret = EXT2_HAS_RO_COMPAT_FEATURE(sb,
+-					       ~EXT2_FEATURE_RO_COMPAT_SUPP))) {
++		__le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb,
++					       ~EXT2_FEATURE_RO_COMPAT_SUPP);
++		if (ret) {
+ 			printk("EXT2-fs: %s: couldn't remount RDWR because of "
+ 			       "unsupported optional features (%x).\n",
+-			       sb->s_id, ret);
++			       sb->s_id, le32_to_cpu(ret));
+ 			return -EROFS;
+ 		}
+ 		/*
+@@ -1018,7 +1008,7 @@ static struct file_system_type ext2_fs_t
+ 	.name		= "ext2",
+ 	.get_sb		= ext2_get_sb,
+ 	.kill_sb	= kill_block_super,
+-	.fs_flags	= FS_REQUIRES_DEV,
++	.fs_flags	= FS_REQUIRES_DEV | FS_VIRTUALIZED,
+ };
+ 
+ static int __init init_ext2_fs(void)
+diff -uprN linux-2.6.8.1.orig/fs/ext2/xattr.c linux-2.6.8.1-ve022stab078/fs/ext2/xattr.c
+--- linux-2.6.8.1.orig/fs/ext2/xattr.c	2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ext2/xattr.c	2006-05-11 13:05:32.000000000 +0400
+@@ -803,7 +803,7 @@ ext2_xattr_set2(struct inode *inode, str
+ 
+ 	/* Update the inode. */
+ 	EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
+-	inode->i_ctime = CURRENT_TIME;
++	inode->i_ctime = CURRENT_TIME_SEC;
+ 	if (IS_SYNC(inode)) {
+ 		error = ext2_sync_inode (inode);
+ 		if (error)
+@@ -1071,7 +1071,7 @@ static inline void ext2_xattr_hash_entry
+ 	}
+ 
+ 	if (entry->e_value_block == 0 && entry->e_value_size != 0) {
+-		__u32 *value = (__u32 *)((char *)header +
++		__le32 *value = (__le32 *)((char *)header +
+ 			le16_to_cpu(entry->e_value_offs));
+ 		for (n = (le32_to_cpu(entry->e_value_size) +
+ 		     EXT2_XATTR_ROUND) >> EXT2_XATTR_PAD_BITS; n; n--) {
+diff -uprN linux-2.6.8.1.orig/fs/ext2/xattr.h linux-2.6.8.1-ve022stab078/fs/ext2/xattr.h
+--- linux-2.6.8.1.orig/fs/ext2/xattr.h	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ext2/xattr.h	2006-05-11 13:05:31.000000000 +0400
+@@ -26,20 +26,20 @@
+ #define EXT2_XATTR_INDEX_SECURITY	        6
+ 
+ struct ext2_xattr_header {
+-	__u32	h_magic;	/* magic number for identification */
+-	__u32	h_refcount;	/* reference count */
+-	__u32	h_blocks;	/* number of disk blocks used */
+-	__u32	h_hash;		/* hash value of all attributes */
++	__le32	h_magic;	/* magic number for identification */
++	__le32	h_refcount;	/* reference count */
++	__le32	h_blocks;	/* number of disk blocks used */
++	__le32	h_hash;		/* hash value of all attributes */
+ 	__u32	h_reserved[4];	/* zero right now */
+ };
+ 
+ struct ext2_xattr_entry {
+ 	__u8	e_name_len;	/* length of name */
+ 	__u8	e_name_index;	/* attribute name index */
+-	__u16	e_value_offs;	/* offset in disk block of value */
+-	__u32	e_value_block;	/* disk block attribute is stored on (n/i) */
+-	__u32	e_value_size;	/* size of attribute value */
+-	__u32	e_hash;		/* hash value of name and value */
++	__le16	e_value_offs;	/* offset in disk block of value */
++	__le32	e_value_block;	/* disk block attribute is stored on (n/i) */
++	__le32	e_value_size;	/* size of attribute value */
++	__le32	e_hash;		/* hash value of name and value */
+ 	char	e_name[0];	/* attribute name */
+ };
+ 
+diff -uprN linux-2.6.8.1.orig/fs/ext2/xattr_user.c linux-2.6.8.1-ve022stab078/fs/ext2/xattr_user.c
+--- linux-2.6.8.1.orig/fs/ext2/xattr_user.c	2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ext2/xattr_user.c	2006-05-11 13:05:35.000000000 +0400
+@@ -40,7 +40,7 @@ ext2_xattr_user_get(struct inode *inode,
+ 		return -EINVAL;
+ 	if (!test_opt(inode->i_sb, XATTR_USER))
+ 		return -EOPNOTSUPP;
+-	error = permission(inode, MAY_READ, NULL);
++	error = permission(inode, MAY_READ, NULL, NULL);
+ 	if (error)
+ 		return error;
+ 
+@@ -60,7 +60,7 @@ ext2_xattr_user_set(struct inode *inode,
+ 	if ( !S_ISREG(inode->i_mode) &&
+ 	    (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
+ 		return -EPERM;
+-	error = permission(inode, MAY_WRITE, NULL);
++	error = permission(inode, MAY_WRITE, NULL, NULL);
+ 	if (error)
+ 		return error;
+ 
+diff -uprN linux-2.6.8.1.orig/fs/ext3/Makefile linux-2.6.8.1-ve022stab078/fs/ext3/Makefile
+--- linux-2.6.8.1.orig/fs/ext3/Makefile	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ext3/Makefile	2006-05-11 13:05:31.000000000 +0400
+@@ -5,7 +5,7 @@
+ obj-$(CONFIG_EXT3_FS) += ext3.o
+ 
+ ext3-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+-	   ioctl.o namei.o super.o symlink.o hash.o
++	   ioctl.o namei.o super.o symlink.o hash.o resize.o
+ 
+ ext3-$(CONFIG_EXT3_FS_XATTR)	 += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+diff -uprN linux-2.6.8.1.orig/fs/ext3/acl.c linux-2.6.8.1-ve022stab078/fs/ext3/acl.c
+--- linux-2.6.8.1.orig/fs/ext3/acl.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ext3/acl.c	2006-05-11 13:05:35.000000000 +0400
+@@ -291,7 +291,7 @@ ext3_set_acl(handle_t *handle, struct in
+  * inode->i_sem: don't care
+  */
+ int
+-ext3_permission(struct inode *inode, int mask, struct nameidata *nd)
++__ext3_permission(struct inode *inode, int mask)
+ {
+ 	int mode = inode->i_mode;
+ 
+@@ -341,6 +341,29 @@ check_capabilities:
+ 	return -EACCES;
+ }
+ 
++int
++ext3_permission(struct inode *inode, int mask, struct nameidata *nd,
++		struct exec_perm *exec_perm)
++{
++	int ret;
++
++	if (exec_perm != NULL)
++		down(&inode->i_sem);
++
++	ret = __ext3_permission(inode, mask);
++
++	if (exec_perm != NULL) {
++		if (!ret) {
++			exec_perm->set = 1;
++			exec_perm->mode = inode->i_mode;
++			exec_perm->uid = inode->i_uid;
++			exec_perm->gid = inode->i_gid;
++		}
++		up(&inode->i_sem);
++	}
++	return ret;
++}
++
+ /*
+  * Initialize the ACLs of a new inode. Called from ext3_new_inode.
+  *
+diff -uprN linux-2.6.8.1.orig/fs/ext3/acl.h linux-2.6.8.1-ve022stab078/fs/ext3/acl.h
+--- linux-2.6.8.1.orig/fs/ext3/acl.h	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ext3/acl.h	2006-05-11 13:05:35.000000000 +0400
+@@ -10,18 +10,18 @@
+ #define EXT3_ACL_MAX_ENTRIES	32
+ 
+ typedef struct {
+-	__u16		e_tag;
+-	__u16		e_perm;
+-	__u32		e_id;
++	__le16		e_tag;
++	__le16		e_perm;
++	__le32		e_id;
+ } ext3_acl_entry;
+ 
+ typedef struct {
+-	__u16		e_tag;
+-	__u16		e_perm;
++	__le16		e_tag;
++	__le16		e_perm;
+ } ext3_acl_entry_short;
+ 
+ typedef struct {
+-	__u32		a_version;
++	__le32		a_version;
+ } ext3_acl_header;
+ 
+ static inline size_t ext3_acl_size(int count)
+@@ -59,7 +59,8 @@ static inline int ext3_acl_count(size_t 
+ #define EXT3_ACL_NOT_CACHED ((void *)-1)
+ 
+ /* acl.c */
+-extern int ext3_permission (struct inode *, int, struct nameidata *);
++extern int ext3_permission (struct inode *, int, struct nameidata *,
++		struct exec_perm *);
+ extern int ext3_acl_chmod (struct inode *);
+ extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
+ 
+diff -uprN linux-2.6.8.1.orig/fs/ext3/balloc.c linux-2.6.8.1-ve022stab078/fs/ext3/balloc.c
+--- linux-2.6.8.1.orig/fs/ext3/balloc.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ext3/balloc.c	2006-05-11 13:05:31.000000000 +0400
+@@ -54,6 +54,7 @@ struct ext3_group_desc * ext3_get_group_
+ 
+ 		return NULL;
+ 	}
++	smp_rmb();
+ 
+ 	group_desc = block_group / EXT3_DESC_PER_BLOCK(sb);
+ 	desc = block_group % EXT3_DESC_PER_BLOCK(sb);
+@@ -91,15 +92,16 @@ read_block_bitmap(struct super_block *sb
+ 	if (!bh)
+ 		ext3_error (sb, "read_block_bitmap",
+ 			    "Cannot read block bitmap - "
+-			    "block_group = %d, block_bitmap = %lu",
+-			    block_group, (unsigned long) desc->bg_block_bitmap);
++			    "block_group = %d, block_bitmap = %u",
++			    block_group, le32_to_cpu(desc->bg_block_bitmap));
+ error_out:
+ 	return bh;
+ }
+ 
+ /* Free given blocks, update quota and i_blocks field */
+-void ext3_free_blocks (handle_t *handle, struct inode * inode,
+-			unsigned long block, unsigned long count)
++void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb,
++			 unsigned long block, unsigned long count,
++			 int *pdquot_freed_blocks)
+ {
+ 	struct buffer_head *bitmap_bh = NULL;
+ 	struct buffer_head *gd_bh;
+@@ -107,18 +109,12 @@ void ext3_free_blocks (handle_t *handle,
+ 	unsigned long bit;
+ 	unsigned long i;
+ 	unsigned long overflow;
+-	struct super_block * sb;
+ 	struct ext3_group_desc * gdp;
+ 	struct ext3_super_block * es;
+ 	struct ext3_sb_info *sbi;
+ 	int err = 0, ret;
+-	int dquot_freed_blocks = 0;
+ 
+-	sb = inode->i_sb;
+-	if (!sb) {
+-		printk ("ext3_free_blocks: nonexistent device");
+-		return;
+-	}
++	*pdquot_freed_blocks = 0;
+ 	sbi = EXT3_SB(sb);
+ 	es = EXT3_SB(sb)->s_es;
+ 	if (block < le32_to_cpu(es->s_first_data_block) ||
+@@ -245,7 +241,7 @@ do_more:
+ 			jbd_lock_bh_state(bitmap_bh);
+ 			BUFFER_TRACE(bitmap_bh, "bit already cleared");
+ 		} else {
+-			dquot_freed_blocks++;
++			(*pdquot_freed_blocks)++;
+ 		}
+ 	}
+ 	jbd_unlock_bh_state(bitmap_bh);
+@@ -253,7 +249,7 @@ do_more:
+ 	spin_lock(sb_bgl_lock(sbi, block_group));
+ 	gdp->bg_free_blocks_count =
+ 		cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) +
+-			dquot_freed_blocks);
++			*pdquot_freed_blocks);
+ 	spin_unlock(sb_bgl_lock(sbi, block_group));
+ 	percpu_counter_mod(&sbi->s_freeblocks_counter, count);
+ 
+@@ -275,6 +271,22 @@ do_more:
+ error_return:
+ 	brelse(bitmap_bh);
+ 	ext3_std_error(sb, err);
++	return;
++}
++
++/* Free given blocks, update quota and i_blocks field */
++void ext3_free_blocks(handle_t *handle, struct inode *inode,
++			unsigned long block, unsigned long count)
++{
++	struct super_block * sb;
++	int dquot_freed_blocks;
++
++	sb = inode->i_sb;
++	if (!sb) {
++		printk ("ext3_free_blocks: nonexistent device");
++		return;
++	}
++	ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
+ 	if (dquot_freed_blocks)
+ 		DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
+ 	return;
+@@ -523,6 +535,8 @@ ext3_new_block(handle_t *handle, struct 
+ #ifdef EXT3FS_DEBUG
+ 	static int goal_hits, goal_attempts;
+ #endif
++	unsigned long ngroups;
++
+ 	*errp = -ENOSPC;
+ 	sb = inode->i_sb;
+ 	if (!sb) {
+@@ -574,13 +588,16 @@ ext3_new_block(handle_t *handle, struct 
+ 			goto allocated;
+ 	}
+ 
++	ngroups = EXT3_SB(sb)->s_groups_count;
++	smp_rmb();
++
+ 	/*
+ 	 * Now search the rest of the groups.  We assume that 
+ 	 * i and gdp correctly point to the last group visited.
+ 	 */
+-	for (bgi = 0; bgi < EXT3_SB(sb)->s_groups_count; bgi++) {
++	for (bgi = 0; bgi < ngroups; bgi++) {
+ 		group_no++;
+-		if (group_no >= EXT3_SB(sb)->s_groups_count)
++		if (group_no >= ngroups)
+ 			group_no = 0;
+ 		gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
+ 		if (!gdp) {
+@@ -715,6 +732,7 @@ unsigned long ext3_count_free_blocks(str
+ 	unsigned long desc_count;
+ 	struct ext3_group_desc *gdp;
+ 	int i;
++	unsigned long ngroups;
+ #ifdef EXT3FS_DEBUG
+ 	struct ext3_super_block *es;
+ 	unsigned long bitmap_count, x;
+@@ -747,7 +765,9 @@ unsigned long ext3_count_free_blocks(str
+ 	return bitmap_count;
+ #else
+ 	desc_count = 0;
+-	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
++	ngroups = EXT3_SB(sb)->s_groups_count;
++	smp_rmb();
++	for (i = 0; i < ngroups; i++) {
+ 		gdp = ext3_get_group_desc(sb, i, NULL);
+ 		if (!gdp)
+ 			continue;
+diff -uprN linux-2.6.8.1.orig/fs/ext3/fsync.c linux-2.6.8.1-ve022stab078/fs/ext3/fsync.c
+--- linux-2.6.8.1.orig/fs/ext3/fsync.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ext3/fsync.c	2006-05-11 13:05:31.000000000 +0400
+@@ -49,10 +49,6 @@ int ext3_sync_file(struct file * file, s
+ 
+ 	J_ASSERT(ext3_journal_current_handle() == 0);
+ 
+-	smp_mb();		/* prepare for lockless i_state read */
+-	if (!(inode->i_state & I_DIRTY))
+-		goto out;
+-
+ 	/*
+ 	 * data=writeback:
+ 	 *  The caller's filemap_fdatawrite()/wait will sync the data.
+diff -uprN linux-2.6.8.1.orig/fs/ext3/ialloc.c linux-2.6.8.1-ve022stab078/fs/ext3/ialloc.c
+--- linux-2.6.8.1.orig/fs/ext3/ialloc.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ext3/ialloc.c	2006-05-11 13:05:32.000000000 +0400
+@@ -64,8 +64,8 @@ read_inode_bitmap(struct super_block * s
+ 	if (!bh)
+ 		ext3_error(sb, "read_inode_bitmap",
+ 			    "Cannot read inode bitmap - "
+-			    "block_group = %lu, inode_bitmap = %lu",
+-			    block_group, (unsigned long) desc->bg_inode_bitmap);
++			    "block_group = %lu, inode_bitmap = %u",
++			    block_group, le32_to_cpu(desc->bg_inode_bitmap));
+ error_out:
+ 	return bh;
+ }
+@@ -97,7 +97,7 @@ void ext3_free_inode (handle_t *handle, 
+ 	unsigned long bit;
+ 	struct ext3_group_desc * gdp;
+ 	struct ext3_super_block * es;
+-	struct ext3_sb_info *sbi = EXT3_SB(sb);
++	struct ext3_sb_info *sbi;
+ 	int fatal = 0, err;
+ 
+ 	if (atomic_read(&inode->i_count) > 1) {
+@@ -114,6 +114,7 @@ void ext3_free_inode (handle_t *handle, 
+ 		printk("ext3_free_inode: inode on nonexistent device\n");
+ 		return;
+ 	}
++	sbi = EXT3_SB(sb);
+ 
+ 	ino = inode->i_ino;
+ 	ext3_debug ("freeing inode %lu\n", ino);
+@@ -319,8 +320,6 @@ static int find_group_orlov(struct super
+ 		desc = ext3_get_group_desc (sb, group, &bh);
+ 		if (!desc || !desc->bg_free_inodes_count)
+ 			continue;
+-		if (sbi->s_debts[group] >= max_debt)
+-			continue;
+ 		if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
+ 			continue;
+ 		if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes)
+@@ -559,7 +558,7 @@ got:
+ 	/* This is the optimal IO size (for stat), not the fs block size */
+ 	inode->i_blksize = PAGE_SIZE;
+ 	inode->i_blocks = 0;
+-	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
++	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
+ 
+ 	memset(ei->i_data, 0, sizeof(ei->i_data));
+ 	ei->i_next_alloc_block = 0;
+diff -uprN linux-2.6.8.1.orig/fs/ext3/inode.c linux-2.6.8.1-ve022stab078/fs/ext3/inode.c
+--- linux-2.6.8.1.orig/fs/ext3/inode.c	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ext3/inode.c	2006-05-11 13:05:39.000000000 +0400
+@@ -66,6 +66,8 @@ int ext3_forget(handle_t *handle, int is
+ {
+ 	int err;
+ 
++	might_sleep();
++
+ 	BUFFER_TRACE(bh, "enter");
+ 
+ 	jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
+@@ -82,7 +84,7 @@ int ext3_forget(handle_t *handle, int is
+ 	    (!is_metadata && !ext3_should_journal_data(inode))) {
+ 		if (bh) {
+ 			BUFFER_TRACE(bh, "call journal_forget");
+-			ext3_journal_forget(handle, bh);
++			return ext3_journal_forget(handle, bh);
+ 		}
+ 		return 0;
+ 	}
+@@ -303,12 +305,12 @@ static int ext3_alloc_block (handle_t *h
+ 
+ 
+ typedef struct {
+-	u32	*p;
+-	u32	key;
++	__le32	*p;
++	__le32	key;
+ 	struct buffer_head *bh;
+ } Indirect;
+ 
+-static inline void add_chain(Indirect *p, struct buffer_head *bh, u32 *v)
++static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
+ {
+ 	p->key = *(p->p = v);
+ 	p->bh = bh;
+@@ -439,7 +441,7 @@ static Indirect *ext3_get_branch(struct 
+ 		/* Reader: pointers */
+ 		if (!verify_chain(chain, p))
+ 			goto changed;
+-		add_chain(++p, bh, (u32*)bh->b_data + *++offsets);
++		add_chain(++p, bh, (__le32*)bh->b_data + *++offsets);
+ 		/* Reader: end */
+ 		if (!p->key)
+ 			goto no_block;
+@@ -480,8 +482,8 @@ no_block:
+ static unsigned long ext3_find_near(struct inode *inode, Indirect *ind)
+ {
+ 	struct ext3_inode_info *ei = EXT3_I(inode);
+-	u32 *start = ind->bh ? (u32*) ind->bh->b_data : ei->i_data;
+-	u32 *p;
++	__le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data;
++	__le32 *p;
+ 	unsigned long bg_start;
+ 	unsigned long colour;
+ 
+@@ -609,7 +611,7 @@ static int ext3_alloc_branch(handle_t *h
+ 			}
+ 
+ 			memset(bh->b_data, 0, blocksize);
+-			branch[n].p = (u32*) bh->b_data + offsets[n];
++			branch[n].p = (__le32*) bh->b_data + offsets[n];
+ 			*branch[n].p = branch[n].key;
+ 			BUFFER_TRACE(bh, "marking uptodate");
+ 			set_buffer_uptodate(bh);
+@@ -687,7 +689,7 @@ static int ext3_splice_branch(handle_t *
+ 
+ 	/* We are done with atomic stuff, now do the rest of housekeeping */
+ 
+-	inode->i_ctime = CURRENT_TIME;
++	inode->i_ctime = CURRENT_TIME_SEC;
+ 	ext3_mark_inode_dirty(handle, inode);
+ 
+ 	/* had we spliced it onto indirect block? */
+@@ -780,6 +782,7 @@ reread:
+ 	if (!partial) {
+ 		clear_buffer_new(bh_result);
+ got_it:
++		clear_buffer_delay(bh_result);
+ 		map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
+ 		if (boundary)
+ 			set_buffer_boundary(bh_result);
+@@ -1063,11 +1066,13 @@ static int walk_page_buffers(	handle_t *
+  * and the commit_write().  So doing the journal_start at the start of
+  * prepare_write() is the right place.
+  *
+- * Also, this function can nest inside ext3_writepage() ->
+- * block_write_full_page(). In that case, we *know* that ext3_writepage()
+- * has generated enough buffer credits to do the whole page.  So we won't
+- * block on the journal in that case, which is good, because the caller may
+- * be PF_MEMALLOC.
++ * [2004/09/04 SAW] journal_start() in prepare_write() causes different ranking
++ * violations if copy_from_user() triggers a page fault (mmap_sem, may be page
++ * lock, plus __GFP_FS allocations).
++ * Now we read in not up-to-date buffers in prepare_write(), and do the rest
++ * including hole instantiation and inode extension in commit_write().
++ *
++ * Other notes.
+  *
+  * By accident, ext3 can be reentered when a transaction is open via
+  * quota file writes.  If we were to commit the transaction while thus
+@@ -1082,6 +1087,27 @@ static int walk_page_buffers(	handle_t *
+  * write.  
+  */
+ 
++static int ext3_get_block_delay(struct inode *inode, sector_t iblock,
++			struct buffer_head *bh, int create)
++{
++	int ret;
++
++	ret = ext3_get_block_handle(NULL, inode, iblock, bh, 0, 0);
++	if (ret)
++		return ret;
++	if (!buffer_mapped(bh)) {
++		set_buffer_delay(bh);
++		set_buffer_new(bh);
++	}
++	return ret;
++}
++
++static int ext3_prepare_write(struct file *file, struct page *page,
++		unsigned from, unsigned to)
++{
++	return block_prepare_write(page, from, to, ext3_get_block_delay);
++}
++
+ static int do_journal_get_write_access(handle_t *handle, 
+ 				       struct buffer_head *bh)
+ {
+@@ -1090,8 +1116,52 @@ static int do_journal_get_write_access(h
+ 	return ext3_journal_get_write_access(handle, bh);
+ }
+ 
+-static int ext3_prepare_write(struct file *file, struct page *page,
+-			      unsigned from, unsigned to)
++/*
++ * This function zeroes buffers not mapped to disk.
++ * We do it similarly to the error path in __block_prepare_write() to avoid
++ * keeping garbage in the page cache.
++ * Here we check BH_delay state.  We know that if the buffer appears
++ * !buffer_mapped then
++ *   - it was !buffer_mapped at the moment of ext3_prepare_write, and
++ *   - ext3_get_block failed to map this buffer (e.g., ENOSPC).
++ * If this !mapped buffer is not up to date (it can be up to date if
++ * PageUptodate), then we zero its content.
++ */
++static void ext3_clear_delayed_buffers(struct page *page,
++		unsigned from, unsigned to)
++{
++	struct buffer_head *bh, *head, *next;
++	unsigned block_start, block_end;
++	unsigned blocksize;
++	void *kaddr;
++
++	head = page_buffers(page);
++	blocksize = head->b_size;
++	for (	bh = head, block_start = 0;
++		bh != head || !block_start;
++	    	block_start = block_end, bh = next)
++	{
++		next = bh->b_this_page;
++		block_end = block_start + blocksize;
++		if (block_end <= from || block_start >= to)
++			continue;
++		if (!buffer_delay(bh))
++			continue;
++		J_ASSERT_BH(bh, !buffer_mapped(bh));
++		clear_buffer_new(bh);
++		clear_buffer_delay(bh);
++		if (!buffer_uptodate(bh)) {
++			kaddr = kmap_atomic(page, KM_USER0);
++			memset(kaddr + block_start, 0, bh->b_size);
++			kunmap_atomic(kaddr, KM_USER0);
++			set_buffer_uptodate(bh);
++			mark_buffer_dirty(bh);
++		}
++	}
++}
++
++static int ext3_map_write(struct file *file, struct page *page,
++		unsigned from, unsigned to)
+ {
+ 	struct inode *inode = page->mapping->host;
+ 	int ret, needed_blocks = ext3_writepage_trans_blocks(inode);
+@@ -1104,19 +1174,19 @@ retry:
+ 		ret = PTR_ERR(handle);
+ 		goto out;
+ 	}
+-	ret = block_prepare_write(page, from, to, ext3_get_block);
+-	if (ret)
+-		goto prepare_write_failed;
+ 
+-	if (ext3_should_journal_data(inode)) {
++	ret = block_prepare_write(page, from, to, ext3_get_block);
++	if (!ret && ext3_should_journal_data(inode)) {
+ 		ret = walk_page_buffers(handle, page_buffers(page),
+ 				from, to, NULL, do_journal_get_write_access);
+ 	}
+-prepare_write_failed:
+-	if (ret)
+-		ext3_journal_stop(handle);
++	if (!ret)
++		goto out;
++
++	ext3_journal_stop(handle);
+ 	if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
+ 		goto retry;
++	ext3_clear_delayed_buffers(page, from, to);
+ out:
+ 	return ret;
+ }
+@@ -1151,10 +1221,15 @@ static int commit_write_fn(handle_t *han
+ static int ext3_ordered_commit_write(struct file *file, struct page *page,
+ 			     unsigned from, unsigned to)
+ {
+-	handle_t *handle = ext3_journal_current_handle();
++	handle_t *handle;
+ 	struct inode *inode = page->mapping->host;
+ 	int ret = 0, ret2;
+ 
++	ret = ext3_map_write(file, page, from, to);
++	if (ret)
++		return ret;
++	handle = ext3_journal_current_handle();
++
+ 	ret = walk_page_buffers(handle, page_buffers(page),
+ 		from, to, NULL, ext3_journal_dirty_data);
+ 
+@@ -1180,11 +1255,15 @@ static int ext3_ordered_commit_write(str
+ static int ext3_writeback_commit_write(struct file *file, struct page *page,
+ 			     unsigned from, unsigned to)
+ {
+-	handle_t *handle = ext3_journal_current_handle();
++	handle_t *handle;
+ 	struct inode *inode = page->mapping->host;
+ 	int ret = 0, ret2;
+ 	loff_t new_i_size;
+ 
++	ret = ext3_map_write(file, page, from, to);
++	if (ret)
++		return ret;
++	handle = ext3_journal_current_handle();
+ 	new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+ 	if (new_i_size > EXT3_I(inode)->i_disksize)
+ 		EXT3_I(inode)->i_disksize = new_i_size;
+@@ -1198,12 +1277,17 @@ static int ext3_writeback_commit_write(s
+ static int ext3_journalled_commit_write(struct file *file,
+ 			struct page *page, unsigned from, unsigned to)
+ {
+-	handle_t *handle = ext3_journal_current_handle();
++	handle_t *handle;
+ 	struct inode *inode = page->mapping->host;
+ 	int ret = 0, ret2;
+ 	int partial = 0;
+ 	loff_t pos;
+ 
++	ret = ext3_map_write(file, page, from, to);
++	if (ret)
++		return ret;
++	handle = ext3_journal_current_handle();
++
+ 	/*
+ 	 * Here we duplicate the generic_commit_write() functionality
+ 	 */
+@@ -1471,8 +1555,11 @@ static int ext3_journalled_writepage(str
+ 		ClearPageChecked(page);
+ 		ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
+ 					ext3_get_block);
+-		if (ret != 0)
+-			goto out_unlock;
++		if (ret != 0) {
++			ext3_journal_stop(handle);
++			unlock_page(page);
++			return ret;
++		}
+ 		ret = walk_page_buffers(handle, page_buffers(page), 0,
+ 			PAGE_CACHE_SIZE, NULL, do_journal_get_write_access);
+ 
+@@ -1498,7 +1585,6 @@ out:
+ 
+ no_write:
+ 	redirty_page_for_writepage(wbc, page);
+-out_unlock:
+ 	unlock_page(page);
+ 	goto out;
+ }
+@@ -1577,6 +1663,12 @@ static ssize_t ext3_direct_IO(int rw, st
+ 				 offset, nr_segs,
+ 				 ext3_direct_io_get_blocks, NULL);
+ 
++	/*
++	 * Reacquire the handle: ext3_direct_io_get_block() can restart the
++	 * transaction
++	 */
++	handle = journal_current_handle();
++
+ out_stop:
+ 	if (handle) {
+ 		int err;
+@@ -1765,7 +1857,7 @@ unlock:
+  * or memcmp with zero_page, whatever is better for particular architecture.
+  * Linus?
+  */
+-static inline int all_zeroes(u32 *p, u32 *q)
++static inline int all_zeroes(__le32 *p, __le32 *q)
+ {
+ 	while (p < q)
+ 		if (*p++)
+@@ -1812,7 +1904,7 @@ static Indirect *ext3_find_shared(struct
+ 				int depth,
+ 				int offsets[4],
+ 				Indirect chain[4],
+-				u32 *top)
++				__le32 *top)
+ {
+ 	Indirect *partial, *p;
+ 	int k, err;
+@@ -1832,7 +1924,7 @@ static Indirect *ext3_find_shared(struct
+ 	if (!partial->key && *partial->p)
+ 		/* Writer: end */
+ 		goto no_top;
+-	for (p=partial; p>chain && all_zeroes((u32*)p->bh->b_data,p->p); p--)
++	for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--)
+ 		;
+ 	/*
+ 	 * OK, we've found the last block that must survive. The rest of our
+@@ -1871,9 +1963,9 @@ no_top:
+ static void
+ ext3_clear_blocks(handle_t *handle, struct inode *inode, struct buffer_head *bh,
+ 		unsigned long block_to_free, unsigned long count,
+-		u32 *first, u32 *last)
++		__le32 *first, __le32 *last)
+ {
+-	u32 *p;
++	__le32 *p;
+ 	if (try_to_extend_transaction(handle, inode)) {
+ 		if (bh) {
+ 			BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+@@ -1929,15 +2021,16 @@ ext3_clear_blocks(handle_t *handle, stru
+  * block pointers.
+  */
+ static void ext3_free_data(handle_t *handle, struct inode *inode,
+-			   struct buffer_head *this_bh, u32 *first, u32 *last)
++			   struct buffer_head *this_bh,
++			   __le32 *first, __le32 *last)
+ {
+ 	unsigned long block_to_free = 0;    /* Starting block # of a run */
+ 	unsigned long count = 0;	    /* Number of blocks in the run */ 
+-	u32 *block_to_free_p = NULL;	    /* Pointer into inode/ind
++	__le32 *block_to_free_p = NULL;	    /* Pointer into inode/ind
+ 					       corresponding to
+ 					       block_to_free */
+ 	unsigned long nr;		    /* Current block # */
+-	u32 *p;				    /* Pointer into inode/ind
++	__le32 *p;			    /* Pointer into inode/ind
+ 					       for current block */
+ 	int err;
+ 
+@@ -1996,10 +2089,10 @@ static void ext3_free_data(handle_t *han
+  */
+ static void ext3_free_branches(handle_t *handle, struct inode *inode,
+ 			       struct buffer_head *parent_bh,
+-			       u32 *first, u32 *last, int depth)
++			       __le32 *first, __le32 *last, int depth)
+ {
+ 	unsigned long nr;
+-	u32 *p;
++	__le32 *p;
+ 
+ 	if (is_handle_aborted(handle))
+ 		return;
+@@ -2029,8 +2122,9 @@ static void ext3_free_branches(handle_t 
+ 
+ 			/* This zaps the entire block.  Bottom up. */
+ 			BUFFER_TRACE(bh, "free child branches");
+-			ext3_free_branches(handle, inode, bh, (u32*)bh->b_data,
+-					   (u32*)bh->b_data + addr_per_block,
++			ext3_free_branches(handle, inode, bh,
++					   (__le32*)bh->b_data,
++					   (__le32*)bh->b_data + addr_per_block,
+ 					   depth);
+ 
+ 			/*
+@@ -2135,13 +2229,13 @@ void ext3_truncate(struct inode * inode)
+ {
+ 	handle_t *handle;
+ 	struct ext3_inode_info *ei = EXT3_I(inode);
+-	u32 *i_data = ei->i_data;
++	__le32 *i_data = ei->i_data;
+ 	int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
+ 	struct address_space *mapping = inode->i_mapping;
+ 	int offsets[4];
+ 	Indirect chain[4];
+ 	Indirect *partial;
+-	int nr = 0;
++	__le32 nr = 0;
+ 	int n;
+ 	long last_block;
+ 	unsigned blocksize = inode->i_sb->s_blocksize;
+@@ -2248,7 +2342,7 @@ void ext3_truncate(struct inode * inode)
+ 	/* Clear the ends of indirect blocks on the shared branch */
+ 	while (partial > chain) {
+ 		ext3_free_branches(handle, inode, partial->bh, partial->p + 1,
+-				   (u32*)partial->bh->b_data + addr_per_block,
++				   (__le32*)partial->bh->b_data+addr_per_block,
+ 				   (chain+n-1) - partial);
+ 		BUFFER_TRACE(partial->bh, "call brelse");
+ 		brelse (partial->bh);
+@@ -2282,7 +2376,7 @@ do_indirects:
+ 			;
+ 	}
+ 	up(&ei->truncate_sem);
+-	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
++	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
+ 	ext3_mark_inode_dirty(handle, inode);
+ 
+ 	/* In a multi-transaction truncate, we only make the final
+@@ -2311,8 +2405,10 @@ static unsigned long ext3_get_inode_bloc
+ 	struct buffer_head *bh;
+ 	struct ext3_group_desc * gdp;
+ 
++
+ 	if ((ino != EXT3_ROOT_INO &&
+ 		ino != EXT3_JOURNAL_INO &&
++		ino != EXT3_RESIZE_INO &&
+ 		ino < EXT3_FIRST_INO(sb)) ||
+ 		ino > le32_to_cpu(
+ 			EXT3_SB(sb)->s_es->s_inodes_count)) {
+@@ -2326,6 +2422,7 @@ static unsigned long ext3_get_inode_bloc
+ 			    "group >= groups count");
+ 		return 0;
+ 	}
++	smp_rmb();
+ 	group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb);
+ 	desc = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1);
+ 	bh = EXT3_SB(sb)->s_group_desc[group_desc];
+@@ -2743,21 +2840,21 @@ out_brelse:
+  * `stuff()' is running, and the new i_size will be lost.  Plus the inode
+  * will no longer be on the superblock's dirty inode list.
+  */
+-void ext3_write_inode(struct inode *inode, int wait)
++int ext3_write_inode(struct inode *inode, int wait)
+ {
+-	if (current->flags & PF_MEMALLOC)
+-		return;
++	if (current->flags & (PF_MEMALLOC | PF_MEMDIE))
++		return 0;
+ 
+ 	if (ext3_journal_current_handle()) {
+ 		jbd_debug(0, "called recursively, non-PF_MEMALLOC!\n");
+ 		dump_stack();
+-		return;
++		return -EIO;
+ 	}
+ 
+ 	if (!wait)
+-		return;
++		return 0;
+ 
+-	ext3_force_commit(inode->i_sb);
++	return ext3_force_commit(inode->i_sb);
+ }
+ 
+ /*
+@@ -2966,6 +3063,7 @@ int ext3_mark_inode_dirty(handle_t *hand
+ 	struct ext3_iloc iloc;
+ 	int err;
+ 
++	might_sleep();
+ 	err = ext3_reserve_inode_write(handle, inode, &iloc);
+ 	if (!err)
+ 		err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+diff -uprN linux-2.6.8.1.orig/fs/ext3/ioctl.c linux-2.6.8.1-ve022stab078/fs/ext3/ioctl.c
+--- linux-2.6.8.1.orig/fs/ext3/ioctl.c	2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ext3/ioctl.c	2006-05-11 13:05:37.000000000 +0400
+@@ -67,7 +67,7 @@ int ext3_ioctl (struct inode * inode, st
+ 		 * the relevant capability.
+ 		 */
+ 		if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) {
+-			if (!capable(CAP_SYS_RESOURCE))
++			if (!capable(CAP_SYS_ADMIN))
+ 				return -EPERM;
+ 		}
+ 
+@@ -86,7 +86,7 @@ int ext3_ioctl (struct inode * inode, st
+ 		ei->i_flags = flags;
+ 
+ 		ext3_set_inode_flags(inode);
+-		inode->i_ctime = CURRENT_TIME;
++		inode->i_ctime = CURRENT_TIME_SEC;
+ 
+ 		err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+ flags_err:
+@@ -120,7 +120,7 @@ flags_err:
+ 			return PTR_ERR(handle);
+ 		err = ext3_reserve_inode_write(handle, inode, &iloc);
+ 		if (err == 0) {
+-			inode->i_ctime = CURRENT_TIME;
++			inode->i_ctime = CURRENT_TIME_SEC;
+ 			inode->i_generation = generation;
+ 			err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+ 		}
+@@ -151,6 +151,51 @@ flags_err:
+ 			return ret;
+ 		}
+ #endif
++	case EXT3_IOC_GROUP_EXTEND: {
++		unsigned long n_blocks_count;
++		struct super_block *sb = inode->i_sb;
++		int err;
++
++		if (!capable(CAP_SYS_RESOURCE))
++			return -EPERM;
++
++		if (IS_RDONLY(inode))
++			return -EROFS;
++
++		if (get_user(n_blocks_count, (__u32 *)arg))
++			return -EFAULT;
++
++		err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count);
++		journal_lock_updates(EXT3_SB(sb)->s_journal);
++		journal_flush(EXT3_SB(sb)->s_journal);
++		journal_unlock_updates(EXT3_SB(sb)->s_journal);
++
++		return err;
++	}
++	case EXT3_IOC_GROUP_ADD: {
++		struct ext3_new_group_data input;
++		struct super_block *sb = inode->i_sb;
++		int err;
++
++		if (!capable(CAP_SYS_RESOURCE))
++			return -EPERM;
++
++		if (IS_RDONLY(inode))
++			return -EROFS;
++
++		if (copy_from_user(&input, (struct ext3_new_group_input *)arg,
++				sizeof(input)))
++			return -EFAULT;
++
++		err = ext3_group_add(sb, &input);
++		journal_lock_updates(EXT3_SB(sb)->s_journal);
++		journal_flush(EXT3_SB(sb)->s_journal);
++		journal_unlock_updates(EXT3_SB(sb)->s_journal);
++
++		return err;
++	}
++
++
+ 	default:
+ 		return -ENOTTY;
+ 	}
+diff -uprN linux-2.6.8.1.orig/fs/ext3/namei.c linux-2.6.8.1-ve022stab078/fs/ext3/namei.c
+--- linux-2.6.8.1.orig/fs/ext3/namei.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ext3/namei.c	2006-05-11 13:05:32.000000000 +0400
+@@ -71,9 +71,6 @@ static struct buffer_head *ext3_append(h
+ #define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
+ #endif
+ 
+-typedef struct { u32 v; } le_u32;
+-typedef struct { u16 v; } le_u16;
+-
+ #ifdef DX_DEBUG
+ #define dxtrace(command) command
+ #else
+@@ -82,22 +79,22 @@ typedef struct { u16 v; } le_u16;
+ 
+ struct fake_dirent
+ {
+-	/*le*/u32 inode;
+-	/*le*/u16 rec_len;
++	__le32 inode;
++	__le16 rec_len;
+ 	u8 name_len;
+ 	u8 file_type;
+ };
+ 
+ struct dx_countlimit
+ {
+-	le_u16 limit;
+-	le_u16 count;
++	__le16 limit;
++	__le16 count;
+ };
+ 
+ struct dx_entry
+ {
+-	le_u32 hash;
+-	le_u32 block;
++	__le32 hash;
++	__le32 block;
+ };
+ 
+ /*
+@@ -114,7 +111,7 @@ struct dx_root
+ 	char dotdot_name[4];
+ 	struct dx_root_info
+ 	{
+-		le_u32 reserved_zero;
++		__le32 reserved_zero;
+ 		u8 hash_version;
+ 		u8 info_length; /* 8 */
+ 		u8 indirect_levels;
+@@ -184,42 +181,42 @@ static int ext3_dx_add_entry(handle_t *h
+ 
+ static inline unsigned dx_get_block (struct dx_entry *entry)
+ {
+-	return le32_to_cpu(entry->block.v) & 0x00ffffff;
++	return le32_to_cpu(entry->block) & 0x00ffffff;
+ }
+ 
+ static inline void dx_set_block (struct dx_entry *entry, unsigned value)
+ {
+-	entry->block.v = cpu_to_le32(value);
++	entry->block = cpu_to_le32(value);
+ }
+ 
+ static inline unsigned dx_get_hash (struct dx_entry *entry)
+ {
+-	return le32_to_cpu(entry->hash.v);
++	return le32_to_cpu(entry->hash);
+ }
+ 
+ static inline void dx_set_hash (struct dx_entry *entry, unsigned value)
+ {
+-	entry->hash.v = cpu_to_le32(value);
++	entry->hash = cpu_to_le32(value);
+ }
+ 
+ static inline unsigned dx_get_count (struct dx_entry *entries)
+ {
+-	return le16_to_cpu(((struct dx_countlimit *) entries)->count.v);
++	return le16_to_cpu(((struct dx_countlimit *) entries)->count);
+ }
+ 
+ static inline unsigned dx_get_limit (struct dx_entry *entries)
+ {
+-	return le16_to_cpu(((struct dx_countlimit *) entries)->limit.v);
++	return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
+ }
+ 
+ static inline void dx_set_count (struct dx_entry *entries, unsigned value)
+ {
+-	((struct dx_countlimit *) entries)->count.v = cpu_to_le16(value);
++	((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
+ }
+ 
+ static inline void dx_set_limit (struct dx_entry *entries, unsigned value)
+ {
+-	((struct dx_countlimit *) entries)->limit.v = cpu_to_le16(value);
++	((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
+ }
+ 
+ static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
+@@ -1254,7 +1251,7 @@ static int add_dirent_to_buf(handle_t *h
+ 	 * happen is that the times are slightly out of date
+ 	 * and/or different from the directory change time.
+ 	 */
+-	dir->i_mtime = dir->i_ctime = CURRENT_TIME;
++	dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
+ 	ext3_update_dx_flag(dir);
+ 	dir->i_version++;
+ 	ext3_mark_inode_dirty(handle, dir);
+@@ -2032,7 +2029,7 @@ static int ext3_rmdir (struct inode * di
+ 	 * recovery. */
+ 	inode->i_size = 0;
+ 	ext3_orphan_add(handle, inode);
+-	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
++	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
+ 	ext3_mark_inode_dirty(handle, inode);
+ 	dir->i_nlink--;
+ 	ext3_update_dx_flag(dir);
+@@ -2082,7 +2079,7 @@ static int ext3_unlink(struct inode * di
+ 	retval = ext3_delete_entry(handle, dir, de, bh);
+ 	if (retval)
+ 		goto end_unlink;
+-	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
++	dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
+ 	ext3_update_dx_flag(dir);
+ 	ext3_mark_inode_dirty(handle, dir);
+ 	inode->i_nlink--;
+@@ -2132,7 +2129,7 @@ retry:
+ 		 * We have a transaction open.  All is sweetness.  It also sets
+ 		 * i_size in generic_commit_write().
+ 		 */
+-		err = page_symlink(inode, symname, l);
++		err = page_symlink(inode, symname, l, GFP_NOFS);
+ 		if (err) {
+ 			ext3_dec_count(handle, inode);
+ 			ext3_mark_inode_dirty(handle, inode);
+@@ -2172,7 +2169,7 @@ retry:
+ 	if (IS_DIRSYNC(dir))
+ 		handle->h_sync = 1;
+ 
+-	inode->i_ctime = CURRENT_TIME;
++	inode->i_ctime = CURRENT_TIME_SEC;
+ 	ext3_inc_count(handle, inode);
+ 	atomic_inc(&inode->i_count);
+ 
+@@ -2258,7 +2255,7 @@ static int ext3_rename (struct inode * o
+ 	} else {
+ 		BUFFER_TRACE(new_bh, "get write access");
+ 		ext3_journal_get_write_access(handle, new_bh);
+-		new_de->inode = le32_to_cpu(old_inode->i_ino);
++		new_de->inode = cpu_to_le32(old_inode->i_ino);
+ 		if (EXT3_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
+ 					      EXT3_FEATURE_INCOMPAT_FILETYPE))
+ 			new_de->file_type = old_de->file_type;
+@@ -2273,7 +2270,7 @@ static int ext3_rename (struct inode * o
+ 	 * Like most other Unix systems, set the ctime for inodes on a
+ 	 * rename.
+ 	 */
+-	old_inode->i_ctime = CURRENT_TIME;
++	old_inode->i_ctime = CURRENT_TIME_SEC;
+ 	ext3_mark_inode_dirty(handle, old_inode);
+ 
+ 	/*
+@@ -2306,14 +2303,14 @@ static int ext3_rename (struct inode * o
+ 
+ 	if (new_inode) {
+ 		new_inode->i_nlink--;
+-		new_inode->i_ctime = CURRENT_TIME;
++		new_inode->i_ctime = CURRENT_TIME_SEC;
+ 	}
+-	old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
++	old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC;
+ 	ext3_update_dx_flag(old_dir);
+ 	if (dir_bh) {
+ 		BUFFER_TRACE(dir_bh, "get_write_access");
+ 		ext3_journal_get_write_access(handle, dir_bh);
+-		PARENT_INO(dir_bh->b_data) = le32_to_cpu(new_dir->i_ino);
++		PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino);
+ 		BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata");
+ 		ext3_journal_dirty_metadata(handle, dir_bh);
+ 		old_dir->i_nlink--;
+diff -uprN linux-2.6.8.1.orig/fs/ext3/resize.c linux-2.6.8.1-ve022stab078/fs/ext3/resize.c
+--- linux-2.6.8.1.orig/fs/ext3/resize.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/fs/ext3/resize.c	2006-05-11 13:05:31.000000000 +0400
+@@ -0,0 +1,996 @@
++/*
++ *  linux/fs/ext3/resize.c
++ *
++ * Support for resizing an ext3 filesystem while it is mounted.
++ *
++ * Copyright (C) 2001, 2002 Andreas Dilger <adilger@clusterfs.com>
++ *
++ * This could probably be made into a module, because it is not often in use.
++ */
++
++#include <linux/config.h>
++
++#define EXT3FS_DEBUG
++
++#include <linux/sched.h>
++#include <linux/smp_lock.h>
++#include <linux/ext3_jbd.h>
++
++#include <linux/errno.h>
++#include <linux/slab.h>
++
++
++#define outside(b, first, last)	((b) < (first) || (b) >= (last))
++#define inside(b, first, last)	((b) >= (first) && (b) < (last))
++
++static int verify_group_input(struct super_block *sb,
++			      struct ext3_new_group_data *input)
++{
++	struct ext3_sb_info *sbi = EXT3_SB(sb);
++	struct ext3_super_block *es = sbi->s_es;
++	unsigned start = le32_to_cpu(es->s_blocks_count);
++	unsigned end = start + input->blocks_count;
++	unsigned group = input->group;
++	unsigned itend = input->inode_table + EXT3_SB(sb)->s_itb_per_group;
++	unsigned overhead = ext3_bg_has_super(sb, group) ?
++		(1 + ext3_bg_num_gdb(sb, group) +
++		 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
++	unsigned metaend = start + overhead;
++	struct buffer_head *bh = NULL;
++	int free_blocks_count;
++	int err = -EINVAL;
++
++	input->free_blocks_count = free_blocks_count =
++		input->blocks_count - 2 - overhead - sbi->s_itb_per_group;
++
++	if (test_opt(sb, DEBUG))
++		printk(KERN_DEBUG "EXT3-fs: adding %s group %u: %u blocks "
++		       "(%d free, %u reserved)\n",
++		       ext3_bg_has_super(sb, input->group) ? "normal" :
++		       "no-super", input->group, input->blocks_count,
++		       free_blocks_count, input->reserved_blocks);
++
++	if (group != sbi->s_groups_count)
++		ext3_warning(sb, __FUNCTION__,
++			     "Cannot add at group %u (only %lu groups)",
++			     input->group, sbi->s_groups_count);
++	else if ((start - le32_to_cpu(es->s_first_data_block)) %
++		 EXT3_BLOCKS_PER_GROUP(sb))
++		ext3_warning(sb, __FUNCTION__, "Last group not full");
++	else if (input->reserved_blocks > input->blocks_count / 5)
++		ext3_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)",
++			     input->reserved_blocks);
++	else if (free_blocks_count < 0)
++		ext3_warning(sb, __FUNCTION__, "Bad blocks count %u",
++			     input->blocks_count);
++	else if (!(bh = sb_bread(sb, end - 1)))
++		ext3_warning(sb, __FUNCTION__, "Cannot read last block (%u)",
++			     end - 1);
++	else if (outside(input->block_bitmap, start, end))
++		ext3_warning(sb, __FUNCTION__,
++			     "Block bitmap not in group (block %u)",
++			     input->block_bitmap);
++	else if (outside(input->inode_bitmap, start, end))
++		ext3_warning(sb, __FUNCTION__,
++			     "Inode bitmap not in group (block %u)",
++			     input->inode_bitmap);
++	else if (outside(input->inode_table, start, end) ||
++	         outside(itend - 1, start, end))
++		ext3_warning(sb, __FUNCTION__,
++			     "Inode table not in group (blocks %u-%u)",
++			     input->inode_table, itend - 1);
++	else if (input->inode_bitmap == input->block_bitmap)
++		ext3_warning(sb, __FUNCTION__,
++			     "Block bitmap same as inode bitmap (%u)",
++			     input->block_bitmap);
++	else if (inside(input->block_bitmap, input->inode_table, itend))
++		ext3_warning(sb, __FUNCTION__,
++			     "Block bitmap (%u) in inode table (%u-%u)",
++			     input->block_bitmap, input->inode_table, itend-1);
++	else if (inside(input->inode_bitmap, input->inode_table, itend))
++		ext3_warning(sb, __FUNCTION__,
++			     "Inode bitmap (%u) in inode table (%u-%u)",
++			     input->inode_bitmap, input->inode_table, itend-1);
++	else if (inside(input->block_bitmap, start, metaend))
++		ext3_warning(sb, __FUNCTION__,
++			     "Block bitmap (%u) in GDT table (%u-%u)",
++			     input->block_bitmap, start, metaend - 1);
++	else if (inside(input->inode_bitmap, start, metaend))
++		ext3_warning(sb, __FUNCTION__,
++			     "Inode bitmap (%u) in GDT table (%u-%u)",
++			     input->inode_bitmap, start, metaend - 1);
++	else if (inside(input->inode_table, start, metaend) ||
++	         inside(itend - 1, start, metaend))
++		ext3_warning(sb, __FUNCTION__,
++			     "Inode table (%u-%u) overlaps GDT table (%u-%u)",
++			     input->inode_table, itend - 1, start, metaend - 1);
++	else
++		err = 0;
++	brelse(bh);
++
++	return err;
++}
++
++static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
++				  unsigned long blk)
++{
++	struct buffer_head *bh;
++	int err;
++
++	bh = sb_getblk(sb, blk);
++	if ((err = ext3_journal_get_write_access(handle, bh))) {
++		brelse(bh);
++		bh = ERR_PTR(err);
++	} else {
++		lock_buffer(bh);
++		memset(bh->b_data, 0, sb->s_blocksize);
++		set_buffer_uptodate(bh);
++		unlock_buffer(bh);
++	}
++
++	return bh;
++}
++
++/*
++ * To avoid calling the atomic setbit hundreds or thousands of times, we only
++ * need to use it within a single byte (to ensure we get endianness right).
++ * We can use memset for the rest of the bitmap as there are no other users.
++ */
++static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
++{
++	int i;
++
++	if (start_bit >= end_bit)
++		return;
++
++	ext3_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
++	for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
++		ext3_set_bit(i, bitmap);
++	if (i < end_bit)
++		memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
++}
++
++/*
++ * Set up the block and inode bitmaps, and the inode table for the new group.
++ * This doesn't need to be part of the main transaction, since we are only
++ * changing blocks outside the actual filesystem.  We still do journaling to
++ * ensure the recovery is correct in case of a failure just after resize.
++ * If any part of this fails, we simply abort the resize.
++ */
++static int setup_new_group_blocks(struct super_block *sb,
++				  struct ext3_new_group_data *input)
++{
++	struct ext3_sb_info *sbi = EXT3_SB(sb);
++	unsigned long start = input->group * sbi->s_blocks_per_group +
++		le32_to_cpu(sbi->s_es->s_first_data_block);
++	int reserved_gdb = ext3_bg_has_super(sb, input->group) ?
++		le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0;
++	unsigned long gdblocks = ext3_bg_num_gdb(sb, input->group);
++	struct buffer_head *bh;
++	handle_t *handle;
++	unsigned long block;
++	int bit;
++	int i;
++	int err = 0, err2;
++
++	handle = ext3_journal_start_sb(sb, reserved_gdb + gdblocks +
++				       2 + sbi->s_itb_per_group);
++	if (IS_ERR(handle))
++		return PTR_ERR(handle);
++
++	lock_super(sb);
++	if (input->group != sbi->s_groups_count) {
++		err = -EBUSY;
++		goto exit_journal;
++	}
++
++	if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) {
++		err = PTR_ERR(bh);
++		goto exit_journal;
++	}
++
++	if (ext3_bg_has_super(sb, input->group)) {
++		ext3_debug("mark backup superblock %#04lx (+0)\n", start);
++		ext3_set_bit(0, bh->b_data);
++	}
++
++	/* Copy all of the GDT blocks into the backup in this group */
++	for (i = 0, bit = 1, block = start + 1;
++	     i < gdblocks; i++, block++, bit++) {
++		struct buffer_head *gdb;
++
++		ext3_debug("update backup group %#04lx (+%d)\n", block, bit);
++
++		gdb = sb_getblk(sb, block);
++		if ((err = ext3_journal_get_write_access(handle, gdb))) {
++			brelse(gdb);
++			goto exit_bh;
++		}
++		lock_buffer(bh);
++		memcpy(gdb->b_data, sbi->s_group_desc[i], bh->b_size);
++		set_buffer_uptodate(gdb);
++		unlock_buffer(bh);
++		ext3_journal_dirty_metadata(handle, gdb);
++		ext3_set_bit(bit, bh->b_data);
++		brelse(gdb);
++	}
++
++	/* Zero out all of the reserved backup group descriptor table blocks */
++	for (i = 0, bit = gdblocks + 1, block = start + bit;
++	     i < reserved_gdb; i++, block++, bit++) {
++		struct buffer_head *gdb;
++
++		ext3_debug("clear reserved block %#04lx (+%d)\n", block, bit);
++
++		if (IS_ERR(gdb = bclean(handle, sb, block))) {
++			err = PTR_ERR(bh);
++			goto exit_bh;
++		}
++		ext3_journal_dirty_metadata(handle, gdb);
++		ext3_set_bit(bit, bh->b_data);
++		brelse(gdb);
++	}
++	ext3_debug("mark block bitmap %#04x (+%ld)\n", input->block_bitmap,
++		   input->block_bitmap - start);
++	ext3_set_bit(input->block_bitmap - start, bh->b_data);
++	ext3_debug("mark inode bitmap %#04x (+%ld)\n", input->inode_bitmap,
++		   input->inode_bitmap - start);
++	ext3_set_bit(input->inode_bitmap - start, bh->b_data);
++
++	/* Zero out all of the inode table blocks */
++	for (i = 0, block = input->inode_table, bit = block - start;
++	     i < sbi->s_itb_per_group; i++, bit++, block++) {
++		struct buffer_head *it;
++
++		ext3_debug("clear inode block %#04x (+%ld)\n", block, bit);
++		if (IS_ERR(it = bclean(handle, sb, block))) {
++			err = PTR_ERR(it);
++			goto exit_bh;
++		}
++		ext3_journal_dirty_metadata(handle, it);
++		brelse(it);
++		ext3_set_bit(bit, bh->b_data);
++	}
++	mark_bitmap_end(input->blocks_count, EXT3_BLOCKS_PER_GROUP(sb),
++			bh->b_data);
++	ext3_journal_dirty_metadata(handle, bh);
++	brelse(bh);
++
++	/* Mark unused entries in inode bitmap used */
++	ext3_debug("clear inode bitmap %#04x (+%ld)\n",
++		   input->inode_bitmap, input->inode_bitmap - start);
++	if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) {
++		err = PTR_ERR(bh);
++		goto exit_journal;
++	}
++
++	mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb),
++			bh->b_data);
++	ext3_journal_dirty_metadata(handle, bh);
++exit_bh:
++	brelse(bh);
++
++exit_journal:
++	unlock_super(sb);
++	if ((err2 = ext3_journal_stop(handle)) && !err)
++		err = err2;
++
++	return err;
++}
++
++/*
++ * Iterate through the groups which hold BACKUP superblock/GDT copies in an
++ * ext3 filesystem.  The counters should be initialized to 1, 5, and 7 before
++ * calling this for the first time.  In a sparse filesystem it will be the
++ * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ...
++ * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ...
++ */
++unsigned ext3_list_backups(struct super_block *sb, unsigned *three,
++			   unsigned *five, unsigned *seven)
++{
++	unsigned *min = three;
++	int mult = 3;
++	unsigned ret;
++
++	if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
++					EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
++		ret = *min;
++		*min += 1;
++		return ret;
++	}
++
++	if (*five < *min) {
++		min = five;
++		mult = 5;
++	}
++	if (*seven < *min) {
++		min = seven;
++		mult = 7;
++	}
++
++	ret = *min;
++	*min *= mult;
++
++	return ret;
++}
++
++/*
++ * Check that all of the backup GDT blocks are held in the primary GDT block.
++ * It is assumed that they are stored in group order.  Returns the number of
++ * groups in current filesystem that have BACKUPS, or -ve error code.
++ */
++static int verify_reserved_gdb(struct super_block *sb,
++			       struct buffer_head *primary)
++{
++	const unsigned long blk = primary->b_blocknr;
++	const unsigned long end = EXT3_SB(sb)->s_groups_count;
++	unsigned three = 1;
++	unsigned five = 5;
++	unsigned seven = 7;
++	unsigned grp;
++	__u32 *p = (__u32 *)primary->b_data;
++	int gdbackups = 0;
++
++	while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) {
++		if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){
++			ext3_warning(sb, __FUNCTION__,
++				     "reserved GDT %ld missing grp %d (%ld)\n",
++				     blk, grp,
++				     grp * EXT3_BLOCKS_PER_GROUP(sb) + blk);
++			return -EINVAL;
++		}
++		if (++gdbackups > EXT3_ADDR_PER_BLOCK(sb))
++			return -EFBIG;
++	}
++
++	return gdbackups;
++}
++
++/*
++ * Called when we need to bring a reserved group descriptor table block into
++ * use from the resize inode.  The primary copy of the new GDT block currently
++ * is an indirect block (under the double indirect block in the resize inode).
++ * The new backup GDT blocks will be stored as leaf blocks in this indirect
++ * block, in group order.  Even though we know all the block numbers we need,
++ * we check to ensure that the resize inode has actually reserved these blocks.
++ *
++ * Don't need to update the block bitmaps because the blocks are still in use.
++ *
++ * We get all of the error cases out of the way, so that we are sure to not
++ * fail once we start modifying the data on disk, because JBD has no rollback.
++ */
++static int add_new_gdb(handle_t *handle, struct inode *inode,
++		       struct ext3_new_group_data *input,
++		       struct buffer_head **primary)
++{
++	struct super_block *sb = inode->i_sb;
++	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
++	unsigned long gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb);
++	unsigned long gdblock = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num;
++	struct buffer_head **o_group_desc, **n_group_desc;
++	struct buffer_head *dind;
++	int gdbackups;
++	struct ext3_iloc iloc;
++	__u32 *data;
++	int err;
++
++	if (test_opt(sb, DEBUG))
++		printk(KERN_DEBUG
++		       "EXT3-fs: ext3_add_new_gdb: adding group block %lu\n",
++		       gdb_num);
++
++	/*
++	 * If we are not using the primary superblock/GDT copy don't resize,
++	 * because the user tools have no way of handling this.  Probably a
++	 * bad time to do it anyways.
++	 */
++	if (EXT3_SB(sb)->s_sbh->b_blocknr !=
++	    le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) {
++		ext3_warning(sb, __FUNCTION__,
++			"won't resize using backup superblock at %llu\n",
++			(unsigned long long)EXT3_SB(sb)->s_sbh->b_blocknr);
++		return -EPERM;
++	}
++
++	*primary = sb_bread(sb, gdblock);
++	if (!*primary)
++		return -EIO;
++
++	if ((gdbackups = verify_reserved_gdb(sb, *primary)) < 0) {
++		err = gdbackups;
++		goto exit_bh;
++	}
++
++	data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK;
++	dind = sb_bread(sb, le32_to_cpu(*data));
++	if (!dind) {
++		err = -EIO;
++		goto exit_bh;
++	}
++
++	data = (__u32 *)dind->b_data;
++	if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) {
++		ext3_warning(sb, __FUNCTION__,
++			     "new group %u GDT block %lu not reserved\n",
++			     input->group, gdblock);
++		err = -EINVAL;
++		goto exit_dind;
++	}
++
++	if ((err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh)))
++		goto exit_dind;
++
++	if ((err = ext3_journal_get_write_access(handle, *primary)))
++		goto exit_sbh;
++
++	if ((err = ext3_journal_get_write_access(handle, dind)))
++		goto exit_primary;
++
++	/* ext3_reserve_inode_write() gets a reference on the iloc */
++	if ((err = ext3_reserve_inode_write(handle, inode, &iloc)))
++		goto exit_dindj;
++
++	n_group_desc = (struct buffer_head **)kmalloc((gdb_num + 1) *
++				sizeof(struct buffer_head *), GFP_KERNEL);
++	if (!n_group_desc) {
++		err = -ENOMEM;
++		ext3_warning (sb, __FUNCTION__,
++			      "not enough memory for %lu groups", gdb_num + 1);
++		goto exit_inode;
++	}
++
++	/*
++	 * Finally, we have all of the possible failures behind us...
++	 *
++	 * Remove new GDT block from inode double-indirect block and clear out
++	 * the new GDT block for use (which also "frees" the backup GDT blocks
++	 * from the reserved inode).  We don't need to change the bitmaps for
++	 * these blocks, because they are marked as in-use from being in the
++	 * reserved inode, and will become GDT blocks (primary and backup).
++	 */
++	data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)] = 0;
++	ext3_journal_dirty_metadata(handle, dind);
++	brelse(dind);
++	inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
++	ext3_mark_iloc_dirty(handle, inode, &iloc);
++	memset((*primary)->b_data, 0, sb->s_blocksize);
++	ext3_journal_dirty_metadata(handle, *primary);
++
++	o_group_desc = EXT3_SB(sb)->s_group_desc;
++	memcpy(n_group_desc, o_group_desc,
++	       EXT3_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
++	n_group_desc[gdb_num] = *primary;
++	EXT3_SB(sb)->s_group_desc = n_group_desc;
++	EXT3_SB(sb)->s_gdb_count++;
++	kfree(o_group_desc);
++
++	es->s_reserved_gdt_blocks =
++		cpu_to_le16(le16_to_cpu(es->s_reserved_gdt_blocks) - 1);
++	ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
++
++	return 0;
++
++exit_inode:
++	//ext3_journal_release_buffer(handle, iloc.bh);
++	brelse(iloc.bh);
++exit_dindj:
++	//ext3_journal_release_buffer(handle, dind);
++exit_primary:
++	//ext3_journal_release_buffer(handle, *primary);
++exit_sbh:
++	//ext3_journal_release_buffer(handle, *primary);
++exit_dind:
++	brelse(dind);
++exit_bh:
++	brelse(*primary);
++
++	ext3_debug("leaving with error %d\n", err);
++	return err;
++}
++
++/*
++ * Called when we are adding a new group which has a backup copy of each of
++ * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks.
++ * We need to add these reserved backup GDT blocks to the resize inode, so
++ * that they are kept for future resizing and not allocated to files.
++ *
++ * Each reserved backup GDT block will go into a different indirect block.
++ * The indirect blocks are actually the primary reserved GDT blocks,
++ * so we know in advance what their block numbers are.  We only get the
++ * double-indirect block to verify it is pointing to the primary reserved
++ * GDT blocks so we don't overwrite a data block by accident.  The reserved
++ * backup GDT blocks are stored in their reserved primary GDT block.
++ */
++static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
++			      struct ext3_new_group_data *input)
++{
++	struct super_block *sb = inode->i_sb;
++	int reserved_gdb =le16_to_cpu(EXT3_SB(sb)->s_es->s_reserved_gdt_blocks);
++	struct buffer_head **primary;
++	struct buffer_head *dind;
++	struct ext3_iloc iloc;
++	unsigned long blk;
++	__u32 *data, *end;
++	int gdbackups = 0;
++	int res, i;
++	int err;
++
++	primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_KERNEL);
++	if (!primary)
++		return -ENOMEM;
++
++	data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK;
++	dind = sb_bread(sb, le32_to_cpu(*data));
++	if (!dind) {
++		err = -EIO;
++		goto exit_free;
++	}
++
++	blk = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + EXT3_SB(sb)->s_gdb_count;
++	data = (__u32 *)dind->b_data + EXT3_SB(sb)->s_gdb_count;
++	end = (__u32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb);
++
++	/* Get each reserved primary GDT block and verify it holds backups */
++	for (res = 0; res < reserved_gdb; res++, blk++) {
++		if (le32_to_cpu(*data) != blk) {
++			ext3_warning(sb, __FUNCTION__,
++				     "reserved block %lu not at offset %ld\n",
++				     blk, (long)(data - (__u32 *)dind->b_data));
++			err = -EINVAL;
++			goto exit_bh;
++		}
++		primary[res] = sb_bread(sb, blk);
++		if (!primary[res]) {
++			err = -EIO;
++			goto exit_bh;
++		}
++		if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) {
++			brelse(primary[res]);
++			err = gdbackups;
++			goto exit_bh;
++		}
++		if (++data >= end)
++			data = (__u32 *)dind->b_data;
++	}
++
++	for (i = 0; i < reserved_gdb; i++) {
++		if ((err = ext3_journal_get_write_access(handle, primary[i]))) {
++			/*
++			int j;
++			for (j = 0; j < i; j++)
++				ext3_journal_release_buffer(handle, primary[j]);
++			 */
++			goto exit_bh;
++		}
++	}
++
++	if ((err = ext3_reserve_inode_write(handle, inode, &iloc)))
++		goto exit_bh;
++
++	/*
++	 * Finally we can add each of the reserved backup GDT blocks from
++	 * the new group to its reserved primary GDT block.
++	 */
++	blk = input->group * EXT3_BLOCKS_PER_GROUP(sb);
++	for (i = 0; i < reserved_gdb; i++) {
++		int err2;
++		data = (__u32 *)primary[i]->b_data;
++		/* printk("reserving backup %lu[%u] = %lu\n",
++		       primary[i]->b_blocknr, gdbackups,
++		       blk + primary[i]->b_blocknr); */
++		data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr);
++		err2 = ext3_journal_dirty_metadata(handle, primary[i]);
++		if (!err)
++			err = err2;
++	}
++	inode->i_blocks += reserved_gdb * sb->s_blocksize >> 9;
++	ext3_mark_iloc_dirty(handle, inode, &iloc);
++
++exit_bh:
++	while (--res >= 0)
++		brelse(primary[res]);
++	brelse(dind);
++
++exit_free:
++	kfree(primary);
++
++	return err;
++}
++
++/*
++ * Update the backup copies of the ext3 metadata.  These don't need to be part
++ * of the main resize transaction, because e2fsck will re-write them if there
++ * is a problem (basically only OOM will cause a problem).  However, we
++ * _should_ update the backups if possible, in case the primary gets trashed
++ * for some reason and we need to run e2fsck from a backup superblock.  The
++ * important part is that the new block and inode counts are in the backup
++ * superblocks, and the location of the new group metadata in the GDT backups.
++ *
++ * We do not need lock_super() for this, because these blocks are not
++ * otherwise touched by the filesystem code when it is mounted.  We don't
++ * need to worry about last changing from sbi->s_groups_count, because the
++ * worst that can happen is that we do not copy the full number of backups
++ * at this time.  The resize which changed s_groups_count will backup again.
++ */
++static void update_backups(struct super_block *sb,
++			   int blk_off, char *data, int size)
++{
++	struct ext3_sb_info *sbi = EXT3_SB(sb);
++	const unsigned long last = sbi->s_groups_count;
++	const int bpg = EXT3_BLOCKS_PER_GROUP(sb);
++	unsigned three = 1;
++	unsigned five = 5;
++	unsigned seven = 7;
++	unsigned group;
++	int rest = sb->s_blocksize - size;
++	handle_t *handle;
++	int err = 0, err2;
++
++	handle = ext3_journal_start_sb(sb, EXT3_MAX_TRANS_DATA);
++	if (IS_ERR(handle)) {
++		group = 1;
++		err = PTR_ERR(handle);
++		goto exit_err;
++	}
++
++	while ((group = ext3_list_backups(sb, &three, &five, &seven)) < last) {
++		struct buffer_head *bh;
++
++		/* Out of journal space, and can't get more - abort - so sad */
++		if (handle->h_buffer_credits == 0 &&
++		    ext3_journal_extend(handle, EXT3_MAX_TRANS_DATA) &&
++		    (err = ext3_journal_restart(handle, EXT3_MAX_TRANS_DATA)))
++			break;
++
++		bh = sb_getblk(sb, group * bpg + blk_off);
++		ext3_debug(sb, __FUNCTION__, "update metadata backup %#04lx\n",
++			   bh->b_blocknr);
++		if ((err = ext3_journal_get_write_access(handle, bh)))
++			break;
++		lock_buffer(bh);
++		memcpy(bh->b_data, data, size);
++		if (rest)
++			memset(bh->b_data + size, 0, rest);
++		set_buffer_uptodate(bh);
++		unlock_buffer(bh);
++		ext3_journal_dirty_metadata(handle, bh);
++		brelse(bh);
++	}
++	if ((err2 = ext3_journal_stop(handle)) && !err)
++		err = err2;
++
++	/*
++	 * Ugh! Need to have e2fsck write the backup copies.  It is too
++	 * late to revert the resize, we shouldn't fail just because of
++	 * the backup copies (they are only needed in case of corruption).
++	 *
++	 * However, if we got here we have a journal problem too, so we
++	 * can't really start a transaction to mark the superblock.
++	 * Chicken out and just set the flag on the hope it will be written
++	 * to disk, and if not - we will simply wait until next fsck.
++	 */
++exit_err:
++	if (err) {
++		ext3_warning(sb, __FUNCTION__,
++			     "can't update backup for group %d (err %d), "
++			     "forcing fsck on next reboot\n", group, err);
++		sbi->s_mount_state &= ~EXT3_VALID_FS;
++		sbi->s_es->s_state &= ~cpu_to_le16(EXT3_VALID_FS);
++		mark_buffer_dirty(sbi->s_sbh);
++	}
++}
++
++/* Add group descriptor data to an existing or new group descriptor block.
++ * Ensure we handle all possible error conditions _before_ we start modifying
++ * the filesystem, because we cannot abort the transaction and not have it
++ * write the data to disk.
++ *
++ * If we are on a GDT block boundary, we need to get the reserved GDT block.
++ * Otherwise, we may need to add backup GDT blocks for a sparse group.
++ *
++ * We only need to hold the superblock lock while we are actually adding
++ * in the new group's counts to the superblock.  Prior to that we have
++ * not really "added" the group at all.  We re-check that we are still
++ * adding in the last group in case things have changed since verifying.
++ */
++int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
++{
++	struct ext3_sb_info *sbi = EXT3_SB(sb);
++	struct ext3_super_block *es = sbi->s_es;
++	int reserved_gdb = ext3_bg_has_super(sb, input->group) ?
++		le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
++	struct buffer_head *primary = NULL;
++	struct ext3_group_desc *gdp;
++	struct inode *inode = NULL;
++	handle_t *handle;
++	int gdb_off, gdb_num;
++	int err, err2;
++
++	gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb);
++	gdb_off = input->group % EXT3_DESC_PER_BLOCK(sb);
++
++	if (gdb_off == 0 && !EXT3_HAS_RO_COMPAT_FEATURE(sb,
++					EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
++		ext3_warning(sb, __FUNCTION__,
++			     "Can't resize non-sparse filesystem further\n");
++		return -EPERM;
++	}
++
++	if (reserved_gdb || gdb_off == 0) {
++		if (!EXT3_HAS_COMPAT_FEATURE(sb,
++					     EXT3_FEATURE_COMPAT_RESIZE_INODE)){
++			ext3_warning(sb, __FUNCTION__,
++				     "No reserved GDT blocks, can't resize\n");
++			return -EPERM;
++		}
++		inode = iget(sb, EXT3_RESIZE_INO);
++		if (!inode || is_bad_inode(inode)) {
++			ext3_warning(sb, __FUNCTION__,
++				     "Error opening resize inode\n");
++			iput(inode);
++			return -ENOENT;
++		}
++	}
++
++	if ((err = verify_group_input(sb, input)))
++		goto exit_put;
++
++	if ((err = setup_new_group_blocks(sb, input)))
++		goto exit_put;
++
++	/*
++	 * We will always be modifying at least the superblock and a GDT
++	 * block.  If we are adding a group past the last current GDT block,
++	 * we will also modify the inode and the dindirect block.  If we
++	 * are adding a group with superblock/GDT backups  we will also
++	 * modify each of the reserved GDT dindirect blocks.
++	 */
++	handle = ext3_journal_start_sb(sb,
++				       ext3_bg_has_super(sb, input->group) ?
++				       3 + reserved_gdb : 4);
++	if (IS_ERR(handle)) {
++		err = PTR_ERR(handle);
++		goto exit_put;
++	}
++
++	lock_super(sb);
++	if (input->group != EXT3_SB(sb)->s_groups_count) {
++		ext3_warning(sb, __FUNCTION__,
++			     "multiple resizers run on filesystem!\n");
++		goto exit_journal;
++	}
++
++	if ((err = ext3_journal_get_write_access(handle, sbi->s_sbh)))
++		goto exit_journal;
++
++	/*
++	 * We will only either add reserved group blocks to a backup group
++	 * or remove reserved blocks for the first group in a new group block.
++	 * Doing both would be mean more complex code, and sane people don't
++	 * use non-sparse filesystems anymore.  This is already checked above.
++	 */
++	if (gdb_off) {
++		primary = sbi->s_group_desc[gdb_num];
++		if ((err = ext3_journal_get_write_access(handle, primary)))
++			goto exit_journal;
++
++		if (reserved_gdb && ext3_bg_num_gdb(sb, input->group) &&
++		    (err = reserve_backup_gdb(handle, inode, input)))
++			goto exit_journal;
++	} else if ((err = add_new_gdb(handle, inode, input, &primary)))
++		goto exit_journal;
++
++	/*
++	 * OK, now we've set up the new group.  Time to make it active.
++	 *
++	 * Current kernels don't lock all allocations via lock_super(),
++	 * so we have to be safe wrt. concurrent accesses the group
++	 * data.  So we need to be careful to set all of the relevant
++	 * group descriptor data etc. *before* we enable the group.
++	 *
++	 * The key field here is EXT3_SB(sb)->s_groups_count: as long as
++	 * that retains its old value, nobody is going to access the new
++	 * group.
++	 *
++	 * So first we update all the descriptor metadata for the new
++	 * group; then we update the total disk blocks count; then we
++	 * update the groups count to enable the group; then finally we
++	 * update the free space counts so that the system can start
++	 * using the new disk blocks.
++	 */
++
++	/* Update group descriptor block for new group */
++	gdp = (struct ext3_group_desc *)primary->b_data + gdb_off;
++
++	gdp->bg_block_bitmap = cpu_to_le32(input->block_bitmap);
++	gdp->bg_inode_bitmap = cpu_to_le32(input->inode_bitmap);
++	gdp->bg_inode_table = cpu_to_le32(input->inode_table);
++	gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
++	gdp->bg_free_inodes_count = cpu_to_le16(EXT3_INODES_PER_GROUP(sb));
++
++	/*
++	 * Make the new blocks and inodes valid next.  We do this before
++	 * increasing the group count so that once the group is enabled,
++	 * all of its blocks and inodes are already valid.
++	 *
++	 * We always allocate group-by-group, then block-by-block or
++	 * inode-by-inode within a group, so enabling these
++	 * blocks/inodes before the group is live won't actually let us
++	 * allocate the new space yet.
++	 */
++	es->s_blocks_count = cpu_to_le32(le32_to_cpu(es->s_blocks_count) +
++		input->blocks_count);
++	es->s_inodes_count = cpu_to_le32(le32_to_cpu(es->s_inodes_count) +
++		EXT3_INODES_PER_GROUP(sb));
++
++	/*
++	 * We need to protect s_groups_count against other CPUs seeing
++	 * inconsistent state in the superblock.
++	 *
++	 * The precise rules we use are:
++	 *
++	 * * Writers of s_groups_count *must* hold lock_super
++	 * AND
++	 * * Writers must perform a smp_wmb() after updating all dependent
++	 *   data and before modifying the groups count
++	 *
++	 * * Readers must hold lock_super() over the access
++	 * OR
++	 * * Readers must perform an smp_rmb() after reading the groups count
++	 *   and before reading any dependent data.
++	 *
++	 * NB. These rules can be relaxed when checking the group count
++	 * while freeing data, as we can only allocate from a block
++	 * group after serialising against the group count, and we can
++	 * only then free after serialising in turn against that
++	 * allocation.
++	 */
++	smp_wmb();
++
++	/* Update the global fs size fields */
++	EXT3_SB(sb)->s_groups_count++;
++
++	ext3_journal_dirty_metadata(handle, primary);
++
++	/* Update the reserved block counts only once the new group is
++	 * active. */
++	es->s_r_blocks_count = cpu_to_le32(le32_to_cpu(es->s_r_blocks_count) +
++		input->reserved_blocks);
++
++	/* Update the free space counts */
++	percpu_counter_mod(&sbi->s_freeblocks_counter,
++			   input->free_blocks_count);
++	percpu_counter_mod(&sbi->s_freeinodes_counter,
++			   EXT3_INODES_PER_GROUP(sb));
++
++	ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
++	sb->s_dirt = 1;
++
++exit_journal:
++	unlock_super(sb);
++	if ((err2 = ext3_journal_stop(handle)) && !err)
++		err = err2;
++	if (!err) {
++		update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
++			       sizeof(struct ext3_super_block));
++		update_backups(sb, primary->b_blocknr, primary->b_data,
++			       primary->b_size);
++	}
++exit_put:
++	iput(inode);
++	return err;
++} /* ext3_group_add */
++
++/* Extend the filesystem to the new number of blocks specified.  This entry
++ * point is only used to extend the current filesystem to the end of the last
++ * existing group.  It can be accessed via ioctl, or by "remount,resize=<size>"
++ * for emergencies (because it has no dependencies on reserved blocks).
++ *
++ * If we _really_ wanted, we could use default values to call ext3_group_add()
++ * allow the "remount" trick to work for arbitrary resizing, assuming enough
++ * GDT blocks are reserved to grow to the desired size.
++ */
++int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
++		      unsigned long n_blocks_count)
++{
++	unsigned long o_blocks_count;
++	unsigned long o_groups_count;
++	unsigned long last;
++	int add;
++	struct buffer_head * bh;
++	handle_t *handle;
++	int err, freed_blocks;
++
++	/* We don't need to worry about locking wrt other resizers just
++	 * yet: we're going to revalidate es->s_blocks_count after
++	 * taking lock_super() below. */
++	o_blocks_count = le32_to_cpu(es->s_blocks_count);
++	o_groups_count = EXT3_SB(sb)->s_groups_count;
++
++	if (test_opt(sb, DEBUG))
++		printk(KERN_DEBUG "EXT3-fs: extending last group from %lu to %lu blocks\n",
++		       o_blocks_count, n_blocks_count);
++
++	if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
++		return 0;
++
++	if (n_blocks_count < o_blocks_count) {
++		ext3_warning(sb, __FUNCTION__,
++			     "can't shrink FS - resize aborted");
++		return -EBUSY;
++	}
++
++	/* Handle the remaining blocks in the last group only. */
++	last = (o_blocks_count - le32_to_cpu(es->s_first_data_block)) %
++		EXT3_BLOCKS_PER_GROUP(sb);
++
++	if (last == 0) {
++		ext3_warning(sb, __FUNCTION__,
++			     "need to use ext2online to resize further\n");
++		return -EPERM;
++	}
++
++	add = EXT3_BLOCKS_PER_GROUP(sb) - last;
++
++	if (o_blocks_count + add > n_blocks_count)
++		add = n_blocks_count - o_blocks_count;
++
++	if (o_blocks_count + add < n_blocks_count)
++		ext3_warning(sb, __FUNCTION__,
++			     "will only finish group (%lu blocks, %u new)",
++			     o_blocks_count + add, add);
++
++	/* See if the device is actually as big as what was requested */
++	bh = sb_bread(sb, o_blocks_count + add -1);
++	if (!bh) {
++		ext3_warning(sb, __FUNCTION__,
++			     "can't read last block, resize aborted");
++		return -ENOSPC;
++	}
++	brelse(bh);
++
++	/* We will update the superblock, one block bitmap, and
++	 * one group descriptor via ext3_free_blocks().
++	 */
++	handle = ext3_journal_start_sb(sb, 3);
++	if (IS_ERR(handle)) {
++		err = PTR_ERR(handle);
++		ext3_warning(sb, __FUNCTION__, "error %d on journal start",err);
++		goto exit_put;
++	}
++
++	lock_super(sb);
++	if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
++		ext3_warning(sb, __FUNCTION__,
++			     "multiple resizers run on filesystem!\n");
++		err = -EBUSY;
++		goto exit_put;
++	}
++
++	if ((err = ext3_journal_get_write_access(handle,
++						 EXT3_SB(sb)->s_sbh))) {
++		ext3_warning(sb, __FUNCTION__,
++			     "error %d on journal write access", err);
++		unlock_super(sb);
++		ext3_journal_stop(handle);
++		goto exit_put;
++	}
++	es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
++	ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
++	sb->s_dirt = 1;
++	unlock_super(sb);
++	ext3_debug("freeing blocks %ld through %ld\n", o_blocks_count,
++		   o_blocks_count + add);
++	ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
++	ext3_debug("freed blocks %ld through %ld\n", o_blocks_count,
++		   o_blocks_count + add);
++	if ((err = ext3_journal_stop(handle)))
++		goto exit_put;
++	if (test_opt(sb, DEBUG))
++		printk(KERN_DEBUG "EXT3-fs: extended group to %u blocks\n",
++		       le32_to_cpu(es->s_blocks_count));
++	update_backups(sb, EXT3_SB(sb)->s_sbh->b_blocknr, (char *)es,
++		       sizeof(struct ext3_super_block));
++exit_put:
++	return err;
++} /* ext3_group_extend */
+diff -uprN linux-2.6.8.1.orig/fs/ext3/super.c linux-2.6.8.1-ve022stab078/fs/ext3/super.c
+--- linux-2.6.8.1.orig/fs/ext3/super.c	2004-08-14 14:56:14.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ext3/super.c	2006-05-11 13:05:40.000000000 +0400
+@@ -59,19 +59,19 @@ static int ext3_sync_fs(struct super_blo
+  * that sync() will call the filesystem's write_super callback if
+  * appropriate. 
+  */
+-handle_t *ext3_journal_start(struct inode *inode, int nblocks)
++handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
+ {
+ 	journal_t *journal;
+ 
+-	if (inode->i_sb->s_flags & MS_RDONLY)
++	if (sb->s_flags & MS_RDONLY)
+ 		return ERR_PTR(-EROFS);
+ 
+ 	/* Special case here: if the journal has aborted behind our
+ 	 * backs (eg. EIO in the commit thread), then we still need to
+ 	 * take the FS itself readonly cleanly. */
+-	journal = EXT3_JOURNAL(inode);
++	journal = EXT3_SB(sb)->s_journal;
+ 	if (is_journal_aborted(journal)) {
+-		ext3_abort(inode->i_sb, __FUNCTION__,
++		ext3_abort(sb, __FUNCTION__,
+ 			   "Detected aborted journal");
+ 		return ERR_PTR(-EROFS);
+ 	}
+@@ -108,17 +108,20 @@ void ext3_journal_abort_handle(const cha
+ 	char nbuf[16];
+ 	const char *errstr = ext3_decode_error(NULL, err, nbuf);
+ 
+-	printk(KERN_ERR "%s: aborting transaction: %s in %s", 
+-	       caller, errstr, err_fn);
+-
+ 	if (bh)
+ 		BUFFER_TRACE(bh, "abort");
+-	journal_abort_handle(handle);
++
+ 	if (!handle->h_err)
+ 		handle->h_err = err;
+-}
+ 
+-static char error_buf[1024];
++	if (is_handle_aborted(handle))
++		return;
++
++	printk(KERN_ERR "%s: aborting transaction: %s in %s\n",
++	       caller, errstr, err_fn);
++
++	journal_abort_handle(handle);
++}
+ 
+ /* Deal with the reporting of failure conditions on a filesystem such as
+  * inconsistencies detected or read IO failures.
+@@ -140,7 +143,7 @@ static void ext3_handle_error(struct sup
+ 	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
+ 
+ 	EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
+-	es->s_state |= cpu_to_le32(EXT3_ERROR_FS);
++	es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
+ 
+ 	if (sb->s_flags & MS_RDONLY)
+ 		return;
+@@ -166,12 +169,11 @@ void ext3_error (struct super_block * sb
+ {
+ 	va_list args;
+ 
+-	va_start (args, fmt);
+-	vsprintf (error_buf, fmt, args);
+-	va_end (args);
+-
+-	printk (KERN_CRIT "EXT3-fs error (device %s): %s: %s\n",
+-		sb->s_id, function, error_buf);
++	va_start(args, fmt);
++	printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
++	vprintk(fmt, args);
++	printk("\n");
++	va_end(args);
+ 
+ 	ext3_handle_error(sb);
+ }
+@@ -240,21 +242,19 @@ void ext3_abort (struct super_block * sb
+ 
+ 	printk (KERN_CRIT "ext3_abort called.\n");
+ 
+-	va_start (args, fmt);
+-	vsprintf (error_buf, fmt, args);
+-	va_end (args);
+-
+-	if (test_opt (sb, ERRORS_PANIC))
+-		panic ("EXT3-fs panic (device %s): %s: %s\n",
+-		       sb->s_id, function, error_buf);
++	va_start(args, fmt);
++	printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
++	vprintk(fmt, args);
++	printk("\n");
++	va_end(args);
+ 
+-	printk (KERN_CRIT "EXT3-fs abort (device %s): %s: %s\n",
+-		sb->s_id, function, error_buf);
++	if (test_opt(sb, ERRORS_PANIC))
++		panic("EXT3-fs panic from previous error\n");
+ 
+ 	if (sb->s_flags & MS_RDONLY)
+ 		return;
+ 
+-	printk (KERN_CRIT "Remounting filesystem read-only\n");
++	printk(KERN_CRIT "Remounting filesystem read-only\n");
+ 	EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
+ 	sb->s_flags |= MS_RDONLY;
+ 	EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
+@@ -272,15 +272,16 @@ NORET_TYPE void ext3_panic (struct super
+ {
+ 	va_list args;
+ 
+-	va_start (args, fmt);
+-	vsprintf (error_buf, fmt, args);
+-	va_end (args);
++	va_start(args, fmt);
++	printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
++	vprintk(fmt, args);
++	printk("\n");
++	va_end(args);
+ 
+ 	/* this is to prevent panic from syncing this filesystem */
+ 	/* AKPM: is this sufficient? */
+ 	sb->s_flags |= MS_RDONLY;
+-	panic ("EXT3-fs panic (device %s): %s: %s\n",
+-	       sb->s_id, function, error_buf);
++	panic ("EXT3-fs panic forced\n");
+ }
+ 
+ void ext3_warning (struct super_block * sb, const char * function,
+@@ -288,11 +289,12 @@ void ext3_warning (struct super_block * 
+ {
+ 	va_list args;
+ 
+-	va_start (args, fmt);
+-	vsprintf (error_buf, fmt, args);
+-	va_end (args);
+-	printk (KERN_WARNING "EXT3-fs warning (device %s): %s: %s\n",
+-		sb->s_id, function, error_buf);
++	va_start(args, fmt);
++	printk(KERN_WARNING "EXT3-fs warning (device %s): %s: ",
++	       sb->s_id, function);
++	vprintk(fmt, args);
++	printk("\n");
++	va_end(args);
+ }
+ 
+ void ext3_update_dynamic_rev(struct super_block *sb)
+@@ -380,7 +382,7 @@ static void dump_orphan_list(struct supe
+ 		       "inode %s:%ld at %p: mode %o, nlink %d, next %d\n",
+ 		       inode->i_sb->s_id, inode->i_ino, inode,
+ 		       inode->i_mode, inode->i_nlink, 
+-		       le32_to_cpu(NEXT_ORPHAN(inode)));
++		       NEXT_ORPHAN(inode));
+ 	}
+ }
+ 
+@@ -394,7 +396,7 @@ void ext3_put_super (struct super_block 
+ 	journal_destroy(sbi->s_journal);
+ 	if (!(sb->s_flags & MS_RDONLY)) {
+ 		EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
+-		es->s_state = le16_to_cpu(sbi->s_mount_state);
++		es->s_state = cpu_to_le16(sbi->s_mount_state);
+ 		BUFFER_TRACE(sbi->s_sbh, "marking dirty");
+ 		mark_buffer_dirty(sbi->s_sbh);
+ 		ext3_commit_super(sb, es, 1);
+@@ -403,7 +405,9 @@ void ext3_put_super (struct super_block 
+ 	for (i = 0; i < sbi->s_gdb_count; i++)
+ 		brelse(sbi->s_group_desc[i]);
+ 	kfree(sbi->s_group_desc);
+-	kfree(sbi->s_debts);
++	percpu_counter_destroy(&sbi->s_freeblocks_counter);
++	percpu_counter_destroy(&sbi->s_freeinodes_counter);
++	percpu_counter_destroy(&sbi->s_dirs_counter);
+ 	brelse(sbi->s_sbh);
+ #ifdef CONFIG_QUOTA
+ 	for (i = 0; i < MAXQUOTAS; i++) {
+@@ -480,7 +484,7 @@ static int init_inodecache(void)
+ {
+ 	ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
+ 					     sizeof(struct ext3_inode_info),
+-					     0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
++					     0, SLAB_RECLAIM_ACCOUNT,
+ 					     init_once, NULL);
+ 	if (ext3_inode_cachep == NULL)
+ 		return -ENOMEM;
+@@ -587,7 +591,7 @@ enum {
+ 	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
+ 	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
+ 	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0,
+-	Opt_ignore, Opt_err,
++	Opt_ignore, Opt_err, Opt_resize,
+ };
+ 
+ static match_table_t tokens = {
+@@ -632,7 +636,8 @@ static match_table_t tokens = {
+ 	{Opt_ignore, "noquota"},
+ 	{Opt_ignore, "quota"},
+ 	{Opt_ignore, "usrquota"},
+-	{Opt_err, NULL}
++	{Opt_err, NULL},
++	{Opt_resize, "resize"},
+ };
+ 
+ static unsigned long get_sb_block(void **data)
+@@ -656,7 +661,7 @@ static unsigned long get_sb_block(void *
+ }
+ 
+ static int parse_options (char * options, struct super_block *sb,
+-			  unsigned long * inum, int is_remount)
++			  unsigned long * inum, unsigned long *n_blocks_count, int is_remount)
+ {
+ 	struct ext3_sb_info *sbi = EXT3_SB(sb);
+ 	char * p;
+@@ -899,6 +904,15 @@ clear_qf_name:
+ 			break;
+ 		case Opt_ignore:
+ 			break;
++		case Opt_resize:
++			if (!n_blocks_count) {
++				printk("EXT3-fs: resize option only available "
++					"for remount\n");
++				return 0;
++			}
++			match_int(&args[0], &option);
++			*n_blocks_count = option;
++			break;
+ 		default:
+ 			printk (KERN_ERR
+ 				"EXT3-fs: Unrecognized mount option \"%s\" "
+@@ -958,8 +972,7 @@ static int ext3_setup_super(struct super
+ 	es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT3_VALID_FS);
+ #endif
+ 	if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
+-		es->s_max_mnt_count =
+-			(__s16) cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT);
++		es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT);
+ 	es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1);
+ 	es->s_mtime = cpu_to_le32(get_seconds());
+ 	ext3_update_dynamic_rev(sb);
+@@ -993,6 +1006,7 @@ static int ext3_setup_super(struct super
+ 	return res;
+ }
+ 
++/* Called at mount-time, super-block is locked */
+ static int ext3_check_descriptors (struct super_block * sb)
+ {
+ 	struct ext3_sb_info *sbi = EXT3_SB(sb);
+@@ -1168,12 +1182,18 @@ static void ext3_orphan_cleanup (struct 
+ static loff_t ext3_max_size(int bits)
+ {
+ 	loff_t res = EXT3_NDIR_BLOCKS;
++	/* This constant is calculated to be the largest file size for a
++	 * dense, 4k-blocksize file such that the total number of
++	 * sectors in the file, including data and all indirect blocks,
++	 * does not exceed 2^32. */
++	const loff_t upper_limit = 0x1ff7fffd000LL;
++
+ 	res += 1LL << (bits-2);
+ 	res += 1LL << (2*(bits-2));
+ 	res += 1LL << (3*(bits-2));
+ 	res <<= bits;
+-	if (res > (512LL << 32) - (1 << bits))
+-		res = (512LL << 32) - (1 << bits);
++	if (res > upper_limit)
++		res = upper_limit;
+ 	return res;
+ }
+ 
+@@ -1215,6 +1235,7 @@ static int ext3_fill_super (struct super
+ 	int db_count;
+ 	int i;
+ 	int needs_recovery;
++	__le32 features;
+ 
+ 	sbi = kmalloc(sizeof(*sbi), GFP_KERNEL);
+ 	if (!sbi)
+@@ -1288,10 +1309,10 @@ static int ext3_fill_super (struct super
+ 	sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
+ 	sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
+ 
+-	if (!parse_options ((char *) data, sb, &journal_inum, 0))
++	if (!parse_options ((char *) data, sb, &journal_inum, NULL, 0))
+ 		goto failed_mount;
+ 
+-	sb->s_flags |= MS_ONE_SECOND;
++	set_sb_time_gran(sb, 1000000000U);
+ 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+ 		((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
+ 
+@@ -1307,17 +1328,18 @@ static int ext3_fill_super (struct super
+ 	 * previously didn't change the revision level when setting the flags,
+ 	 * so there is a chance incompat flags are set on a rev 0 filesystem.
+ 	 */
+-	if ((i = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP))) {
++	features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP);
++	if (features) {
+ 		printk(KERN_ERR "EXT3-fs: %s: couldn't mount because of "
+ 		       "unsupported optional features (%x).\n",
+-		       sb->s_id, i);
++		       sb->s_id, le32_to_cpu(features));
+ 		goto failed_mount;
+ 	}
+-	if (!(sb->s_flags & MS_RDONLY) &&
+-	    (i = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP))){
++	features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP);
++	if (!(sb->s_flags & MS_RDONLY) && features) {
+ 		printk(KERN_ERR "EXT3-fs: %s: couldn't mount RDWR because of "
+ 		       "unsupported optional features (%x).\n",
+-		       sb->s_id, i);
++		       sb->s_id, le32_to_cpu(features));
+ 		goto failed_mount;
+ 	}
+ 	blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
+@@ -1354,7 +1376,7 @@ static int ext3_fill_super (struct super
+ 		}
+ 		es = (struct ext3_super_block *)(((char *)bh->b_data) + offset);
+ 		sbi->s_es = es;
+-		if (es->s_magic != le16_to_cpu(EXT3_SUPER_MAGIC)) {
++		if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
+ 			printk (KERN_ERR 
+ 				"EXT3-fs: Magic mismatch, very weird !\n");
+ 			goto failed_mount;
+@@ -1432,13 +1454,6 @@ static int ext3_fill_super (struct super
+ 		printk (KERN_ERR "EXT3-fs: not enough memory\n");
+ 		goto failed_mount;
+ 	}
+-	sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(u8),
+-			GFP_KERNEL);
+-	if (!sbi->s_debts) {
+-		printk("EXT3-fs: not enough memory to allocate s_bgi\n");
+-		goto failed_mount2;
+-	}
+-	memset(sbi->s_debts, 0,  sbi->s_groups_count * sizeof(u8));
+ 
+ 	percpu_counter_init(&sbi->s_freeblocks_counter);
+ 	percpu_counter_init(&sbi->s_freeinodes_counter);
+@@ -1575,7 +1590,6 @@ static int ext3_fill_super (struct super
+ failed_mount3:
+ 	journal_destroy(sbi->s_journal);
+ failed_mount2:
+-	kfree(sbi->s_debts);
+ 	for (i = 0; i < db_count; i++)
+ 		brelse(sbi->s_group_desc[i]);
+ 	kfree(sbi->s_group_desc);
+@@ -1724,10 +1738,10 @@ static journal_t *ext3_get_dev_journal(s
+ 		printk(KERN_ERR "EXT3-fs: I/O error on journal device\n");
+ 		goto out_journal;
+ 	}
+-	if (ntohl(journal->j_superblock->s_nr_users) != 1) {
++	if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
+ 		printk(KERN_ERR "EXT3-fs: External journal has more than one "
+ 					"user (unsupported) - %d\n",
+-			ntohl(journal->j_superblock->s_nr_users));
++			be32_to_cpu(journal->j_superblock->s_nr_users));
+ 		goto out_journal;
+ 	}
+ 	EXT3_SB(sb)->journal_bdev = bdev;
+@@ -2013,11 +2027,12 @@ int ext3_remount (struct super_block * s
+ 	struct ext3_super_block * es;
+ 	struct ext3_sb_info *sbi = EXT3_SB(sb);
+ 	unsigned long tmp;
++	unsigned long n_blocks_count = 0;
+ 
+ 	/*
+ 	 * Allow the "check" option to be passed as a remount option.
+ 	 */
+-	if (!parse_options(data, sb, &tmp, 1))
++	if (!parse_options(data, sb, &tmp, &n_blocks_count, 1))
+ 		return -EINVAL;
+ 
+ 	if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
+@@ -2030,7 +2045,8 @@ int ext3_remount (struct super_block * s
+ 
+ 	ext3_init_journal_params(sbi, sbi->s_journal);
+ 
+-	if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
++	if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
++		n_blocks_count > le32_to_cpu(es->s_blocks_count)) {
+ 		if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
+ 			return -EROFS;
+ 
+@@ -2052,13 +2068,13 @@ int ext3_remount (struct super_block * s
+ 
+ 			ext3_mark_recovery_complete(sb, es);
+ 		} else {
+-			int ret;
++			__le32 ret;
+ 			if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb,
+ 					~EXT3_FEATURE_RO_COMPAT_SUPP))) {
+ 				printk(KERN_WARNING "EXT3-fs: %s: couldn't "
+ 				       "remount RDWR because of unsupported "
+ 				       "optional features (%x).\n",
+-				       sb->s_id, ret);
++				       sb->s_id, le32_to_cpu(ret));
+ 				return -EROFS;
+ 			}
+ 			/*
+@@ -2069,6 +2085,8 @@ int ext3_remount (struct super_block * s
+ 			 */
+ 			ext3_clear_journal_err(sb, es);
+ 			sbi->s_mount_state = le16_to_cpu(es->s_state);
++			if ((ret = ext3_group_extend(sb, es, n_blocks_count)))
++				return ret;
+ 			if (!ext3_setup_super (sb, es, 0))
+ 				sb->s_flags &= ~MS_RDONLY;
+ 		}
+@@ -2085,6 +2103,10 @@ int ext3_statfs (struct super_block * sb
+ 	if (test_opt (sb, MINIX_DF))
+ 		overhead = 0;
+ 	else {
++		unsigned long ngroups;
++		ngroups = EXT3_SB(sb)->s_groups_count;
++		smp_rmb();
++
+ 		/*
+ 		 * Compute the overhead (FS structures)
+ 		 */
+@@ -2100,7 +2122,7 @@ int ext3_statfs (struct super_block * sb
+ 		 * block group descriptors.  If the sparse superblocks
+ 		 * feature is turned on, then not all groups have this.
+ 		 */
+-		for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++)
++		for (i = 0; i < ngroups; i++)
+ 			overhead += ext3_bg_has_super(sb, i) +
+ 				ext3_bg_num_gdb(sb, i);
+ 
+@@ -2108,8 +2130,7 @@ int ext3_statfs (struct super_block * sb
+ 		 * Every block group has an inode bitmap, a block
+ 		 * bitmap, and an inode table.
+ 		 */
+-		overhead += (EXT3_SB(sb)->s_groups_count *
+-			     (2 + EXT3_SB(sb)->s_itb_per_group));
++		overhead += (ngroups * (2 + EXT3_SB(sb)->s_itb_per_group));
+ 	}
+ 
+ 	buf->f_type = EXT3_SUPER_MAGIC;
+@@ -2331,7 +2352,7 @@ static struct file_system_type ext3_fs_t
+ 	.name		= "ext3",
+ 	.get_sb		= ext3_get_sb,
+ 	.kill_sb	= kill_block_super,
+-	.fs_flags	= FS_REQUIRES_DEV,
++	.fs_flags	= FS_REQUIRES_DEV | FS_VIRTUALIZED,
+ };
+ 
+ static int __init init_ext3_fs(void)
+diff -uprN linux-2.6.8.1.orig/fs/ext3/xattr.c linux-2.6.8.1-ve022stab078/fs/ext3/xattr.c
+--- linux-2.6.8.1.orig/fs/ext3/xattr.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ext3/xattr.c	2006-05-11 13:05:32.000000000 +0400
+@@ -819,7 +819,7 @@ getblk_failed:
+ 
+ 	/* Update the inode. */
+ 	EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
+-	inode->i_ctime = CURRENT_TIME;
++	inode->i_ctime = CURRENT_TIME_SEC;
+ 	ext3_mark_inode_dirty(handle, inode);
+ 	if (IS_SYNC(inode))
+ 		handle->h_sync = 1;
+@@ -1130,7 +1130,7 @@ static inline void ext3_xattr_hash_entry
+ 	}
+ 
+ 	if (entry->e_value_block == 0 && entry->e_value_size != 0) {
+-		__u32 *value = (__u32 *)((char *)header +
++		__le32 *value = (__le32 *)((char *)header +
+ 			le16_to_cpu(entry->e_value_offs));
+ 		for (n = (le32_to_cpu(entry->e_value_size) +
+ 		     EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) {
+diff -uprN linux-2.6.8.1.orig/fs/ext3/xattr.h linux-2.6.8.1-ve022stab078/fs/ext3/xattr.h
+--- linux-2.6.8.1.orig/fs/ext3/xattr.h	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ext3/xattr.h	2006-05-11 13:05:31.000000000 +0400
+@@ -25,20 +25,20 @@
+ #define EXT3_XATTR_INDEX_SECURITY	        6
+ 
+ struct ext3_xattr_header {
+-	__u32	h_magic;	/* magic number for identification */
+-	__u32	h_refcount;	/* reference count */
+-	__u32	h_blocks;	/* number of disk blocks used */
+-	__u32	h_hash;		/* hash value of all attributes */
++	__le32	h_magic;	/* magic number for identification */
++	__le32	h_refcount;	/* reference count */
++	__le32	h_blocks;	/* number of disk blocks used */
++	__le32	h_hash;		/* hash value of all attributes */
+ 	__u32	h_reserved[4];	/* zero right now */
+ };
+ 
+ struct ext3_xattr_entry {
+ 	__u8	e_name_len;	/* length of name */
+ 	__u8	e_name_index;	/* attribute name index */
+-	__u16	e_value_offs;	/* offset in disk block of value */
+-	__u32	e_value_block;	/* disk block attribute is stored on (n/i) */
+-	__u32	e_value_size;	/* size of attribute value */
+-	__u32	e_hash;		/* hash value of name and value */
++	__le16	e_value_offs;	/* offset in disk block of value */
++	__le32	e_value_block;	/* disk block attribute is stored on (n/i) */
++	__le32	e_value_size;	/* size of attribute value */
++	__le32	e_hash;		/* hash value of name and value */
+ 	char	e_name[0];	/* attribute name */
+ };
+ 
+diff -uprN linux-2.6.8.1.orig/fs/ext3/xattr_user.c linux-2.6.8.1-ve022stab078/fs/ext3/xattr_user.c
+--- linux-2.6.8.1.orig/fs/ext3/xattr_user.c	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ext3/xattr_user.c	2006-05-11 13:05:35.000000000 +0400
+@@ -42,7 +42,7 @@ ext3_xattr_user_get(struct inode *inode,
+ 		return -EINVAL;
+ 	if (!test_opt(inode->i_sb, XATTR_USER))
+ 		return -EOPNOTSUPP;
+-	error = permission(inode, MAY_READ, NULL);
++	error = permission(inode, MAY_READ, NULL, NULL);
+ 	if (error)
+ 		return error;
+ 
+@@ -62,7 +62,7 @@ ext3_xattr_user_set(struct inode *inode,
+ 	if ( !S_ISREG(inode->i_mode) &&
+ 	    (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
+ 		return -EPERM;
+-	error = permission(inode, MAY_WRITE, NULL);
++	error = permission(inode, MAY_WRITE, NULL, NULL);
+ 	if (error)
+ 		return error;
+ 
+diff -uprN linux-2.6.8.1.orig/fs/fat/inode.c linux-2.6.8.1-ve022stab078/fs/fat/inode.c
+--- linux-2.6.8.1.orig/fs/fat/inode.c	2004-08-14 14:55:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/fat/inode.c	2006-05-11 13:05:35.000000000 +0400
+@@ -1227,7 +1227,7 @@ static int fat_fill_inode(struct inode *
+ 	return 0;
+ }
+ 
+-void fat_write_inode(struct inode *inode, int wait)
++int fat_write_inode(struct inode *inode, int wait)
+ {
+ 	struct super_block *sb = inode->i_sb;
+ 	struct buffer_head *bh;
+@@ -1237,14 +1237,14 @@ void fat_write_inode(struct inode *inode
+ retry:
+ 	i_pos = MSDOS_I(inode)->i_pos;
+ 	if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) {
+-		return;
++		return 0;
+ 	}
+ 	lock_kernel();
+ 	if (!(bh = sb_bread(sb, i_pos >> MSDOS_SB(sb)->dir_per_block_bits))) {
+ 		printk(KERN_ERR "FAT: unable to read inode block "
+ 		       "for updating (i_pos %lld)\n", i_pos);
+ 		unlock_kernel();
+-		return /* -EIO */;
++		return -EIO;
+ 	}
+ 	spin_lock(&fat_inode_lock);
+ 	if (i_pos != MSDOS_I(inode)->i_pos) {
+@@ -1281,6 +1281,7 @@ retry:
+ 	mark_buffer_dirty(bh);
+ 	brelse(bh);
+ 	unlock_kernel();
++	return 0;
+ }
+ 
+ 
+diff -uprN linux-2.6.8.1.orig/fs/fcntl.c linux-2.6.8.1-ve022stab078/fs/fcntl.c
+--- linux-2.6.8.1.orig/fs/fcntl.c	2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/fcntl.c	2006-05-11 13:05:40.000000000 +0400
+@@ -14,6 +14,7 @@
+ #include <linux/module.h>
+ #include <linux/security.h>
+ #include <linux/ptrace.h>
++#include <linux/ve_owner.h>
+ 
+ #include <asm/poll.h>
+ #include <asm/siginfo.h>
+@@ -219,6 +220,9 @@ static int setfl(int fd, struct file * f
+ 	struct inode * inode = filp->f_dentry->d_inode;
+ 	int error = 0;
+ 
++	if (!capable(CAP_SYS_RAWIO))
++		arg &= ~O_DIRECT;
++
+ 	/* O_APPEND cannot be cleared if the file is marked as append-only */
+ 	if (!(arg & O_APPEND) && IS_APPEND(inode))
+ 		return -EPERM;
+@@ -262,6 +266,7 @@ static int setfl(int fd, struct file * f
+ static void f_modown(struct file *filp, unsigned long pid,
+                      uid_t uid, uid_t euid, int force)
+ {
++	pid = comb_vpid_to_pid(pid);
+ 	write_lock_irq(&filp->f_owner.lock);
+ 	if (force || !filp->f_owner.pid) {
+ 		filp->f_owner.pid = pid;
+@@ -330,7 +335,7 @@ static long do_fcntl(int fd, unsigned in
+ 		 * current syscall conventions, the only way
+ 		 * to fix this will be in libc.
+ 		 */
+-		err = filp->f_owner.pid;
++		err = comb_pid_to_vpid(filp->f_owner.pid);
+ 		force_successful_syscall_return();
+ 		break;
+ 	case F_SETOWN:
+@@ -482,6 +487,8 @@ static void send_sigio_to_task(struct ta
+ 
+ void send_sigio(struct fown_struct *fown, int fd, int band)
+ {
++	struct file *f;
++	struct ve_struct *env;
+ 	struct task_struct *p;
+ 	int pid;
+ 	
+@@ -489,19 +496,21 @@ void send_sigio(struct fown_struct *fown
+ 	pid = fown->pid;
+ 	if (!pid)
+ 		goto out_unlock_fown;
+-	
++
++	/* hack: fown's are always embedded in struct file */
++	f = container_of(fown, struct file, f_owner);
++	env = VE_OWNER_FILP(f);
++
+ 	read_lock(&tasklist_lock);
+ 	if (pid > 0) {
+-		p = find_task_by_pid(pid);
+-		if (p) {
++		p = find_task_by_pid_all(pid);
++		if (p && ve_accessible(VE_TASK_INFO(p)->owner_env, env)) {
+ 			send_sigio_to_task(p, fown, fd, band);
+ 		}
+ 	} else {
+-		struct list_head *l;
+-		struct pid *pidptr;
+-		for_each_task_pid(-pid, PIDTYPE_PGID, p, l, pidptr) {
++		__do_each_task_pid_ve(-pid, PIDTYPE_PGID, p, env) {
+ 			send_sigio_to_task(p, fown, fd, band);
+-		}
++		} __while_each_task_pid_ve(-pid, PIDTYPE_PGID, p, env);
+ 	}
+ 	read_unlock(&tasklist_lock);
+  out_unlock_fown:
+@@ -517,6 +526,8 @@ static void send_sigurg_to_task(struct t
+ 
+ int send_sigurg(struct fown_struct *fown)
+ {
++	struct file *f;
++	struct ve_struct *env;
+ 	struct task_struct *p;
+ 	int pid, ret = 0;
+ 	
+@@ -527,18 +538,20 @@ int send_sigurg(struct fown_struct *fown
+ 
+ 	ret = 1;
+ 	
++	/* hack: fown's are always embedded in struct file */
++	f = container_of(fown, struct file, f_owner);
++	env = VE_OWNER_FILP(f);
++
+ 	read_lock(&tasklist_lock);
+ 	if (pid > 0) {
+-		p = find_task_by_pid(pid);
+-		if (p) {
++		p = find_task_by_pid_all(pid);
++		if (p && ve_accessible(VE_TASK_INFO(p)->owner_env, env)) {
+ 			send_sigurg_to_task(p, fown);
+ 		}
+ 	} else {
+-		struct list_head *l;
+-		struct pid *pidptr;
+-		for_each_task_pid(-pid, PIDTYPE_PGID, p, l, pidptr) {
++		__do_each_task_pid_ve(-pid, PIDTYPE_PGID, p, env) {
+ 			send_sigurg_to_task(p, fown);
+-		}
++		} __while_each_task_pid_ve(-pid, PIDTYPE_PGID, p, env);
+ 	}
+ 	read_unlock(&tasklist_lock);
+  out_unlock_fown:
+diff -uprN linux-2.6.8.1.orig/fs/file.c linux-2.6.8.1-ve022stab078/fs/file.c
+--- linux-2.6.8.1.orig/fs/file.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/file.c	2006-05-11 13:05:39.000000000 +0400
+@@ -15,6 +15,7 @@
+ 
+ #include <asm/bitops.h>
+ 
++#include <ub/ub_mem.h>
+ 
+ /*
+  * Allocate an fd array, using kmalloc or vmalloc.
+@@ -26,9 +27,9 @@ struct file ** alloc_fd_array(int num)
+ 	int size = num * sizeof(struct file *);
+ 
+ 	if (size <= PAGE_SIZE)
+-		new_fds = (struct file **) kmalloc(size, GFP_KERNEL);
++		new_fds = (struct file **) ub_kmalloc(size, GFP_KERNEL);
+ 	else 
+-		new_fds = (struct file **) vmalloc(size);
++		new_fds = (struct file **) ub_vmalloc(size);
+ 	return new_fds;
+ }
+ 
+@@ -135,9 +136,9 @@ fd_set * alloc_fdset(int num)
+ 	int size = num / 8;
+ 
+ 	if (size <= PAGE_SIZE)
+-		new_fdset = (fd_set *) kmalloc(size, GFP_KERNEL);
++		new_fdset = (fd_set *) ub_kmalloc(size, GFP_KERNEL);
+ 	else
+-		new_fdset = (fd_set *) vmalloc(size);
++		new_fdset = (fd_set *) ub_vmalloc(size);
+ 	return new_fdset;
+ }
+ 
+diff -uprN linux-2.6.8.1.orig/fs/file_table.c linux-2.6.8.1-ve022stab078/fs/file_table.c
+--- linux-2.6.8.1.orig/fs/file_table.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/file_table.c	2006-05-11 13:05:40.000000000 +0400
+@@ -8,6 +8,7 @@
+ #include <linux/string.h>
+ #include <linux/slab.h>
+ #include <linux/file.h>
++#include <linux/ve_owner.h>
+ #include <linux/init.h>
+ #include <linux/module.h>
+ #include <linux/smp_lock.h>
+@@ -17,6 +18,8 @@
+ #include <linux/mount.h>
+ #include <linux/cdev.h>
+ 
++#include <ub/ub_misc.h>
++
+ /* sysctl tunables... */
+ struct files_stat_struct files_stat = {
+ 	.max_files = NR_FILE
+@@ -56,6 +59,8 @@ void filp_dtor(void * objp, struct kmem_
+ 
+ static inline void file_free(struct file *f)
+ {
++	ub_file_uncharge(f);
++	put_ve(VE_OWNER_FILP(f));
+ 	kmem_cache_free(filp_cachep, f);
+ }
+ 
+@@ -65,40 +70,46 @@ static inline void file_free(struct file
+  */
+ struct file *get_empty_filp(void)
+ {
+-static int old_max;
++	static int old_max;
+ 	struct file * f;
+ 
+ 	/*
+ 	 * Privileged users can go above max_files
+ 	 */
+-	if (files_stat.nr_files < files_stat.max_files ||
+-				capable(CAP_SYS_ADMIN)) {
+-		f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);
+-		if (f) {
+-			memset(f, 0, sizeof(*f));
+-			if (security_file_alloc(f)) {
+-				file_free(f);
+-				goto fail;
+-			}
+-			eventpoll_init_file(f);
+-			atomic_set(&f->f_count, 1);
+-			f->f_uid = current->fsuid;
+-			f->f_gid = current->fsgid;
+-			f->f_owner.lock = RW_LOCK_UNLOCKED;
+-			/* f->f_version: 0 */
+-			INIT_LIST_HEAD(&f->f_list);
+-			return f;
+-		}
++	if (files_stat.nr_files >= files_stat.max_files &&
++			!capable(CAP_SYS_ADMIN))
++		goto over;
++
++	f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);
++	if (f == NULL)
++		goto fail;
++
++	memset(f, 0, sizeof(*f));
++	if (ub_file_charge(f)) {
++		kmem_cache_free(filp_cachep, f);
++		goto fail;
+ 	}
+ 
++	SET_VE_OWNER_FILP(f, get_ve(get_exec_env()));
++	if (security_file_alloc(f)) {
++		file_free(f);
++		goto fail;
++	}
++	eventpoll_init_file(f);
++	atomic_set(&f->f_count, 1);
++	f->f_uid = current->fsuid;
++	f->f_gid = current->fsgid;
++	f->f_owner.lock = RW_LOCK_UNLOCKED;
++	/* f->f_version: 0 */
++	INIT_LIST_HEAD(&f->f_list);
++	return f;
++
++over:
+ 	/* Ran out of filps - report that */
+-	if (files_stat.max_files >= old_max) {
++	if (files_stat.nr_files > old_max) {
+ 		printk(KERN_INFO "VFS: file-max limit %d reached\n",
+-					files_stat.max_files);
+-		old_max = files_stat.max_files;
+-	} else {
+-		/* Big problems... */
+-		printk(KERN_WARNING "VFS: filp allocation failed\n");
++				files_stat.max_files);
++		old_max = files_stat.nr_files;
+ 	}
+ fail:
+ 	return NULL;
+diff -uprN linux-2.6.8.1.orig/fs/filesystems.c linux-2.6.8.1-ve022stab078/fs/filesystems.c
+--- linux-2.6.8.1.orig/fs/filesystems.c	2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/filesystems.c	2006-05-11 13:05:40.000000000 +0400
+@@ -11,6 +11,7 @@
+ #include <linux/kmod.h>
+ #include <linux/init.h>
+ #include <linux/module.h>
++#include <linux/ve_owner.h>
+ #include <asm/uaccess.h>
+ 
+ /*
+@@ -20,8 +21,8 @@
+  *	During the unload module must call unregister_filesystem().
+  *	We can access the fields of list element if:
+  *		1) spinlock is held or
+- *		2) we hold the reference to the module.
+- *	The latter can be guaranteed by call of try_module_get(); if it
++ *		2) we hold the reference to the element.
++ *	The latter can be guaranteed by call of try_filesystem(); if it
+  *	returned 0 we must skip the element, otherwise we got the reference.
+  *	Once the reference is obtained we can drop the spinlock.
+  */
+@@ -29,23 +30,51 @@
+ static struct file_system_type *file_systems;
+ static rwlock_t file_systems_lock = RW_LOCK_UNLOCKED;
+ 
++int try_get_filesystem(struct file_system_type *fs)
++{
++	if (try_module_get(fs->owner)) {
++#ifdef CONFIG_VE
++		get_ve(VE_OWNER_FSTYPE(fs));
++#endif
++		return 1;
++	}
++	return 0;
++}
++
+ /* WARNING: This can be used only if we _already_ own a reference */
+ void get_filesystem(struct file_system_type *fs)
+ {
++#ifdef CONFIG_VE
++	get_ve(VE_OWNER_FSTYPE(fs));
++#endif
+ 	__module_get(fs->owner);
+ }
+ 
+ void put_filesystem(struct file_system_type *fs)
+ {
+ 	module_put(fs->owner);
++#ifdef CONFIG_VE
++	put_ve(VE_OWNER_FSTYPE(fs));
++#endif
++}
++
++static inline int check_ve_fstype(struct file_system_type *p,
++		struct ve_struct *env)
++{
++	return ((p->fs_flags & FS_VIRTUALIZED) ||
++			ve_accessible_strict(VE_OWNER_FSTYPE(p), env));
+ }
+ 
+-static struct file_system_type **find_filesystem(const char *name)
++static struct file_system_type **find_filesystem(const char *name,
++		struct ve_struct *env)
+ {
+ 	struct file_system_type **p;
+-	for (p=&file_systems; *p; p=&(*p)->next)
++	for (p=&file_systems; *p; p=&(*p)->next) {
++		if (!check_ve_fstype(*p, env))
++			continue;
+ 		if (strcmp((*p)->name,name) == 0)
+ 			break;
++	}
+ 	return p;
+ }
+ 
+@@ -72,8 +101,10 @@ int register_filesystem(struct file_syst
+ 	if (fs->next)
+ 		return -EBUSY;
+ 	INIT_LIST_HEAD(&fs->fs_supers);
++	if (VE_OWNER_FSTYPE(fs) == NULL)
++		SET_VE_OWNER_FSTYPE(fs, get_ve0());
+ 	write_lock(&file_systems_lock);
+-	p = find_filesystem(fs->name);
++	p = find_filesystem(fs->name, VE_OWNER_FSTYPE(fs));
+ 	if (*p)
+ 		res = -EBUSY;
+ 	else
+@@ -130,11 +161,14 @@ static int fs_index(const char __user * 
+ 
+ 	err = -EINVAL;
+ 	read_lock(&file_systems_lock);
+-	for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next, index++) {
++	for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next) {
++		if (!check_ve_fstype(tmp, get_exec_env()))
++			continue;
+ 		if (strcmp(tmp->name,name) == 0) {
+ 			err = index;
+ 			break;
+ 		}
++		index++;
+ 	}
+ 	read_unlock(&file_systems_lock);
+ 	putname(name);
+@@ -147,9 +181,15 @@ static int fs_name(unsigned int index, c
+ 	int len, res;
+ 
+ 	read_lock(&file_systems_lock);
+-	for (tmp = file_systems; tmp; tmp = tmp->next, index--)
+-		if (index <= 0 && try_module_get(tmp->owner))
+-			break;
++	for (tmp = file_systems; tmp; tmp = tmp->next) {
++		if (!check_ve_fstype(tmp, get_exec_env()))
++			continue;
++		if (!index) {
++			if (try_get_filesystem(tmp))
++				break;
++		} else
++			index--;
++	}
+ 	read_unlock(&file_systems_lock);
+ 	if (!tmp)
+ 		return -EINVAL;
+@@ -167,8 +207,9 @@ static int fs_maxindex(void)
+ 	int index;
+ 
+ 	read_lock(&file_systems_lock);
+-	for (tmp = file_systems, index = 0 ; tmp ; tmp = tmp->next, index++)
+-		;
++	for (tmp = file_systems, index = 0 ; tmp ; tmp = tmp->next)
++		if (check_ve_fstype(tmp, get_exec_env()))
++			index++;
+ 	read_unlock(&file_systems_lock);
+ 	return index;
+ }
+@@ -204,9 +245,10 @@ int get_filesystem_list(char * buf)
+ 	read_lock(&file_systems_lock);
+ 	tmp = file_systems;
+ 	while (tmp && len < PAGE_SIZE - 80) {
+-		len += sprintf(buf+len, "%s\t%s\n",
+-			(tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
+-			tmp->name);
++		if (check_ve_fstype(tmp, get_exec_env()))
++			len += sprintf(buf+len, "%s\t%s\n",
++				(tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
++				tmp->name);
+ 		tmp = tmp->next;
+ 	}
+ 	read_unlock(&file_systems_lock);
+@@ -218,14 +260,14 @@ struct file_system_type *get_fs_type(con
+ 	struct file_system_type *fs;
+ 
+ 	read_lock(&file_systems_lock);
+-	fs = *(find_filesystem(name));
+-	if (fs && !try_module_get(fs->owner))
++	fs = *(find_filesystem(name, get_exec_env()));
++	if (fs && !try_get_filesystem(fs))
+ 		fs = NULL;
+ 	read_unlock(&file_systems_lock);
+ 	if (!fs && (request_module("%s", name) == 0)) {
+ 		read_lock(&file_systems_lock);
+-		fs = *(find_filesystem(name));
+-		if (fs && !try_module_get(fs->owner))
++		fs = *(find_filesystem(name, get_exec_env()));
++		if (fs && !try_get_filesystem(fs))
+ 			fs = NULL;
+ 		read_unlock(&file_systems_lock);
+ 	}
+@@ -233,3 +275,5 @@ struct file_system_type *get_fs_type(con
+ }
+ 
+ EXPORT_SYMBOL(get_fs_type);
++EXPORT_SYMBOL(get_filesystem);
++EXPORT_SYMBOL(put_filesystem);
+diff -uprN linux-2.6.8.1.orig/fs/fs-writeback.c linux-2.6.8.1-ve022stab078/fs/fs-writeback.c
+--- linux-2.6.8.1.orig/fs/fs-writeback.c	2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/fs-writeback.c	2006-05-11 13:05:35.000000000 +0400
+@@ -133,10 +133,11 @@ out:
+ 
+ EXPORT_SYMBOL(__mark_inode_dirty);
+ 
+-static void write_inode(struct inode *inode, int sync)
++static int write_inode(struct inode *inode, int sync)
+ {
+ 	if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode))
+-		inode->i_sb->s_op->write_inode(inode, sync);
++		return inode->i_sb->s_op->write_inode(inode, sync);
++	return 0;
+ }
+ 
+ /*
+@@ -170,8 +171,11 @@ __sync_single_inode(struct inode *inode,
+ 	ret = do_writepages(mapping, wbc);
+ 
+ 	/* Don't write the inode if only I_DIRTY_PAGES was set */
+-	if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC))
+-		write_inode(inode, wait);
++	if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
++		int err = write_inode(inode, wait);
++		if (ret == 0)
++			ret = err;
++	}
+ 
+ 	if (wait) {
+ 		int err = filemap_fdatawait(mapping);
+@@ -392,7 +396,6 @@ writeback_inodes(struct writeback_contro
+ {
+ 	struct super_block *sb;
+ 
+-	spin_lock(&inode_lock);
+ 	spin_lock(&sb_lock);
+ restart:
+ 	sb = sb_entry(super_blocks.prev);
+@@ -407,19 +410,21 @@ restart:
+ 			 * be unmounted by the time it is released.
+ 			 */
+ 			if (down_read_trylock(&sb->s_umount)) {
+-				if (sb->s_root)
++				if (sb->s_root) {
++					spin_lock(&inode_lock);
+ 					sync_sb_inodes(sb, wbc);
++					spin_unlock(&inode_lock);
++				}
+ 				up_read(&sb->s_umount);
+ 			}
+ 			spin_lock(&sb_lock);
+-			if (__put_super(sb))
++			if (__put_super_and_need_restart(sb))
+ 				goto restart;
+ 		}
+ 		if (wbc->nr_to_write <= 0)
+ 			break;
+ 	}
+ 	spin_unlock(&sb_lock);
+-	spin_unlock(&inode_lock);
+ }
+ 
+ /*
+@@ -464,32 +469,6 @@ static void set_sb_syncing(int val)
+ 	spin_unlock(&sb_lock);
+ }
+ 
+-/*
+- * Find a superblock with inodes that need to be synced
+- */
+-static struct super_block *get_super_to_sync(void)
+-{
+-	struct super_block *sb;
+-restart:
+-	spin_lock(&sb_lock);
+-	sb = sb_entry(super_blocks.prev);
+-	for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) {
+-		if (sb->s_syncing)
+-			continue;
+-		sb->s_syncing = 1;
+-		sb->s_count++;
+-		spin_unlock(&sb_lock);
+-		down_read(&sb->s_umount);
+-		if (!sb->s_root) {
+-			drop_super(sb);
+-			goto restart;
+-		}
+-		return sb;
+-	}
+-	spin_unlock(&sb_lock);
+-	return NULL;
+-}
+-
+ /**
+  * sync_inodes
+  *
+@@ -508,23 +487,39 @@ restart:
+  * outstanding dirty inodes, the writeback goes block-at-a-time within the
+  * filesystem's write_inode().  This is extremely slow.
+  */
+-void sync_inodes(int wait)
++static void __sync_inodes(int wait)
+ {
+ 	struct super_block *sb;
+ 
+-	set_sb_syncing(0);
+-	while ((sb = get_super_to_sync()) != NULL) {
+-		sync_inodes_sb(sb, 0);
+-		sync_blockdev(sb->s_bdev);
+-		drop_super(sb);
++	spin_lock(&sb_lock);
++restart:
++	list_for_each_entry(sb, &super_blocks, s_list) {	
++		if (sb->s_syncing)
++			continue;
++		sb->s_syncing = 1;
++		sb->s_count++;
++		spin_unlock(&sb_lock);				
++		down_read(&sb->s_umount); 
++		if (sb->s_root) {
++			sync_inodes_sb(sb, wait);
++			sync_blockdev(sb->s_bdev);
++		}
++		up_read(&sb->s_umount);	
++		spin_lock(&sb_lock);
++		if (__put_super_and_need_restart(sb))
++			goto restart;
+ 	}
++	spin_unlock(&sb_lock);
++}
++
++void sync_inodes(int wait)
++{
++	set_sb_syncing(0);
++	__sync_inodes(0);
++	
+ 	if (wait) {
+ 		set_sb_syncing(0);
+-		while ((sb = get_super_to_sync()) != NULL) {
+-			sync_inodes_sb(sb, 1);
+-			sync_blockdev(sb->s_bdev);
+-			drop_super(sb);
+-		}
++		__sync_inodes(1);
+ 	}
+ }
+ 
+diff -uprN linux-2.6.8.1.orig/fs/hfs/hfs_fs.h linux-2.6.8.1-ve022stab078/fs/hfs/hfs_fs.h
+--- linux-2.6.8.1.orig/fs/hfs/hfs_fs.h	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/hfs/hfs_fs.h	2006-05-11 13:05:35.000000000 +0400
+@@ -198,7 +198,7 @@ extern struct address_space_operations h
+ 
+ extern struct inode *hfs_new_inode(struct inode *, struct qstr *, int);
+ extern void hfs_inode_write_fork(struct inode *, struct hfs_extent *, u32 *, u32 *);
+-extern void hfs_write_inode(struct inode *, int);
++extern int hfs_write_inode(struct inode *, int);
+ extern int hfs_inode_setattr(struct dentry *, struct iattr *);
+ extern void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext,
+ 				u32 log_size, u32 phys_size, u32 clump_size);
+diff -uprN linux-2.6.8.1.orig/fs/hfs/inode.c linux-2.6.8.1-ve022stab078/fs/hfs/inode.c
+--- linux-2.6.8.1.orig/fs/hfs/inode.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/hfs/inode.c	2006-05-11 13:05:35.000000000 +0400
+@@ -381,7 +381,7 @@ void hfs_inode_write_fork(struct inode *
+ 					 HFS_SB(inode->i_sb)->alloc_blksz);
+ }
+ 
+-void hfs_write_inode(struct inode *inode, int unused)
++int hfs_write_inode(struct inode *inode, int unused)
+ {
+ 	struct hfs_find_data fd;
+ 	hfs_cat_rec rec;
+@@ -395,27 +395,27 @@ void hfs_write_inode(struct inode *inode
+ 			break;
+ 		case HFS_EXT_CNID:
+ 			hfs_btree_write(HFS_SB(inode->i_sb)->ext_tree);
+-			return;
++			return 0;
+ 		case HFS_CAT_CNID:
+ 			hfs_btree_write(HFS_SB(inode->i_sb)->cat_tree);
+-			return;
++			return 0;
+ 		default:
+ 			BUG();
+-			return;
++			return -EIO;
+ 		}
+ 	}
+ 
+ 	if (HFS_IS_RSRC(inode)) {
+ 		mark_inode_dirty(HFS_I(inode)->rsrc_inode);
+-		return;
++		return 0;
+ 	}
+ 
+ 	if (!inode->i_nlink)
+-		return;
++		return 0;
+ 
+ 	if (hfs_find_init(HFS_SB(inode->i_sb)->cat_tree, &fd))
+ 		/* panic? */
+-		return;
++		return -EIO;
+ 
+ 	fd.search_key->cat = HFS_I(inode)->cat_key;
+ 	if (hfs_brec_find(&fd))
+@@ -460,6 +460,7 @@ void hfs_write_inode(struct inode *inode
+ 	}
+ out:
+ 	hfs_find_exit(&fd);
++	return 0;
+ }
+ 
+ static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry,
+@@ -512,11 +513,11 @@ void hfs_clear_inode(struct inode *inode
+ }
+ 
+ static int hfs_permission(struct inode *inode, int mask,
+-			  struct nameidata *nd)
++			  struct nameidata *nd, struct exec_perm *exec_perm)
+ {
+ 	if (S_ISREG(inode->i_mode) && mask & MAY_EXEC)
+ 		return 0;
+-	return vfs_permission(inode, mask);
++	return vfs_permission(inode, mask, NULL);
+ }
+ 
+ static int hfs_file_open(struct inode *inode, struct file *file)
+diff -uprN linux-2.6.8.1.orig/fs/hfsplus/dir.c linux-2.6.8.1-ve022stab078/fs/hfsplus/dir.c
+--- linux-2.6.8.1.orig/fs/hfsplus/dir.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/hfsplus/dir.c	2006-05-11 13:05:32.000000000 +0400
+@@ -396,7 +396,7 @@ int hfsplus_symlink(struct inode *dir, s
+ 	if (!inode)
+ 		return -ENOSPC;
+ 
+-	res = page_symlink(inode, symname, strlen(symname) + 1);
++	res = page_symlink(inode, symname, strlen(symname) + 1, GFP_KERNEL);
+ 	if (res) {
+ 		inode->i_nlink = 0;
+ 		hfsplus_delete_inode(inode);
+diff -uprN linux-2.6.8.1.orig/fs/hfsplus/hfsplus_fs.h linux-2.6.8.1-ve022stab078/fs/hfsplus/hfsplus_fs.h
+--- linux-2.6.8.1.orig/fs/hfsplus/hfsplus_fs.h	2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/hfsplus/hfsplus_fs.h	2006-05-11 13:05:35.000000000 +0400
+@@ -333,7 +333,7 @@ extern struct address_space_operations h
+ void hfsplus_inode_read_fork(struct inode *, struct hfsplus_fork_raw *);
+ void hfsplus_inode_write_fork(struct inode *, struct hfsplus_fork_raw *);
+ int hfsplus_cat_read_inode(struct inode *, struct hfs_find_data *);
+-void hfsplus_cat_write_inode(struct inode *);
++int hfsplus_cat_write_inode(struct inode *);
+ struct inode *hfsplus_new_inode(struct super_block *, int);
+ void hfsplus_delete_inode(struct inode *);
+ 
+diff -uprN linux-2.6.8.1.orig/fs/hfsplus/inode.c linux-2.6.8.1-ve022stab078/fs/hfsplus/inode.c
+--- linux-2.6.8.1.orig/fs/hfsplus/inode.c	2004-08-14 14:54:52.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/hfsplus/inode.c	2006-05-11 13:05:35.000000000 +0400
+@@ -252,15 +252,19 @@ static void hfsplus_set_perms(struct ino
+ 	perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev);
+ }
+ 
+-static int hfsplus_permission(struct inode *inode, int mask, struct nameidata *nd)
++static int hfsplus_permission(struct inode *inode, int mask,
++		struct nameidata *nd, struct exec_perm *exec_perm)
+ {
+ 	/* MAY_EXEC is also used for lookup, if no x bit is set allow lookup,
+ 	 * open_exec has the same test, so it's still not executable, if a x bit
+ 	 * is set fall back to standard permission check.
++	 *
++	 * The comment above and the check below don't make much sense
++	 * with S_ISREG condition...  --SAW
+ 	 */
+ 	if (S_ISREG(inode->i_mode) && mask & MAY_EXEC && !(inode->i_mode & 0111))
+ 		return 0;
+-	return vfs_permission(inode, mask);
++	return vfs_permission(inode, mask, exec_perm);
+ }
+ 
+ 
+@@ -483,22 +487,22 @@ int hfsplus_cat_read_inode(struct inode 
+ 	return res;
+ }
+ 
+-void hfsplus_cat_write_inode(struct inode *inode)
++int hfsplus_cat_write_inode(struct inode *inode)
+ {
+ 	struct hfs_find_data fd;
+ 	hfsplus_cat_entry entry;
+ 
+ 	if (HFSPLUS_IS_RSRC(inode)) {
+ 		mark_inode_dirty(HFSPLUS_I(inode).rsrc_inode);
+-		return;
++		return 0;
+ 	}
+ 
+ 	if (!inode->i_nlink)
+-		return;
++		return 0;
+ 
+ 	if (hfs_find_init(HFSPLUS_SB(inode->i_sb).cat_tree, &fd))
+ 		/* panic? */
+-		return;
++		return -EIO;
+ 
+ 	if (hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd))
+ 		/* panic? */
+@@ -546,4 +550,5 @@ void hfsplus_cat_write_inode(struct inod
+ 	}
+ out:
+ 	hfs_find_exit(&fd);
++	return 0;
+ }
+diff -uprN linux-2.6.8.1.orig/fs/hfsplus/super.c linux-2.6.8.1-ve022stab078/fs/hfsplus/super.c
+--- linux-2.6.8.1.orig/fs/hfsplus/super.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/hfsplus/super.c	2006-05-11 13:05:35.000000000 +0400
+@@ -94,20 +94,20 @@ static void hfsplus_read_inode(struct in
+ 	make_bad_inode(inode);
+ }
+ 
+-void hfsplus_write_inode(struct inode *inode, int unused)
++int hfsplus_write_inode(struct inode *inode, int unused)
+ {
+ 	struct hfsplus_vh *vhdr;
++	int ret = 0;
+ 
+ 	dprint(DBG_INODE, "hfsplus_write_inode: %lu\n", inode->i_ino);
+ 	hfsplus_ext_write_extent(inode);
+ 	if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID) {
+-		hfsplus_cat_write_inode(inode);
+-		return;
++		return hfsplus_cat_write_inode(inode);
+ 	}
+ 	vhdr = HFSPLUS_SB(inode->i_sb).s_vhdr;
+ 	switch (inode->i_ino) {
+ 	case HFSPLUS_ROOT_CNID:
+-		hfsplus_cat_write_inode(inode);
++		ret = hfsplus_cat_write_inode(inode);
+ 		break;
+ 	case HFSPLUS_EXT_CNID:
+ 		if (vhdr->ext_file.total_size != cpu_to_be64(inode->i_size)) {
+@@ -148,6 +148,7 @@ void hfsplus_write_inode(struct inode *i
+ 		hfs_btree_write(HFSPLUS_SB(inode->i_sb).attr_tree);
+ 		break;
+ 	}
++	return ret;
+ }
+ 
+ static void hfsplus_clear_inode(struct inode *inode)
+diff -uprN linux-2.6.8.1.orig/fs/hpfs/namei.c linux-2.6.8.1-ve022stab078/fs/hpfs/namei.c
+--- linux-2.6.8.1.orig/fs/hpfs/namei.c	2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/hpfs/namei.c	2006-05-11 13:05:35.000000000 +0400
+@@ -415,7 +415,7 @@ again:
+ 		d_drop(dentry);
+ 		spin_lock(&dentry->d_lock);
+ 		if (atomic_read(&dentry->d_count) > 1 ||
+-		    permission(inode, MAY_WRITE, NULL) ||
++		    permission(inode, MAY_WRITE, NULL, NULL) ||
+ 		    !S_ISREG(inode->i_mode) ||
+ 		    get_write_access(inode)) {
+ 			spin_unlock(&dentry->d_lock);
+diff -uprN linux-2.6.8.1.orig/fs/hugetlbfs/inode.c linux-2.6.8.1-ve022stab078/fs/hugetlbfs/inode.c
+--- linux-2.6.8.1.orig/fs/hugetlbfs/inode.c	2004-08-14 14:56:14.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/hugetlbfs/inode.c	2006-05-11 13:05:40.000000000 +0400
+@@ -198,6 +198,7 @@ static void hugetlbfs_delete_inode(struc
+ 	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(inode->i_sb);
+ 
+ 	hlist_del_init(&inode->i_hash);
++	list_del(&inode->i_sb_list);
+ 	list_del_init(&inode->i_list);
+ 	inode->i_state |= I_FREEING;
+ 	inodes_stat.nr_inodes--;
+@@ -240,6 +241,7 @@ static void hugetlbfs_forget_inode(struc
+ 	inodes_stat.nr_unused--;
+ 	hlist_del_init(&inode->i_hash);
+ out_truncate:
++	list_del(&inode->i_sb_list);
+ 	list_del_init(&inode->i_list);
+ 	inode->i_state |= I_FREEING;
+ 	inodes_stat.nr_inodes--;
+@@ -453,7 +455,7 @@ static int hugetlbfs_symlink(struct inod
+ 					gid, S_IFLNK|S_IRWXUGO, 0);
+ 	if (inode) {
+ 		int l = strlen(symname)+1;
+-		error = page_symlink(inode, symname, l);
++		error = page_symlink(inode, symname, l, GFP_KERNEL);
+ 		if (!error) {
+ 			d_instantiate(dentry, inode);
+ 			dget(dentry);
+@@ -731,7 +733,7 @@ struct file *hugetlb_zero_setup(size_t s
+ 	struct inode *inode;
+ 	struct dentry *dentry, *root;
+ 	struct qstr quick_string;
+-	char buf[16];
++	char buf[64];
+ 
+ 	if (!can_do_hugetlb_shm())
+ 		return ERR_PTR(-EPERM);
+@@ -740,7 +742,8 @@ struct file *hugetlb_zero_setup(size_t s
+ 		return ERR_PTR(-ENOMEM);
+ 
+ 	root = hugetlbfs_vfsmount->mnt_root;
+-	snprintf(buf, 16, "%lu", hugetlbfs_counter());
++	snprintf(buf, sizeof(buf), "VE%d-%d",
++			get_exec_env()->veid, hugetlbfs_counter());
+ 	quick_string.name = buf;
+ 	quick_string.len = strlen(quick_string.name);
+ 	quick_string.hash = 0;
+diff -uprN linux-2.6.8.1.orig/fs/inode.c linux-2.6.8.1-ve022stab078/fs/inode.c
+--- linux-2.6.8.1.orig/fs/inode.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/inode.c	2006-05-11 13:05:43.000000000 +0400
+@@ -9,8 +9,10 @@
+ #include <linux/mm.h>
+ #include <linux/dcache.h>
+ #include <linux/init.h>
++#include <linux/kernel_stat.h>
+ #include <linux/quotaops.h>
+ #include <linux/slab.h>
++#include <linux/kmem_cache.h>
+ #include <linux/writeback.h>
+ #include <linux/module.h>
+ #include <linux/backing-dev.h>
+@@ -99,11 +101,18 @@ struct inodes_stat_t inodes_stat;
+ 
+ static kmem_cache_t * inode_cachep;
+ 
++unsigned int inode_memusage(void)
++{
++	return kmem_cache_memusage(inode_cachep);
++}
++
++static struct address_space_operations vfs_empty_aops;
++struct inode_operations vfs_empty_iops;
++static struct file_operations vfs_empty_fops;
++EXPORT_SYMBOL(vfs_empty_iops);
++
+ static struct inode *alloc_inode(struct super_block *sb)
+ {
+-	static struct address_space_operations empty_aops;
+-	static struct inode_operations empty_iops;
+-	static struct file_operations empty_fops;
+ 	struct inode *inode;
+ 
+ 	if (sb->s_op->alloc_inode)
+@@ -119,8 +128,8 @@ static struct inode *alloc_inode(struct 
+ 		inode->i_flags = 0;
+ 		atomic_set(&inode->i_count, 1);
+ 		inode->i_sock = 0;
+-		inode->i_op = &empty_iops;
+-		inode->i_fop = &empty_fops;
++		inode->i_op = &vfs_empty_iops;
++		inode->i_fop = &vfs_empty_fops;
+ 		inode->i_nlink = 1;
+ 		atomic_set(&inode->i_writecount, 0);
+ 		inode->i_size = 0;
+@@ -144,7 +153,7 @@ static struct inode *alloc_inode(struct 
+ 			return NULL;
+ 		}
+ 
+-		mapping->a_ops = &empty_aops;
++		mapping->a_ops = &vfs_empty_aops;
+  		mapping->host = inode;
+ 		mapping->flags = 0;
+ 		mapping_set_gfp_mask(mapping, GFP_HIGHUSER);
+@@ -295,10 +304,11 @@ static void dispose_list(struct list_hea
+ /*
+  * Invalidate all inodes for a device.
+  */
+-static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose)
++static int invalidate_list(struct list_head *head, struct list_head * dispose,
++			int verify)
+ {
+ 	struct list_head *next;
+-	int busy = 0, count = 0;
++	int busy = 0, count = 0, print_once = 1;
+ 
+ 	next = head->next;
+ 	for (;;) {
+@@ -308,18 +318,63 @@ static int invalidate_list(struct list_h
+ 		next = next->next;
+ 		if (tmp == head)
+ 			break;
+-		inode = list_entry(tmp, struct inode, i_list);
+-		if (inode->i_sb != sb)
+-			continue;
++		inode = list_entry(tmp, struct inode, i_sb_list);
+ 		invalidate_inode_buffers(inode);
+ 		if (!atomic_read(&inode->i_count)) {
+ 			hlist_del_init(&inode->i_hash);
++			list_del(&inode->i_sb_list);
+ 			list_move(&inode->i_list, dispose);
+ 			inode->i_state |= I_FREEING;
+ 			count++;
+ 			continue;
+ 		}
+ 		busy = 1;
++
++		if (!verify)
++			continue;
++
++		if (print_once) {
++			struct super_block *sb = inode->i_sb;
++			printk("VFS: Busy inodes after unmount. "
++				"sb = %p, fs type = %s, sb count = %d, "
++				"sb->s_root = %s\n", sb,
++				(sb->s_type != NULL) ? sb->s_type->name : "",
++				sb->s_count,
++		  		(sb->s_root != NULL) ?
++				(char *)sb->s_root->d_name.name : "");
++			print_once = 0;
++		}
++
++		{
++			struct dentry *d;
++			int i;
++
++			printk("inode = %p, inode->i_count = %d, "
++					"inode->i_nlink = %d, "
++					"inode->i_mode = %d, "
++					"inode->i_state = %ld, "
++					"inode->i_flags = %d, "
++					"inode->i_devices.next = %p, "
++					"inode->i_devices.prev = %p, "
++					"inode->i_ino = %ld\n",
++					tmp,
++					atomic_read(&inode->i_count),
++					inode->i_nlink,
++					inode->i_mode,
++					inode->i_state,
++					inode->i_flags,
++					inode->i_devices.next,
++					inode->i_devices.prev,
++					inode->i_ino);
++			printk("inode dump: ");
++			for (i = 0; i < sizeof(*tmp); i++)
++				printk("%2.2x ", *((u_char *)tmp + i));
++			printk("\n");
++			list_for_each_entry(d, &inode->i_dentry, d_alias)
++				printk("  d_alias %s\n",
++						d->d_name.name);
++
++		}
+ 	}
+ 	/* only unused inodes may be cached with i_count zero */
+ 	inodes_stat.nr_unused -= count;
+@@ -342,17 +397,14 @@ static int invalidate_list(struct list_h
+  *	fails because there are busy inodes then a non zero value is returned.
+  *	If the discard is successful all the inodes have been discarded.
+  */
+-int invalidate_inodes(struct super_block * sb)
++int invalidate_inodes(struct super_block * sb, int verify)
+ {
+ 	int busy;
+ 	LIST_HEAD(throw_away);
+ 
+ 	down(&iprune_sem);
+ 	spin_lock(&inode_lock);
+-	busy = invalidate_list(&inode_in_use, sb, &throw_away);
+-	busy |= invalidate_list(&inode_unused, sb, &throw_away);
+-	busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
+-	busy |= invalidate_list(&sb->s_io, sb, &throw_away);
++	busy = invalidate_list(&sb->s_inodes, &throw_away, verify);
+ 	spin_unlock(&inode_lock);
+ 
+ 	dispose_list(&throw_away);
+@@ -381,7 +433,7 @@ int __invalidate_device(struct block_dev
+ 		 * hold).
+ 		 */
+ 		shrink_dcache_sb(sb);
+-		res = invalidate_inodes(sb);
++		res = invalidate_inodes(sb, 0);
+ 		drop_super(sb);
+ 	}
+ 	invalidate_bdev(bdev, 0);
+@@ -452,6 +504,7 @@ static void prune_icache(int nr_to_scan)
+ 				continue;
+ 		}
+ 		hlist_del_init(&inode->i_hash);
++		list_del(&inode->i_sb_list);
+ 		list_move(&inode->i_list, &freeable);
+ 		inode->i_state |= I_FREEING;
+ 		nr_pruned++;
+@@ -479,6 +532,7 @@ static void prune_icache(int nr_to_scan)
+  */
+ static int shrink_icache_memory(int nr, unsigned int gfp_mask)
+ {
++	KSTAT_PERF_ENTER(shrink_icache)
+ 	if (nr) {
+ 		/*
+ 		 * Nasty deadlock avoidance.  We may hold various FS locks,
+@@ -488,6 +542,7 @@ static int shrink_icache_memory(int nr, 
+ 		if (gfp_mask & __GFP_FS)
+ 			prune_icache(nr);
+ 	}
++	KSTAT_PERF_LEAVE(shrink_icache)
+ 	return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
+ }
+ 
+@@ -510,7 +565,7 @@ repeat:
+ 			continue;
+ 		if (!test(inode, data))
+ 			continue;
+-		if (inode->i_state & (I_FREEING|I_CLEAR)) {
++		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
+ 			__wait_on_freeing_inode(inode);
+ 			goto repeat;
+ 		}
+@@ -535,7 +590,7 @@ repeat:
+ 			continue;
+ 		if (inode->i_sb != sb)
+ 			continue;
+-		if (inode->i_state & (I_FREEING|I_CLEAR)) {
++		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
+ 			__wait_on_freeing_inode(inode);
+ 			goto repeat;
+ 		}
+@@ -561,6 +616,7 @@ struct inode *new_inode(struct super_blo
+ 	if (inode) {
+ 		spin_lock(&inode_lock);
+ 		inodes_stat.nr_inodes++;
++		list_add(&inode->i_sb_list, &sb->s_inodes);
+ 		list_add(&inode->i_list, &inode_in_use);
+ 		inode->i_ino = ++last_ino;
+ 		inode->i_state = 0;
+@@ -609,6 +665,7 @@ static struct inode * get_new_inode(stru
+ 				goto set_failed;
+ 
+ 			inodes_stat.nr_inodes++;
++			list_add(&inode->i_sb_list, &sb->s_inodes);
+ 			list_add(&inode->i_list, &inode_in_use);
+ 			hlist_add_head(&inode->i_hash, head);
+ 			inode->i_state = I_LOCK|I_NEW;
+@@ -657,6 +714,7 @@ static struct inode * get_new_inode_fast
+ 		if (!old) {
+ 			inode->i_ino = ino;
+ 			inodes_stat.nr_inodes++;
++			list_add(&inode->i_sb_list, &sb->s_inodes);
+ 			list_add(&inode->i_list, &inode_in_use);
+ 			hlist_add_head(&inode->i_hash, head);
+ 			inode->i_state = I_LOCK|I_NEW;
+@@ -734,7 +792,7 @@ EXPORT_SYMBOL(iunique);
+ struct inode *igrab(struct inode *inode)
+ {
+ 	spin_lock(&inode_lock);
+-	if (!(inode->i_state & I_FREEING))
++	if (!(inode->i_state & (I_FREEING|I_WILL_FREE)))
+ 		__iget(inode);
+ 	else
+ 		/*
+@@ -993,6 +1051,7 @@ void generic_delete_inode(struct inode *
+ {
+ 	struct super_operations *op = inode->i_sb->s_op;
+ 
++	list_del(&inode->i_sb_list);
+ 	list_del_init(&inode->i_list);
+ 	inode->i_state|=I_FREEING;
+ 	inodes_stat.nr_inodes--;
+@@ -1030,14 +1089,20 @@ static void generic_forget_inode(struct 
+ 		if (!(inode->i_state & (I_DIRTY|I_LOCK)))
+ 			list_move(&inode->i_list, &inode_unused);
+ 		inodes_stat.nr_unused++;
+-		spin_unlock(&inode_lock);
+-		if (!sb || (sb->s_flags & MS_ACTIVE))
++		if (!sb || (sb->s_flags & MS_ACTIVE)) {
++			spin_unlock(&inode_lock);
+ 			return;
++		}
++		inode->i_state |= I_WILL_FREE;
++		BUG_ON(inode->i_state & I_LOCK);
++		spin_unlock(&inode_lock);
+ 		write_inode_now(inode, 1);
+ 		spin_lock(&inode_lock);
++		inode->i_state &= ~I_WILL_FREE;
+ 		inodes_stat.nr_unused--;
+ 		hlist_del_init(&inode->i_hash);
+ 	}
++	list_del(&inode->i_sb_list);
+ 	list_del_init(&inode->i_list);
+ 	inode->i_state|=I_FREEING;
+ 	inodes_stat.nr_inodes--;
+@@ -1128,19 +1193,6 @@ sector_t bmap(struct inode * inode, sect
+ 
+ EXPORT_SYMBOL(bmap);
+ 
+-/*
+- * Return true if the filesystem which backs this inode considers the two
+- * passed timespecs to be sufficiently different to warrant flushing the
+- * altered time out to disk.
+- */
+-static int inode_times_differ(struct inode *inode,
+-			struct timespec *old, struct timespec *new)
+-{
+-	if (IS_ONE_SECOND(inode))
+-		return old->tv_sec != new->tv_sec;
+-	return !timespec_equal(old, new);
+-}
+-
+ /**
+  *	update_atime	-	update the access time
+  *	@inode: inode accessed
+@@ -1160,8 +1212,8 @@ void update_atime(struct inode *inode)
+ 	if (IS_RDONLY(inode))
+ 		return;
+ 
+-	now = current_kernel_time();
+-	if (inode_times_differ(inode, &inode->i_atime, &now)) {
++	now = current_fs_time(inode->i_sb);
++	if (!timespec_equal(&inode->i_atime, &now)) {
+ 		inode->i_atime = now;
+ 		mark_inode_dirty_sync(inode);
+ 	} else {
+@@ -1191,14 +1243,13 @@ void inode_update_time(struct inode *ino
+ 	if (IS_RDONLY(inode))
+ 		return;
+ 
+-	now = current_kernel_time();
+-
+-	if (inode_times_differ(inode, &inode->i_mtime, &now))
++	now = current_fs_time(inode->i_sb);
++	if (!timespec_equal(&inode->i_mtime, &now))
+ 		sync_it = 1;
+ 	inode->i_mtime = now;
+ 
+ 	if (ctime_too) {
+-		if (inode_times_differ(inode, &inode->i_ctime, &now))
++		if (!timespec_equal(&inode->i_ctime, &now))
+ 			sync_it = 1;
+ 		inode->i_ctime = now;
+ 	}
+@@ -1230,33 +1281,15 @@ int remove_inode_dquot_ref(struct inode 
+ void remove_dquot_ref(struct super_block *sb, int type, struct list_head *tofree_head)
+ {
+ 	struct inode *inode;
+-	struct list_head *act_head;
+ 
+ 	if (!sb->dq_op)
+ 		return;	/* nothing to do */
+-	spin_lock(&inode_lock);	/* This lock is for inodes code */
+ 
++	spin_lock(&inode_lock);	/* This lock is for inodes code */
+ 	/* We hold dqptr_sem so we are safe against the quota code */
+-	list_for_each(act_head, &inode_in_use) {
+-		inode = list_entry(act_head, struct inode, i_list);
+-		if (inode->i_sb == sb && !IS_NOQUOTA(inode))
+-			remove_inode_dquot_ref(inode, type, tofree_head);
+-	}
+-	list_for_each(act_head, &inode_unused) {
+-		inode = list_entry(act_head, struct inode, i_list);
+-		if (inode->i_sb == sb && !IS_NOQUOTA(inode))
+-			remove_inode_dquot_ref(inode, type, tofree_head);
+-	}
+-	list_for_each(act_head, &sb->s_dirty) {
+-		inode = list_entry(act_head, struct inode, i_list);
++	list_for_each_entry(inode, &sb->s_inodes, i_sb_list)
+ 		if (!IS_NOQUOTA(inode))
+ 			remove_inode_dquot_ref(inode, type, tofree_head);
+-	}
+-	list_for_each(act_head, &sb->s_io) {
+-		inode = list_entry(act_head, struct inode, i_list);
+-		if (!IS_NOQUOTA(inode))
+-			remove_inode_dquot_ref(inode, type, tofree_head);
+-	}
+ 	spin_unlock(&inode_lock);
+ }
+ 
+@@ -1372,7 +1405,7 @@ void __init inode_init(unsigned long mem
+ 
+ 	/* inode slab cache */
+ 	inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode),
+-				0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, init_once,
++				0, SLAB_RECLAIM_ACCOUNT|SLAB_HWCACHE_ALIGN|SLAB_PANIC, init_once,
+ 				NULL);
+ 	set_shrinker(DEFAULT_SEEKS, shrink_icache_memory);
+ }
+diff -uprN linux-2.6.8.1.orig/fs/isofs/compress.c linux-2.6.8.1-ve022stab078/fs/isofs/compress.c
+--- linux-2.6.8.1.orig/fs/isofs/compress.c	2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/isofs/compress.c	2006-05-11 13:05:34.000000000 +0400
+@@ -147,8 +147,14 @@ static int zisofs_readpage(struct file *
+ 	cend = le32_to_cpu(*(u32 *)(bh->b_data + (blockendptr & bufmask)));
+ 	brelse(bh);
+ 
++	if (cstart > cend)
++		goto eio;
++		
+ 	csize = cend-cstart;
+ 
++	if (csize > deflateBound(1UL << zisofs_block_shift))
++		goto eio;
++
+ 	/* Now page[] contains an array of pages, any of which can be NULL,
+ 	   and the locks on which we hold.  We should now read the data and
+ 	   release the pages.  If the pages are NULL the decompressed data
+diff -uprN linux-2.6.8.1.orig/fs/isofs/inode.c linux-2.6.8.1-ve022stab078/fs/isofs/inode.c
+--- linux-2.6.8.1.orig/fs/isofs/inode.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/isofs/inode.c	2006-05-11 13:05:34.000000000 +0400
+@@ -685,6 +685,8 @@ root_found:
+ 	  sbi->s_log_zone_size = isonum_723 (h_pri->logical_block_size);
+ 	  sbi->s_max_size = isonum_733(h_pri->volume_space_size);
+ 	} else {
++	  if (!pri)
++	    goto out_freebh;
+ 	  rootp = (struct iso_directory_record *) pri->root_directory_record;
+ 	  sbi->s_nzones = isonum_733 (pri->volume_space_size);
+ 	  sbi->s_log_zone_size = isonum_723 (pri->logical_block_size);
+@@ -1394,6 +1396,9 @@ struct inode *isofs_iget(struct super_bl
+ 	struct inode *inode;
+ 	struct isofs_iget5_callback_data data;
+ 
++	if (offset >= 1ul << sb->s_blocksize_bits)
++		return NULL;
++
+ 	data.block = block;
+ 	data.offset = offset;
+ 
+diff -uprN linux-2.6.8.1.orig/fs/isofs/rock.c linux-2.6.8.1-ve022stab078/fs/isofs/rock.c
+--- linux-2.6.8.1.orig/fs/isofs/rock.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/isofs/rock.c	2006-05-11 13:05:34.000000000 +0400
+@@ -53,6 +53,7 @@
+   if(LEN & 1) LEN++;						\
+   CHR = ((unsigned char *) DE) + LEN;				\
+   LEN = *((unsigned char *) DE) - LEN;                          \
++  if (LEN<0) LEN=0;                                             \
+   if (ISOFS_SB(inode->i_sb)->s_rock_offset!=-1)                \
+   {                                                             \
+      LEN-=ISOFS_SB(inode->i_sb)->s_rock_offset;                \
+@@ -73,6 +74,10 @@
+     offset1 = 0; \
+     pbh = sb_bread(DEV->i_sb, block); \
+     if(pbh){       \
++      if (offset > pbh->b_size || offset + cont_size > pbh->b_size){	\
++	brelse(pbh); \
++	goto out; \
++      } \
+       memcpy(buffer + offset1, pbh->b_data + offset, cont_size - offset1); \
+       brelse(pbh); \
+       chr = (unsigned char *) buffer; \
+@@ -103,12 +108,13 @@ int get_rock_ridge_filename(struct iso_d
+     struct rock_ridge * rr;
+     int sig;
+     
+-    while (len > 1){ /* There may be one byte for padding somewhere */
++    while (len > 2){ /* There may be one byte for padding somewhere */
+       rr = (struct rock_ridge *) chr;
+-      if (rr->len == 0) goto out; /* Something got screwed up here */
++      if (rr->len < 3) goto out; /* Something got screwed up here */
+       sig = isonum_721(chr);
+       chr += rr->len; 
+       len -= rr->len;
++      if (len < 0) goto out;	/* corrupted isofs */
+ 
+       switch(sig){
+       case SIG('R','R'):
+@@ -122,6 +128,7 @@ int get_rock_ridge_filename(struct iso_d
+ 	break;
+       case SIG('N','M'):
+ 	if (truncate) break;
++	if (rr->len < 5) break;
+         /*
+ 	 * If the flags are 2 or 4, this indicates '.' or '..'.
+ 	 * We don't want to do anything with this, because it
+@@ -183,12 +190,13 @@ int parse_rock_ridge_inode_internal(stru
+     struct rock_ridge * rr;
+     int rootflag;
+     
+-    while (len > 1){ /* There may be one byte for padding somewhere */
++    while (len > 2){ /* There may be one byte for padding somewhere */
+       rr = (struct rock_ridge *) chr;
+-      if (rr->len == 0) goto out; /* Something got screwed up here */
++      if (rr->len < 3) goto out; /* Something got screwed up here */
+       sig = isonum_721(chr);
+       chr += rr->len; 
+       len -= rr->len;
++      if (len < 0) goto out;	/* corrupted isofs */
+       
+       switch(sig){
+ #ifndef CONFIG_ZISOFS		/* No flag for SF or ZF */
+@@ -460,7 +468,7 @@ static int rock_ridge_symlink_readpage(s
+ 	struct rock_ridge *rr;
+ 
+ 	if (!ISOFS_SB(inode->i_sb)->s_rock)
+-		panic ("Cannot have symlink with high sierra variant of iso filesystem\n");
++		goto error;
+ 
+ 	block = ei->i_iget5_block;
+ 	lock_kernel();
+@@ -485,13 +493,15 @@ static int rock_ridge_symlink_readpage(s
+ 	SETUP_ROCK_RIDGE(raw_inode, chr, len);
+ 
+       repeat:
+-	while (len > 1) { /* There may be one byte for padding somewhere */
++	while (len > 2) { /* There may be one byte for padding somewhere */
+ 		rr = (struct rock_ridge *) chr;
+-		if (rr->len == 0)
++		if (rr->len < 3)
+ 			goto out;	/* Something got screwed up here */
+ 		sig = isonum_721(chr);
+ 		chr += rr->len;
+ 		len -= rr->len;
++		if (len < 0)
++			goto out;	/* corrupted isofs */
+ 
+ 		switch (sig) {
+ 		case SIG('R', 'R'):
+@@ -539,6 +549,7 @@ static int rock_ridge_symlink_readpage(s
+       fail:
+ 	brelse(bh);
+ 	unlock_kernel();
++      error:
+ 	SetPageError(page);
+ 	kunmap(page);
+ 	unlock_page(page);
+diff -uprN linux-2.6.8.1.orig/fs/jbd/checkpoint.c linux-2.6.8.1-ve022stab078/fs/jbd/checkpoint.c
+--- linux-2.6.8.1.orig/fs/jbd/checkpoint.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/jbd/checkpoint.c	2006-05-11 13:05:32.000000000 +0400
+@@ -335,8 +335,10 @@ int log_do_checkpoint(journal_t *journal
+ 			retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count);
+ 		} while (jh != last_jh && !retry);
+ 
+-		if (batch_count)
++		if (batch_count) {
+ 			__flush_batch(journal, bhs, &batch_count);
++			retry = 1;
++		}
+ 
+ 		/*
+ 		 * If someone cleaned up this transaction while we slept, we're
+diff -uprN linux-2.6.8.1.orig/fs/jbd/commit.c linux-2.6.8.1-ve022stab078/fs/jbd/commit.c
+--- linux-2.6.8.1.orig/fs/jbd/commit.c	2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/jbd/commit.c	2006-05-11 13:05:35.000000000 +0400
+@@ -103,10 +103,10 @@ void journal_commit_transaction(journal_
+ {
+ 	transaction_t *commit_transaction;
+ 	struct journal_head *jh, *new_jh, *descriptor;
+-	struct buffer_head *wbuf[64];
++	struct buffer_head **wbuf = journal->j_wbuf;
+ 	int bufs;
+ 	int flags;
+-	int err;
++	int err, data_err;
+ 	unsigned long blocknr;
+ 	char *tagp = NULL;
+ 	journal_header_t *header;
+@@ -234,6 +234,7 @@ void journal_commit_transaction(journal_
+ 	 */
+ 
+ 	err = 0;
++	data_err = 0;
+ 	/*
+ 	 * Whenever we unlock the journal and sleep, things can get added
+ 	 * onto ->t_sync_datalist, so we have to keep looping back to
+@@ -258,7 +259,7 @@ write_out_data:
+ 			BUFFER_TRACE(bh, "locked");
+ 			if (!inverted_lock(journal, bh))
+ 				goto write_out_data;
+-			__journal_unfile_buffer(jh);
++			__journal_temp_unlink_buffer(jh);
+ 			__journal_file_buffer(jh, commit_transaction,
+ 						BJ_Locked);
+ 			jbd_unlock_bh_state(bh);
+@@ -271,7 +272,7 @@ write_out_data:
+ 				BUFFER_TRACE(bh, "start journal writeout");
+ 				get_bh(bh);
+ 				wbuf[bufs++] = bh;
+-				if (bufs == ARRAY_SIZE(wbuf)) {
++				if (bufs == journal->j_wbufsize) {
+ 					jbd_debug(2, "submit %d writes\n",
+ 							bufs);
+ 					spin_unlock(&journal->j_list_lock);
+@@ -284,6 +285,8 @@ write_out_data:
+ 				BUFFER_TRACE(bh, "writeout complete: unfile");
+ 				if (!inverted_lock(journal, bh))
+ 					goto write_out_data;
++				if (unlikely(!buffer_uptodate(bh)))
++					data_err = -EIO;
+ 				__journal_unfile_buffer(jh);
+ 				jbd_unlock_bh_state(bh);
+ 				journal_remove_journal_head(bh);
+@@ -315,8 +318,6 @@ write_out_data:
+ 		if (buffer_locked(bh)) {
+ 			spin_unlock(&journal->j_list_lock);
+ 			wait_on_buffer(bh);
+-			if (unlikely(!buffer_uptodate(bh)))
+-				err = -EIO;
+ 			spin_lock(&journal->j_list_lock);
+ 		}
+ 		if (!inverted_lock(journal, bh)) {
+@@ -324,6 +325,8 @@ write_out_data:
+ 			spin_lock(&journal->j_list_lock);
+ 			continue;
+ 		}
++		if (unlikely(!buffer_uptodate(bh)))
++			data_err = -EIO;
+ 		if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
+ 			__journal_unfile_buffer(jh);
+ 			jbd_unlock_bh_state(bh);
+@@ -341,6 +344,12 @@ write_out_data:
+ 	}
+ 	spin_unlock(&journal->j_list_lock);
+ 
++	/*
++	 * XXX: what to do if (data_err)?
++	 * Print message?
++	 * Abort journal?
++	 */
++
+ 	journal_write_revoke_records(journal, commit_transaction);
+ 
+ 	jbd_debug(3, "JBD: commit phase 2\n");
+@@ -365,6 +374,7 @@ write_out_data:
+ 	descriptor = NULL;
+ 	bufs = 0;
+ 	while (commit_transaction->t_buffers) {
++		int error;
+ 
+ 		/* Find the next buffer to be journaled... */
+ 
+@@ -405,9 +415,9 @@ write_out_data:
+ 			jbd_debug(4, "JBD: got buffer %llu (%p)\n",
+ 				(unsigned long long)bh->b_blocknr, bh->b_data);
+ 			header = (journal_header_t *)&bh->b_data[0];
+-			header->h_magic     = htonl(JFS_MAGIC_NUMBER);
+-			header->h_blocktype = htonl(JFS_DESCRIPTOR_BLOCK);
+-			header->h_sequence  = htonl(commit_transaction->t_tid);
++			header->h_magic     = cpu_to_be32(JFS_MAGIC_NUMBER);
++			header->h_blocktype = cpu_to_be32(JFS_DESCRIPTOR_BLOCK);
++			header->h_sequence  = cpu_to_be32(commit_transaction->t_tid);
+ 
+ 			tagp = &bh->b_data[sizeof(journal_header_t)];
+ 			space_left = bh->b_size - sizeof(journal_header_t);
+@@ -425,11 +435,12 @@ write_out_data:
+ 
+ 		/* Where is the buffer to be written? */
+ 
+-		err = journal_next_log_block(journal, &blocknr);
++		error = journal_next_log_block(journal, &blocknr);
+ 		/* If the block mapping failed, just abandon the buffer
+ 		   and repeat this loop: we'll fall into the
+ 		   refile-on-abort condition above. */
+-		if (err) {
++		if (error) {
++			err = error;
+ 			__journal_abort_hard(journal);
+ 			continue;
+ 		}
+@@ -473,8 +484,8 @@ write_out_data:
+ 			tag_flag |= JFS_FLAG_SAME_UUID;
+ 
+ 		tag = (journal_block_tag_t *) tagp;
+-		tag->t_blocknr = htonl(jh2bh(jh)->b_blocknr);
+-		tag->t_flags = htonl(tag_flag);
++		tag->t_blocknr = cpu_to_be32(jh2bh(jh)->b_blocknr);
++		tag->t_flags = cpu_to_be32(tag_flag);
+ 		tagp += sizeof(journal_block_tag_t);
+ 		space_left -= sizeof(journal_block_tag_t);
+ 
+@@ -488,7 +499,7 @@ write_out_data:
+ 		/* If there's no more to do, or if the descriptor is full,
+ 		   let the IO rip! */
+ 
+-		if (bufs == ARRAY_SIZE(wbuf) ||
++		if (bufs == journal->j_wbufsize ||
+ 		    commit_transaction->t_buffers == NULL ||
+ 		    space_left < sizeof(journal_block_tag_t) + 16) {
+ 
+@@ -498,7 +509,7 @@ write_out_data:
+                            submitting the IOs.  "tag" still points to
+                            the last tag we set up. */
+ 
+-			tag->t_flags |= htonl(JFS_FLAG_LAST_TAG);
++			tag->t_flags |= cpu_to_be32(JFS_FLAG_LAST_TAG);
+ 
+ start_journal_io:
+ 			for (i = 0; i < bufs; i++) {
+@@ -613,6 +624,8 @@ wait_for_iobuf:
+ 
+ 	jbd_debug(3, "JBD: commit phase 6\n");
+ 
++	if (err)
++		goto skip_commit;
+ 	if (is_journal_aborted(journal))
+ 		goto skip_commit;
+ 
+@@ -631,9 +644,9 @@ wait_for_iobuf:
+ 	for (i = 0; i < jh2bh(descriptor)->b_size; i += 512) {
+ 		journal_header_t *tmp =
+ 			(journal_header_t*)jh2bh(descriptor)->b_data;
+-		tmp->h_magic = htonl(JFS_MAGIC_NUMBER);
+-		tmp->h_blocktype = htonl(JFS_COMMIT_BLOCK);
+-		tmp->h_sequence = htonl(commit_transaction->t_tid);
++		tmp->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
++		tmp->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK);
++		tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
+ 	}
+ 
+ 	JBUFFER_TRACE(descriptor, "write commit block");
+@@ -655,8 +668,13 @@ wait_for_iobuf:
+ 
+ skip_commit: /* The journal should be unlocked by now. */
+ 
+-	if (err)
++	if (err) {
++		char b[BDEVNAME_SIZE];
++
++		printk(KERN_ERR "Error %d writing journal on %s\n",
++				err, bdevname(journal->j_dev, b));
+ 		__journal_abort_hard(journal);
++	}
+ 
+ 	/*
+ 	 * Call any callbacks that had been registered for handles in this
+diff -uprN linux-2.6.8.1.orig/fs/jbd/journal.c linux-2.6.8.1-ve022stab078/fs/jbd/journal.c
+--- linux-2.6.8.1.orig/fs/jbd/journal.c	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/jbd/journal.c	2006-05-11 13:05:37.000000000 +0400
+@@ -34,6 +34,7 @@
+ #include <linux/suspend.h>
+ #include <linux/pagemap.h>
+ #include <asm/uaccess.h>
++#include <asm/page.h>
+ #include <linux/proc_fs.h>
+ 
+ EXPORT_SYMBOL(journal_start);
+@@ -152,6 +153,9 @@ int kjournald(void *arg)
+ 	spin_lock(&journal->j_state_lock);
+ 
+ loop:
++	if (journal->j_flags & JFS_UNMOUNT)
++		goto end_loop;
++
+ 	jbd_debug(1, "commit_sequence=%d, commit_request=%d\n",
+ 		journal->j_commit_sequence, journal->j_commit_request);
+ 
+@@ -161,11 +165,11 @@ loop:
+ 		del_timer_sync(journal->j_commit_timer);
+ 		journal_commit_transaction(journal);
+ 		spin_lock(&journal->j_state_lock);
+-		goto end_loop;
++		goto loop;
+ 	}
+ 
+ 	wake_up(&journal->j_wait_done_commit);
+-	if (current->flags & PF_FREEZE) {
++	if (test_thread_flag(TIF_FREEZE)) {
+ 		/*
+ 		 * The simpler the better. Flushing journal isn't a
+ 		 * good idea, because that depends on threads that may
+@@ -173,7 +177,7 @@ loop:
+ 		 */
+ 		jbd_debug(1, "Now suspending kjournald\n");
+ 		spin_unlock(&journal->j_state_lock);
+-		refrigerator(PF_FREEZE);
++		refrigerator();
+ 		spin_lock(&journal->j_state_lock);
+ 	} else {
+ 		/*
+@@ -191,6 +195,8 @@ loop:
+ 		if (transaction && time_after_eq(jiffies,
+ 						transaction->t_expires))
+ 			should_sleep = 0;
++		if (journal->j_flags & JFS_UNMOUNT)
++ 			should_sleep = 0;
+ 		if (should_sleep) {
+ 			spin_unlock(&journal->j_state_lock);
+ 			schedule();
+@@ -209,10 +215,9 @@ loop:
+ 		journal->j_commit_request = transaction->t_tid;
+ 		jbd_debug(1, "woke because of timeout\n");
+ 	}
+-end_loop:
+-	if (!(journal->j_flags & JFS_UNMOUNT))
+-		goto loop;
++	goto loop;
+ 
++end_loop:
+ 	spin_unlock(&journal->j_state_lock);
+ 	del_timer_sync(journal->j_commit_timer);
+ 	journal->j_task = NULL;
+@@ -221,10 +226,16 @@ end_loop:
+ 	return 0;
+ }
+ 
+-static void journal_start_thread(journal_t *journal)
++static int journal_start_thread(journal_t *journal)
+ {
+-	kernel_thread(kjournald, journal, CLONE_VM|CLONE_FS|CLONE_FILES);
++	int err;
++
++	err = kernel_thread(kjournald, journal, CLONE_VM|CLONE_FS|CLONE_FILES);
++	if (err < 0)
++		return err;
++
+ 	wait_event(journal->j_wait_done_commit, journal->j_task != 0);
++	return 0;
+ }
+ 
+ static void journal_kill_thread(journal_t *journal)
+@@ -325,8 +336,8 @@ repeat:
+ 	/*
+ 	 * Check for escaping
+ 	 */
+-	if (*((unsigned int *)(mapped_data + new_offset)) ==
+-				htonl(JFS_MAGIC_NUMBER)) {
++	if (*((__be32 *)(mapped_data + new_offset)) ==
++				cpu_to_be32(JFS_MAGIC_NUMBER)) {
+ 		need_copy_out = 1;
+ 		do_escape = 1;
+ 	}
+@@ -720,6 +731,7 @@ journal_t * journal_init_dev(struct bloc
+ {
+ 	journal_t *journal = journal_init_common();
+ 	struct buffer_head *bh;
++	int n;
+ 
+ 	if (!journal)
+ 		return NULL;
+@@ -735,6 +747,17 @@ journal_t * journal_init_dev(struct bloc
+ 	journal->j_sb_buffer = bh;
+ 	journal->j_superblock = (journal_superblock_t *)bh->b_data;
+ 
++	/* journal descriptor can store up to n blocks -bzzz */
++	n = journal->j_blocksize / sizeof(journal_block_tag_t);
++	journal->j_wbufsize = n;
++	journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
++	if (!journal->j_wbuf) {
++		printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
++			__FUNCTION__);
++		kfree(journal);
++		journal = NULL;
++	}
++
+ 	return journal;
+ }
+  
+@@ -751,6 +774,7 @@ journal_t * journal_init_inode (struct i
+ 	struct buffer_head *bh;
+ 	journal_t *journal = journal_init_common();
+ 	int err;
++	int n;
+ 	unsigned long blocknr;
+ 
+ 	if (!journal)
+@@ -767,6 +791,17 @@ journal_t * journal_init_inode (struct i
+ 	journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits;
+ 	journal->j_blocksize = inode->i_sb->s_blocksize;
+ 
++	/* journal descriptor can store up to n blocks -bzzz */
++	n = journal->j_blocksize / sizeof(journal_block_tag_t);
++	journal->j_wbufsize = n;
++	journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
++	if (!journal->j_wbuf) {
++		printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
++			__FUNCTION__);
++		kfree(journal);
++		return NULL;
++	}
++
+ 	err = journal_bmap(journal, 0, &blocknr);
+ 	/* If that failed, give up */
+ 	if (err) {
+@@ -808,8 +843,8 @@ static int journal_reset(journal_t *jour
+ 	journal_superblock_t *sb = journal->j_superblock;
+ 	unsigned int first, last;
+ 
+-	first = ntohl(sb->s_first);
+-	last = ntohl(sb->s_maxlen);
++	first = be32_to_cpu(sb->s_first);
++	last = be32_to_cpu(sb->s_maxlen);
+ 
+ 	journal->j_first = first;
+ 	journal->j_last = last;
+@@ -826,8 +861,7 @@ static int journal_reset(journal_t *jour
+ 
+ 	/* Add the dynamic fields and write it to disk. */
+ 	journal_update_superblock(journal, 1);
+-	journal_start_thread(journal);
+-	return 0;
++	return journal_start_thread(journal);
+ }
+ 
+ /** 
+@@ -886,12 +920,12 @@ int journal_create(journal_t *journal)
+ 	/* OK, fill in the initial static fields in the new superblock */
+ 	sb = journal->j_superblock;
+ 
+-	sb->s_header.h_magic	 = htonl(JFS_MAGIC_NUMBER);
+-	sb->s_header.h_blocktype = htonl(JFS_SUPERBLOCK_V2);
++	sb->s_header.h_magic	 = cpu_to_be32(JFS_MAGIC_NUMBER);
++	sb->s_header.h_blocktype = cpu_to_be32(JFS_SUPERBLOCK_V2);
+ 
+-	sb->s_blocksize	= htonl(journal->j_blocksize);
+-	sb->s_maxlen	= htonl(journal->j_maxlen);
+-	sb->s_first	= htonl(1);
++	sb->s_blocksize	= cpu_to_be32(journal->j_blocksize);
++	sb->s_maxlen	= cpu_to_be32(journal->j_maxlen);
++	sb->s_first	= cpu_to_be32(1);
+ 
+ 	journal->j_transaction_sequence = 1;
+ 
+@@ -934,9 +968,9 @@ void journal_update_superblock(journal_t
+ 	jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n",
+ 		  journal->j_tail, journal->j_tail_sequence, journal->j_errno);
+ 
+-	sb->s_sequence = htonl(journal->j_tail_sequence);
+-	sb->s_start    = htonl(journal->j_tail);
+-	sb->s_errno    = htonl(journal->j_errno);
++	sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
++	sb->s_start    = cpu_to_be32(journal->j_tail);
++	sb->s_errno    = cpu_to_be32(journal->j_errno);
+ 	spin_unlock(&journal->j_state_lock);
+ 
+ 	BUFFER_TRACE(bh, "marking dirty");
+@@ -987,13 +1021,13 @@ static int journal_get_superblock(journa
+ 
+ 	err = -EINVAL;
+ 
+-	if (sb->s_header.h_magic != htonl(JFS_MAGIC_NUMBER) ||
+-	    sb->s_blocksize != htonl(journal->j_blocksize)) {
++	if (sb->s_header.h_magic != cpu_to_be32(JFS_MAGIC_NUMBER) ||
++	    sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
+ 		printk(KERN_WARNING "JBD: no valid journal superblock found\n");
+ 		goto out;
+ 	}
+ 
+-	switch(ntohl(sb->s_header.h_blocktype)) {
++	switch(be32_to_cpu(sb->s_header.h_blocktype)) {
+ 	case JFS_SUPERBLOCK_V1:
+ 		journal->j_format_version = 1;
+ 		break;
+@@ -1005,9 +1039,9 @@ static int journal_get_superblock(journa
+ 		goto out;
+ 	}
+ 
+-	if (ntohl(sb->s_maxlen) < journal->j_maxlen)
+-		journal->j_maxlen = ntohl(sb->s_maxlen);
+-	else if (ntohl(sb->s_maxlen) > journal->j_maxlen) {
++	if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen)
++		journal->j_maxlen = be32_to_cpu(sb->s_maxlen);
++	else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) {
+ 		printk (KERN_WARNING "JBD: journal file too short\n");
+ 		goto out;
+ 	}
+@@ -1035,11 +1069,11 @@ static int load_superblock(journal_t *jo
+ 
+ 	sb = journal->j_superblock;
+ 
+-	journal->j_tail_sequence = ntohl(sb->s_sequence);
+-	journal->j_tail = ntohl(sb->s_start);
+-	journal->j_first = ntohl(sb->s_first);
+-	journal->j_last = ntohl(sb->s_maxlen);
+-	journal->j_errno = ntohl(sb->s_errno);
++	journal->j_tail_sequence = be32_to_cpu(sb->s_sequence);
++	journal->j_tail = be32_to_cpu(sb->s_start);
++	journal->j_first = be32_to_cpu(sb->s_first);
++	journal->j_last = be32_to_cpu(sb->s_maxlen);
++	journal->j_errno = be32_to_cpu(sb->s_errno);
+ 
+ 	return 0;
+ }
+@@ -1140,6 +1174,7 @@ void journal_destroy(journal_t *journal)
+ 		iput(journal->j_inode);
+ 	if (journal->j_revoke)
+ 		journal_destroy_revoke(journal);
++	kfree(journal->j_wbuf);
+ 	kfree(journal);
+ }
+ 
+@@ -1252,7 +1287,7 @@ int journal_update_format (journal_t *jo
+ 
+ 	sb = journal->j_superblock;
+ 
+-	switch (ntohl(sb->s_header.h_blocktype)) {
++	switch (be32_to_cpu(sb->s_header.h_blocktype)) {
+ 	case JFS_SUPERBLOCK_V2:
+ 		return 0;
+ 	case JFS_SUPERBLOCK_V1:
+@@ -1274,7 +1309,7 @@ static int journal_convert_superblock_v1
+ 
+ 	/* Pre-initialise new fields to zero */
+ 	offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb);
+-	blocksize = ntohl(sb->s_blocksize);
++	blocksize = be32_to_cpu(sb->s_blocksize);
+ 	memset(&sb->s_feature_compat, 0, blocksize-offset);
+ 
+ 	sb->s_nr_users = cpu_to_be32(1);
+@@ -1490,7 +1525,7 @@ void __journal_abort_soft (journal_t *jo
+  * entered abort state during the update.
+  *
+  * Recursive transactions are not disturbed by journal abort until the
+- * final journal_stop, which will receive the -EIO error.
++ * final journal_stop.
+  *
+  * Finally, the journal_abort call allows the caller to supply an errno
+  * which will be recorded (if possible) in the journal superblock.  This
+@@ -1766,6 +1801,7 @@ static void __journal_remove_journal_hea
+ 		if (jh->b_transaction == NULL &&
+ 				jh->b_next_transaction == NULL &&
+ 				jh->b_cp_transaction == NULL) {
++			J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
+ 			J_ASSERT_BH(bh, buffer_jbd(bh));
+ 			J_ASSERT_BH(bh, jh2bh(jh) == bh);
+ 			BUFFER_TRACE(bh, "remove journal_head");
+diff -uprN linux-2.6.8.1.orig/fs/jbd/recovery.c linux-2.6.8.1-ve022stab078/fs/jbd/recovery.c
+--- linux-2.6.8.1.orig/fs/jbd/recovery.c	2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/jbd/recovery.c	2006-05-11 13:05:31.000000000 +0400
+@@ -191,10 +191,10 @@ static int count_tags(struct buffer_head
+ 
+ 		nr++;
+ 		tagp += sizeof(journal_block_tag_t);
+-		if (!(tag->t_flags & htonl(JFS_FLAG_SAME_UUID)))
++		if (!(tag->t_flags & cpu_to_be32(JFS_FLAG_SAME_UUID)))
+ 			tagp += 16;
+ 
+-		if (tag->t_flags & htonl(JFS_FLAG_LAST_TAG))
++		if (tag->t_flags & cpu_to_be32(JFS_FLAG_LAST_TAG))
+ 			break;
+ 	}
+ 
+@@ -239,8 +239,8 @@ int journal_recover(journal_t *journal)
+ 
+ 	if (!sb->s_start) {
+ 		jbd_debug(1, "No recovery required, last transaction %d\n",
+-			  ntohl(sb->s_sequence));
+-		journal->j_transaction_sequence = ntohl(sb->s_sequence) + 1;
++			  be32_to_cpu(sb->s_sequence));
++		journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
+ 		return 0;
+ 	}
+ 
+@@ -295,7 +295,7 @@ int journal_skip_recovery(journal_t *jou
+ 		++journal->j_transaction_sequence;
+ 	} else {
+ #ifdef CONFIG_JBD_DEBUG
+-		int dropped = info.end_transaction - ntohl(sb->s_sequence);
++		int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence);
+ #endif
+ 		jbd_debug(0, 
+ 			  "JBD: ignoring %d transaction%s from the journal.\n",
+@@ -331,8 +331,8 @@ static int do_one_pass(journal_t *journa
+ 	 */
+ 
+ 	sb = journal->j_superblock;
+-	next_commit_ID = ntohl(sb->s_sequence);
+-	next_log_block = ntohl(sb->s_start);
++	next_commit_ID = be32_to_cpu(sb->s_sequence);
++	next_log_block = be32_to_cpu(sb->s_start);
+ 
+ 	first_commit_ID = next_commit_ID;
+ 	if (pass == PASS_SCAN)
+@@ -385,13 +385,13 @@ static int do_one_pass(journal_t *journa
+ 
+ 		tmp = (journal_header_t *)bh->b_data;
+ 
+-		if (tmp->h_magic != htonl(JFS_MAGIC_NUMBER)) {
++		if (tmp->h_magic != cpu_to_be32(JFS_MAGIC_NUMBER)) {
+ 			brelse(bh);
+ 			break;
+ 		}
+ 
+-		blocktype = ntohl(tmp->h_blocktype);
+-		sequence = ntohl(tmp->h_sequence);
++		blocktype = be32_to_cpu(tmp->h_blocktype);
++		sequence = be32_to_cpu(tmp->h_sequence);
+ 		jbd_debug(3, "Found magic %d, sequence %d\n", 
+ 			  blocktype, sequence);
+ 
+@@ -427,7 +427,7 @@ static int do_one_pass(journal_t *journa
+ 				unsigned long io_block;
+ 
+ 				tag = (journal_block_tag_t *) tagp;
+-				flags = ntohl(tag->t_flags);
++				flags = be32_to_cpu(tag->t_flags);
+ 
+ 				io_block = next_log_block++;
+ 				wrap(journal, next_log_block);
+@@ -444,7 +444,7 @@ static int do_one_pass(journal_t *journa
+ 					unsigned long blocknr;
+ 
+ 					J_ASSERT(obh != NULL);
+-					blocknr = ntohl(tag->t_blocknr);
++					blocknr = be32_to_cpu(tag->t_blocknr);
+ 
+ 					/* If the block has been
+ 					 * revoked, then we're all done
+@@ -476,8 +476,8 @@ static int do_one_pass(journal_t *journa
+ 					memcpy(nbh->b_data, obh->b_data,
+ 							journal->j_blocksize);
+ 					if (flags & JFS_FLAG_ESCAPE) {
+-						*((unsigned int *)bh->b_data) =
+-							htonl(JFS_MAGIC_NUMBER);
++						*((__be32 *)bh->b_data) =
++						cpu_to_be32(JFS_MAGIC_NUMBER);
+ 					}
+ 
+ 					BUFFER_TRACE(nbh, "marking dirty");
+@@ -572,13 +572,13 @@ static int scan_revoke_records(journal_t
+ 
+ 	header = (journal_revoke_header_t *) bh->b_data;
+ 	offset = sizeof(journal_revoke_header_t);
+-	max = ntohl(header->r_count);
++	max = be32_to_cpu(header->r_count);
+ 
+ 	while (offset < max) {
+ 		unsigned long blocknr;
+ 		int err;
+ 
+-		blocknr = ntohl(* ((unsigned int *) (bh->b_data+offset)));
++		blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
+ 		offset += 4;
+ 		err = journal_set_revoke(journal, blocknr, sequence);
+ 		if (err)
+diff -uprN linux-2.6.8.1.orig/fs/jbd/revoke.c linux-2.6.8.1-ve022stab078/fs/jbd/revoke.c
+--- linux-2.6.8.1.orig/fs/jbd/revoke.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/jbd/revoke.c	2006-05-11 13:05:31.000000000 +0400
+@@ -332,6 +332,7 @@ int journal_revoke(handle_t *handle, uns
+ 	struct block_device *bdev;
+ 	int err;
+ 
++	might_sleep();
+ 	if (bh_in)
+ 		BUFFER_TRACE(bh_in, "enter");
+ 
+@@ -375,7 +376,12 @@ int journal_revoke(handle_t *handle, uns
+            first having the revoke cancelled: it's illegal to free a
+            block twice without allocating it in between! */
+ 	if (bh) {
+-		J_ASSERT_BH(bh, !buffer_revoked(bh));
++		if (!J_EXPECT_BH(bh, !buffer_revoked(bh),
++				 "inconsistent data on disk")) {
++			if (!bh_in)
++				brelse(bh);
++			return -EIO;
++		}
+ 		set_buffer_revoked(bh);
+ 		set_buffer_revokevalid(bh);
+ 		if (bh_in) {
+@@ -565,9 +571,9 @@ static void write_one_revoke_record(jour
+ 		if (!descriptor)
+ 			return;
+ 		header = (journal_header_t *) &jh2bh(descriptor)->b_data[0];
+-		header->h_magic     = htonl(JFS_MAGIC_NUMBER);
+-		header->h_blocktype = htonl(JFS_REVOKE_BLOCK);
+-		header->h_sequence  = htonl(transaction->t_tid);
++		header->h_magic     = cpu_to_be32(JFS_MAGIC_NUMBER);
++		header->h_blocktype = cpu_to_be32(JFS_REVOKE_BLOCK);
++		header->h_sequence  = cpu_to_be32(transaction->t_tid);
+ 
+ 		/* Record it so that we can wait for IO completion later */
+ 		JBUFFER_TRACE(descriptor, "file as BJ_LogCtl");
+@@ -577,8 +583,8 @@ static void write_one_revoke_record(jour
+ 		*descriptorp = descriptor;
+ 	}
+ 
+-	* ((unsigned int *)(&jh2bh(descriptor)->b_data[offset])) = 
+-		htonl(record->blocknr);
++	* ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) = 
++		cpu_to_be32(record->blocknr);
+ 	offset += 4;
+ 	*offsetp = offset;
+ }
+@@ -603,7 +609,7 @@ static void flush_descriptor(journal_t *
+ 	}
+ 
+ 	header = (journal_revoke_header_t *) jh2bh(descriptor)->b_data;
+-	header->r_count = htonl(offset);
++	header->r_count = cpu_to_be32(offset);
+ 	set_buffer_jwrite(bh);
+ 	BUFFER_TRACE(bh, "write");
+ 	set_buffer_dirty(bh);
+diff -uprN linux-2.6.8.1.orig/fs/jbd/transaction.c linux-2.6.8.1-ve022stab078/fs/jbd/transaction.c
+--- linux-2.6.8.1.orig/fs/jbd/transaction.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/jbd/transaction.c	2006-05-11 13:05:39.000000000 +0400
+@@ -1046,7 +1046,12 @@ int journal_dirty_data(handle_t *handle,
+ 			/* journal_clean_data_list() may have got there first */
+ 			if (jh->b_transaction != NULL) {
+ 				JBUFFER_TRACE(jh, "unfile from commit");
+-				__journal_unfile_buffer(jh);
++				__journal_temp_unlink_buffer(jh);
++				/* It still points to the committing
++				 * transaction; move it to this one so
++				 * that the refile assert checks are
++				 * happy. */
++				jh->b_transaction = handle->h_transaction;
+ 			}
+ 			/* The buffer will be refiled below */
+ 
+@@ -1060,7 +1065,8 @@ int journal_dirty_data(handle_t *handle,
+ 		if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) {
+ 			JBUFFER_TRACE(jh, "not on correct data list: unfile");
+ 			J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow);
+-			__journal_unfile_buffer(jh);
++			__journal_temp_unlink_buffer(jh);
++			jh->b_transaction = handle->h_transaction;
+ 			JBUFFER_TRACE(jh, "file as data");
+ 			__journal_file_buffer(jh, handle->h_transaction,
+ 						BJ_SyncData);
+@@ -1200,11 +1206,12 @@ journal_release_buffer(handle_t *handle,
+  * Allow this call even if the handle has aborted --- it may be part of
+  * the caller's cleanup after an abort.
+  */
+-void journal_forget(handle_t *handle, struct buffer_head *bh)
++int journal_forget (handle_t *handle, struct buffer_head *bh)
+ {
+ 	transaction_t *transaction = handle->h_transaction;
+ 	journal_t *journal = transaction->t_journal;
+ 	struct journal_head *jh;
++	int err = 0;
+ 
+ 	BUFFER_TRACE(bh, "entry");
+ 
+@@ -1215,6 +1222,14 @@ void journal_forget(handle_t *handle, st
+ 		goto not_jbd;
+ 	jh = bh2jh(bh);
+ 
++	/* Critical error: attempting to delete a bitmap buffer, maybe?
++	 * Don't do any jbd operations, and return an error. */
++	if (!J_EXPECT_JH(jh, !jh->b_committed_data,
++			 "inconsistent data on disk")) {
++		err = -EIO;
++		goto not_jbd;
++	}
++
+ 	if (jh->b_transaction == handle->h_transaction) {
+ 		J_ASSERT_JH(jh, !jh->b_frozen_data);
+ 
+@@ -1225,9 +1240,6 @@ void journal_forget(handle_t *handle, st
+ 		clear_buffer_jbddirty(bh);
+ 
+ 		JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
+-		J_ASSERT_JH(jh, !jh->b_committed_data);
+-
+-		__journal_unfile_buffer(jh);
+ 
+ 		/* 
+ 		 * We are no longer going to journal this buffer.
+@@ -1242,15 +1254,17 @@ void journal_forget(handle_t *handle, st
+ 		 */
+ 
+ 		if (jh->b_cp_transaction) {
++			__journal_temp_unlink_buffer(jh);
+ 			__journal_file_buffer(jh, transaction, BJ_Forget);
+ 		} else {
++			__journal_unfile_buffer(jh);
+ 			journal_remove_journal_head(bh);
+ 			__brelse(bh);
+ 			if (!buffer_jbd(bh)) {
+ 				spin_unlock(&journal->j_list_lock);
+ 				jbd_unlock_bh_state(bh);
+ 				__bforget(bh);
+-				return;
++				return 0;
+ 			}
+ 		}
+ 	} else if (jh->b_transaction) {
+@@ -1272,7 +1286,7 @@ not_jbd:
+ 	spin_unlock(&journal->j_list_lock);
+ 	jbd_unlock_bh_state(bh);
+ 	__brelse(bh);
+-	return;
++	return err;
+ }
+ 
+ /**
+@@ -1402,7 +1416,8 @@ int journal_stop(handle_t *handle)
+ 		 * Special case: JFS_SYNC synchronous updates require us
+ 		 * to wait for the commit to complete.  
+ 		 */
+-		if (handle->h_sync && !(current->flags & PF_MEMALLOC))
++		if (handle->h_sync && !(current->flags &
++					(PF_MEMALLOC | PF_MEMDIE)))
+ 			err = log_wait_commit(journal, tid);
+ 	} else {
+ 		spin_unlock(&transaction->t_handle_lock);
+@@ -1498,7 +1513,7 @@ __blist_del_buffer(struct journal_head *
+  *
+  * Called under j_list_lock.  The journal may not be locked.
+  */
+-void __journal_unfile_buffer(struct journal_head *jh)
++void __journal_temp_unlink_buffer(struct journal_head *jh)
+ {
+ 	struct journal_head **list = NULL;
+ 	transaction_t *transaction;
+@@ -1515,7 +1530,7 @@ void __journal_unfile_buffer(struct jour
+ 
+ 	switch (jh->b_jlist) {
+ 	case BJ_None:
+-		goto out;
++		return;
+ 	case BJ_SyncData:
+ 		list = &transaction->t_sync_datalist;
+ 		break;
+@@ -1548,7 +1563,11 @@ void __journal_unfile_buffer(struct jour
+ 	jh->b_jlist = BJ_None;
+ 	if (test_clear_buffer_jbddirty(bh))
+ 		mark_buffer_dirty(bh);	/* Expose it to the VM */
+-out:
++}
++
++void __journal_unfile_buffer(struct journal_head *jh)
++{
++	__journal_temp_unlink_buffer(jh);
+ 	jh->b_transaction = NULL;
+ }
+ 
+@@ -1804,10 +1823,10 @@ static int journal_unmap_buffer(journal_
+ 			JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
+ 			ret = __dispose_buffer(jh,
+ 					journal->j_running_transaction);
++			journal_put_journal_head(jh);
+ 			spin_unlock(&journal->j_list_lock);
+ 			jbd_unlock_bh_state(bh);
+ 			spin_unlock(&journal->j_state_lock);
+-			journal_put_journal_head(jh);
+ 			return ret;
+ 		} else {
+ 			/* There is no currently-running transaction. So the
+@@ -1818,10 +1837,10 @@ static int journal_unmap_buffer(journal_
+ 				JBUFFER_TRACE(jh, "give to committing trans");
+ 				ret = __dispose_buffer(jh,
+ 					journal->j_committing_transaction);
++				journal_put_journal_head(jh);
+ 				spin_unlock(&journal->j_list_lock);
+ 				jbd_unlock_bh_state(bh);
+ 				spin_unlock(&journal->j_state_lock);
+-				journal_put_journal_head(jh);
+ 				return ret;
+ 			} else {
+ 				/* The orphan record's transaction has
+@@ -1831,7 +1850,17 @@ static int journal_unmap_buffer(journal_
+ 			}
+ 		}
+ 	} else if (transaction == journal->j_committing_transaction) {
+-		/* If it is committing, we simply cannot touch it.  We
++		if (jh->b_jlist == BJ_Locked) {
++			/*
++			 * The buffer is on the committing transaction's locked
++			 * list.  We have the buffer locked, so I/O has
++			 * completed.  So we can nail the buffer now.
++			 */
++			may_free = __dispose_buffer(jh, transaction);
++			goto zap_buffer;
++		}
++		/*
++		 * If it is committing, we simply cannot touch it.  We
+ 		 * can remove it's next_transaction pointer from the
+ 		 * running transaction if that is set, but nothing
+ 		 * else. */
+@@ -1842,10 +1871,10 @@ static int journal_unmap_buffer(journal_
+ 					journal->j_running_transaction);
+ 			jh->b_next_transaction = NULL;
+ 		}
++		journal_put_journal_head(jh);
+ 		spin_unlock(&journal->j_list_lock);
+ 		jbd_unlock_bh_state(bh);
+ 		spin_unlock(&journal->j_state_lock);
+-		journal_put_journal_head(jh);
+ 		return 0;
+ 	} else {
+ 		/* Good, the buffer belongs to the running transaction.
+@@ -1870,6 +1899,7 @@ zap_buffer_unlocked:
+ 	clear_buffer_mapped(bh);
+ 	clear_buffer_req(bh);
+ 	clear_buffer_new(bh);
++	clear_buffer_delay(bh);
+ 	bh->b_bdev = NULL;
+ 	return may_free;
+ }
+@@ -1906,7 +1936,6 @@ int journal_invalidatepage(journal_t *jo
+ 		unsigned int next_off = curr_off + bh->b_size;
+ 		next = bh->b_this_page;
+ 
+-		/* AKPM: doing lock_buffer here may be overly paranoid */
+ 		if (offset <= curr_off) {
+ 		 	/* This block is wholly outside the truncation point */
+ 			lock_buffer(bh);
+@@ -1958,7 +1987,7 @@ void __journal_file_buffer(struct journa
+ 	}
+ 
+ 	if (jh->b_transaction)
+-		__journal_unfile_buffer(jh);
++		__journal_temp_unlink_buffer(jh);
+ 	jh->b_transaction = transaction;
+ 
+ 	switch (jlist) {
+@@ -2041,7 +2070,7 @@ void __journal_refile_buffer(struct jour
+ 	 */
+ 
+ 	was_dirty = test_clear_buffer_jbddirty(bh);
+-	__journal_unfile_buffer(jh);
++	__journal_temp_unlink_buffer(jh);
+ 	jh->b_transaction = jh->b_next_transaction;
+ 	jh->b_next_transaction = NULL;
+ 	__journal_file_buffer(jh, jh->b_transaction, BJ_Metadata);
+diff -uprN linux-2.6.8.1.orig/fs/jffs2/background.c linux-2.6.8.1-ve022stab078/fs/jffs2/background.c
+--- linux-2.6.8.1.orig/fs/jffs2/background.c	2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/jffs2/background.c	2006-05-11 13:05:25.000000000 +0400
+@@ -93,8 +93,8 @@ static int jffs2_garbage_collect_thread(
+ 			schedule();
+ 		}
+ 
+-		if (current->flags & PF_FREEZE) {
+-			refrigerator(0);
++		if (test_thread_flag(TIF_FREEZE)) {
++			refrigerator();
+ 			/* refrigerator() should recalc sigpending for us
+ 			   but doesn't. No matter - allow_signal() will. */
+ 			continue;
+diff -uprN linux-2.6.8.1.orig/fs/jfs/acl.c linux-2.6.8.1-ve022stab078/fs/jfs/acl.c
+--- linux-2.6.8.1.orig/fs/jfs/acl.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/jfs/acl.c	2006-05-11 13:05:35.000000000 +0400
+@@ -127,7 +127,7 @@ out:
+  *
+  * modified vfs_permission to check posix acl
+  */
+-int jfs_permission(struct inode * inode, int mask, struct nameidata *nd)
++int __jfs_permission(struct inode * inode, int mask)
+ {
+ 	umode_t mode = inode->i_mode;
+ 	struct jfs_inode_info *ji = JFS_IP(inode);
+@@ -206,6 +206,28 @@ check_capabilities:
+ 	return -EACCES;
+ }
+ 
++int jfs_permission(struct inode *inode, int mask, struct nameidata *nd,
++		struct exec_perm *exec_perm)
++{
++	int ret;
++
++	if (exec_perm != NULL)
++		down(&inode->i_sem);
++
++	ret = __jfs_permission(inode, mask);
++
++	if (exec_perm != NULL) {
++		if (!ret) {
++			exec_perm->set = 1;
++			exec_perm->mode = inode->i_mode;
++			exec_perm->uid = inode->i_uid;
++			exec_perm->gid = inode->i_gid;
++		}
++		up(&inode->i_sem);
++	}
++	return ret;
++}
++
+ int jfs_init_acl(struct inode *inode, struct inode *dir)
+ {
+ 	struct posix_acl *acl = NULL;
+diff -uprN linux-2.6.8.1.orig/fs/jfs/inode.c linux-2.6.8.1-ve022stab078/fs/jfs/inode.c
+--- linux-2.6.8.1.orig/fs/jfs/inode.c	2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/jfs/inode.c	2006-05-11 13:05:35.000000000 +0400
+@@ -105,10 +105,10 @@ int jfs_commit_inode(struct inode *inode
+ 	return rc;
+ }
+ 
+-void jfs_write_inode(struct inode *inode, int wait)
++int jfs_write_inode(struct inode *inode, int wait)
+ {
+ 	if (test_cflag(COMMIT_Nolink, inode))
+-		return;
++		return 0;
+ 	/*
+ 	 * If COMMIT_DIRTY is not set, the inode isn't really dirty.
+ 	 * It has been committed since the last change, but was still
+@@ -117,12 +117,14 @@ void jfs_write_inode(struct inode *inode
+ 	 if (!test_cflag(COMMIT_Dirty, inode)) {
+ 		/* Make sure committed changes hit the disk */
+ 		jfs_flush_journal(JFS_SBI(inode->i_sb)->log, wait);
+-		return;
++		return 0;
+ 	 }
+ 
+ 	if (jfs_commit_inode(inode, wait)) {
+ 		jfs_err("jfs_write_inode: jfs_commit_inode failed!");
+-	}
++		return -EIO;
++	} else
++		return 0;
+ }
+ 
+ void jfs_delete_inode(struct inode *inode)
+diff -uprN linux-2.6.8.1.orig/fs/jfs/jfs_acl.h linux-2.6.8.1-ve022stab078/fs/jfs/jfs_acl.h
+--- linux-2.6.8.1.orig/fs/jfs/jfs_acl.h	2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/jfs/jfs_acl.h	2006-05-11 13:05:35.000000000 +0400
+@@ -22,7 +22,7 @@
+ 
+ #include <linux/xattr_acl.h>
+ 
+-int jfs_permission(struct inode *, int, struct nameidata *);
++int jfs_permission(struct inode *, int, struct nameidata *, struct exec_perm *);
+ int jfs_init_acl(struct inode *, struct inode *);
+ int jfs_setattr(struct dentry *, struct iattr *);
+ 
+diff -uprN linux-2.6.8.1.orig/fs/jfs/jfs_logmgr.c linux-2.6.8.1-ve022stab078/fs/jfs/jfs_logmgr.c
+--- linux-2.6.8.1.orig/fs/jfs/jfs_logmgr.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/jfs/jfs_logmgr.c	2006-05-11 13:05:25.000000000 +0400
+@@ -2328,9 +2328,9 @@ int jfsIOWait(void *arg)
+ 			lbmStartIO(bp);
+ 			spin_lock_irq(&log_redrive_lock);
+ 		}
+-		if (current->flags & PF_FREEZE) {
++		if (test_thread_flag(TIF_FREEZE)) {
+ 			spin_unlock_irq(&log_redrive_lock);
+-			refrigerator(PF_FREEZE);
++			refrigerator();
+ 		} else {
+ 			add_wait_queue(&jfs_IO_thread_wait, &wq);
+ 			set_current_state(TASK_INTERRUPTIBLE);
+diff -uprN linux-2.6.8.1.orig/fs/jfs/jfs_txnmgr.c linux-2.6.8.1-ve022stab078/fs/jfs/jfs_txnmgr.c
+--- linux-2.6.8.1.orig/fs/jfs/jfs_txnmgr.c	2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/jfs/jfs_txnmgr.c	2006-05-11 13:05:25.000000000 +0400
+@@ -2776,9 +2776,9 @@ int jfs_lazycommit(void *arg)
+ 				break;
+ 		}
+ 
+-		if (current->flags & PF_FREEZE) {
++		if (test_thread_flag(TIF_FREEZE)) {
+ 			LAZY_UNLOCK(flags);
+-			refrigerator(PF_FREEZE);
++			refrigerator();
+ 		} else {
+ 			DECLARE_WAITQUEUE(wq, current);
+ 
+@@ -2987,9 +2987,9 @@ int jfs_sync(void *arg)
+ 		/* Add anon_list2 back to anon_list */
+ 		list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
+ 
+-		if (current->flags & PF_FREEZE) {
++		if (test_thread_flag(TIF_FREEZE)) {
+ 			TXN_UNLOCK();
+-			refrigerator(PF_FREEZE);
++			refrigerator();
+ 		} else {
+ 			DECLARE_WAITQUEUE(wq, current);
+ 
+diff -uprN linux-2.6.8.1.orig/fs/jfs/super.c linux-2.6.8.1-ve022stab078/fs/jfs/super.c
+--- linux-2.6.8.1.orig/fs/jfs/super.c	2004-08-14 14:55:31.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/jfs/super.c	2006-05-11 13:05:35.000000000 +0400
+@@ -77,7 +77,7 @@ extern int jfs_sync(void *);
+ extern void jfs_read_inode(struct inode *inode);
+ extern void jfs_dirty_inode(struct inode *inode);
+ extern void jfs_delete_inode(struct inode *inode);
+-extern void jfs_write_inode(struct inode *inode, int wait);
++extern int jfs_write_inode(struct inode *inode, int wait);
+ 
+ extern struct dentry *jfs_get_parent(struct dentry *dentry);
+ extern int jfs_extendfs(struct super_block *, s64, int);
+diff -uprN linux-2.6.8.1.orig/fs/jfs/xattr.c linux-2.6.8.1-ve022stab078/fs/jfs/xattr.c
+--- linux-2.6.8.1.orig/fs/jfs/xattr.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/jfs/xattr.c	2006-05-11 13:05:35.000000000 +0400
+@@ -745,7 +745,7 @@ static int can_set_xattr(struct inode *i
+ 	    (!S_ISDIR(inode->i_mode) || inode->i_mode &S_ISVTX))
+ 		return -EPERM;
+ 
+-	return permission(inode, MAY_WRITE, NULL);
++	return permission(inode, MAY_WRITE, NULL, NULL);
+ }
+ 
+ int __jfs_setxattr(struct inode *inode, const char *name, const void *value,
+@@ -906,7 +906,7 @@ static int can_get_xattr(struct inode *i
+ {
+ 	if(strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) == 0)
+ 		return 0;
+-	return permission(inode, MAY_READ, NULL);
++	return permission(inode, MAY_READ, NULL, NULL);
+ }
+ 
+ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data,
+diff -uprN linux-2.6.8.1.orig/fs/libfs.c linux-2.6.8.1-ve022stab078/fs/libfs.c
+--- linux-2.6.8.1.orig/fs/libfs.c	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/libfs.c	2006-05-11 13:05:40.000000000 +0400
+@@ -412,10 +412,13 @@ static spinlock_t pin_fs_lock = SPIN_LOC
+ int simple_pin_fs(char *name, struct vfsmount **mount, int *count)
+ {
+ 	struct vfsmount *mnt = NULL;
++	struct file_system_type *fstype;
+ 	spin_lock(&pin_fs_lock);
+ 	if (unlikely(!*mount)) {
+ 		spin_unlock(&pin_fs_lock);
+-		mnt = do_kern_mount(name, 0, name, NULL);
++		fstype = get_fs_type(name);
++		mnt = do_kern_mount(fstype, 0, name, NULL);
++		put_filesystem(fstype);
+ 		if (IS_ERR(mnt))
+ 			return PTR_ERR(mnt);
+ 		spin_lock(&pin_fs_lock);
+diff -uprN linux-2.6.8.1.orig/fs/lockd/clntproc.c linux-2.6.8.1-ve022stab078/fs/lockd/clntproc.c
+--- linux-2.6.8.1.orig/fs/lockd/clntproc.c	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/lockd/clntproc.c	2006-05-11 13:05:40.000000000 +0400
+@@ -53,10 +53,10 @@ nlmclnt_setlockargs(struct nlm_rqst *req
+ 	nlmclnt_next_cookie(&argp->cookie);
+ 	argp->state   = nsm_local_state;
+ 	memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh));
+-	lock->caller  = system_utsname.nodename;
++	lock->caller  = ve_utsname.nodename;
+ 	lock->oh.data = req->a_owner;
+ 	lock->oh.len  = sprintf(req->a_owner, "%d@%s",
+-				current->pid, system_utsname.nodename);
++				current->pid, ve_utsname.nodename);
+ 	locks_copy_lock(&lock->fl, fl);
+ }
+ 
+@@ -69,7 +69,7 @@ nlmclnt_setgrantargs(struct nlm_rqst *ca
+ {
+ 	locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
+ 	memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
+-	call->a_args.lock.caller = system_utsname.nodename;
++	call->a_args.lock.caller = ve_utsname.nodename;
+ 	call->a_args.lock.oh.len = lock->oh.len;
+ 
+ 	/* set default data area */
+diff -uprN linux-2.6.8.1.orig/fs/lockd/mon.c linux-2.6.8.1-ve022stab078/fs/lockd/mon.c
+--- linux-2.6.8.1.orig/fs/lockd/mon.c	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/lockd/mon.c	2006-05-11 13:05:40.000000000 +0400
+@@ -151,7 +151,7 @@ xdr_encode_common(struct rpc_rqst *rqstp
+ 	sprintf(buffer, "%d.%d.%d.%d", (addr>>24) & 0xff, (addr>>16) & 0xff,
+ 				 	(addr>>8) & 0xff,  (addr) & 0xff);
+ 	if (!(p = xdr_encode_string(p, buffer))
+-	 || !(p = xdr_encode_string(p, system_utsname.nodename)))
++	 || !(p = xdr_encode_string(p, ve_utsname.nodename)))
+ 		return ERR_PTR(-EIO);
+ 	*p++ = htonl(argp->prog);
+ 	*p++ = htonl(argp->vers);
+diff -uprN linux-2.6.8.1.orig/fs/locks.c linux-2.6.8.1-ve022stab078/fs/locks.c
+--- linux-2.6.8.1.orig/fs/locks.c	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/locks.c	2006-05-11 13:05:40.000000000 +0400
+@@ -127,6 +127,8 @@
+ #include <asm/semaphore.h>
+ #include <asm/uaccess.h>
+ 
++#include <ub/ub_misc.h>
++
+ #define IS_POSIX(fl)	(fl->fl_flags & FL_POSIX)
+ #define IS_FLOCK(fl)	(fl->fl_flags & FL_FLOCK)
+ #define IS_LEASE(fl)	(fl->fl_flags & FL_LEASE)
+@@ -146,9 +148,23 @@ static LIST_HEAD(blocked_list);
+ static kmem_cache_t *filelock_cache;
+ 
+ /* Allocate an empty lock structure. */
+-static struct file_lock *locks_alloc_lock(void)
++static struct file_lock *locks_alloc_lock(int charge)
+ {
+-	return kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
++	struct file_lock *flock;
++
++	flock = kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
++	if (flock == NULL)
++		goto out;
++	flock->fl_charged = 0;
++	if (!charge)
++		goto out;
++	if (!ub_flock_charge(flock, 1))
++		goto out;
++
++	kmem_cache_free(filelock_cache, flock);
++	flock = NULL;
++out:
++	return flock;
+ }
+ 
+ /* Free a lock which is not in use. */
+@@ -167,6 +183,7 @@ static inline void locks_free_lock(struc
+ 	if (!list_empty(&fl->fl_link))
+ 		panic("Attempting to free lock on active lock list");
+ 
++	ub_flock_uncharge(fl);
+ 	kmem_cache_free(filelock_cache, fl);
+ }
+ 
+@@ -247,8 +264,8 @@ static int flock_make_lock(struct file *
+ 	int type = flock_translate_cmd(cmd);
+ 	if (type < 0)
+ 		return type;
+-	
+-	fl = locks_alloc_lock();
++
++	fl = locks_alloc_lock(type != F_UNLCK);
+ 	if (fl == NULL)
+ 		return -ENOMEM;
+ 
+@@ -382,7 +399,7 @@ static int flock64_to_posix_lock(struct 
+ /* Allocate a file_lock initialised to this type of lease */
+ static int lease_alloc(struct file *filp, int type, struct file_lock **flp)
+ {
+-	struct file_lock *fl = locks_alloc_lock();
++	struct file_lock *fl = locks_alloc_lock(1);
+ 	if (fl == NULL)
+ 		return -ENOMEM;
+ 
+@@ -733,8 +750,11 @@ static int __posix_lock_file(struct inod
+ 	 * We may need two file_lock structures for this operation,
+ 	 * so we get them in advance to avoid races.
+ 	 */
+-	new_fl = locks_alloc_lock();
+-	new_fl2 = locks_alloc_lock();
++	if (request->fl_type != F_UNLCK)
++		new_fl = locks_alloc_lock(1);
++	else
++		new_fl = NULL;
++	new_fl2 = locks_alloc_lock(0);
+ 
+ 	lock_kernel();
+ 	if (request->fl_type != F_UNLCK) {
+@@ -762,7 +782,7 @@ static int __posix_lock_file(struct inod
+ 		goto out;
+ 
+ 	error = -ENOLCK; /* "no luck" */
+-	if (!(new_fl && new_fl2))
++	if (!((request->fl_type == F_UNLCK || new_fl) && new_fl2))
+ 		goto out;
+ 
+ 	/*
+@@ -864,19 +884,29 @@ static int __posix_lock_file(struct inod
+ 	if (!added) {
+ 		if (request->fl_type == F_UNLCK)
+ 			goto out;
++		error = -ENOLCK;
++		if (right && (left == right) && ub_flock_charge(new_fl, 1))
++				goto out;
+ 		locks_copy_lock(new_fl, request);
+ 		locks_insert_lock(before, new_fl);
+ 		new_fl = NULL;
++		error = 0;
+ 	}
+ 	if (right) {
+ 		if (left == right) {
+ 			/* The new lock breaks the old one in two pieces,
+ 			 * so we have to use the second new lock.
+ 			 */
++			error = -ENOLCK;
++			if (added && ub_flock_charge(new_fl2,
++						request->fl_type != F_UNLCK))
++				goto out;
++			new_fl2->fl_charged = 1;
+ 			left = new_fl2;
+ 			new_fl2 = NULL;
+ 			locks_copy_lock(left, right);
+ 			locks_insert_lock(before, left);
++			error = 0;
+ 		}
+ 		right->fl_start = request->fl_end + 1;
+ 		locks_wake_up_blocks(right);
+@@ -1024,7 +1054,6 @@ static void time_out_leases(struct inode
+ 			before = &fl->fl_next;
+ 			continue;
+ 		}
+-		printk(KERN_INFO "lease broken - owner pid = %d\n", fl->fl_pid);
+ 		lease_modify(before, fl->fl_type & ~F_INPROGRESS);
+ 		if (fl == *before)	/* lease_modify may have freed fl */
+ 			before = &fl->fl_next;
+@@ -1146,7 +1175,7 @@ void lease_get_mtime(struct inode *inode
+ {
+ 	struct file_lock *flock = inode->i_flock;
+ 	if (flock && IS_LEASE(flock) && (flock->fl_type & F_WRLCK))
+-		*time = CURRENT_TIME;
++		*time = current_fs_time(inode->i_sb);
+ 	else
+ 		*time = inode->i_mtime;
+ }
+@@ -1400,7 +1429,7 @@ int fcntl_getlk(struct file *filp, struc
+  
+ 	flock.l_type = F_UNLCK;
+ 	if (fl != NULL) {
+-		flock.l_pid = fl->fl_pid;
++		flock.l_pid = pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
+ #if BITS_PER_LONG == 32
+ 		/*
+ 		 * Make sure we can represent the posix lock via
+@@ -1432,7 +1461,7 @@ out:
+  */
+ int fcntl_setlk(struct file *filp, unsigned int cmd, struct flock __user *l)
+ {
+-	struct file_lock *file_lock = locks_alloc_lock();
++	struct file_lock *file_lock = locks_alloc_lock(0);
+ 	struct flock flock;
+ 	struct inode *inode;
+ 	int error;
+@@ -1547,7 +1576,7 @@ int fcntl_getlk64(struct file *filp, str
+  
+ 	flock.l_type = F_UNLCK;
+ 	if (fl != NULL) {
+-		flock.l_pid = fl->fl_pid;
++		flock.l_pid = pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
+ 		flock.l_start = fl->fl_start;
+ 		flock.l_len = fl->fl_end == OFFSET_MAX ? 0 :
+ 			fl->fl_end - fl->fl_start + 1;
+@@ -1567,7 +1596,7 @@ out:
+  */
+ int fcntl_setlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
+ {
+-	struct file_lock *file_lock = locks_alloc_lock();
++	struct file_lock *file_lock = locks_alloc_lock(1);
+ 	struct flock64 flock;
+ 	struct inode *inode;
+ 	int error;
+@@ -1712,7 +1741,12 @@ void locks_remove_flock(struct file *fil
+ 
+ 	while ((fl = *before) != NULL) {
+ 		if (fl->fl_file == filp) {
+-			if (IS_FLOCK(fl)) {
++			/*
++			 * We might have a POSIX lock that was created at the same time
++			 * the filp was closed for the last time. Just remove that too,
++			 * regardless of ownership, since nobody can own it.
++			 */
++			if (IS_FLOCK(fl) || IS_POSIX(fl)) {
+ 				locks_delete_lock(before);
+ 				continue;
+ 			}
+@@ -1720,9 +1754,7 @@ void locks_remove_flock(struct file *fil
+ 				lease_modify(before, F_UNLCK);
+ 				continue;
+ 			}
+-			/* FL_POSIX locks of this process have already been
+-			 * removed in filp_close->locks_remove_posix.
+-			 */
++			/* What? */
+ 			BUG();
+  		}
+ 		before = &fl->fl_next;
+@@ -1775,7 +1807,9 @@ EXPORT_SYMBOL(posix_unblock_lock);
+ static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx)
+ {
+ 	struct inode *inode = NULL;
++	unsigned int fl_pid;
+ 
++	fl_pid = pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
+ 	if (fl->fl_file != NULL)
+ 		inode = fl->fl_file->f_dentry->d_inode;
+ 
+@@ -1817,16 +1851,16 @@ static void lock_get_status(char* out, s
+ 	}
+ 	if (inode) {
+ #ifdef WE_CAN_BREAK_LSLK_NOW
+-		out += sprintf(out, "%d %s:%ld ", fl->fl_pid,
++		out += sprintf(out, "%d %s:%ld ", fl_pid,
+ 				inode->i_sb->s_id, inode->i_ino);
+ #else
+ 		/* userspace relies on this representation of dev_t ;-( */
+-		out += sprintf(out, "%d %02x:%02x:%ld ", fl->fl_pid,
++		out += sprintf(out, "%d %02x:%02x:%ld ", fl_pid,
+ 				MAJOR(inode->i_sb->s_dev),
+ 				MINOR(inode->i_sb->s_dev), inode->i_ino);
+ #endif
+ 	} else {
+-		out += sprintf(out, "%d <none>:0 ", fl->fl_pid);
++		out += sprintf(out, "%d <none>:0 ", fl_pid);
+ 	}
+ 	if (IS_POSIX(fl)) {
+ 		if (fl->fl_end == OFFSET_MAX)
+@@ -1875,11 +1909,17 @@ int get_locks_status(char *buffer, char 
+ 	char *q = buffer;
+ 	off_t pos = 0;
+ 	int i = 0;
++	struct ve_struct *env;
+ 
+ 	lock_kernel();
++	env = get_exec_env();
+ 	list_for_each(tmp, &file_lock_list) {
+ 		struct list_head *btmp;
+ 		struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link);
++
++		if (!ve_accessible(VE_OWNER_FILP(fl->fl_file), env))
++			continue;
++
+ 		lock_get_status(q, fl, ++i, "");
+ 		move_lock_status(&q, &pos, offset);
+ 
+@@ -2033,9 +2073,9 @@ EXPORT_SYMBOL(steal_locks);
+ static int __init filelock_init(void)
+ {
+ 	filelock_cache = kmem_cache_create("file_lock_cache",
+-			sizeof(struct file_lock), 0, SLAB_PANIC,
++			sizeof(struct file_lock), 0, SLAB_PANIC | SLAB_UBC,
+ 			init_once, NULL);
+ 	return 0;
+ }
+ 
+-module_init(filelock_init)
++core_initcall(filelock_init);
+diff -uprN linux-2.6.8.1.orig/fs/minix/inode.c linux-2.6.8.1-ve022stab078/fs/minix/inode.c
+--- linux-2.6.8.1.orig/fs/minix/inode.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/minix/inode.c	2006-05-11 13:05:35.000000000 +0400
+@@ -18,7 +18,7 @@
+ #include <linux/vfs.h>
+ 
+ static void minix_read_inode(struct inode * inode);
+-static void minix_write_inode(struct inode * inode, int wait);
++static int minix_write_inode(struct inode * inode, int wait);
+ static int minix_statfs(struct super_block *sb, struct kstatfs *buf);
+ static int minix_remount (struct super_block * sb, int * flags, char * data);
+ 
+@@ -505,9 +505,10 @@ static struct buffer_head *minix_update_
+ 		return V2_minix_update_inode(inode);
+ }
+ 
+-static void minix_write_inode(struct inode * inode, int wait)
++static int minix_write_inode(struct inode * inode, int wait)
+ {
+ 	brelse(minix_update_inode(inode));
++	return 0;
+ }
+ 
+ int minix_sync_inode(struct inode * inode)
+diff -uprN linux-2.6.8.1.orig/fs/minix/namei.c linux-2.6.8.1-ve022stab078/fs/minix/namei.c
+--- linux-2.6.8.1.orig/fs/minix/namei.c	2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/minix/namei.c	2006-05-11 13:05:32.000000000 +0400
+@@ -116,7 +116,7 @@ static int minix_symlink(struct inode * 
+ 
+ 	inode->i_mode = S_IFLNK | 0777;
+ 	minix_set_inode(inode, 0);
+-	err = page_symlink(inode, symname, i);
++	err = page_symlink(inode, symname, i, GFP_KERNEL);
+ 	if (err)
+ 		goto out_fail;
+ 
+diff -uprN linux-2.6.8.1.orig/fs/mpage.c linux-2.6.8.1-ve022stab078/fs/mpage.c
+--- linux-2.6.8.1.orig/fs/mpage.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/mpage.c	2006-05-11 13:05:25.000000000 +0400
+@@ -687,6 +687,8 @@ retry:
+ 				bio = mpage_writepage(bio, page, get_block,
+ 						&last_block_in_bio, &ret, wbc);
+ 			}
++			if (unlikely(ret == WRITEPAGE_ACTIVATE))
++				unlock_page(page);
+ 			if (ret || (--(wbc->nr_to_write) <= 0))
+ 				done = 1;
+ 			if (wbc->nonblocking && bdi_write_congested(bdi)) {
+diff -uprN linux-2.6.8.1.orig/fs/namei.c linux-2.6.8.1-ve022stab078/fs/namei.c
+--- linux-2.6.8.1.orig/fs/namei.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/namei.c	2006-05-11 13:05:43.000000000 +0400
+@@ -115,11 +115,12 @@ static inline int do_getname(const char 
+ 	int retval;
+ 	unsigned long len = PATH_MAX;
+ 
+-	if ((unsigned long) filename >= TASK_SIZE) {
+-		if (!segment_eq(get_fs(), KERNEL_DS))
++	if (!segment_eq(get_fs(), KERNEL_DS)) {
++		if ((unsigned long) filename >= TASK_SIZE)
+ 			return -EFAULT;
+-	} else if (TASK_SIZE - (unsigned long) filename < PATH_MAX)
+-		len = TASK_SIZE - (unsigned long) filename;
++		if (TASK_SIZE - (unsigned long) filename < PATH_MAX)
++			len = TASK_SIZE - (unsigned long) filename;
++	}
+ 
+ 	retval = strncpy_from_user((char *)page, filename, len);
+ 	if (retval > 0) {
+@@ -159,7 +160,7 @@ char * getname(const char __user * filen
+  * for filesystem access without changing the "normal" uids which
+  * are used for other things..
+  */
+-int vfs_permission(struct inode * inode, int mask)
++int __vfs_permission(struct inode * inode, int mask)
+ {
+ 	umode_t			mode = inode->i_mode;
+ 
+@@ -208,7 +209,29 @@ int vfs_permission(struct inode * inode,
+ 	return -EACCES;
+ }
+ 
+-int permission(struct inode * inode,int mask, struct nameidata *nd)
++int vfs_permission(struct inode * inode, int mask, struct exec_perm * exec_perm)
++{
++	int ret;
++
++	if (exec_perm != NULL)
++		down(&inode->i_sem);
++
++	ret = __vfs_permission(inode, mask);
++
++	if (exec_perm != NULL) {
++		if (!ret) {
++			exec_perm->set = 1;
++			exec_perm->mode = inode->i_mode;
++			exec_perm->uid = inode->i_uid;
++			exec_perm->gid = inode->i_gid;
++		}
++		up(&inode->i_sem);
++	}
++	return ret;
++}
++
++int permission(struct inode * inode, int mask, struct nameidata *nd,
++		struct exec_perm *exec_perm)
+ {
+ 	int retval;
+ 	int submask;
+@@ -217,9 +240,9 @@ int permission(struct inode * inode,int 
+ 	submask = mask & ~MAY_APPEND;
+ 
+ 	if (inode->i_op && inode->i_op->permission)
+-		retval = inode->i_op->permission(inode, submask, nd);
++		retval = inode->i_op->permission(inode, submask, nd, exec_perm);
+ 	else
+-		retval = vfs_permission(inode, submask);
++		retval = vfs_permission(inode, submask, exec_perm);
+ 	if (retval)
+ 		return retval;
+ 
+@@ -302,6 +325,21 @@ static struct dentry * cached_lookup(str
+ 	if (!dentry)
+ 		dentry = d_lookup(parent, name);
+ 
++	/*
++	 * The revalidation rules are simple:
++	 * d_revalidate operation is called when we're about to use a cached
++	 * dentry rather than call d_lookup.
++	 * d_revalidate method may unhash the dentry itself or return FALSE, in
++	 * which case if the dentry can be released d_lookup will be called.
++	 *
++	 * Additionally, by request of NFS people
++	 * (http://linux.bkbits.net:8080/linux-2.4/cset@1.181?nav=index.html|src/|src/fs|related/fs/namei.c)
++	 * d_revalidate is called when `/', `.' or `..' are looked up.
++	 * Since re-lookup is impossible on them, we introduce a hack and
++	 * return an error in this case.
++	 *
++	 *     2003/02/19  SAW
++	 */
+ 	if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
+ 		if (!dentry->d_op->d_revalidate(dentry, nd) && !d_invalidate(dentry)) {
+ 			dput(dentry);
+@@ -364,6 +402,7 @@ static struct dentry * real_lookup(struc
+ 	struct dentry * result;
+ 	struct inode *dir = parent->d_inode;
+ 
++repeat:
+ 	down(&dir->i_sem);
+ 	/*
+ 	 * First re-do the cached lookup just in case it was created
+@@ -402,7 +441,7 @@ static struct dentry * real_lookup(struc
+ 	if (result->d_op && result->d_op->d_revalidate) {
+ 		if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) {
+ 			dput(result);
+-			result = ERR_PTR(-ENOENT);
++			goto repeat;
+ 		}
+ 	}
+ 	return result;
+@@ -578,7 +617,14 @@ static inline void follow_dotdot(struct 
+                         read_unlock(&current->fs->lock);
+ 			break;
+ 		}
+-                read_unlock(&current->fs->lock);
++#ifdef CONFIG_VE
++		if (*dentry == get_exec_env()->fs_root &&
++		    *mnt == get_exec_env()->fs_rootmnt)  {
++			read_unlock(&current->fs->lock);
++			break;
++		}
++#endif
++		read_unlock(&current->fs->lock);
+ 		spin_lock(&dcache_lock);
+ 		if (*dentry != (*mnt)->mnt_root) {
+ 			*dentry = dget((*dentry)->d_parent);
+@@ -658,6 +704,7 @@ int fastcall link_path_walk(const char *
+ {
+ 	struct path next;
+ 	struct inode *inode;
++	int real_components = 0;
+ 	int err;
+ 	unsigned int lookup_flags = nd->flags;
+ 	
+@@ -678,7 +725,7 @@ int fastcall link_path_walk(const char *
+ 
+ 		err = exec_permission_lite(inode, nd);
+ 		if (err == -EAGAIN) { 
+-			err = permission(inode, MAY_EXEC, nd);
++			err = permission(inode, MAY_EXEC, nd, NULL);
+ 		}
+  		if (err)
+ 			break;
+@@ -730,10 +777,14 @@ int fastcall link_path_walk(const char *
+ 		}
+ 		nd->flags |= LOOKUP_CONTINUE;
+ 		/* This does the actual lookups.. */
++		real_components++;
+ 		err = do_lookup(nd, &this, &next);
+ 		if (err)
+ 			break;
+ 		/* Check mountpoints.. */
++		err = -ENOENT;
++		if ((lookup_flags & LOOKUP_STRICT) && d_mountpoint(nd->dentry))
++			goto out_dput;
+ 		follow_mount(&next.mnt, &next.dentry);
+ 
+ 		err = -ENOENT;
+@@ -745,6 +796,10 @@ int fastcall link_path_walk(const char *
+ 			goto out_dput;
+ 
+ 		if (inode->i_op->follow_link) {
++			err = -ENOENT;
++			if (lookup_flags & LOOKUP_STRICT)
++				goto out_dput;
++
+ 			mntget(next.mnt);
+ 			err = do_follow_link(next.dentry, nd);
+ 			dput(next.dentry);
+@@ -795,9 +850,13 @@ last_component:
+ 		err = do_lookup(nd, &this, &next);
+ 		if (err)
+ 			break;
++		err = -ENOENT;
++		if ((lookup_flags & LOOKUP_STRICT) && d_mountpoint(nd->dentry))
++			goto out_dput;
+ 		follow_mount(&next.mnt, &next.dentry);
+ 		inode = next.dentry->d_inode;
+ 		if ((lookup_flags & LOOKUP_FOLLOW)
++		    && !(lookup_flags & LOOKUP_STRICT)
+ 		    && inode && inode->i_op && inode->i_op->follow_link) {
+ 			mntget(next.mnt);
+ 			err = do_follow_link(next.dentry, nd);
+@@ -825,26 +884,40 @@ lookup_parent:
+ 		nd->last_type = LAST_NORM;
+ 		if (this.name[0] != '.')
+ 			goto return_base;
+-		if (this.len == 1)
++		if (this.len == 1) {
+ 			nd->last_type = LAST_DOT;
+-		else if (this.len == 2 && this.name[1] == '.')
++			goto return_reval;
++		} else if (this.len == 2 && this.name[1] == '.') {
+ 			nd->last_type = LAST_DOTDOT;
+-		else
+-			goto return_base;
++			goto return_reval;
++		}
++return_base:
++		if (!(nd->flags & LOOKUP_NOAREACHECK)) {
++			err = check_area_access_ve(nd->dentry, nd->mnt);
++			if (err)
++				break;
++		}
++		return 0;
+ return_reval:
+ 		/*
+ 		 * We bypassed the ordinary revalidation routines.
+ 		 * We may need to check the cached dentry for staleness.
+ 		 */
+-		if (nd->dentry && nd->dentry->d_sb &&
++		if (!real_components && nd->dentry && nd->dentry->d_sb &&
+ 		    (nd->dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
+ 			err = -ESTALE;
+ 			/* Note: we do not d_invalidate() */
+ 			if (!nd->dentry->d_op->d_revalidate(nd->dentry, nd))
++				/*
++				 * This lookup is for `/' or `.' or `..'.
++				 * The filesystem unhashed the dentry itself
++				 * inside d_revalidate (otherwise, d_invalidate
++				 * wouldn't succeed).  As a special courtesy to
++				 * NFS we return an error.   2003/02/19  SAW
++				 */
+ 				break;
+ 		}
+-return_base:
+-		return 0;
++		goto return_base;
+ out_dput:
+ 		dput(next.dentry);
+ 		break;
+@@ -971,7 +1044,7 @@ static struct dentry * __lookup_hash(str
+ 	int err;
+ 
+ 	inode = base->d_inode;
+-	err = permission(inode, MAY_EXEC, nd);
++	err = permission(inode, MAY_EXEC, nd, NULL);
+ 	dentry = ERR_PTR(err);
+ 	if (err)
+ 		goto out;
+@@ -1096,7 +1169,7 @@ static inline int may_delete(struct inod
+ 	int error;
+ 	if (!victim->d_inode || victim->d_parent->d_inode != dir)
+ 		return -ENOENT;
+-	error = permission(dir,MAY_WRITE | MAY_EXEC, NULL);
++	error = permission(dir,MAY_WRITE | MAY_EXEC, NULL, NULL);
+ 	if (error)
+ 		return error;
+ 	if (IS_APPEND(dir))
+@@ -1133,7 +1206,7 @@ static inline int may_create(struct inod
+ 		return -EEXIST;
+ 	if (IS_DEADDIR(dir))
+ 		return -ENOENT;
+-	return permission(dir,MAY_WRITE | MAY_EXEC, nd);
++	return permission(dir, MAY_WRITE | MAY_EXEC, nd, NULL);
+ }
+ 
+ /* 
+@@ -1241,7 +1314,7 @@ int may_open(struct nameidata *nd, int a
+ 	if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
+ 		return -EISDIR;
+ 
+-	error = permission(inode, acc_mode, nd);
++	error = permission(inode, acc_mode, nd, NULL);
+ 	if (error)
+ 		return error;
+ 
+@@ -1662,17 +1735,13 @@ out:
+ static void d_unhash(struct dentry *dentry)
+ {
+ 	dget(dentry);
+-	spin_lock(&dcache_lock);
+-	switch (atomic_read(&dentry->d_count)) {
+-	default:
+-		spin_unlock(&dcache_lock);
++	if (atomic_read(&dentry->d_count))
+ 		shrink_dcache_parent(dentry);
+-		spin_lock(&dcache_lock);
+-		if (atomic_read(&dentry->d_count) != 2)
+-			break;
+-	case 2:
++	spin_lock(&dcache_lock);
++	spin_lock(&dentry->d_lock);
++	if (atomic_read(&dentry->d_count) == 2)
+ 		__d_drop(dentry);
+-	}
++	spin_unlock(&dentry->d_lock);
+ 	spin_unlock(&dcache_lock);
+ }
+ 
+@@ -2020,7 +2089,7 @@ int vfs_rename_dir(struct inode *old_dir
+ 	 * we'll need to flip '..'.
+ 	 */
+ 	if (new_dir != old_dir) {
+-		error = permission(old_dentry->d_inode, MAY_WRITE, NULL);
++		error = permission(old_dentry->d_inode, MAY_WRITE, NULL, NULL);
+ 		if (error)
+ 			return error;
+ 	}
+@@ -2090,6 +2159,9 @@ int vfs_rename(struct inode *old_dir, st
+ 	int error;
+ 	int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
+ 
++	if (DQUOT_RENAME(old_dentry->d_inode, old_dir, new_dir))
++		return -EXDEV;
++
+ 	if (old_dentry->d_inode == new_dentry->d_inode)
+  		return 0;
+  
+@@ -2332,13 +2404,16 @@ int page_follow_link(struct dentry *dent
+ 	return res;
+ }
+ 
+-int page_symlink(struct inode *inode, const char *symname, int len)
++int page_symlink(struct inode *inode, const char *symname, int len,
++		int gfp_mask)
+ {
+ 	struct address_space *mapping = inode->i_mapping;
+-	struct page *page = grab_cache_page(mapping, 0);
++	struct page *page;
+ 	int err = -ENOMEM;
+ 	char *kaddr;
+ 
++	page = find_or_create_page(mapping, 0,
++			mapping_gfp_mask(mapping) | gfp_mask);
+ 	if (!page)
+ 		goto fail;
+ 	err = mapping->a_ops->prepare_write(NULL, page, 0, len-1);
+diff -uprN linux-2.6.8.1.orig/fs/namespace.c linux-2.6.8.1-ve022stab078/fs/namespace.c
+--- linux-2.6.8.1.orig/fs/namespace.c	2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/namespace.c	2006-05-11 13:05:40.000000000 +0400
+@@ -37,6 +37,7 @@ static inline int sysfs_init(void)
+ 
+ /* spinlock for vfsmount related operations, inplace of dcache_lock */
+ spinlock_t vfsmount_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
++EXPORT_SYMBOL(vfsmount_lock);
+ 
+ static struct list_head *mount_hashtable;
+ static int hash_mask, hash_bits;
+@@ -238,10 +239,32 @@ static int show_vfsmnt(struct seq_file *
+ 		{ 0, NULL }
+ 	};
+ 	struct proc_fs_info *fs_infop;
++	char *path_buf, *path;
+ 
+-	mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
++	/* skip FS_NOMOUNT mounts (rootfs) */
++	if (mnt->mnt_sb->s_flags & MS_NOUSER)
++		return 0;
++
++	path_buf = (char *) __get_free_page(GFP_KERNEL);
++	if (!path_buf)
++		return -ENOMEM;
++	path = d_path(mnt->mnt_root, mnt, path_buf, PAGE_SIZE);
++	if (IS_ERR(path)) {
++		free_page((unsigned long) path_buf);
++		/*
++		 * This means that the file position will be incremented, i.e.
++		 * the total number of "invisible" vfsmnt will leak.
++		 */
++		return 0;
++	}
++
++	if (ve_is_super(get_exec_env()))
++		mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
++	else
++		mangle(m, mnt->mnt_sb->s_type->name);
+ 	seq_putc(m, ' ');
+-	seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
++	mangle(m, path);
++	free_page((unsigned long) path_buf);
+ 	seq_putc(m, ' ');
+ 	mangle(m, mnt->mnt_sb->s_type->name);
+ 	seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw");
+@@ -364,6 +387,7 @@ void umount_tree(struct vfsmount *mnt)
+ 		spin_lock(&vfsmount_lock);
+ 	}
+ }
++EXPORT_SYMBOL(umount_tree);
+ 
+ static int do_umount(struct vfsmount *mnt, int flags)
+ {
+@@ -480,7 +504,7 @@ asmlinkage long sys_umount(char __user *
+ 		goto dput_and_out;
+ 
+ 	retval = -EPERM;
+-	if (!capable(CAP_SYS_ADMIN))
++	if (!capable(CAP_VE_SYS_ADMIN))
+ 		goto dput_and_out;
+ 
+ 	retval = do_umount(nd.mnt, flags);
+@@ -505,7 +529,7 @@ asmlinkage long sys_oldumount(char __use
+ 
+ static int mount_is_safe(struct nameidata *nd)
+ {
+-	if (capable(CAP_SYS_ADMIN))
++	if (capable(CAP_VE_SYS_ADMIN))
+ 		return 0;
+ 	return -EPERM;
+ #ifdef notyet
+@@ -515,7 +539,7 @@ static int mount_is_safe(struct nameidat
+ 		if (current->uid != nd->dentry->d_inode->i_uid)
+ 			return -EPERM;
+ 	}
+-	if (permission(nd->dentry->d_inode, MAY_WRITE, nd))
++	if (permission(nd->dentry->d_inode, MAY_WRITE, nd, NULL))
+ 		return -EPERM;
+ 	return 0;
+ #endif
+@@ -673,7 +697,7 @@ static int do_remount(struct nameidata *
+ 	int err;
+ 	struct super_block * sb = nd->mnt->mnt_sb;
+ 
+-	if (!capable(CAP_SYS_ADMIN))
++	if (!capable(CAP_VE_SYS_ADMIN))
+ 		return -EPERM;
+ 
+ 	if (!check_mnt(nd->mnt))
+@@ -682,6 +706,10 @@ static int do_remount(struct nameidata *
+ 	if (nd->dentry != nd->mnt->mnt_root)
+ 		return -EINVAL;
+ 
++	/* do not allow to remount bind-mounts */
++	if (nd->dentry != sb->s_root)
++		return -EINVAL;
++
+ 	down_write(&sb->s_umount);
+ 	err = do_remount_sb(sb, flags, data, 0);
+ 	if (!err)
+@@ -697,7 +725,7 @@ static int do_move_mount(struct nameidat
+ 	struct nameidata old_nd, parent_nd;
+ 	struct vfsmount *p;
+ 	int err = 0;
+-	if (!capable(CAP_SYS_ADMIN))
++	if (!capable(CAP_VE_SYS_ADMIN))
+ 		return -EPERM;
+ 	if (!old_name || !*old_name)
+ 		return -EINVAL;
+@@ -764,15 +792,20 @@ static int do_new_mount(struct nameidata
+ 			int mnt_flags, char *name, void *data)
+ {
+ 	struct vfsmount *mnt;
++	struct file_system_type *fstype;
+ 
+ 	if (!type || !memchr(type, 0, PAGE_SIZE))
+ 		return -EINVAL;
+ 
+ 	/* we need capabilities... */
+-	if (!capable(CAP_SYS_ADMIN))
++	if (!capable(CAP_VE_SYS_ADMIN))
+ 		return -EPERM;
+ 
+-	mnt = do_kern_mount(type, flags, name, data);
++	fstype = get_fs_type(type);
++	if (fstype == NULL)
++		return -ENODEV;
++	mnt = do_kern_mount(fstype, flags, name, data);
++	put_filesystem(fstype);
+ 	if (IS_ERR(mnt))
+ 		return PTR_ERR(mnt);
+ 
+@@ -809,6 +842,10 @@ int do_add_mount(struct vfsmount *newmnt
+ 	newmnt->mnt_flags = mnt_flags;
+ 	err = graft_tree(newmnt, nd);
+ 
++	if (newmnt->mnt_mountpoint->d_flags & DCACHE_VIRTUAL)
++		/* unaccessible yet - no lock */
++		newmnt->mnt_root->d_flags |= DCACHE_VIRTUAL;
++
+ 	if (err == 0 && fslist) {
+ 		/* add to the specified expiration list */
+ 		spin_lock(&vfsmount_lock);
+@@ -1213,7 +1250,7 @@ static void chroot_fs_refs(struct nameid
+ 	struct fs_struct *fs;
+ 
+ 	read_lock(&tasklist_lock);
+-	do_each_thread(g, p) {
++	do_each_thread_ve(g, p) {
+ 		task_lock(p);
+ 		fs = p->fs;
+ 		if (fs) {
+@@ -1226,7 +1263,7 @@ static void chroot_fs_refs(struct nameid
+ 			put_fs_struct(fs);
+ 		} else
+ 			task_unlock(p);
+-	} while_each_thread(g, p);
++	} while_each_thread_ve(g, p);
+ 	read_unlock(&tasklist_lock);
+ }
+ 
+@@ -1339,8 +1376,13 @@ static void __init init_mount_tree(void)
+ 	struct vfsmount *mnt;
+ 	struct namespace *namespace;
+ 	struct task_struct *g, *p;
++	struct file_system_type *fstype;
+ 
+-	mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
++	fstype = get_fs_type("rootfs");
++	if (fstype == NULL)
++		panic("Can't create rootfs");
++	mnt = do_kern_mount(fstype, 0, "rootfs", NULL);
++	put_filesystem(fstype);
+ 	if (IS_ERR(mnt))
+ 		panic("Can't create rootfs");
+ 	namespace = kmalloc(sizeof(*namespace), GFP_KERNEL);
+@@ -1355,10 +1397,10 @@ static void __init init_mount_tree(void)
+ 
+ 	init_task.namespace = namespace;
+ 	read_lock(&tasklist_lock);
+-	do_each_thread(g, p) {
++	do_each_thread_all(g, p) {
+ 		get_namespace(namespace);
+ 		p->namespace = namespace;
+-	} while_each_thread(g, p);
++	} while_each_thread_all(g, p);
+ 	read_unlock(&tasklist_lock);
+ 
+ 	set_fs_pwd(current->fs, namespace->root, namespace->root->mnt_root);
+@@ -1373,7 +1415,7 @@ void __init mnt_init(unsigned long mempa
+ 	int i;
+ 
+ 	mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
+-			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
++			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
+ 
+ 	order = 0; 
+ 	mount_hashtable = (struct list_head *)
+diff -uprN linux-2.6.8.1.orig/fs/ncpfs/ioctl.c linux-2.6.8.1-ve022stab078/fs/ncpfs/ioctl.c
+--- linux-2.6.8.1.orig/fs/ncpfs/ioctl.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ncpfs/ioctl.c	2006-05-11 13:05:35.000000000 +0400
+@@ -34,7 +34,7 @@ ncp_get_fs_info(struct ncp_server* serve
+ {
+ 	struct ncp_fs_info info;
+ 
+-	if ((permission(inode, MAY_WRITE, NULL) != 0)
++	if ((permission(inode, MAY_WRITE, NULL, NULL) != 0)
+ 	    && (current->uid != server->m.mounted_uid)) {
+ 		return -EACCES;
+ 	}
+@@ -62,7 +62,7 @@ ncp_get_fs_info_v2(struct ncp_server* se
+ {
+ 	struct ncp_fs_info_v2 info2;
+ 
+-	if ((permission(inode, MAY_WRITE, NULL) != 0)
++	if ((permission(inode, MAY_WRITE, NULL, NULL) != 0)
+ 	    && (current->uid != server->m.mounted_uid)) {
+ 		return -EACCES;
+ 	}
+@@ -190,7 +190,7 @@ int ncp_ioctl(struct inode *inode, struc
+ 	switch (cmd) {
+ 	case NCP_IOC_NCPREQUEST:
+ 
+-		if ((permission(inode, MAY_WRITE, NULL) != 0)
++		if ((permission(inode, MAY_WRITE, NULL, NULL) != 0)
+ 		    && (current->uid != server->m.mounted_uid)) {
+ 			return -EACCES;
+ 		}
+@@ -254,7 +254,7 @@ int ncp_ioctl(struct inode *inode, struc
+ 		{
+ 			unsigned long tmp = server->m.mounted_uid;
+ 
+-			if (   (permission(inode, MAY_READ, NULL) != 0)
++			if (   (permission(inode, MAY_READ, NULL, NULL) != 0)
+ 			    && (current->uid != server->m.mounted_uid))
+ 			{
+ 				return -EACCES;
+@@ -268,7 +268,7 @@ int ncp_ioctl(struct inode *inode, struc
+ 		{
+ 			struct ncp_setroot_ioctl sr;
+ 
+-			if (   (permission(inode, MAY_READ, NULL) != 0)
++			if (   (permission(inode, MAY_READ, NULL, NULL) != 0)
+ 			    && (current->uid != server->m.mounted_uid))
+ 			{
+ 				return -EACCES;
+@@ -341,7 +341,7 @@ int ncp_ioctl(struct inode *inode, struc
+ 
+ #ifdef CONFIG_NCPFS_PACKET_SIGNING	
+ 	case NCP_IOC_SIGN_INIT:
+-		if ((permission(inode, MAY_WRITE, NULL) != 0)
++		if ((permission(inode, MAY_WRITE, NULL, NULL) != 0)
+ 		    && (current->uid != server->m.mounted_uid))
+ 		{
+ 			return -EACCES;
+@@ -364,7 +364,7 @@ int ncp_ioctl(struct inode *inode, struc
+ 		return 0;		
+ 		
+         case NCP_IOC_SIGN_WANTED:
+-		if (   (permission(inode, MAY_READ, NULL) != 0)
++		if (   (permission(inode, MAY_READ, NULL, NULL) != 0)
+ 		    && (current->uid != server->m.mounted_uid))
+ 		{
+ 			return -EACCES;
+@@ -377,7 +377,7 @@ int ncp_ioctl(struct inode *inode, struc
+ 		{
+ 			int newstate;
+ 
+-			if (   (permission(inode, MAY_WRITE, NULL) != 0)
++			if (   (permission(inode, MAY_WRITE, NULL, NULL) != 0)
+ 			    && (current->uid != server->m.mounted_uid))
+ 			{
+ 				return -EACCES;
+@@ -398,7 +398,7 @@ int ncp_ioctl(struct inode *inode, struc
+ 
+ #ifdef CONFIG_NCPFS_IOCTL_LOCKING
+ 	case NCP_IOC_LOCKUNLOCK:
+-		if (   (permission(inode, MAY_WRITE, NULL) != 0)
++		if (   (permission(inode, MAY_WRITE, NULL, NULL) != 0)
+ 		    && (current->uid != server->m.mounted_uid))
+ 		{
+ 			return -EACCES;
+@@ -603,7 +603,7 @@ outrel:			
+ #endif /* CONFIG_NCPFS_NLS */
+ 
+ 	case NCP_IOC_SETDENTRYTTL:
+-		if ((permission(inode, MAY_WRITE, NULL) != 0) &&
++		if ((permission(inode, MAY_WRITE, NULL, NULL) != 0) &&
+ 				 (current->uid != server->m.mounted_uid))
+ 			return -EACCES;
+ 		{
+@@ -633,7 +633,7 @@ outrel:			
+            so we have this out of switch */
+ 	if (cmd == NCP_IOC_GETMOUNTUID) {
+ 		__kernel_uid_t uid = 0;
+-		if ((permission(inode, MAY_READ, NULL) != 0)
++		if ((permission(inode, MAY_READ, NULL, NULL) != 0)
+ 		    && (current->uid != server->m.mounted_uid)) {
+ 			return -EACCES;
+ 		}
+diff -uprN linux-2.6.8.1.orig/fs/nfs/dir.c linux-2.6.8.1-ve022stab078/fs/nfs/dir.c
+--- linux-2.6.8.1.orig/fs/nfs/dir.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/nfs/dir.c	2006-05-11 13:05:35.000000000 +0400
+@@ -1499,7 +1499,8 @@ out:
+ }
+ 
+ int
+-nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
++nfs_permission(struct inode *inode, int mask, struct nameidata *nd,
++		struct exec_perm *exec_perm)
+ {
+ 	struct nfs_access_cache *cache = &NFS_I(inode)->cache_access;
+ 	struct rpc_cred *cred;
+@@ -1541,6 +1542,7 @@ nfs_permission(struct inode *inode, int 
+ 	if (!NFS_PROTO(inode)->access)
+ 		goto out_notsup;
+ 
++	/* Can NFS fill exec_perm atomically?  Don't know...  --SAW */
+ 	cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
+ 	if (cache->cred == cred
+ 	    && time_before(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))
+@@ -1565,7 +1567,7 @@ out:
+ 	return res;
+ out_notsup:
+ 	nfs_revalidate_inode(NFS_SERVER(inode), inode);
+-	res = vfs_permission(inode, mask);
++	res = vfs_permission(inode, mask, exec_perm);
+ 	unlock_kernel();
+ 	return res;
+ add_cache:
+diff -uprN linux-2.6.8.1.orig/fs/nfs/direct.c linux-2.6.8.1-ve022stab078/fs/nfs/direct.c
+--- linux-2.6.8.1.orig/fs/nfs/direct.c	2004-08-14 14:56:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/nfs/direct.c	2006-05-11 13:05:34.000000000 +0400
+@@ -72,8 +72,10 @@ nfs_get_user_pages(int rw, unsigned long
+ 	size_t array_size;
+ 
+ 	/* set an arbitrary limit to prevent arithmetic overflow */
+-	if (size > MAX_DIRECTIO_SIZE)
++	if (size > MAX_DIRECTIO_SIZE) {
++		*pages = NULL;
+ 		return -EFBIG;
++	}
+ 
+ 	page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ 	page_count -= user_addr >> PAGE_SHIFT;
+diff -uprN linux-2.6.8.1.orig/fs/nfs/file.c linux-2.6.8.1-ve022stab078/fs/nfs/file.c
+--- linux-2.6.8.1.orig/fs/nfs/file.c	2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/nfs/file.c	2006-05-11 13:05:28.000000000 +0400
+@@ -103,6 +103,9 @@ nfs_file_open(struct inode *inode, struc
+ static int
+ nfs_file_release(struct inode *inode, struct file *filp)
+ {
++	/* Ensure that dirty pages are flushed out with the right creds */
++	if (filp->f_mode & FMODE_WRITE)
++		filemap_fdatawrite(filp->f_mapping);
+ 	return NFS_PROTO(inode)->file_release(inode, filp);
+ }
+ 
+diff -uprN linux-2.6.8.1.orig/fs/nfs/inode.c linux-2.6.8.1-ve022stab078/fs/nfs/inode.c
+--- linux-2.6.8.1.orig/fs/nfs/inode.c	2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/nfs/inode.c	2006-05-11 13:05:35.000000000 +0400
+@@ -55,7 +55,7 @@ static int nfs_update_inode(struct inode
+ 
+ static struct inode *nfs_alloc_inode(struct super_block *sb);
+ static void nfs_destroy_inode(struct inode *);
+-static void nfs_write_inode(struct inode *,int);
++static int nfs_write_inode(struct inode *,int);
+ static void nfs_delete_inode(struct inode *);
+ static void nfs_put_super(struct super_block *);
+ static void nfs_clear_inode(struct inode *);
+@@ -110,12 +110,16 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fat
+ 	return nfs_fileid_to_ino_t(fattr->fileid);
+ }
+ 
+-static void
++static int
+ nfs_write_inode(struct inode *inode, int sync)
+ {
+ 	int flags = sync ? FLUSH_WAIT : 0;
++	int ret;
+ 
+-	nfs_commit_inode(inode, 0, 0, flags);
++	ret = nfs_commit_inode(inode, 0, 0, flags);
++	if (ret < 0)
++		return ret;
++	return 0;
+ }
+ 
+ static void
+diff -uprN linux-2.6.8.1.orig/fs/nfs/nfsroot.c linux-2.6.8.1-ve022stab078/fs/nfs/nfsroot.c
+--- linux-2.6.8.1.orig/fs/nfs/nfsroot.c	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/nfs/nfsroot.c	2006-05-11 13:05:40.000000000 +0400
+@@ -306,7 +306,7 @@ static int __init root_nfs_name(char *na
+ 	/* Override them by options set on kernel command-line */
+ 	root_nfs_parse(name, buf);
+ 
+-	cp = system_utsname.nodename;
++	cp = ve_utsname.nodename;
+ 	if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
+ 		printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
+ 		return -1;
+diff -uprN linux-2.6.8.1.orig/fs/nfsctl.c linux-2.6.8.1-ve022stab078/fs/nfsctl.c
+--- linux-2.6.8.1.orig/fs/nfsctl.c	2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/nfsctl.c	2006-05-11 13:05:40.000000000 +0400
+@@ -23,8 +23,14 @@ static struct file *do_open(char *name, 
+ {
+ 	struct nameidata nd;
+ 	int error;
++	struct file_system_type *fstype;
+ 
+-	nd.mnt = do_kern_mount("nfsd", 0, "nfsd", NULL);
++	fstype = get_fs_type("nfsd");
++	if (fstype == NULL)
++		return ERR_PTR(-ENODEV);
++
++	nd.mnt = do_kern_mount(fstype, 0, "nfsd", NULL);
++	put_filesystem(fstype);
+ 
+ 	if (IS_ERR(nd.mnt))
+ 		return (struct file *)nd.mnt;
+diff -uprN linux-2.6.8.1.orig/fs/nfsd/nfsfh.c linux-2.6.8.1-ve022stab078/fs/nfsd/nfsfh.c
+--- linux-2.6.8.1.orig/fs/nfsd/nfsfh.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/nfsd/nfsfh.c	2006-05-11 13:05:35.000000000 +0400
+@@ -56,7 +56,7 @@ int nfsd_acceptable(void *expv, struct d
+ 		/* make sure parents give x permission to user */
+ 		int err;
+ 		parent = dget_parent(tdentry);
+-		err = permission(parent->d_inode, MAY_EXEC, NULL);
++		err = permission(parent->d_inode, MAY_EXEC, NULL, NULL);
+ 		if (err < 0) {
+ 			dput(parent);
+ 			break;
+diff -uprN linux-2.6.8.1.orig/fs/nfsd/vfs.c linux-2.6.8.1-ve022stab078/fs/nfsd/vfs.c
+--- linux-2.6.8.1.orig/fs/nfsd/vfs.c	2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/nfsd/vfs.c	2006-05-11 13:05:35.000000000 +0400
+@@ -1592,12 +1592,13 @@ nfsd_permission(struct svc_export *exp, 
+ 	    inode->i_uid == current->fsuid)
+ 		return 0;
+ 
+-	err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC), NULL);
++	err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC),
++			NULL, NULL);
+ 
+ 	/* Allow read access to binaries even when mode 111 */
+ 	if (err == -EACCES && S_ISREG(inode->i_mode) &&
+ 	    acc == (MAY_READ | MAY_OWNER_OVERRIDE))
+-		err = permission(inode, MAY_EXEC, NULL);
++		err = permission(inode, MAY_EXEC, NULL, NULL);
+ 
+ 	return err? nfserrno(err) : 0;
+ }
+diff -uprN linux-2.6.8.1.orig/fs/nls/nls_ascii.c linux-2.6.8.1-ve022stab078/fs/nls/nls_ascii.c
+--- linux-2.6.8.1.orig/fs/nls/nls_ascii.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/nls/nls_ascii.c	2006-05-11 13:05:34.000000000 +0400
+@@ -13,7 +13,7 @@
+ #include <linux/nls.h>
+ #include <linux/errno.h>
+ 
+-static wchar_t charset2uni[128] = {
++static wchar_t charset2uni[256] = {
+ 	/* 0x00*/
+ 	0x0000, 0x0001, 0x0002, 0x0003,
+ 	0x0004, 0x0005, 0x0006, 0x0007,
+@@ -56,7 +56,7 @@ static wchar_t charset2uni[128] = {
+ 	0x007c, 0x007d, 0x007e, 0x007f,
+ };
+ 
+-static unsigned char page00[128] = {
++static unsigned char page00[256] = {
+ 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
+ 	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
+ 	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
+@@ -75,11 +75,11 @@ static unsigned char page00[128] = {
+ 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */
+ };
+ 
+-static unsigned char *page_uni2charset[128] = {
+-	page00, NULL,   NULL,   NULL,   NULL,   NULL,   NULL,   NULL,
++static unsigned char *page_uni2charset[256] = {
++	page00,
+ };
+ 
+-static unsigned char charset2lower[128] = {
++static unsigned char charset2lower[256] = {
+ 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
+ 	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
+ 	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
+@@ -98,7 +98,7 @@ static unsigned char charset2lower[128] 
+ 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */
+ };
+ 
+-static unsigned char charset2upper[128] = {
++static unsigned char charset2upper[256] = {
+ 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
+ 	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
+ 	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
+diff -uprN linux-2.6.8.1.orig/fs/ntfs/inode.h linux-2.6.8.1-ve022stab078/fs/ntfs/inode.h
+--- linux-2.6.8.1.orig/fs/ntfs/inode.h	2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ntfs/inode.h	2006-05-11 13:05:35.000000000 +0400
+@@ -285,7 +285,7 @@ extern void ntfs_truncate(struct inode *
+ 
+ extern int ntfs_setattr(struct dentry *dentry, struct iattr *attr);
+ 
+-extern void ntfs_write_inode(struct inode *vi, int sync);
++extern int ntfs_write_inode(struct inode *vi, int sync);
+ 
+ static inline void ntfs_commit_inode(struct inode *vi)
+ {
+diff -uprN linux-2.6.8.1.orig/fs/ntfs/super.c linux-2.6.8.1-ve022stab078/fs/ntfs/super.c
+--- linux-2.6.8.1.orig/fs/ntfs/super.c	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ntfs/super.c	2006-05-11 13:05:43.000000000 +0400
+@@ -2404,7 +2404,7 @@ iput_tmp_ino_err_out_now:
+ 	 * method again... FIXME: Do we need to do this twice now because of
+ 	 * attribute inodes? I think not, so leave as is for now... (AIA)
+ 	 */
+-	if (invalidate_inodes(sb)) {
++	if (invalidate_inodes(sb, 0)) {
+ 		ntfs_error(sb, "Busy inodes left. This is most likely a NTFS "
+ 				"driver bug.");
+ 		/* Copied from fs/super.c. I just love this message. (-; */
+diff -uprN linux-2.6.8.1.orig/fs/open.c linux-2.6.8.1-ve022stab078/fs/open.c
+--- linux-2.6.8.1.orig/fs/open.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/open.c	2006-05-11 13:05:43.000000000 +0400
+@@ -22,6 +22,7 @@
+ #include <asm/uaccess.h>
+ #include <linux/fs.h>
+ #include <linux/pagemap.h>
++#include <linux/faudit.h>
+ 
+ #include <asm/unistd.h>
+ 
+@@ -46,7 +47,21 @@ int vfs_statfs(struct super_block *sb, s
+ 
+ EXPORT_SYMBOL(vfs_statfs);
+ 
+-static int vfs_statfs_native(struct super_block *sb, struct statfs *buf)
++int faudit_statfs(struct super_block *sb, struct kstatfs *buf)
++{
++	struct faudit_statfs_arg arg;
++
++	arg.sb = sb;
++	arg.stat = buf;
++
++	if (virtinfo_notifier_call(VITYPE_FAUDIT, VIRTINFO_FAUDIT_STATFS, &arg)
++			!= NOTIFY_DONE)
++		return arg.err;
++	return 0;
++}
++
++static int vfs_statfs_native(struct super_block *sb, struct vfsmount *mnt,
++		struct statfs *buf)
+ {
+ 	struct kstatfs st;
+ 	int retval;
+@@ -55,6 +70,10 @@ static int vfs_statfs_native(struct supe
+ 	if (retval)
+ 		return retval;
+ 
++	retval = faudit_statfs(mnt->mnt_sb, &st);
++	if (retval)
++		return retval;
++
+ 	if (sizeof(*buf) == sizeof(st))
+ 		memcpy(buf, &st, sizeof(st));
+ 	else {
+@@ -89,7 +108,8 @@ static int vfs_statfs_native(struct supe
+ 	return 0;
+ }
+ 
+-static int vfs_statfs64(struct super_block *sb, struct statfs64 *buf)
++static int vfs_statfs64(struct super_block *sb, struct vfsmount *mnt,
++		struct statfs64 *buf)
+ {
+ 	struct kstatfs st;
+ 	int retval;
+@@ -98,6 +118,10 @@ static int vfs_statfs64(struct super_blo
+ 	if (retval)
+ 		return retval;
+ 
++	retval = faudit_statfs(mnt->mnt_sb, &st);
++	if (retval)
++		return retval;
++
+ 	if (sizeof(*buf) == sizeof(st))
+ 		memcpy(buf, &st, sizeof(st));
+ 	else {
+@@ -124,7 +148,8 @@ asmlinkage long sys_statfs(const char __
+ 	error = user_path_walk(path, &nd);
+ 	if (!error) {
+ 		struct statfs tmp;
+-		error = vfs_statfs_native(nd.dentry->d_inode->i_sb, &tmp);
++		error = vfs_statfs_native(nd.dentry->d_inode->i_sb,
++				nd.mnt, &tmp);
+ 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
+ 			error = -EFAULT;
+ 		path_release(&nd);
+@@ -143,7 +168,8 @@ asmlinkage long sys_statfs64(const char 
+ 	error = user_path_walk(path, &nd);
+ 	if (!error) {
+ 		struct statfs64 tmp;
+-		error = vfs_statfs64(nd.dentry->d_inode->i_sb, &tmp);
++		error = vfs_statfs64(nd.dentry->d_inode->i_sb,
++				nd.mnt, &tmp);
+ 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
+ 			error = -EFAULT;
+ 		path_release(&nd);
+@@ -162,7 +188,8 @@ asmlinkage long sys_fstatfs(unsigned int
+ 	file = fget(fd);
+ 	if (!file)
+ 		goto out;
+-	error = vfs_statfs_native(file->f_dentry->d_inode->i_sb, &tmp);
++	error = vfs_statfs_native(file->f_dentry->d_inode->i_sb,
++			file->f_vfsmnt, &tmp);
+ 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
+ 		error = -EFAULT;
+ 	fput(file);
+@@ -183,7 +210,8 @@ asmlinkage long sys_fstatfs64(unsigned i
+ 	file = fget(fd);
+ 	if (!file)
+ 		goto out;
+-	error = vfs_statfs64(file->f_dentry->d_inode->i_sb, &tmp);
++	error = vfs_statfs64(file->f_dentry->d_inode->i_sb,
++			file->f_vfsmnt, &tmp);
+ 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
+ 		error = -EFAULT;
+ 	fput(file);
+@@ -234,7 +262,7 @@ static inline long do_sys_truncate(const
+ 	if (!S_ISREG(inode->i_mode))
+ 		goto dput_and_out;
+ 
+-	error = permission(inode,MAY_WRITE,&nd);
++	error = permission(inode,MAY_WRITE,&nd,NULL);
+ 	if (error)
+ 		goto dput_and_out;
+ 
+@@ -388,7 +416,7 @@ asmlinkage long sys_utime(char __user * 
+                         goto dput_and_out;
+ 
+ 		if (current->fsuid != inode->i_uid &&
+-		    (error = permission(inode,MAY_WRITE,&nd)) != 0)
++		    (error = permission(inode,MAY_WRITE,&nd,NULL)) != 0)
+ 			goto dput_and_out;
+ 	}
+ 	down(&inode->i_sem);
+@@ -441,7 +469,7 @@ long do_utimes(char __user * filename, s
+                         goto dput_and_out;
+ 
+ 		if (current->fsuid != inode->i_uid &&
+-		    (error = permission(inode,MAY_WRITE,&nd)) != 0)
++		    (error = permission(inode,MAY_WRITE,&nd,NULL)) != 0)
+ 			goto dput_and_out;
+ 	}
+ 	down(&inode->i_sem);
+@@ -500,7 +528,7 @@ asmlinkage long sys_access(const char __
+ 
+ 	res = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
+ 	if (!res) {
+-		res = permission(nd.dentry->d_inode, mode, &nd);
++		res = permission(nd.dentry->d_inode, mode, &nd, NULL);
+ 		/* SuS v2 requires we report a read only fs too */
+ 		if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
+ 		   && !special_file(nd.dentry->d_inode->i_mode))
+@@ -524,7 +552,7 @@ asmlinkage long sys_chdir(const char __u
+ 	if (error)
+ 		goto out;
+ 
+-	error = permission(nd.dentry->d_inode,MAY_EXEC,&nd);
++	error = permission(nd.dentry->d_inode,MAY_EXEC,&nd,NULL);
+ 	if (error)
+ 		goto dput_and_out;
+ 
+@@ -557,7 +585,7 @@ asmlinkage long sys_fchdir(unsigned int 
+ 	if (!S_ISDIR(inode->i_mode))
+ 		goto out_putf;
+ 
+-	error = permission(inode, MAY_EXEC, NULL);
++	error = permission(inode, MAY_EXEC, NULL, NULL);
+ 	if (!error)
+ 		set_fs_pwd(current->fs, mnt, dentry);
+ out_putf:
+@@ -575,7 +603,7 @@ asmlinkage long sys_chroot(const char __
+ 	if (error)
+ 		goto out;
+ 
+-	error = permission(nd.dentry->d_inode,MAY_EXEC,&nd);
++	error = permission(nd.dentry->d_inode,MAY_EXEC,&nd,NULL);
+ 	if (error)
+ 		goto dput_and_out;
+ 
+@@ -776,6 +804,9 @@ struct file *dentry_open(struct dentry *
+ 	struct inode *inode;
+ 	int error;
+ 
++	if (!capable(CAP_SYS_RAWIO))
++		flags &= ~O_DIRECT;
++
+ 	error = -ENFILE;
+ 	f = get_empty_filp();
+ 	if (!f)
+@@ -1082,3 +1113,81 @@ int nonseekable_open(struct inode *inode
+ }
+ 
+ EXPORT_SYMBOL(nonseekable_open);
++
++long sys_lchmod(char __user * filename, mode_t mode)
++{
++	struct nameidata nd;
++	struct inode * inode;
++	int error;
++	struct iattr newattrs;
++
++	error = user_path_walk_link(filename, &nd);
++	if (error)
++		goto out;
++	inode = nd.dentry->d_inode;
++
++	error = -EROFS;
++	if (IS_RDONLY(inode))
++		goto dput_and_out;
++
++	error = -EPERM;
++	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
++		goto dput_and_out;
++
++	down(&inode->i_sem);
++	if (mode == (mode_t) -1)
++		mode = inode->i_mode;
++	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
++	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
++	error = notify_change(nd.dentry, &newattrs);
++	up(&inode->i_sem);
++
++dput_and_out:
++	path_release(&nd);
++out:
++	return error;
++}
++
++long sys_lutime(char __user * filename,
++		struct utimbuf __user * times)
++{
++	int error;
++	struct nameidata nd;
++	struct inode * inode;
++	struct iattr newattrs;
++
++	error = user_path_walk_link(filename, &nd);
++	if (error)
++		goto out;
++	inode = nd.dentry->d_inode;
++
++	error = -EROFS;
++	if (IS_RDONLY(inode))
++		goto dput_and_out;
++
++	/* Don't worry, the checks are done in inode_change_ok() */
++	newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
++	if (times) {
++		error = get_user(newattrs.ia_atime.tv_sec, &times->actime);
++		newattrs.ia_atime.tv_nsec = 0;
++		if (!error)
++			error = get_user(newattrs.ia_mtime.tv_sec,
++					&times->modtime);
++		newattrs.ia_mtime.tv_nsec = 0;
++		if (error)
++			goto dput_and_out;
++
++		newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
++	} else {
++		if (current->fsuid != inode->i_uid &&
++		    (error = permission(inode, MAY_WRITE, NULL, NULL)) != 0)
++			goto dput_and_out;
++	}
++	down(&inode->i_sem);
++	error = notify_change(nd.dentry, &newattrs);
++	up(&inode->i_sem);
++dput_and_out:
++	path_release(&nd);
++out:
++	return error;
++}
+diff -uprN linux-2.6.8.1.orig/fs/partitions/check.c linux-2.6.8.1-ve022stab078/fs/partitions/check.c
+--- linux-2.6.8.1.orig/fs/partitions/check.c	2004-08-14 14:56:20.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/partitions/check.c	2006-05-11 13:05:40.000000000 +0400
+@@ -127,6 +127,7 @@ char *disk_name(struct gendisk *hd, int 
+ 
+ 	return buf;
+ }
++EXPORT_SYMBOL(disk_name);
+ 
+ const char *bdevname(struct block_device *bdev, char *buf)
+ {
+diff -uprN linux-2.6.8.1.orig/fs/pipe.c linux-2.6.8.1-ve022stab078/fs/pipe.c
+--- linux-2.6.8.1.orig/fs/pipe.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/pipe.c	2006-05-11 13:05:39.000000000 +0400
+@@ -534,7 +534,7 @@ struct inode* pipe_new(struct inode* ino
+ {
+ 	unsigned long page;
+ 
+-	page = __get_free_page(GFP_USER);
++	page = __get_free_page(GFP_USER_UBC);
+ 	if (!page)
+ 		return NULL;
+ 
+diff -uprN linux-2.6.8.1.orig/fs/proc/array.c linux-2.6.8.1-ve022stab078/fs/proc/array.c
+--- linux-2.6.8.1.orig/fs/proc/array.c	2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/proc/array.c	2006-05-11 13:05:45.000000000 +0400
+@@ -73,6 +73,8 @@
+ #include <linux/highmem.h>
+ #include <linux/file.h>
+ #include <linux/times.h>
++#include <linux/fairsched.h>
++#include <ub/beancounter.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/pgtable.h>
+@@ -88,10 +90,13 @@ static inline char * task_name(struct ta
+ {
+ 	int i;
+ 	char * name;
++	char tcomm[sizeof(p->comm)];
++
++	get_task_comm(tcomm, p);
+ 
+ 	ADDBUF(buf, "Name:\t");
+-	name = p->comm;
+-	i = sizeof(p->comm);
++	name = tcomm;
++	i = sizeof(tcomm);
+ 	do {
+ 		unsigned char c = *name;
+ 		name++;
+@@ -127,18 +132,19 @@ static const char *task_state_array[] = 
+ 	"S (sleeping)",		/*  1 */
+ 	"D (disk sleep)",	/*  2 */
+ 	"T (stopped)",		/*  4 */
+-	"Z (zombie)",		/*  8 */
+-	"X (dead)"		/* 16 */
++	"T (tracing stop)",     /*  8 */
++	"Z (zombie)",		/* 16 */
++	"X (dead)"		/* 32 */
+ };
+ 
+ static inline const char * get_task_state(struct task_struct *tsk)
+ {
+-	unsigned int state = tsk->state & (TASK_RUNNING |
+-					   TASK_INTERRUPTIBLE |
+-					   TASK_UNINTERRUPTIBLE |
+-					   TASK_ZOMBIE |
+-					   TASK_DEAD |
+-					   TASK_STOPPED);
++ 	unsigned int state = (tsk->state & (TASK_RUNNING |
++ 					    TASK_INTERRUPTIBLE |
++ 					    TASK_UNINTERRUPTIBLE |
++ 					    TASK_STOPPED)) |
++ 			(tsk->exit_state & (EXIT_ZOMBIE |
++ 					    EXIT_DEAD));
+ 	const char **p = &task_state_array[0];
+ 
+ 	while (state) {
+@@ -152,8 +158,13 @@ static inline char * task_state(struct t
+ {
+ 	struct group_info *group_info;
+ 	int g;
++	pid_t pid, ppid, tgid;
++
++	pid = get_task_pid(p);
++	tgid = get_task_tgid(p);
+ 
+ 	read_lock(&tasklist_lock);
++	ppid = get_task_ppid(p);
+ 	buffer += sprintf(buffer,
+ 		"State:\t%s\n"
+ 		"SleepAVG:\t%lu%%\n"
+@@ -161,13 +172,19 @@ static inline char * task_state(struct t
+ 		"Pid:\t%d\n"
+ 		"PPid:\t%d\n"
+ 		"TracerPid:\t%d\n"
++#ifdef CONFIG_FAIRSCHED
++		"FNid:\t%d\n"
++#endif
+ 		"Uid:\t%d\t%d\t%d\t%d\n"
+ 		"Gid:\t%d\t%d\t%d\t%d\n",
+ 		get_task_state(p),
+ 		(p->sleep_avg/1024)*100/(1020000000/1024),
+-	       	p->tgid,
+-		p->pid, p->pid ? p->real_parent->pid : 0,
+-		p->pid && p->ptrace ? p->parent->pid : 0,
++	       	tgid,
++		pid, ppid,
++		p->pid && p->ptrace ? get_task_pid(p->parent) : 0,
++#ifdef CONFIG_FAIRSCHED
++		task_fairsched_node_id(p),
++#endif
+ 		p->uid, p->euid, p->suid, p->fsuid,
+ 		p->gid, p->egid, p->sgid, p->fsgid);
+ 	read_unlock(&tasklist_lock);
+@@ -186,6 +203,20 @@ static inline char * task_state(struct t
+ 	put_group_info(group_info);
+ 
+ 	buffer += sprintf(buffer, "\n");
++
++#ifdef CONFIG_VE
++	buffer += sprintf(buffer,
++			"envID:\t%d\n"
++			"VPid:\t%d\n"
++			"PNState:\t%u\n"
++			"StopState:\t%u\n"
++			"SigSuspState:\t%u\n",
++			VE_TASK_INFO(p)->owner_env->veid,
++			virt_pid(p),
++			p->pn_state,
++			p->stopped_state,
++			p->sigsuspend_state);
++#endif
+ 	return buffer;
+ }
+ 
+@@ -231,7 +262,7 @@ static void collect_sigign_sigcatch(stru
+ 
+ static inline char * task_sig(struct task_struct *p, char *buffer)
+ {
+-	sigset_t pending, shpending, blocked, ignored, caught;
++	sigset_t pending, shpending, blocked, ignored, caught, saved;
+ 	int num_threads = 0;
+ 
+ 	sigemptyset(&pending);
+@@ -239,6 +270,7 @@ static inline char * task_sig(struct tas
+ 	sigemptyset(&blocked);
+ 	sigemptyset(&ignored);
+ 	sigemptyset(&caught);
++	sigemptyset(&saved);
+ 
+ 	/* Gather all the data with the appropriate locks held */
+ 	read_lock(&tasklist_lock);
+@@ -247,6 +279,7 @@ static inline char * task_sig(struct tas
+ 		pending = p->pending.signal;
+ 		shpending = p->signal->shared_pending.signal;
+ 		blocked = p->blocked;
++		saved = p->saved_sigset;
+ 		collect_sigign_sigcatch(p, &ignored, &caught);
+ 		num_threads = atomic_read(&p->signal->count);
+ 		spin_unlock_irq(&p->sighand->siglock);
+@@ -261,6 +294,7 @@ static inline char * task_sig(struct tas
+ 	buffer = render_sigset_t("SigBlk:\t", &blocked, buffer);
+ 	buffer = render_sigset_t("SigIgn:\t", &ignored, buffer);
+ 	buffer = render_sigset_t("SigCgt:\t", &caught, buffer);
++	buffer = render_sigset_t("SigSvd:\t", &saved, buffer);
+ 
+ 	return buffer;
+ }
+@@ -275,6 +309,24 @@ static inline char *task_cap(struct task
+ 			    cap_t(p->cap_effective));
+ }
+ 
++#ifdef CONFIG_USER_RESOURCE
++static inline char *task_show_ub(struct task_struct *p, char *buffer)
++{
++	char ub_info[64];
++
++	print_ub_uid(get_task_ub(p), ub_info, sizeof(ub_info));
++	buffer += sprintf(buffer, "TaskUB:\t%s\n", ub_info);
++	task_lock(p);
++	if (p->mm != NULL)
++		print_ub_uid(mm_ub(p->mm), ub_info, sizeof(ub_info));
++	else
++		strcpy(ub_info, "N/A");
++	task_unlock(p);
++	buffer += sprintf(buffer, "MMUB:\t%s\n", ub_info);
++	return buffer;
++}
++#endif
++
+ extern char *task_mem(struct mm_struct *, char *);
+ int proc_pid_status(struct task_struct *task, char * buffer)
+ {
+@@ -293,6 +345,9 @@ int proc_pid_status(struct task_struct *
+ #if defined(CONFIG_ARCH_S390)
+ 	buffer = task_show_regs(task, buffer);
+ #endif
++#ifdef CONFIG_USER_RESOURCE
++	buffer = task_show_ub(task, buffer);
++#endif
+ 	return buffer - orig;
+ }
+ 
+@@ -309,6 +364,9 @@ int proc_pid_stat(struct task_struct *ta
+ 	int num_threads = 0;
+ 	struct mm_struct *mm;
+ 	unsigned long long start_time;
++	char tcomm[sizeof(task->comm)];
++	char mm_ub_info[64];
++	char task_ub_info[64];
+ 
+ 	state = *get_task_state(task);
+ 	vsize = eip = esp = 0;
+@@ -325,6 +383,7 @@ int proc_pid_stat(struct task_struct *ta
+ 		up_read(&mm->mmap_sem);
+ 	}
+ 
++	get_task_comm(tcomm, task);
+ 	wchan = get_wchan(task);
+ 
+ 	sigemptyset(&sigign);
+@@ -338,12 +397,13 @@ int proc_pid_stat(struct task_struct *ta
+ 	}
+ 	if (task->signal) {
+ 		if (task->signal->tty) {
+-			tty_pgrp = task->signal->tty->pgrp;
++			tty_pgrp = pid_type_to_vpid(PIDTYPE_PGID, task->signal->tty->pgrp);
+ 			tty_nr = new_encode_dev(tty_devnum(task->signal->tty));
+ 		}
+-		pgid = process_group(task);
+-		sid = task->signal->session;
++		pgid = get_task_pgid(task);
++		sid = get_task_sid(task);
+ 	}
++	ppid = get_task_ppid(task);
+ 	read_unlock(&tasklist_lock);
+ 
+ 	/* scale priority and nice values from timeslices to -20..20 */
+@@ -351,18 +411,27 @@ int proc_pid_stat(struct task_struct *ta
+ 	priority = task_prio(task);
+ 	nice = task_nice(task);
+ 
+-	read_lock(&tasklist_lock);
+-	ppid = task->pid ? task->real_parent->pid : 0;
+-	read_unlock(&tasklist_lock);
+-
+ 	/* Temporary variable needed for gcc-2.96 */
+ 	start_time = jiffies_64_to_clock_t(task->start_time - INITIAL_JIFFIES);
+ 
++#ifdef CONFIG_USER_RESOURCE
++	print_ub_uid(get_task_ub(task), task_ub_info, sizeof(task_ub_info));
++	if (mm != NULL)
++		print_ub_uid(mm_ub(mm), mm_ub_info, sizeof(mm_ub_info));
++	else
++		strcpy(mm_ub_info, "N/A");
++#else
++	strcpy(task_ub_info, "0");
++	strcpy(mm_ub_info, "0");
++#endif
++
+ 	res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
+ %lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \
+-%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n",
+-		task->pid,
+-		task->comm,
++%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu \
++0 0 0 0 0 0 0 0 %d %u \
++%s %s\n",
++		get_task_pid(task),
++		tcomm,
+ 		state,
+ 		ppid,
+ 		pgid,
+@@ -382,7 +451,12 @@ int proc_pid_stat(struct task_struct *ta
+ 		nice,
+ 		num_threads,
+ 		jiffies_to_clock_t(task->it_real_value),
++#ifndef CONFIG_VE
+ 		start_time,
++#else
++		jiffies_64_to_clock_t(task->start_time - 
++				      get_exec_env()->init_entry->start_time),
++#endif
+ 		vsize,
+ 		mm ? mm->rss : 0, /* you might want to shift this left 3 */
+ 		task->rlim[RLIMIT_RSS].rlim_cur,
+@@ -405,7 +479,11 @@ int proc_pid_stat(struct task_struct *ta
+ 		task->exit_signal,
+ 		task_cpu(task),
+ 		task->rt_priority,
+-		task->policy);
++		task->policy,
++		virt_pid(task),
++		VEID(VE_TASK_INFO(task)->owner_env),
++		task_ub_info,
++		mm_ub_info);
+ 	if(mm)
+ 		mmput(mm);
+ 	return res;
+diff -uprN linux-2.6.8.1.orig/fs/proc/base.c linux-2.6.8.1-ve022stab078/fs/proc/base.c
+--- linux-2.6.8.1.orig/fs/proc/base.c	2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/proc/base.c	2006-05-11 13:05:40.000000000 +0400
+@@ -188,22 +188,25 @@ static int proc_fd_link(struct inode *in
+ 	struct files_struct *files;
+ 	struct file *file;
+ 	int fd = proc_type(inode) - PROC_TID_FD_DIR;
++	int err = -ENOENT;
+ 
+ 	files = get_files_struct(task);
+ 	if (files) {
+ 		spin_lock(&files->file_lock);
+ 		file = fcheck_files(files, fd);
+ 		if (file) {
+-			*mnt = mntget(file->f_vfsmnt);
+-			*dentry = dget(file->f_dentry);
+-			spin_unlock(&files->file_lock);
+-			put_files_struct(files);
+-			return 0;
++			if (d_root_check(file->f_dentry, file->f_vfsmnt)) {
++				err = -EACCES;
++			} else {
++				*mnt = mntget(file->f_vfsmnt);
++				*dentry = dget(file->f_dentry);
++				err = 0;
++			}
+ 		}
+ 		spin_unlock(&files->file_lock);
+ 		put_files_struct(files);
+ 	}
+-	return -ENOENT;
++	return err;
+ }
+ 
+ static int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
+@@ -220,13 +223,16 @@ static int proc_exe_link(struct inode *i
+ 	while (vma) {
+ 		if ((vma->vm_flags & VM_EXECUTABLE) && 
+ 		    vma->vm_file) {
+-			*mnt = mntget(vma->vm_file->f_vfsmnt);
+-			*dentry = dget(vma->vm_file->f_dentry);
+-			result = 0;
++			result = d_root_check(vma->vm_file->f_dentry,
++					vma->vm_file->f_vfsmnt);
++			if (!result) {
++				*mnt = mntget(vma->vm_file->f_vfsmnt);
++				*dentry = dget(vma->vm_file->f_dentry);
++			}
+ 			break;
+ 		}
+ 		vma = vma->vm_next;
+-	}
++ 	}
+ 	up_read(&mm->mmap_sem);
+ 	mmput(mm);
+ out:
+@@ -244,10 +250,12 @@ static int proc_cwd_link(struct inode *i
+ 	task_unlock(proc_task(inode));
+ 	if (fs) {
+ 		read_lock(&fs->lock);
+-		*mnt = mntget(fs->pwdmnt);
+-		*dentry = dget(fs->pwd);
++		result = d_root_check(fs->pwd, fs->pwdmnt);
++		if (!result) {
++			*mnt = mntget(fs->pwdmnt);
++			*dentry = dget(fs->pwd);
++		}
+ 		read_unlock(&fs->lock);
+-		result = 0;
+ 		put_fs_struct(fs);
+ 	}
+ 	return result;
+@@ -297,6 +305,11 @@ static int may_ptrace_attach(struct task
+ 	rmb();
+ 	if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
+ 		goto out;
++	if (!task->mm->vps_dumpable && !ve_is_super(get_exec_env()))
++		goto out;
++	/* optional: defensive measure */
++	if (!ve_accessible(VE_TASK_INFO(task)->owner_env, get_exec_env()))
++		goto out;
+ 	if (security_ptrace(current, task))
+ 		goto out;
+ 
+@@ -329,6 +342,8 @@ static int proc_pid_cmdline(struct task_
+ 	struct mm_struct *mm = get_task_mm(task);
+ 	if (!mm)
+ 		goto out;
++	if (!mm->arg_end)
++		goto out_mm;	/* Shh! No looking before we're done */
+ 
+  	len = mm->arg_end - mm->arg_start;
+  
+@@ -351,8 +366,8 @@ static int proc_pid_cmdline(struct task_
+ 			res = strnlen(buffer, res);
+ 		}
+ 	}
++out_mm:
+ 	mmput(mm);
+-
+ out:
+ 	return res;
+ }
+@@ -443,9 +458,10 @@ out:
+ 	goto exit;
+ }
+ 
+-static int proc_permission(struct inode *inode, int mask, struct nameidata *nd)
++static int proc_permission(struct inode *inode, int mask, struct nameidata *nd,
++		struct exec_perm *exec_perm)
+ {
+-	if (vfs_permission(inode, mask) != 0)
++	if (vfs_permission(inode, mask, exec_perm) != 0)
+ 		return -EACCES;
+ 	return proc_check_root(inode);
+ }
+@@ -767,12 +783,6 @@ static struct inode_operations proc_pid_
+ 	.follow_link	= proc_pid_follow_link
+ };
+ 
+-static int pid_alive(struct task_struct *p)
+-{
+-	BUG_ON(p->pids[PIDTYPE_PID].pidptr != &p->pids[PIDTYPE_PID].pid);
+-	return atomic_read(&p->pids[PIDTYPE_PID].pid.count);
+-}
+-
+ #define NUMBUF 10
+ 
+ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
+@@ -927,6 +937,10 @@ static struct inode *proc_pid_make_inode
+ 	struct inode * inode;
+ 	struct proc_inode *ei;
+ 
++	if (!ve_accessible(VE_TASK_INFO(task)->owner_env,
++			   VE_OWNER_FSTYPE(sb->s_type)))
++		return NULL;
++
+ 	/* We need a new inode */
+ 	
+ 	inode = new_inode(sb);
+@@ -1030,6 +1044,10 @@ static void pid_base_iput(struct dentry 
+ 	spin_lock(&task->proc_lock);
+ 	if (task->proc_dentry == dentry)
+ 		task->proc_dentry = NULL;
++#ifdef CONFIG_VE
++	if (VE_TASK_INFO(task)->glob_proc_dentry == dentry)
++		VE_TASK_INFO(task)->glob_proc_dentry = NULL;
++#endif
+ 	spin_unlock(&task->proc_lock);
+ 	iput(inode);
+ }
+@@ -1467,14 +1485,14 @@ static int proc_self_readlink(struct den
+ 			      int buflen)
+ {
+ 	char tmp[30];
+-	sprintf(tmp, "%d", current->tgid);
++	sprintf(tmp, "%d", get_task_tgid(current));
+ 	return vfs_readlink(dentry,buffer,buflen,tmp);
+ }
+ 
+ static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
+ {
+ 	char tmp[30];
+-	sprintf(tmp, "%d", current->tgid);
++	sprintf(tmp, "%d", get_task_tgid(current));
+ 	return vfs_follow_link(nd,tmp);
+ }	
+ 
+@@ -1499,24 +1517,33 @@ static struct inode_operations proc_self
+  *   of PIDTYPE_PID.
+  */
+ 
+-struct dentry *proc_pid_unhash(struct task_struct *p)
++struct dentry *__proc_pid_unhash(struct task_struct *p, struct dentry *proc_dentry)
+ {
+-	struct dentry *proc_dentry;
+-
+-	proc_dentry = p->proc_dentry;
+ 	if (proc_dentry != NULL) {
+ 
+ 		spin_lock(&dcache_lock);
++		spin_lock(&proc_dentry->d_lock);
+ 		if (!d_unhashed(proc_dentry)) {
+ 			dget_locked(proc_dentry);
+ 			__d_drop(proc_dentry);
+-		} else
++			spin_unlock(&proc_dentry->d_lock);
++		} else {
++			spin_unlock(&proc_dentry->d_lock);
+ 			proc_dentry = NULL;
++		}
+ 		spin_unlock(&dcache_lock);
+ 	}
+ 	return proc_dentry;
+ }
+ 
++void proc_pid_unhash(struct task_struct *p, struct dentry *pd[2])
++{
++	pd[0] = __proc_pid_unhash(p, p->proc_dentry);
++#ifdef CONFIG_VE
++	pd[1] = __proc_pid_unhash(p, VE_TASK_INFO(p)->glob_proc_dentry);
++#endif
++}
++
+ /**
+  * proc_pid_flush - recover memory used by stale /proc/<pid>/x entries
+  * @proc_entry: directoy to prune.
+@@ -1524,7 +1551,7 @@ struct dentry *proc_pid_unhash(struct ta
+  * Shrink the /proc directory that was used by the just killed thread.
+  */
+ 	
+-void proc_pid_flush(struct dentry *proc_dentry)
++void __proc_pid_flush(struct dentry *proc_dentry)
+ {
+ 	if(proc_dentry != NULL) {
+ 		shrink_dcache_parent(proc_dentry);
+@@ -1532,12 +1559,21 @@ void proc_pid_flush(struct dentry *proc_
+ 	}
+ }
+ 
++void proc_pid_flush(struct dentry *proc_dentry[2])
++{
++	__proc_pid_flush(proc_dentry[0]);
++#ifdef CONFIG_VE
++	__proc_pid_flush(proc_dentry[1]);
++#endif
++}
++
+ /* SMP-safe */
+ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
+ {
+ 	struct task_struct *task;
+ 	struct inode *inode;
+ 	struct proc_inode *ei;
++	struct dentry *pd[2];
+ 	unsigned tgid;
+ 	int died;
+ 
+@@ -1561,7 +1597,19 @@ struct dentry *proc_pid_lookup(struct in
+ 		goto out;
+ 
+ 	read_lock(&tasklist_lock);
+-	task = find_task_by_pid(tgid);
++	task = find_task_by_pid_ve(tgid);
++	/* In theory we are allowed to lookup both /proc/VIRT_PID and
++	 * /proc/GLOBAL_PID inside VE. However, current /proc implementation
++	 * cannot maintain two references to one task, so that we have
++	 * to prohibit /proc/GLOBAL_PID.
++	 */
++	if (task && !ve_is_super(get_exec_env()) && !is_virtual_pid(tgid)) {
++		/* However, VE_ENTERed tasks are exception, they use global
++		 * pids.
++		 */
++		if (virt_pid(task) != tgid)
++			task = NULL;
++	}
+ 	if (task)
+ 		get_task_struct(task);
+ 	read_unlock(&tasklist_lock);
+@@ -1586,16 +1634,23 @@ struct dentry *proc_pid_lookup(struct in
+ 	died = 0;
+ 	d_add(dentry, inode);
+ 	spin_lock(&task->proc_lock);
++#ifdef CONFIG_VE
++	if (ve_is_super(VE_OWNER_FSTYPE(inode->i_sb->s_type)))
++		VE_TASK_INFO(task)->glob_proc_dentry = dentry;
++	else
++		task->proc_dentry = dentry;
++#else
+ 	task->proc_dentry = dentry;
++#endif
+ 	if (!pid_alive(task)) {
+-		dentry = proc_pid_unhash(task);
++		proc_pid_unhash(task, pd);
+ 		died = 1;
+ 	}
+ 	spin_unlock(&task->proc_lock);
+ 
+ 	put_task_struct(task);
+ 	if (died) {
+-		proc_pid_flush(dentry);
++		proc_pid_flush(pd);
+ 		goto out;
+ 	}
+ 	return NULL;
+@@ -1616,7 +1671,12 @@ static struct dentry *proc_task_lookup(s
+ 		goto out;
+ 
+ 	read_lock(&tasklist_lock);
+-	task = find_task_by_pid(tid);
++	task = find_task_by_pid_ve(tid);
++	/* See comment above in similar place. */
++	if (task && !ve_is_super(get_exec_env()) && !is_virtual_pid(tid)) {
++		if (virt_pid(task) != tid)
++			task = NULL;
++	}
+ 	if (task)
+ 		get_task_struct(task);
+ 	read_unlock(&tasklist_lock);
+@@ -1656,7 +1716,8 @@ out:
+  * tasklist lock while doing this, and we must release it before
+  * we actually do the filldir itself, so we use a temp buffer..
+  */
+-static int get_tgid_list(int index, unsigned long version, unsigned int *tgids)
++static int get_tgid_list(int index, unsigned long version, unsigned int *tgids,
++		struct ve_struct *owner)
+ {
+ 	struct task_struct *p;
+ 	int nr_tgids = 0;
+@@ -1665,18 +1726,23 @@ static int get_tgid_list(int index, unsi
+ 	read_lock(&tasklist_lock);
+ 	p = NULL;
+ 	if (version) {
+-		p = find_task_by_pid(version);
+-		if (!thread_group_leader(p))
++		struct ve_struct *oldve;
++
++		oldve = set_exec_env(owner);
++		p = find_task_by_pid_ve(version);
++		(void)set_exec_env(oldve);
++
++		if (p != NULL && !thread_group_leader(p))
+ 			p = NULL;
+ 	}
+ 
+ 	if (p)
+ 		index = 0;
+ 	else
+-		p = next_task(&init_task);
++		p = __first_task_ve(owner);
+ 
+-	for ( ; p != &init_task; p = next_task(p)) {
+-		int tgid = p->pid;
++	for ( ; p != NULL; p = __next_task_ve(owner, p)) {
++		int tgid = get_task_pid_ve(p, owner);
+ 		if (!pid_alive(p))
+ 			continue;
+ 		if (--index >= 0)
+@@ -1709,7 +1775,7 @@ static int get_tid_list(int index, unsig
+ 	 * via next_thread().
+ 	 */
+ 	if (pid_alive(task)) do {
+-		int tid = task->pid;
++		int tid = get_task_pid(task);
+ 
+ 		if (--index >= 0)
+ 			continue;
+@@ -1741,7 +1807,8 @@ int proc_pid_readdir(struct file * filp,
+ 	/*
+ 	 * f_version caches the last tgid which was returned from readdir
+ 	 */
+-	nr_tgids = get_tgid_list(nr, filp->f_version, tgid_array);
++	nr_tgids = get_tgid_list(nr, filp->f_version, tgid_array,
++			VE_OWNER_FSTYPE(filp->f_dentry->d_sb->s_type));
+ 
+ 	for (i = 0; i < nr_tgids; i++) {
+ 		int tgid = tgid_array[i];
+diff -uprN linux-2.6.8.1.orig/fs/proc/generic.c linux-2.6.8.1-ve022stab078/fs/proc/generic.c
+--- linux-2.6.8.1.orig/fs/proc/generic.c	2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/proc/generic.c	2006-05-11 13:05:40.000000000 +0400
+@@ -10,7 +10,9 @@
+ 
+ #include <linux/errno.h>
+ #include <linux/time.h>
++#include <linux/fs.h>
+ #include <linux/proc_fs.h>
++#include <linux/ve_owner.h>
+ #include <linux/stat.h>
+ #include <linux/module.h>
+ #include <linux/mount.h>
+@@ -27,6 +29,8 @@ static ssize_t proc_file_write(struct fi
+ 			       size_t count, loff_t *ppos);
+ static loff_t proc_file_lseek(struct file *, loff_t, int);
+ 
++static DECLARE_RWSEM(proc_tree_sem);
++
+ int proc_match(int len, const char *name, struct proc_dir_entry *de)
+ {
+ 	if (de->namelen != len)
+@@ -54,13 +58,25 @@ proc_file_read(struct file *file, char _
+ 	ssize_t	n, count;
+ 	char	*start;
+ 	struct proc_dir_entry * dp;
++	unsigned long long pos;
++
++	/*
++	 * Gaah, please just use "seq_file" instead. The legacy /proc
++	 * interfaces cut loff_t down to off_t for reads, and ignore
++	 * the offset entirely for writes..
++	 */
++	pos = *ppos;
++	if (pos > MAX_NON_LFS)
++		return 0;
++	if (nbytes > MAX_NON_LFS - pos)
++		nbytes = MAX_NON_LFS - pos;
+ 
+ 	dp = PDE(inode);
+ 	if (!(page = (char*) __get_free_page(GFP_KERNEL)))
+ 		return -ENOMEM;
+ 
+ 	while ((nbytes > 0) && !eof) {
+-		count = min_t(ssize_t, PROC_BLOCK_SIZE, nbytes);
++		count = min_t(size_t, PROC_BLOCK_SIZE, nbytes);
+ 
+ 		start = NULL;
+ 		if (dp->get_info) {
+@@ -202,32 +218,20 @@ proc_file_write(struct file *file, const
+ static loff_t
+ proc_file_lseek(struct file *file, loff_t offset, int orig)
+ {
+-    lock_kernel();
+-
+-    switch (orig) {
+-    case 0:
+-	if (offset < 0)
+-	    goto out;
+-	file->f_pos = offset;
+-	unlock_kernel();
+-	return(file->f_pos);
+-    case 1:
+-	if (offset + file->f_pos < 0)
+-	    goto out;
+-	file->f_pos += offset;
+-	unlock_kernel();
+-	return(file->f_pos);
+-    case 2:
+-	goto out;
+-    default:
+-	goto out;
+-    }
+-
+-out:
+-    unlock_kernel();
+-    return -EINVAL;
++	loff_t retval = -EINVAL;
++	switch (orig) {
++	case 1:
++		offset += file->f_pos;
++	/* fallthrough */
++	case 0:
++		if (offset < 0 || offset > MAX_NON_LFS)
++			break;
++		file->f_pos = retval = offset;
++	}
++	return retval;
+ }
+ 
++#ifndef CONFIG_VE
+ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
+ {
+ 	struct inode *inode = dentry->d_inode;
+@@ -248,9 +252,12 @@ static int proc_notify_change(struct den
+ out:
+ 	return error;
+ }
++#endif
+ 
+ static struct inode_operations proc_file_inode_operations = {
++#ifndef CONFIG_VE
+ 	.setattr	= proc_notify_change,
++#endif
+ };
+ 
+ /*
+@@ -258,14 +265,14 @@ static struct inode_operations proc_file
+  * returns the struct proc_dir_entry for "/proc/tty/driver", and
+  * returns "serial" in residual.
+  */
+-static int xlate_proc_name(const char *name,
+-			   struct proc_dir_entry **ret, const char **residual)
++static int __xlate_proc_name(struct proc_dir_entry *root, const char *name,
++		struct proc_dir_entry **ret, const char **residual)
+ {
+ 	const char     		*cp = name, *next;
+ 	struct proc_dir_entry	*de;
+ 	int			len;
+ 
+-	de = &proc_root;
++	de = root;
+ 	while (1) {
+ 		next = strchr(cp, '/');
+ 		if (!next)
+@@ -285,6 +292,23 @@ static int xlate_proc_name(const char *n
+ 	return 0;
+ }
+ 
++#ifndef CONFIG_VE
++#define xlate_proc_loc_name xlate_proc_name
++#else
++static int xlate_proc_loc_name(const char *name,
++			   struct proc_dir_entry **ret, const char **residual)
++{
++	return __xlate_proc_name(get_exec_env()->proc_root,
++			name, ret, residual);
++}
++#endif
++
++static int xlate_proc_name(const char *name,
++		struct proc_dir_entry **ret, const char **residual)
++{
++	return __xlate_proc_name(&proc_root, name, ret, residual);
++}
++
+ static DEFINE_IDR(proc_inum_idr);
+ static spinlock_t proc_inum_lock = SPIN_LOCK_UNLOCKED; /* protects the above */
+ 
+@@ -363,31 +387,102 @@ static struct dentry_operations proc_den
+ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+ {
+ 	struct inode *inode = NULL;
+-	struct proc_dir_entry * de;
++	struct proc_dir_entry *lde, *gde;
+ 	int error = -ENOENT;
+ 
+ 	lock_kernel();
+-	de = PDE(dir);
+-	if (de) {
+-		for (de = de->subdir; de ; de = de->next) {
+-			if (de->namelen != dentry->d_name.len)
+-				continue;
+-			if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
+-				unsigned int ino = de->low_ino;
++	lde = LPDE(dir);
++	if (!lde)
++		goto out;
+ 
+-				error = -EINVAL;
+-				inode = proc_get_inode(dir->i_sb, ino, de);
++	down_read(&proc_tree_sem);
++	for (lde = lde->subdir; lde ; lde = lde->next) {
++		if (lde->namelen != dentry->d_name.len)
++			continue;
++		if (!memcmp(dentry->d_name.name, lde->name, lde->namelen))
++			break;
++	}
++#ifdef CONFIG_VE
++	gde = GPDE(dir);
++	if (gde != NULL) {
++		for (gde = gde->subdir; gde ; gde = gde->next) {
++			if (gde->namelen != dentry->d_name.len)
++				continue;
++			if (!memcmp(dentry->d_name.name, gde->name, gde->namelen))
+ 				break;
+-			}
+ 		}
+ 	}
+-	unlock_kernel();
++#else
++	gde = NULL;
++#endif
++
++	/*
++	 * There are following possible cases after lookup:
++	 *
++	 * lde		gde
++	 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++	 * NULL		NULL		ENOENT
++	 * loc		NULL		found in local tree
++	 * loc		glob		found in both trees
++	 * NULL		glob		found in global tree
++	 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++	 *
++	 * We initialized inode as follows after lookup:
++	 *
++	 * inode->lde	inode->gde
++	 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++	 * loc		NULL		in local tree
++	 * loc		glob		both trees
++	 * glob		glob		global tree
++	 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++	 * i.e. inode->lde is always initialized
++	 */
++
++	if (lde == NULL && gde == NULL)
++		goto out_up;
+ 
++	if (lde != NULL) {
++		inode = proc_get_inode(dir->i_sb, lde->low_ino, lde);
++	} else {
++		inode = proc_get_inode(dir->i_sb, gde->low_ino, gde);
++	}
++	/*
++	 * We can sleep in proc_get_inode(), but since we have i_sem
++	 * being taken, no one can setup GPDE/LPDE on this inode.
++	 */
+ 	if (inode) {
++#ifdef CONFIG_VE
++		GPDE(inode) = gde;
++		if (gde) {
++			atomic_inc(&gde->count);	/* de_get() */
++			/* we have taken a ref in proc_get_inode() already */
++			__module_get(gde->owner);
++		}
++		/* if dentry is found in both trees and it is a directory
++		 * then inode's nlink count must be altered, because local
++		 * and global subtrees may differ.
++		 * on the other hand, they may intersect, so actual nlink
++		 * value is difficult to calculate - upper estimate is used
++		 * instead of it.
++		 * dentry found in global tree only must not be writable
++		 * in non-super ve.
++		 */
++		if (lde && gde && lde != gde && gde->nlink > 1)
++			inode->i_nlink += gde->nlink - 2;
++		if (lde == NULL && !ve_is_super(
++					VE_OWNER_FSTYPE(dir->i_sb->s_type)))
++			inode->i_mode &= ~S_IWUGO;
++#endif
++		up_read(&proc_tree_sem);
++		unlock_kernel();
+ 		dentry->d_op = &proc_dentry_operations;
+ 		d_add(dentry, inode);
+ 		return NULL;
+ 	}
++out_up:
++	up_read(&proc_tree_sem);
++out:
++	unlock_kernel();
+ 	return ERR_PTR(error);
+ }
+ 
+@@ -434,29 +529,58 @@ int proc_readdir(struct file * filp,
+ 			filp->f_pos++;
+ 			/* fall through */
+ 		default:
+-			de = de->subdir;
+ 			i -= 2;
+-			for (;;) {
+-				if (!de) {
+-					ret = 1;
+-					goto out;
+-				}
+-				if (!i)
+-					break;
+-				de = de->next;
+-				i--;
+-			}
++	}
+ 
+-			do {
+-				if (filldir(dirent, de->name, de->namelen, filp->f_pos,
+-					    de->low_ino, de->mode >> 12) < 0)
+-					goto out;
+-				filp->f_pos++;
+-				de = de->next;
+-			} while (de);
++	down_read(&proc_tree_sem);
++	de = de->subdir;
++	for (; de != NULL; de = de->next) {
++		if (!i)
++			break;
++		i--;
+ 	}
++
++	for (; de != NULL; de = de->next) {
++		if (filldir(dirent, de->name, de->namelen, filp->f_pos,
++			    de->low_ino, de->mode >> 12) < 0)
++			goto out_up;
++		filp->f_pos++;
++	}
++#ifdef CONFIG_VE
++	de = GPDE(inode);
++	if (de == NULL) {
++		ret = 1;
++		goto out_up;
++	}
++	de = de->subdir;
++
++	for (; de != NULL; de = de->next) {
++		struct proc_dir_entry *p;
++		/* check that we haven't filled this dir already */
++		for (p = LPDE(inode)->subdir; p; p = p->next) {
++			if (de->namelen != p->namelen)
++				continue;
++			if (!memcmp(de->name, p->name, p->namelen))
++				break;
++		}
++		if (p)
++			continue;
++		/* skip first i entries */
++		if (i > 0) {
++			i--;
++			continue;
++		}
++		if (filldir(dirent, de->name, de->namelen, filp->f_pos,
++			    de->low_ino, de->mode >> 12) < 0)
++			goto out_up;
++		filp->f_pos++;
++ 	}
++#endif
+ 	ret = 1;
+-out:	unlock_kernel();
++out_up:
++	up_read(&proc_tree_sem);
++out:
++	unlock_kernel();
+ 	return ret;	
+ }
+ 
+@@ -475,7 +599,9 @@ static struct file_operations proc_dir_o
+  */
+ static struct inode_operations proc_dir_inode_operations = {
+ 	.lookup		= proc_lookup,
++#ifndef CONFIG_VE
+ 	.setattr	= proc_notify_change,
++#endif
+ };
+ 
+ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
+@@ -504,6 +630,7 @@ static int proc_register(struct proc_dir
+ 		if (dp->proc_iops == NULL)
+ 			dp->proc_iops = &proc_file_inode_operations;
+ 	}
++	de_get(dir);
+ 	return 0;
+ }
+ 
+@@ -549,7 +676,7 @@ static struct proc_dir_entry *proc_creat
+ 	/* make sure name is valid */
+ 	if (!name || !strlen(name)) goto out;
+ 
+-	if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0)
++	if (!(*parent) && xlate_proc_loc_name(name, parent, &fn) != 0)
+ 		goto out;
+ 	len = strlen(fn);
+ 
+@@ -558,6 +685,7 @@ static struct proc_dir_entry *proc_creat
+ 
+ 	memset(ent, 0, sizeof(struct proc_dir_entry));
+ 	memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1);
++	atomic_set(&ent->count, 1);
+ 	ent->name = ((char *) ent) + sizeof(*ent);
+ 	ent->namelen = len;
+ 	ent->mode = mode;
+@@ -571,6 +699,7 @@ struct proc_dir_entry *proc_symlink(cons
+ {
+ 	struct proc_dir_entry *ent;
+ 
++	down_write(&proc_tree_sem);
+ 	ent = proc_create(&parent,name,
+ 			  (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1);
+ 
+@@ -588,6 +717,7 @@ struct proc_dir_entry *proc_symlink(cons
+ 			ent = NULL;
+ 		}
+ 	}
++	up_write(&proc_tree_sem);
+ 	return ent;
+ }
+ 
+@@ -596,6 +726,7 @@ struct proc_dir_entry *proc_mkdir_mode(c
+ {
+ 	struct proc_dir_entry *ent;
+ 
++	down_write(&proc_tree_sem);
+ 	ent = proc_create(&parent, name, S_IFDIR | mode, 2);
+ 	if (ent) {
+ 		ent->proc_fops = &proc_dir_operations;
+@@ -606,6 +737,7 @@ struct proc_dir_entry *proc_mkdir_mode(c
+ 			ent = NULL;
+ 		}
+ 	}
++	up_write(&proc_tree_sem);
+ 	return ent;
+ }
+ 
+@@ -615,7 +747,7 @@ struct proc_dir_entry *proc_mkdir(const 
+ 	return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent);
+ }
+ 
+-struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
++static struct proc_dir_entry *__create_proc_entry(const char *name, mode_t mode,
+ 					 struct proc_dir_entry *parent)
+ {
+ 	struct proc_dir_entry *ent;
+@@ -647,6 +779,35 @@ struct proc_dir_entry *create_proc_entry
+ 	return ent;
+ }
+ 
++struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
++					 struct proc_dir_entry *parent)
++{
++	struct proc_dir_entry *ent;
++	const char *path = name;
++
++	ent = NULL;
++	down_write(&proc_tree_sem);
++	if (parent || xlate_proc_loc_name(path, &parent, &name) == 0)
++		ent = __create_proc_entry(name, mode, parent);
++	up_write(&proc_tree_sem);
++	return ent;
++}
++
++struct proc_dir_entry *create_proc_glob_entry(const char *name, mode_t mode,
++		struct proc_dir_entry *parent)
++{
++	struct proc_dir_entry *ent;
++	const char *path = name;
++
++	ent = NULL;
++	down_write(&proc_tree_sem);
++	if (parent || xlate_proc_name(path, &parent, &name) == 0)
++		ent = __create_proc_entry(name, mode, parent);
++	up_write(&proc_tree_sem);
++	return ent;
++}
++EXPORT_SYMBOL(create_proc_glob_entry);
++
+ void free_proc_entry(struct proc_dir_entry *de)
+ {
+ 	unsigned int ino = de->low_ino;
+@@ -665,15 +826,13 @@ void free_proc_entry(struct proc_dir_ent
+  * Remove a /proc entry and free it if it's not currently in use.
+  * If it is in use, we set the 'deleted' flag.
+  */
+-void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
++static void __remove_proc_entry(const char *name, struct proc_dir_entry *parent)
+ {
+ 	struct proc_dir_entry **p;
+ 	struct proc_dir_entry *de;
+ 	const char *fn = name;
+ 	int len;
+ 
+-	if (!parent && xlate_proc_name(name, &parent, &fn) != 0)
+-		goto out;
+ 	len = strlen(fn);
+ 	for (p = &parent->subdir; *p; p=&(*p)->next ) {
+ 		if (!proc_match(len, fn, *p))
+@@ -681,20 +840,58 @@ void remove_proc_entry(const char *name,
+ 		de = *p;
+ 		*p = de->next;
+ 		de->next = NULL;
++		de_put(parent);
+ 		if (S_ISDIR(de->mode))
+ 			parent->nlink--;
+ 		proc_kill_inodes(de);
+ 		de->nlink = 0;
+ 		WARN_ON(de->subdir);
+-		if (!atomic_read(&de->count))
+-			free_proc_entry(de);
+-		else {
+-			de->deleted = 1;
+-			printk("remove_proc_entry: %s/%s busy, count=%d\n",
+-				parent->name, de->name, atomic_read(&de->count));
+-		}
++		de->deleted = 1;
++		de_put(de);
+ 		break;
+ 	}
+-out:
+-	return;
++}
++
++static void __remove_proc_glob_entry(const char *name, struct proc_dir_entry *p)
++{
++	const char *fn = name;
++
++	if (!p && xlate_proc_name(name, &p, &fn) != 0)
++		return;
++	__remove_proc_entry(fn, p);
++}
++
++void remove_proc_glob_entry(const char *name, struct proc_dir_entry *parent)
++{
++	down_write(&proc_tree_sem);
++	__remove_proc_glob_entry(name, parent);
++	up_write(&proc_tree_sem);
++}
++
++static void __remove_proc_loc_entry(const char *name, struct proc_dir_entry *p)
++{
++	const char *fn = name;
++
++	if (!p && xlate_proc_loc_name(name, &p, &fn) != 0)
++		return;
++	__remove_proc_entry(fn, p);
++}
++
++void remove_proc_loc_entry(const char *name, struct proc_dir_entry *parent)
++{
++	down_write(&proc_tree_sem);
++	__remove_proc_entry(name, parent);
++	up_write(&proc_tree_sem);
++}
++
++/* used in cases when we don't know whether it is global or local proc tree */
++void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
++{
++	down_write(&proc_tree_sem);
++	__remove_proc_loc_entry(name, parent);
++#ifdef CONFIG_VE
++	if (ve_is_super(get_exec_env()))
++		__remove_proc_glob_entry(name, parent);
++#endif
++	up_write(&proc_tree_sem);
+ }
+diff -uprN linux-2.6.8.1.orig/fs/proc/inode.c linux-2.6.8.1-ve022stab078/fs/proc/inode.c
+--- linux-2.6.8.1.orig/fs/proc/inode.c	2004-08-14 14:56:14.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/proc/inode.c	2006-05-11 13:05:40.000000000 +0400
+@@ -8,6 +8,7 @@
+ #include <linux/proc_fs.h>
+ #include <linux/kernel.h>
+ #include <linux/mm.h>
++#include <linux/ve_owner.h>
+ #include <linux/string.h>
+ #include <linux/stat.h>
+ #include <linux/file.h>
+@@ -22,34 +23,25 @@
+ 
+ extern void free_proc_entry(struct proc_dir_entry *);
+ 
+-static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de)
+-{
+-	if (de)
+-		atomic_inc(&de->count);
+-	return de;
+-}
+-
+ /*
+  * Decrements the use count and checks for deferred deletion.
+  */
+-static void de_put(struct proc_dir_entry *de)
++void de_put(struct proc_dir_entry *de)
+ {
+ 	if (de) {	
+-		lock_kernel();		
+ 		if (!atomic_read(&de->count)) {
+ 			printk("de_put: entry %s already free!\n", de->name);
+-			unlock_kernel();
+ 			return;
+ 		}
+ 
+ 		if (atomic_dec_and_test(&de->count)) {
+-			if (de->deleted) {
+-				printk("de_put: deferred delete of %s\n",
+-					de->name);
+-				free_proc_entry(de);
++			if (!de->deleted) {
++				printk("de_put: entry %s is not removed yet\n",
++						de->name);
++				return;
+ 			}
+-		}		
+-		unlock_kernel();
++			free_proc_entry(de);
++		}
+ 	}
+ }
+ 
+@@ -67,12 +59,19 @@ static void proc_delete_inode(struct ino
+ 		put_task_struct(tsk);
+ 
+ 	/* Let go of any associated proc directory entry */
+-	de = PROC_I(inode)->pde;
++	de = LPDE(inode);
+ 	if (de) {
+ 		if (de->owner)
+ 			module_put(de->owner);
+ 		de_put(de);
+ 	}
++#ifdef CONFIG_VE
++	de = GPDE(inode);
++	if (de) {
++		module_put(de->owner);
++		de_put(de);
++	}
++#endif
+ 	clear_inode(inode);
+ }
+ 
+@@ -99,6 +98,9 @@ static struct inode *proc_alloc_inode(st
+ 	ei->pde = NULL;
+ 	inode = &ei->vfs_inode;
+ 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
++#ifdef CONFIG_VE
++	GPDE(inode) = NULL;
++#endif
+ 	return inode;
+ }
+ 
+@@ -200,10 +202,13 @@ struct inode *proc_get_inode(struct supe
+ 
+ 	WARN_ON(de && de->deleted);
+ 
++	if (de != NULL && !try_module_get(de->owner))
++		goto out_mod;
++
+ 	inode = iget(sb, ino);
+ 	if (!inode)
+-		goto out_fail;
+-	
++		goto out_ino;
++
+ 	PROC_I(inode)->pde = de;
+ 	if (de) {
+ 		if (de->mode) {
+@@ -215,20 +220,20 @@ struct inode *proc_get_inode(struct supe
+ 			inode->i_size = de->size;
+ 		if (de->nlink)
+ 			inode->i_nlink = de->nlink;
+-		if (!try_module_get(de->owner))
+-			goto out_fail;
+ 		if (de->proc_iops)
+ 			inode->i_op = de->proc_iops;
+ 		if (de->proc_fops)
+ 			inode->i_fop = de->proc_fops;
+ 	}
+ 
+-out:
+ 	return inode;
+ 
+-out_fail:
++out_ino:
++	if (de != NULL)
++		module_put(de->owner);
++out_mod:
+ 	de_put(de);
+-	goto out;
++	return NULL;
+ }			
+ 
+ int proc_fill_super(struct super_block *s, void *data, int silent)
+@@ -251,6 +256,14 @@ int proc_fill_super(struct super_block *
+ 	s->s_root = d_alloc_root(root_inode);
+ 	if (!s->s_root)
+ 		goto out_no_root;
++
++#ifdef CONFIG_VE
++	LPDE(root_inode) = de_get(get_exec_env()->proc_root);
++	GPDE(root_inode) = &proc_root;
++#else
++	LPDE(root_inode) = &proc_root;
++#endif
++
+ 	parse_options(data, &root_inode->i_uid, &root_inode->i_gid);
+ 	return 0;
+ 
+diff -uprN linux-2.6.8.1.orig/fs/proc/kmsg.c linux-2.6.8.1-ve022stab078/fs/proc/kmsg.c
+--- linux-2.6.8.1.orig/fs/proc/kmsg.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/proc/kmsg.c	2006-05-11 13:05:42.000000000 +0400
+@@ -11,6 +11,7 @@
+ #include <linux/kernel.h>
+ #include <linux/poll.h>
+ #include <linux/fs.h>
++#include <linux/veprintk.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/io.h>
+@@ -40,7 +41,7 @@ static ssize_t kmsg_read(struct file *fi
+ 
+ static unsigned int kmsg_poll(struct file *file, poll_table *wait)
+ {
+-	poll_wait(file, &log_wait, wait);
++	poll_wait(file, &ve_log_wait, wait);
+ 	if (do_syslog(9, NULL, 0))
+ 		return POLLIN | POLLRDNORM;
+ 	return 0;
+diff -uprN linux-2.6.8.1.orig/fs/proc/proc_misc.c linux-2.6.8.1-ve022stab078/fs/proc/proc_misc.c
+--- linux-2.6.8.1.orig/fs/proc/proc_misc.c	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/proc/proc_misc.c	2006-05-11 13:05:49.000000000 +0400
+@@ -31,6 +31,7 @@
+ #include <linux/pagemap.h>
+ #include <linux/swap.h>
+ #include <linux/slab.h>
++#include <linux/virtinfo.h>
+ #include <linux/smp.h>
+ #include <linux/signal.h>
+ #include <linux/module.h>
+@@ -44,14 +45,15 @@
+ #include <linux/jiffies.h>
+ #include <linux/sysrq.h>
+ #include <linux/vmalloc.h>
++#include <linux/version.h>
++#include <linux/compile.h>
+ #include <asm/uaccess.h>
+ #include <asm/pgtable.h>
+ #include <asm/io.h>
+ #include <asm/tlb.h>
+ #include <asm/div64.h>
++#include <linux/fairsched.h>
+ 
+-#define LOAD_INT(x) ((x) >> FSHIFT)
+-#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+ /*
+  * Warning: stuff below (imported functions) assumes that its output will fit
+  * into one page. For some of those functions it may be wrong. Moreover, we
+@@ -83,15 +85,33 @@ static int loadavg_read_proc(char *page,
+ {
+ 	int a, b, c;
+ 	int len;
+-
+-	a = avenrun[0] + (FIXED_1/200);
+-	b = avenrun[1] + (FIXED_1/200);
+-	c = avenrun[2] + (FIXED_1/200);
++	unsigned long __nr_running;
++	int __nr_threads;
++	unsigned long *__avenrun;
++	struct ve_struct *ve;
++
++	ve = get_exec_env();
++
++	if (ve_is_super(ve)) {
++		__avenrun = &avenrun[0];
++		__nr_running = nr_running();
++		__nr_threads = nr_threads;
++	} 
++#ifdef CONFIG_VE
++	else {
++		__avenrun = &ve->avenrun[0];
++		__nr_running = nr_running_ve(ve); 
++		__nr_threads = atomic_read(&ve->pcounter);
++	}
++#endif
++	a = __avenrun[0] + (FIXED_1/200);
++	b = __avenrun[1] + (FIXED_1/200);
++	c = __avenrun[2] + (FIXED_1/200);
+ 	len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
+ 		LOAD_INT(a), LOAD_FRAC(a),
+ 		LOAD_INT(b), LOAD_FRAC(b),
+ 		LOAD_INT(c), LOAD_FRAC(c),
+-		nr_running(), nr_threads, last_pid);
++		__nr_running, __nr_threads, last_pid);
+ 	return proc_calc_metrics(page, start, off, count, eof, len);
+ }
+ 
+@@ -139,6 +159,13 @@ static int uptime_read_proc(char *page, 
+ 	u64 idle_jiffies = init_task.utime + init_task.stime;
+ 
+ 	do_posix_clock_monotonic_gettime(&uptime);
++#ifdef CONFIG_VE
++	if (!ve_is_super(get_exec_env())) {
++		set_normalized_timespec(&uptime,
++		      uptime.tv_sec - get_exec_env()->start_timespec.tv_sec,
++		      uptime.tv_nsec - get_exec_env()->start_timespec.tv_nsec);
++	}
++#endif
+ 	jiffies_to_timespec(idle_jiffies, &idle);
+ 	len = sprintf(page,"%lu.%02lu %lu.%02lu\n",
+ 			(unsigned long) uptime.tv_sec,
+@@ -152,30 +179,34 @@ static int uptime_read_proc(char *page, 
+ static int meminfo_read_proc(char *page, char **start, off_t off,
+ 				 int count, int *eof, void *data)
+ {
+-	struct sysinfo i;
+-	int len, committed;
+-	struct page_state ps;
+-	unsigned long inactive;
+-	unsigned long active;
+-	unsigned long free;
+-	unsigned long vmtot;
++	struct meminfo mi;
++	int len;
++	unsigned long dummy;
+ 	struct vmalloc_info vmi;
+ 
+-	get_page_state(&ps);
+-	get_zone_counts(&active, &inactive, &free);
++	get_page_state(&mi.ps);
++	get_zone_counts(&mi.active, &mi.inactive, &dummy);
+ 
+ /*
+  * display in kilobytes.
+  */
+ #define K(x) ((x) << (PAGE_SHIFT - 10))
+-	si_meminfo(&i);
+-	si_swapinfo(&i);
+-	committed = atomic_read(&vm_committed_space);
++	si_meminfo(&mi.si);
++	si_swapinfo(&mi.si);
++	mi.committed_space = atomic_read(&vm_committed_space);
++	mi.swapcache = total_swapcache_pages;
++	mi.cache = get_page_cache_size() - mi.swapcache - mi.si.bufferram;
+ 
+-	vmtot = (VMALLOC_END-VMALLOC_START)>>10;
++	mi.vmalloc_total = (VMALLOC_END - VMALLOC_START) >> PAGE_SHIFT;
+ 	vmi = get_vmalloc_info();
+-	vmi.used >>= 10;
+-	vmi.largest_chunk >>= 10;
++	mi.vmalloc_used = vmi.used >> PAGE_SHIFT;
++	mi.vmalloc_largest = vmi.largest_chunk >> PAGE_SHIFT;
++
++#ifdef CONFIG_USER_RESOURCE
++	if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_MEMINFO, &mi)
++			& NOTIFY_FAIL)
++		return -ENOMSG;
++#endif
+ 
+ 	/*
+ 	 * Tagged format, for easy grepping and expansion.
+@@ -198,36 +229,40 @@ static int meminfo_read_proc(char *page,
+ 		"Writeback:    %8lu kB\n"
+ 		"Mapped:       %8lu kB\n"
+ 		"Slab:         %8lu kB\n"
+-		"Committed_AS: %8u kB\n"
++		"Committed_AS: %8lu kB\n"
+ 		"PageTables:   %8lu kB\n"
+ 		"VmallocTotal: %8lu kB\n"
+ 		"VmallocUsed:  %8lu kB\n"
+ 		"VmallocChunk: %8lu kB\n",
+-		K(i.totalram),
+-		K(i.freeram),
+-		K(i.bufferram),
+-		K(get_page_cache_size()-total_swapcache_pages-i.bufferram),
+-		K(total_swapcache_pages),
+-		K(active),
+-		K(inactive),
+-		K(i.totalhigh),
+-		K(i.freehigh),
+-		K(i.totalram-i.totalhigh),
+-		K(i.freeram-i.freehigh),
+-		K(i.totalswap),
+-		K(i.freeswap),
+-		K(ps.nr_dirty),
+-		K(ps.nr_writeback),
+-		K(ps.nr_mapped),
+-		K(ps.nr_slab),
+-		K(committed),
+-		K(ps.nr_page_table_pages),
+-		vmtot,
+-		vmi.used,
+-		vmi.largest_chunk
++		K(mi.si.totalram),
++		K(mi.si.freeram),
++		K(mi.si.bufferram),
++		K(mi.cache),
++		K(mi.swapcache),
++		K(mi.active),
++		K(mi.inactive),
++		K(mi.si.totalhigh),
++		K(mi.si.freehigh),
++		K(mi.si.totalram-mi.si.totalhigh),
++		K(mi.si.freeram-mi.si.freehigh),
++		K(mi.si.totalswap),
++		K(mi.si.freeswap),
++		K(mi.ps.nr_dirty),
++		K(mi.ps.nr_writeback),
++		K(mi.ps.nr_mapped),
++		K(mi.ps.nr_slab),
++		K(mi.committed_space),
++		K(mi.ps.nr_page_table_pages),
++		K(mi.vmalloc_total),
++		K(mi.vmalloc_used),
++		K(mi.vmalloc_largest)
+ 		);
+ 
++#ifdef CONFIG_HUGETLB_PAGE
++#warning Virtualize hugetlb_report_meminfo
++#else
+ 		len += hugetlb_report_meminfo(page + len);
++#endif
+ 
+ 	return proc_calc_metrics(page, start, off, count, eof, len);
+ #undef K
+@@ -252,8 +287,15 @@ static int version_read_proc(char *page,
+ {
+ 	extern char *linux_banner;
+ 	int len;
++	struct new_utsname *utsname = &ve_utsname;
+ 
+-	strcpy(page, linux_banner);
++	if (ve_is_super(get_exec_env()))
++		strcpy(page, linux_banner);
++	else
++		sprintf(page, "Linux version %s ("
++		      LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") ("
++		      LINUX_COMPILER ") %s\n",
++		      utsname->release, utsname->version);
+ 	len = strlen(page);
+ 	return proc_calc_metrics(page, start, off, count, eof, len);
+ }
+@@ -352,21 +394,14 @@ static struct file_operations proc_slabi
+ 	.release	= seq_release,
+ };
+ 
+-int show_stat(struct seq_file *p, void *v)
++static void show_stat_ve0(struct seq_file *p)
+ {
+-	int i;
+-	extern unsigned long total_forks;
+-	unsigned long jif;
+-	u64	sum = 0, user = 0, nice = 0, system = 0,
+-		idle = 0, iowait = 0, irq = 0, softirq = 0;
+-
+-	jif = - wall_to_monotonic.tv_sec;
+-	if (wall_to_monotonic.tv_nsec)
+-		--jif;
++	int i, j;
++	struct page_state page_state;
++	u64 sum, user, nice, system, idle, iowait, irq, softirq;
+ 
++	sum = user = nice = system = idle = iowait = irq = softirq = 0;
+ 	for_each_cpu(i) {
+-		int j;
+-
+ 		user += kstat_cpu(i).cpustat.user;
+ 		nice += kstat_cpu(i).cpustat.nice;
+ 		system += kstat_cpu(i).cpustat.system;
+@@ -386,8 +421,8 @@ int show_stat(struct seq_file *p, void *
+ 		(unsigned long long)jiffies_64_to_clock_t(iowait),
+ 		(unsigned long long)jiffies_64_to_clock_t(irq),
+ 		(unsigned long long)jiffies_64_to_clock_t(softirq));
+-	for_each_online_cpu(i) {
+ 
++	for_each_online_cpu(i) {
+ 		/* Copy values here to work around gcc-2.95.3, gcc-2.96 */
+ 		user = kstat_cpu(i).cpustat.user;
+ 		nice = kstat_cpu(i).cpustat.nice;
+@@ -396,6 +431,7 @@ int show_stat(struct seq_file *p, void *
+ 		iowait = kstat_cpu(i).cpustat.iowait;
+ 		irq = kstat_cpu(i).cpustat.irq;
+ 		softirq = kstat_cpu(i).cpustat.softirq;
++
+ 		seq_printf(p, "cpu%d %llu %llu %llu %llu %llu %llu %llu\n",
+ 			i,
+ 			(unsigned long long)jiffies_64_to_clock_t(user),
+@@ -412,6 +448,84 @@ int show_stat(struct seq_file *p, void *
+ 	for (i = 0; i < NR_IRQS; i++)
+ 		seq_printf(p, " %u", kstat_irqs(i));
+ #endif
++	get_full_page_state(&page_state);
++	seq_printf(p, "\nswap %lu %lu",
++			page_state.pswpin, page_state.pswpout);
++}
++
++#ifdef CONFIG_VE
++static void show_stat_ve(struct seq_file *p, struct ve_struct *env)
++{
++	int i;
++	u64 user, nice, system;
++	cycles_t idle, iowait;
++	cpumask_t ve_cpus;
++
++	ve_cpu_online_map(env, &ve_cpus);
++
++	user = nice = system = idle = iowait = 0;
++	for_each_cpu_mask(i, ve_cpus) {
++		user += VE_CPU_STATS(env, i)->user;
++		nice += VE_CPU_STATS(env, i)->nice;
++		system += VE_CPU_STATS(env, i)->system;
++		idle += ve_sched_get_idle_time(env, i);
++		iowait += ve_sched_get_iowait_time(env, i);
++	}
++
++	seq_printf(p, "cpu  %llu %llu %llu %llu %llu 0 0\n",
++		(unsigned long long)jiffies_64_to_clock_t(user),
++		(unsigned long long)jiffies_64_to_clock_t(nice),
++		(unsigned long long)jiffies_64_to_clock_t(system),
++		(unsigned long long)cycles_to_clocks(idle),
++		(unsigned long long)cycles_to_clocks(iowait));
++
++	for_each_cpu_mask(i, ve_cpus) {
++		user = VE_CPU_STATS(env, i)->user;
++		nice = VE_CPU_STATS(env, i)->nice;
++		system = VE_CPU_STATS(env, i)->system;
++		idle = ve_sched_get_idle_time(env, i);
++		iowait = ve_sched_get_iowait_time(env, i);
++
++		seq_printf(p, "cpu%d %llu %llu %llu %llu %llu 0 0\n",
++			i,
++			(unsigned long long)jiffies_64_to_clock_t(user),
++			(unsigned long long)jiffies_64_to_clock_t(nice),
++			(unsigned long long)jiffies_64_to_clock_t(system),
++			(unsigned long long)cycles_to_clocks(idle),
++			(unsigned long long)cycles_to_clocks(iowait));
++	}
++	seq_printf(p, "intr 0");
++	seq_printf(p, "\nswap %d %d", 0, 0);
++}
++#endif
++
++int show_stat(struct seq_file *p, void *v)
++{
++	extern unsigned long total_forks;
++	unsigned long seq, jif;
++	struct ve_struct *env;
++	unsigned long __nr_running, __nr_iowait;
++ 
++	do {
++		seq = read_seqbegin(&xtime_lock);
++		jif = - wall_to_monotonic.tv_sec;
++		if (wall_to_monotonic.tv_nsec)
++			--jif;
++	} while (read_seqretry(&xtime_lock, seq));
++
++	env = get_exec_env();
++	if (ve_is_super(env)) {
++		show_stat_ve0(p);
++		__nr_running = nr_running();
++		__nr_iowait = nr_iowait();
++	}
++#ifdef CONFIG_VE
++	else {
++		show_stat_ve(p, env);
++		__nr_running = nr_running_ve(env);
++		__nr_iowait = nr_iowait_ve(env);
++	}
++#endif
+ 
+ 	seq_printf(p,
+ 		"\nctxt %llu\n"
+@@ -422,8 +536,8 @@ int show_stat(struct seq_file *p, void *
+ 		nr_context_switches(),
+ 		(unsigned long)jif,
+ 		total_forks,
+-		nr_running(),
+-		nr_iowait());
++		__nr_running,
++		__nr_iowait);
+ 
+ 	return 0;
+ }
+@@ -520,7 +634,8 @@ static int cmdline_read_proc(char *page,
+ {
+ 	int len;
+ 
+-	len = sprintf(page, "%s\n", saved_command_line);
++	len = sprintf(page, "%s\n",
++		ve_is_super(get_exec_env()) ? saved_command_line : "");
+ 	return proc_calc_metrics(page, start, off, count, eof, len);
+ }
+ 
+diff -uprN linux-2.6.8.1.orig/fs/proc/proc_tty.c linux-2.6.8.1-ve022stab078/fs/proc/proc_tty.c
+--- linux-2.6.8.1.orig/fs/proc/proc_tty.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/proc/proc_tty.c	2006-05-11 13:05:40.000000000 +0400
+@@ -6,6 +6,7 @@
+ 
+ #include <asm/uaccess.h>
+ 
++#include <linux/ve_owner.h>
+ #include <linux/init.h>
+ #include <linux/errno.h>
+ #include <linux/time.h>
+@@ -111,24 +112,35 @@ static int show_tty_driver(struct seq_fi
+ /* iterator */
+ static void *t_start(struct seq_file *m, loff_t *pos)
+ {
+-	struct list_head *p;
++	struct tty_driver *drv;
++
+ 	loff_t l = *pos;
+-	list_for_each(p, &tty_drivers)
++	read_lock(&tty_driver_guard);
++	list_for_each_entry(drv, &tty_drivers, tty_drivers) {
++		if (!ve_accessible_strict(VE_OWNER_TTYDRV(drv), get_exec_env()))
++			continue;
+ 		if (!l--)
+-			return list_entry(p, struct tty_driver, tty_drivers);
++			return drv;
++	}
+ 	return NULL;
+ }
+ 
+ static void *t_next(struct seq_file *m, void *v, loff_t *pos)
+ {
+-	struct list_head *p = ((struct tty_driver *)v)->tty_drivers.next;
++	struct tty_driver *drv;
++
+ 	(*pos)++;
+-	return p==&tty_drivers ? NULL :
+-			list_entry(p, struct tty_driver, tty_drivers);
++	drv = (struct tty_driver *)v;
++	list_for_each_entry_continue(drv, &tty_drivers, tty_drivers) {
++		if (ve_accessible_strict(VE_OWNER_TTYDRV(drv), get_exec_env()))
++			return drv;
++	}
++	return NULL;
+ }
+ 
+ static void t_stop(struct seq_file *m, void *v)
+ {
++	read_unlock(&tty_driver_guard);
+ }
+ 
+ static struct seq_operations tty_drivers_op = {
+diff -uprN linux-2.6.8.1.orig/fs/proc/root.c linux-2.6.8.1-ve022stab078/fs/proc/root.c
+--- linux-2.6.8.1.orig/fs/proc/root.c	2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/proc/root.c	2006-05-11 13:05:42.000000000 +0400
+@@ -30,12 +30,14 @@ static struct super_block *proc_get_sb(s
+ 	return get_sb_single(fs_type, flags, data, proc_fill_super);
+ }
+ 
+-static struct file_system_type proc_fs_type = {
++struct file_system_type proc_fs_type = {
+ 	.name		= "proc",
+ 	.get_sb		= proc_get_sb,
+ 	.kill_sb	= kill_anon_super,
+ };
+ 
++EXPORT_SYMBOL(proc_fs_type);
++
+ extern int __init proc_init_inodecache(void);
+ void __init proc_root_init(void)
+ {
+diff -uprN linux-2.6.8.1.orig/fs/qnx4/inode.c linux-2.6.8.1-ve022stab078/fs/qnx4/inode.c
+--- linux-2.6.8.1.orig/fs/qnx4/inode.c	2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/qnx4/inode.c	2006-05-11 13:05:35.000000000 +0400
+@@ -78,7 +78,7 @@ static void qnx4_write_super(struct supe
+ 	unlock_kernel();
+ }
+ 
+-static void qnx4_write_inode(struct inode *inode, int unused)
++static int qnx4_write_inode(struct inode *inode, int unused)
+ {
+ 	struct qnx4_inode_entry *raw_inode;
+ 	int block, ino;
+@@ -87,12 +87,12 @@ static void qnx4_write_inode(struct inod
+ 
+ 	QNX4DEBUG(("qnx4: write inode 1.\n"));
+ 	if (inode->i_nlink == 0) {
+-		return;
++		return 0;
+ 	}
+ 	if (!ino) {
+ 		printk("qnx4: bad inode number on dev %s: %d is out of range\n",
+ 		       inode->i_sb->s_id, ino);
+-		return;
++		return -EIO;
+ 	}
+ 	QNX4DEBUG(("qnx4: write inode 2.\n"));
+ 	block = ino / QNX4_INODES_PER_BLOCK;
+@@ -101,7 +101,7 @@ static void qnx4_write_inode(struct inod
+ 		printk("qnx4: major problem: unable to read inode from dev "
+ 		       "%s\n", inode->i_sb->s_id);
+ 		unlock_kernel();
+-		return;
++		return -EIO;
+ 	}
+ 	raw_inode = ((struct qnx4_inode_entry *) bh->b_data) +
+ 	    (ino % QNX4_INODES_PER_BLOCK);
+@@ -117,6 +117,7 @@ static void qnx4_write_inode(struct inod
+ 	mark_buffer_dirty(bh);
+ 	brelse(bh);
+ 	unlock_kernel();
++	return 0;
+ }
+ 
+ #endif
+diff -uprN linux-2.6.8.1.orig/fs/quota.c linux-2.6.8.1-ve022stab078/fs/quota.c
+--- linux-2.6.8.1.orig/fs/quota.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/quota.c	2006-05-11 13:05:43.000000000 +0400
+@@ -94,26 +94,29 @@ static int check_quotactl_valid(struct s
+ 	if (cmd == Q_GETQUOTA || cmd == Q_XGETQUOTA) {
+ 		if (((type == USRQUOTA && current->euid != id) ||
+ 		     (type == GRPQUOTA && !in_egroup_p(id))) &&
+-		    !capable(CAP_SYS_ADMIN))
++		    !capable(CAP_VE_SYS_ADMIN))
+ 			return -EPERM;
+ 	}
+ 	else if (cmd != Q_GETFMT && cmd != Q_SYNC && cmd != Q_GETINFO && cmd != Q_XGETQSTAT)
+-		if (!capable(CAP_SYS_ADMIN))
++		if (!capable(CAP_VE_SYS_ADMIN))
+ 			return -EPERM;
+ 
+ 	return security_quotactl (cmd, type, id, sb);
+ }
+ 
+-static struct super_block *get_super_to_sync(int type)
++void sync_dquots(struct super_block *sb, int type)
+ {
+-	struct list_head *head;
+ 	int cnt, dirty;
+-
+-restart:
++	
++	if (sb) {
++		if (sb->s_qcop && sb->s_qcop->quota_sync)
++			sb->s_qcop->quota_sync(sb, type);
++		return;
++	}
++	
+ 	spin_lock(&sb_lock);
+-	list_for_each(head, &super_blocks) {
+-		struct super_block *sb = list_entry(head, struct super_block, s_list);
+-
++restart:		
++	list_for_each_entry(sb, &super_blocks, s_list) {
+ 		/* This test just improves performance so it needn't be reliable... */
+ 		for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
+ 			if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
+@@ -124,29 +127,14 @@ restart:
+ 		sb->s_count++;
+ 		spin_unlock(&sb_lock);
+ 		down_read(&sb->s_umount);
+-		if (!sb->s_root) {
+-			drop_super(sb);
++		if (sb->s_root && sb->s_qcop->quota_sync)
++			sb->s_qcop->quota_sync(sb, type);
++		up_read(&sb->s_umount);
++		spin_lock(&sb_lock);
++		if (__put_super_and_need_restart(sb))
+ 			goto restart;
+-		}
+-		return sb;
+ 	}
+ 	spin_unlock(&sb_lock);
+-	return NULL;
+-}
+-
+-void sync_dquots(struct super_block *sb, int type)
+-{
+-	if (sb) {
+-		if (sb->s_qcop->quota_sync)
+-			sb->s_qcop->quota_sync(sb, type);
+-	}
+-	else {
+-		while ((sb = get_super_to_sync(type)) != 0) {
+-			if (sb->s_qcop->quota_sync)
+-				sb->s_qcop->quota_sync(sb, type);
+-			drop_super(sb);
+-		}
+-	}
+ }
+ 
+ /* Copy parameters and call proper function */
+@@ -258,6 +246,250 @@ static int do_quotactl(struct super_bloc
+ 	return 0;
+ }
+ 
++static struct super_block *quota_get_sb(const char __user *special)
++{
++	struct super_block *sb;
++	struct block_device *bdev;
++	char *tmp;
++
++	tmp = getname(special);
++	if (IS_ERR(tmp))
++		return (struct super_block *)tmp;
++	bdev = lookup_bdev(tmp, FMODE_QUOTACTL);
++	putname(tmp);
++	if (IS_ERR(bdev))
++		return (struct super_block *)bdev;
++	sb = get_super(bdev);
++	bdput(bdev);
++	if (!sb)
++		return ERR_PTR(-ENODEV);
++	return sb;
++}
++
++#ifdef CONFIG_QUOTA_COMPAT
++
++#define QC_QUOTAON  0x0100	/* enable quotas */
++#define QC_QUOTAOFF 0x0200	/* disable quotas */
++/* GETQUOTA, SETQUOTA and SETUSE which were at 0x0300-0x0500 has now other parameteres */
++#define QC_SYNC     0x0600	/* sync disk copy of a filesystems quotas */
++#define QC_SETQLIM  0x0700	/* set limits */
++/* GETSTATS at 0x0800 is now longer... */
++#define QC_GETINFO  0x0900	/* get info about quotas - graces, flags... */
++#define QC_SETINFO  0x0A00	/* set info about quotas */
++#define QC_SETGRACE 0x0B00	/* set inode and block grace */
++#define QC_SETFLAGS 0x0C00	/* set flags for quota */
++#define QC_GETQUOTA 0x0D00	/* get limits and usage */
++#define QC_SETQUOTA 0x0E00	/* set limits and usage */
++#define QC_SETUSE   0x0F00	/* set usage */
++/* 0x1000 used by old RSQUASH */
++#define QC_GETSTATS 0x1100	/* get collected stats */
++#define QC_GETQUOTI 0x2B00	/* get limits and usage by index */
++
++struct compat_dqblk {
++	unsigned int dqb_ihardlimit;
++	unsigned int dqb_isoftlimit;
++	unsigned int dqb_curinodes;
++	unsigned int dqb_bhardlimit;
++	unsigned int dqb_bsoftlimit;
++	qsize_t dqb_curspace;
++	__kernel_time_t dqb_btime;
++	__kernel_time_t dqb_itime;
++};
++
++struct compat_dqinfo {
++	unsigned int dqi_bgrace;
++	unsigned int dqi_igrace;
++	unsigned int dqi_flags;
++	unsigned int dqi_blocks;
++	unsigned int dqi_free_blk;
++	unsigned int dqi_free_entry;
++};
++
++struct compat_dqstats {
++	__u32 lookups;
++	__u32 drops;
++	__u32 reads;
++	__u32 writes;
++	__u32 cache_hits;
++	__u32 allocated_dquots;
++	__u32 free_dquots;
++	__u32 syncs;
++	__u32 version;
++};
++
++asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special, qid_t id, void __user *addr);
++static long compat_quotactl(unsigned int cmds, unsigned int type,
++		const char __user *special, qid_t id,
++		void __user *addr)
++{
++	struct super_block *sb;
++	long ret;
++
++	sb = NULL;
++	switch (cmds) {
++		case QC_QUOTAON:
++			return sys_quotactl(QCMD(Q_QUOTAON, type),
++					special, id, addr);
++
++		case QC_QUOTAOFF:
++			return sys_quotactl(QCMD(Q_QUOTAOFF, type),
++					special, id, addr);
++
++		case QC_SYNC:
++			return sys_quotactl(QCMD(Q_SYNC, type),
++					special, id, addr);
++
++		case QC_GETQUOTA: {
++			struct if_dqblk idq;
++			struct compat_dqblk cdq;
++
++			sb = quota_get_sb(special);
++			ret = PTR_ERR(sb);
++			if (IS_ERR(sb))
++				break;
++			ret = check_quotactl_valid(sb, type, Q_GETQUOTA, id);
++			if (ret)
++				break;
++			ret = sb->s_qcop->get_dqblk(sb, type, id, &idq);
++			if (ret)
++				break;
++			cdq.dqb_ihardlimit = idq.dqb_ihardlimit;
++			cdq.dqb_isoftlimit = idq.dqb_isoftlimit;
++			cdq.dqb_curinodes = idq.dqb_curinodes;
++			cdq.dqb_bhardlimit = idq.dqb_bhardlimit;
++			cdq.dqb_bsoftlimit = idq.dqb_bsoftlimit;
++			cdq.dqb_curspace = idq.dqb_curspace;
++			cdq.dqb_btime = idq.dqb_btime;
++			cdq.dqb_itime = idq.dqb_itime;
++			ret = 0;
++			if (copy_to_user(addr, &cdq, sizeof(cdq)))
++				ret = -EFAULT;
++			break;
++		}
++
++		case QC_SETQUOTA:
++		case QC_SETUSE:
++		case QC_SETQLIM: {
++			struct if_dqblk idq;
++			struct compat_dqblk cdq;
++
++			sb = quota_get_sb(special);
++			ret = PTR_ERR(sb);
++			if (IS_ERR(sb))
++				break;
++			ret = check_quotactl_valid(sb, type, Q_SETQUOTA, id);
++			if (ret)
++				break;
++			ret = -EFAULT;
++			if (copy_from_user(&cdq, addr, sizeof(cdq)))
++				break;
++			idq.dqb_ihardlimit = cdq.dqb_ihardlimit;
++			idq.dqb_isoftlimit = cdq.dqb_isoftlimit;
++			idq.dqb_curinodes = cdq.dqb_curinodes;
++			idq.dqb_bhardlimit = cdq.dqb_bhardlimit;
++			idq.dqb_bsoftlimit = cdq.dqb_bsoftlimit;
++			idq.dqb_curspace = cdq.dqb_curspace;
++			idq.dqb_valid = 0;
++			if (cmds == QC_SETQUOTA || cmds == QC_SETQLIM)
++				idq.dqb_valid |= QIF_LIMITS;
++			if (cmds == QC_SETQUOTA || cmds == QC_SETUSE)
++				idq.dqb_valid |= QIF_USAGE;
++			ret = sb->s_qcop->set_dqblk(sb, type, id, &idq);
++			break;
++		}
++
++		case QC_GETINFO: {
++			struct if_dqinfo iinf;
++			struct compat_dqinfo cinf;
++
++			sb = quota_get_sb(special);
++			ret = PTR_ERR(sb);
++			if (IS_ERR(sb))
++				break;
++			ret = check_quotactl_valid(sb, type, Q_GETQUOTA, id);
++			if (ret)
++				break;
++			ret = sb->s_qcop->get_info(sb, type, &iinf);
++			if (ret)
++				break;
++			cinf.dqi_bgrace = iinf.dqi_bgrace;
++			cinf.dqi_igrace = iinf.dqi_igrace;
++			cinf.dqi_flags = 0;
++			if (iinf.dqi_flags & DQF_INFO_DIRTY)
++				cinf.dqi_flags |= 0x0010;
++			cinf.dqi_blocks = 0;
++			cinf.dqi_free_blk = 0;
++			cinf.dqi_free_entry = 0;
++			ret = 0;
++			if (copy_to_user(addr, &cinf, sizeof(cinf)))
++				ret = -EFAULT;
++			break;
++		}
++
++		case QC_SETINFO:
++		case QC_SETGRACE:
++		case QC_SETFLAGS: {
++			struct if_dqinfo iinf;
++			struct compat_dqinfo cinf;
++
++			sb = quota_get_sb(special);
++			ret = PTR_ERR(sb);
++			if (IS_ERR(sb))
++				break;
++			ret = check_quotactl_valid(sb, type, Q_SETINFO, id);
++			if (ret)
++				break;
++			ret = -EFAULT;
++			if (copy_from_user(&cinf, addr, sizeof(cinf)))
++				break;
++			iinf.dqi_bgrace = cinf.dqi_bgrace;
++			iinf.dqi_igrace = cinf.dqi_igrace;
++			iinf.dqi_flags = cinf.dqi_flags;
++			iinf.dqi_valid = 0;
++			if (cmds == QC_SETINFO || cmds == QC_SETGRACE)
++				iinf.dqi_valid |= IIF_BGRACE | IIF_IGRACE;
++			if (cmds == QC_SETINFO || cmds == QC_SETFLAGS)
++				iinf.dqi_valid |= IIF_FLAGS;
++			ret = sb->s_qcop->set_info(sb, type, &iinf);
++			break;
++		}
++
++		case QC_GETSTATS: {
++			struct compat_dqstats stat;
++
++			memset(&stat, 0, sizeof(stat));
++			stat.version = 6*10000+5*100+0;
++			ret = 0;
++			if (copy_to_user(addr, &stat, sizeof(stat)))
++				ret = -EFAULT;
++			break;
++		}
++
++		case QC_GETQUOTI:
++			sb = quota_get_sb(special);
++			ret = PTR_ERR(sb);
++			if (IS_ERR(sb))
++				break;
++			ret = check_quotactl_valid(sb, type, Q_GETINFO, id);
++			if (ret)
++				break;
++			ret = -ENOSYS;
++			if (!sb->s_qcop->get_quoti)
++				break;
++			ret = sb->s_qcop->get_quoti(sb, type, id, addr);
++			break;
++
++		default:
++			ret = -ENOSYS;
++			break;
++	}
++	if (sb && !IS_ERR(sb))
++		drop_super(sb);
++	return ret;
++}
++
++#endif
++
+ /*
+  * This is the system call interface. This communicates with
+  * the user-level programs. Currently this only supports diskquota
+@@ -268,25 +500,20 @@ asmlinkage long sys_quotactl(unsigned in
+ {
+ 	uint cmds, type;
+ 	struct super_block *sb = NULL;
+-	struct block_device *bdev;
+-	char *tmp;
+ 	int ret;
+ 
+ 	cmds = cmd >> SUBCMDSHIFT;
+ 	type = cmd & SUBCMDMASK;
+ 
++#ifdef CONFIG_QUOTA_COMPAT
++	if (cmds >= 0x0100 && cmds < 0x3000)
++		return compat_quotactl(cmds, type, special, id, addr);
++#endif
++
+ 	if (cmds != Q_SYNC || special) {
+-		tmp = getname(special);
+-		if (IS_ERR(tmp))
+-			return PTR_ERR(tmp);
+-		bdev = lookup_bdev(tmp);
+-		putname(tmp);
+-		if (IS_ERR(bdev))
+-			return PTR_ERR(bdev);
+-		sb = get_super(bdev);
+-		bdput(bdev);
+-		if (!sb)
+-			return -ENODEV;
++		sb = quota_get_sb(special);
++		if (IS_ERR(sb))
++			return PTR_ERR(sb);
+ 	}
+ 
+ 	ret = check_quotactl_valid(sb, type, cmds, id);
+diff -uprN linux-2.6.8.1.orig/fs/ramfs/inode.c linux-2.6.8.1-ve022stab078/fs/ramfs/inode.c
+--- linux-2.6.8.1.orig/fs/ramfs/inode.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ramfs/inode.c	2006-05-11 13:05:32.000000000 +0400
+@@ -128,7 +128,7 @@ static int ramfs_symlink(struct inode * 
+ 	inode = ramfs_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0);
+ 	if (inode) {
+ 		int l = strlen(symname)+1;
+-		error = page_symlink(inode, symname, l);
++		error = page_symlink(inode, symname, l, GFP_KERNEL);
+ 		if (!error) {
+ 			if (dir->i_mode & S_ISGID)
+ 				inode->i_gid = dir->i_gid;
+diff -uprN linux-2.6.8.1.orig/fs/reiserfs/file.c linux-2.6.8.1-ve022stab078/fs/reiserfs/file.c
+--- linux-2.6.8.1.orig/fs/reiserfs/file.c	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/reiserfs/file.c	2006-05-11 13:05:33.000000000 +0400
+@@ -535,7 +535,7 @@ error_exit:
+ 
+ /* Unlock pages prepared by reiserfs_prepare_file_region_for_write */
+ void reiserfs_unprepare_pages(struct page **prepared_pages, /* list of locked pages */
+-			      int num_pages /* amount of pages */) {
++			      size_t num_pages /* amount of pages */) {
+     int i; // loop counter
+ 
+     for (i=0; i < num_pages ; i++) {
+@@ -566,7 +566,7 @@ int reiserfs_copy_from_user_to_file_regi
+     int offset; // offset in page
+ 
+     for ( i = 0, offset = (pos & (PAGE_CACHE_SIZE-1)); i < num_pages ; i++,offset=0) {
+-	int count = min_t(int,PAGE_CACHE_SIZE-offset,write_bytes); // How much of bytes to write to this page
++	size_t count = min_t(size_t,PAGE_CACHE_SIZE-offset,write_bytes); // How much of bytes to write to this page
+ 	struct page *page=prepared_pages[i]; // Current page we process.
+ 
+ 	fault_in_pages_readable( buf, count);
+@@ -661,8 +661,8 @@ int reiserfs_submit_file_region_for_writ
+ 				struct reiserfs_transaction_handle *th,
+ 				struct inode *inode,
+ 				loff_t pos, /* Writing position offset */
+-				int num_pages, /* Number of pages to write */
+-				int write_bytes, /* number of bytes to write */
++				size_t num_pages, /* Number of pages to write */
++				size_t write_bytes, /* number of bytes to write */
+ 				struct page **prepared_pages /* list of pages */
+ 				)
+ {
+@@ -795,9 +795,9 @@ int reiserfs_check_for_tail_and_convert(
+ int reiserfs_prepare_file_region_for_write(
+ 				struct inode *inode /* Inode of the file */,
+ 				loff_t pos, /* position in the file */
+-				int num_pages, /* number of pages to
++				size_t num_pages, /* number of pages to
+ 					          prepare */
+-				int write_bytes, /* Amount of bytes to be
++				size_t write_bytes, /* Amount of bytes to be
+ 						    overwritten from
+ 						    @pos */
+ 				struct page **prepared_pages /* pointer to array
+@@ -1176,10 +1176,9 @@ ssize_t reiserfs_file_write( struct file
+     while ( count > 0) {
+ 	/* This is the main loop in which we running until some error occures
+ 	   or until we write all of the data. */
+-	int num_pages;/* amount of pages we are going to write this iteration */
+-	int write_bytes; /* amount of bytes to write during this iteration */
+-	int blocks_to_allocate; /* how much blocks we need to allocate for
+-				   this iteration */
++	size_t num_pages;/* amount of pages we are going to write this iteration */
++	size_t write_bytes; /* amount of bytes to write during this iteration */
++	size_t blocks_to_allocate; /* how much blocks we need to allocate for this iteration */
+         
+         /*  (pos & (PAGE_CACHE_SIZE-1)) is an idiom for offset into a page of pos*/
+ 	num_pages = !!((pos+count) & (PAGE_CACHE_SIZE - 1)) + /* round up partial
+@@ -1193,7 +1192,7 @@ ssize_t reiserfs_file_write( struct file
+ 	    /* If we were asked to write more data than we want to or if there
+ 	       is not that much space, then we shorten amount of data to write
+ 	       for this iteration. */
+-	    num_pages = min_t(int, REISERFS_WRITE_PAGES_AT_A_TIME, reiserfs_can_fit_pages(inode->i_sb));
++	    num_pages = min_t(size_t, REISERFS_WRITE_PAGES_AT_A_TIME, reiserfs_can_fit_pages(inode->i_sb));
+ 	    /* Also we should not forget to set size in bytes accordingly */
+ 	    write_bytes = (num_pages << PAGE_CACHE_SHIFT) - 
+ 			    (pos & (PAGE_CACHE_SIZE-1));
+@@ -1219,7 +1218,7 @@ ssize_t reiserfs_file_write( struct file
+ 	    // But overwriting files on absolutelly full volumes would not
+ 	    // be very efficient. Well, people are not supposed to fill
+ 	    // 100% of disk space anyway.
+-	    write_bytes = min_t(int, count, inode->i_sb->s_blocksize - (pos & (inode->i_sb->s_blocksize - 1)));
++	    write_bytes = min_t(size_t, count, inode->i_sb->s_blocksize - (pos & (inode->i_sb->s_blocksize - 1)));
+ 	    num_pages = 1;
+ 	    // No blocks were claimed before, so do it now.
+ 	    reiserfs_claim_blocks_to_be_allocated(inode->i_sb, 1 << (PAGE_CACHE_SHIFT - inode->i_blkbits));
+diff -uprN linux-2.6.8.1.orig/fs/reiserfs/inode.c linux-2.6.8.1-ve022stab078/fs/reiserfs/inode.c
+--- linux-2.6.8.1.orig/fs/reiserfs/inode.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/reiserfs/inode.c	2006-05-11 13:05:35.000000000 +0400
+@@ -1504,7 +1504,7 @@ int reiserfs_encode_fh(struct dentry *de
+ ** to properly mark inodes for datasync and such, but only actually
+ ** does something when called for a synchronous update.
+ */
+-void reiserfs_write_inode (struct inode * inode, int do_sync) {
++int reiserfs_write_inode (struct inode * inode, int do_sync) {
+     struct reiserfs_transaction_handle th ;
+     int jbegin_count = 1 ;
+ 
+@@ -1512,7 +1512,7 @@ void reiserfs_write_inode (struct inode 
+         reiserfs_warning (inode->i_sb,
+ 			  "clm-6005: writing inode %lu on readonly FS",
+ 			  inode->i_ino) ;
+-        return ;
++        return -EROFS;
+     }
+     /* memory pressure can sometimes initiate write_inode calls with sync == 1,
+     ** these cases are just when the system needs ram, not when the 
+@@ -1526,6 +1526,7 @@ void reiserfs_write_inode (struct inode 
+ 	journal_end_sync(&th, inode->i_sb, jbegin_count) ;
+ 	reiserfs_write_unlock(inode->i_sb);
+     }
++    return 0;
+ }
+ 
+ /* FIXME: no need any more. right? */
+diff -uprN linux-2.6.8.1.orig/fs/reiserfs/namei.c linux-2.6.8.1-ve022stab078/fs/reiserfs/namei.c
+--- linux-2.6.8.1.orig/fs/reiserfs/namei.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/reiserfs/namei.c	2006-05-11 13:05:43.000000000 +0400
+@@ -799,6 +799,9 @@ static int reiserfs_rmdir (struct inode 
+     struct reiserfs_dir_entry de;
+ 
+ 
++    inode = dentry->d_inode;
++    DQUOT_INIT(inode);
++
+     /* we will be doing 2 balancings and update 2 stat data */
+     jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2;
+ 
+@@ -814,8 +817,6 @@ static int reiserfs_rmdir (struct inode 
+ 	goto end_rmdir;
+     }
+ 
+-    inode = dentry->d_inode;
+-
+     reiserfs_update_inode_transaction(inode) ;
+     reiserfs_update_inode_transaction(dir) ;
+ 
+@@ -878,6 +879,7 @@ static int reiserfs_unlink (struct inode
+     unsigned long savelink;
+ 
+     inode = dentry->d_inode;
++    DQUOT_INIT(inode);
+ 
+     /* in this transaction we can be doing at max two balancings and update
+        two stat datas */
+@@ -1146,6 +1148,8 @@ static int reiserfs_rename (struct inode
+ 
+     old_inode = old_dentry->d_inode;
+     new_dentry_inode = new_dentry->d_inode;
++    if (new_dentry_inode)
++	    DQUOT_INIT(new_dentry_inode);
+ 
+     // make sure, that oldname still exists and points to an object we
+     // are going to rename
+diff -uprN linux-2.6.8.1.orig/fs/reiserfs/xattr.c linux-2.6.8.1-ve022stab078/fs/reiserfs/xattr.c
+--- linux-2.6.8.1.orig/fs/reiserfs/xattr.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/reiserfs/xattr.c	2006-05-11 13:05:35.000000000 +0400
+@@ -1429,9 +1429,26 @@ check_capabilities:
+ }
+ 
+ int
+-reiserfs_permission (struct inode *inode, int mask, struct nameidata *nd)
++reiserfs_permission (struct inode *inode, int mask, struct nameidata *nd,
++		struct exec_perm *exec_perm)
+ {
+-    return __reiserfs_permission (inode, mask, nd, 1);
++	int ret;
++
++	if (exec_perm != NULL)
++		down(&inode->i_sem);
++
++	ret = __reiserfs_permission (inode, mask, nd, 1);
++
++	if (exec_perm != NULL) {
++		if (!ret) {
++			exec_perm->set = 1;
++			exec_perm->mode = inode->i_mode;
++			exec_perm->uid = inode->i_uid;
++			exec_perm->gid = inode->i_gid;
++		}
++		up(&inode->i_sem);
++	}
++	return ret;
+ }
+ 
+ int
+diff -uprN linux-2.6.8.1.orig/fs/select.c linux-2.6.8.1-ve022stab078/fs/select.c
+--- linux-2.6.8.1.orig/fs/select.c	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/select.c	2006-05-11 13:05:39.000000000 +0400
+@@ -24,6 +24,8 @@
+ 
+ #include <asm/uaccess.h>
+ 
++#include <ub/ub_mem.h>
++
+ #define ROUND_UP(x,y) (((x)+(y)-1)/(y))
+ #define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
+ 
+@@ -94,7 +96,8 @@ void __pollwait(struct file *filp, wait_
+ 	if (!table || POLL_TABLE_FULL(table)) {
+ 		struct poll_table_page *new_table;
+ 
+-		new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
++		new_table = (struct poll_table_page *) __get_free_page(
++				GFP_KERNEL_UBC);
+ 		if (!new_table) {
+ 			p->error = -ENOMEM;
+ 			__set_current_state(TASK_RUNNING);
+@@ -275,7 +278,7 @@ EXPORT_SYMBOL(do_select);
+ 
+ static void *select_bits_alloc(int size)
+ {
+-	return kmalloc(6 * size, GFP_KERNEL);
++	return ub_kmalloc(6 * size, GFP_KERNEL);
+ }
+ 
+ static void select_bits_free(void *bits, int size)
+@@ -484,7 +487,7 @@ asmlinkage long sys_poll(struct pollfd _
+ 	err = -ENOMEM;
+ 	while(i!=0) {
+ 		struct poll_list *pp;
+-		pp = kmalloc(sizeof(struct poll_list)+
++		pp = ub_kmalloc(sizeof(struct poll_list)+
+ 				sizeof(struct pollfd)*
+ 				(i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i),
+ 					GFP_KERNEL);
+diff -uprN linux-2.6.8.1.orig/fs/seq_file.c linux-2.6.8.1-ve022stab078/fs/seq_file.c
+--- linux-2.6.8.1.orig/fs/seq_file.c	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/seq_file.c	2006-05-11 13:05:40.000000000 +0400
+@@ -311,6 +311,8 @@ int seq_path(struct seq_file *m,
+ 	if (m->count < m->size) {
+ 		char *s = m->buf + m->count;
+ 		char *p = d_path(dentry, mnt, s, m->size - m->count);
++		if (IS_ERR(p) && PTR_ERR(p) != -ENAMETOOLONG)
++			return 0;
+ 		if (!IS_ERR(p)) {
+ 			while (s <= p) {
+ 				char c = *p++;
+diff -uprN linux-2.6.8.1.orig/fs/simfs.c linux-2.6.8.1-ve022stab078/fs/simfs.c
+--- linux-2.6.8.1.orig/fs/simfs.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/fs/simfs.c	2006-05-11 13:05:43.000000000 +0400
+@@ -0,0 +1,289 @@
++/*
++ *  fs/simfs.c
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/fs.h>
++#include <linux/file.h>
++#include <linux/init.h>
++#include <linux/namei.h>
++#include <linux/err.h>
++#include <linux/module.h>
++#include <linux/mount.h>
++#include <linux/vzquota.h>
++#include <linux/statfs.h>
++#include <linux/virtinfo.h>
++#include <linux/faudit.h>
++#include <linux/genhd.h>
++
++#include <asm/unistd.h>
++#include <asm/uaccess.h>
++
++#define SIMFS_GET_LOWER_FS_SB(sb) sb->s_root->d_sb
++
++static struct super_operations sim_super_ops;
++
++static int sim_getattr(struct vfsmount *mnt, struct dentry *dentry,
++		struct kstat *stat)
++{
++	struct super_block *sb;
++	struct inode *inode;
++
++	inode = dentry->d_inode;
++	if (!inode->i_op->getattr) {
++		generic_fillattr(inode, stat);
++		if (!stat->blksize) {
++			unsigned blocks;
++
++			sb = inode->i_sb;
++			blocks = (stat->size + sb->s_blocksize-1) >>
++				sb->s_blocksize_bits;
++			stat->blocks = (sb->s_blocksize / 512) * blocks;
++			stat->blksize = sb->s_blocksize;
++		}
++	} else {
++		int err;
++
++		err = inode->i_op->getattr(mnt, dentry, stat);
++		if (err)
++			return err;
++	}
++
++	sb = mnt->mnt_sb;
++	if (sb->s_op == &sim_super_ops)
++		stat->dev = sb->s_dev;
++	return 0;
++}
++
++static void quota_get_stat(struct super_block *sb, struct kstatfs *buf)
++{
++	int err;
++	struct dq_stat qstat;
++	struct virt_info_quota q;
++	long free_file, adj_file;
++	s64 blk, free_blk, adj_blk;
++	int bsize_bits;
++
++	q.super = sb;
++	q.qstat = &qstat;
++	err = virtinfo_notifier_call(VITYPE_QUOTA, VIRTINFO_QUOTA_GETSTAT, &q);
++	if (err != NOTIFY_OK)
++		return;
++
++	bsize_bits = ffs(buf->f_bsize) - 1;
++	free_blk = (s64)(qstat.bsoftlimit - qstat.bcurrent) >> bsize_bits;
++	if (free_blk < 0)
++		free_blk = 0;
++	/*
++	 * In the regular case, we always set buf->f_bfree and buf->f_blocks to
++	 * the values reported by quota.  In case of real disk space shortage,
++	 * we adjust the values.  We want this adjustment to look as if the
++	 * total disk space were reduced, not as if the usage were increased.
++	 *    -- SAW
++	 */
++	adj_blk = 0;
++	if (buf->f_bfree < free_blk)
++		adj_blk = free_blk - buf->f_bfree;
++	buf->f_bfree = (long)(free_blk - adj_blk);
++
++	if (free_blk < buf->f_bavail)
++		buf->f_bavail = (long)free_blk; /* min(f_bavail, free_blk) */
++
++	blk = (qstat.bsoftlimit >> bsize_bits) - adj_blk;
++	buf->f_blocks = blk > LONG_MAX ? LONG_MAX : blk;
++
++	free_file = qstat.isoftlimit - qstat.icurrent;
++	if (free_file < 0)
++		free_file = 0;
++	if (buf->f_ffree == -1)
++		/*
++		 * One filesystem uses -1 to represent the fact that it doesn't
++		 * have a detached limit for inode number.
++		 * May be, because -1 is a good pretendent for the maximum value
++		 * of signed long type, may be, because it's just nice to have
++		 * an exceptional case...  Guess what that filesystem is :-)
++		 *    -- SAW
++		 */
++		buf->f_ffree = free_file;
++	adj_file = 0;
++	if (buf->f_ffree < free_file)
++		adj_file = free_file - buf->f_ffree;
++	buf->f_ffree = free_file - adj_file;
++	buf->f_files = qstat.isoftlimit - adj_file;
++}
++
++static int sim_statfs(struct super_block *sb, struct kstatfs *buf)
++{
++	int err;
++	struct super_block *lsb;
++	struct kstatfs statbuf;
++
++	err = 0;
++	if (sb->s_op != &sim_super_ops)
++		return 0;
++
++	lsb = SIMFS_GET_LOWER_FS_SB(sb);
++
++	err = -ENOSYS;
++	if (lsb && lsb->s_op && lsb->s_op->statfs)
++		err = lsb->s_op->statfs(lsb, &statbuf);
++	if (err)
++		return err;
++
++	quota_get_stat(sb, &statbuf);
++	buf->f_files    = statbuf.f_files;
++	buf->f_ffree    = statbuf.f_ffree;
++	buf->f_blocks   = statbuf.f_blocks;
++	buf->f_bfree    = statbuf.f_bfree;
++	buf->f_bavail   = statbuf.f_bavail;
++	return 0;
++}
++
++static int sim_systemcall(struct vnotifier_block *me, unsigned long n,
++		void *d, int old_ret)
++{
++	int err;
++
++	switch (n) {
++	case VIRTINFO_FAUDIT_STAT: {
++		struct faudit_stat_arg *arg;
++
++		arg = (struct faudit_stat_arg *)d;
++		err = sim_getattr(arg->mnt, arg->dentry, arg->stat);
++		arg->err = err;
++		}
++		break;
++	case VIRTINFO_FAUDIT_STATFS: {
++		struct faudit_statfs_arg *arg;
++
++		arg = (struct faudit_statfs_arg *)d;
++		err = sim_statfs(arg->sb, arg->stat);
++		arg->err = err;
++		}
++		break;
++	default:
++		return old_ret;
++	}
++	return (err ? NOTIFY_BAD : NOTIFY_OK);
++}
++
++static struct inode *sim_quota_root(struct super_block *sb)
++{
++	return sb->s_root->d_inode;
++}
++
++void sim_put_super(struct super_block *sb)
++{
++	struct virt_info_quota viq;
++
++	viq.super = sb;
++	virtinfo_notifier_call(VITYPE_QUOTA, VIRTINFO_QUOTA_OFF, &viq);
++	bdput(sb->s_bdev);
++}
++
++static struct super_operations sim_super_ops = {
++	.get_quota_root	= sim_quota_root,
++	.put_super = sim_put_super,
++};
++
++static int sim_fill_super(struct super_block *s, void *data)
++{
++	int err;
++	struct nameidata *nd;
++
++	err = set_anon_super(s, NULL);
++	if (err)
++		goto out;
++
++	err = 0;
++	nd = (struct nameidata *)data;
++	s->s_root = dget(nd->dentry);
++	s->s_op = &sim_super_ops;
++out:
++	return err;
++}
++
++struct super_block *sim_get_sb(struct file_system_type *type,
++		int flags, const char *dev_name, void *opt)
++{
++	int err;
++	struct nameidata nd;
++	struct super_block *sb;
++	struct block_device *bd;
++	struct virt_info_quota viq;
++	static struct hd_struct fake_hds;
++
++	sb = ERR_PTR(-EINVAL);
++	if (opt == NULL)
++		goto out;
++
++	err = path_lookup(opt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
++	sb = ERR_PTR(err);
++	if (err)
++		goto out;
++
++	sb = sget(type, NULL, sim_fill_super, &nd);
++	if (IS_ERR(sb))
++		goto out_path;
++
++	bd = bdget(sb->s_dev);
++	if (!bd)
++		goto out_killsb;
++
++	sb->s_bdev = bd;
++	bd->bd_part = &fake_hds;
++	viq.super = sb;
++	virtinfo_notifier_call(VITYPE_QUOTA, VIRTINFO_QUOTA_ON, &viq);
++out_path:
++	path_release(&nd);
++out:
++	return sb;
++
++out_killsb:
++	up_write(&sb->s_umount);
++	deactivate_super(sb);
++	sb = ERR_PTR(-ENODEV);
++	goto out_path;
++}
++
++static struct file_system_type sim_fs_type = {
++	.owner		= THIS_MODULE,
++	.name		= "simfs",
++	.get_sb		= sim_get_sb,
++	.kill_sb	= kill_anon_super,
++};
++
++static struct vnotifier_block sim_syscalls = {
++	.notifier_call = sim_systemcall,
++};
++
++static int __init init_simfs(void)
++{
++	int err;
++
++	err = register_filesystem(&sim_fs_type);
++	if (err)
++		return err;
++
++	virtinfo_notifier_register(VITYPE_FAUDIT, &sim_syscalls);
++	return 0;
++}
++
++static void __exit exit_simfs(void)
++{
++	virtinfo_notifier_unregister(VITYPE_FAUDIT, &sim_syscalls);
++	unregister_filesystem(&sim_fs_type);
++}
++
++MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
++MODULE_DESCRIPTION("Open Virtuozzo Simulation of File System");
++MODULE_LICENSE("GPL v2");
++
++module_init(init_simfs);
++module_exit(exit_simfs);
+diff -uprN linux-2.6.8.1.orig/fs/smbfs/dir.c linux-2.6.8.1-ve022stab078/fs/smbfs/dir.c
+--- linux-2.6.8.1.orig/fs/smbfs/dir.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/smbfs/dir.c	2006-05-11 13:05:34.000000000 +0400
+@@ -431,6 +431,11 @@ smb_lookup(struct inode *dir, struct den
+ 	if (dentry->d_name.len > SMB_MAXNAMELEN)
+ 		goto out;
+ 
++	/* Do not allow lookup of names with backslashes in */
++	error = -EINVAL;
++	if (memchr(dentry->d_name.name, '\\', dentry->d_name.len))
++		goto out;
++
+ 	lock_kernel();
+ 	error = smb_proc_getattr(dentry, &finfo);
+ #ifdef SMBFS_PARANOIA
+diff -uprN linux-2.6.8.1.orig/fs/smbfs/file.c linux-2.6.8.1-ve022stab078/fs/smbfs/file.c
+--- linux-2.6.8.1.orig/fs/smbfs/file.c	2004-08-14 14:56:13.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/smbfs/file.c	2006-05-11 13:05:35.000000000 +0400
+@@ -387,7 +387,8 @@ smb_file_release(struct inode *inode, st
+  * privileges, so we need our own check for this.
+  */
+ static int
+-smb_file_permission(struct inode *inode, int mask, struct nameidata *nd)
++smb_file_permission(struct inode *inode, int mask, struct nameidata *nd,
++		struct exec_perm *exec_perm)
+ {
+ 	int mode = inode->i_mode;
+ 	int error = 0;
+diff -uprN linux-2.6.8.1.orig/fs/smbfs/inode.c linux-2.6.8.1-ve022stab078/fs/smbfs/inode.c
+--- linux-2.6.8.1.orig/fs/smbfs/inode.c	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/smbfs/inode.c	2006-05-11 13:05:43.000000000 +0400
+@@ -233,7 +233,7 @@ smb_invalidate_inodes(struct smb_sb_info
+ {
+ 	VERBOSE("\n");
+ 	shrink_dcache_sb(SB_of(server));
+-	invalidate_inodes(SB_of(server));
++	invalidate_inodes(SB_of(server), 0);
+ }
+ 
+ /*
+diff -uprN linux-2.6.8.1.orig/fs/smbfs/sock.c linux-2.6.8.1-ve022stab078/fs/smbfs/sock.c
+--- linux-2.6.8.1.orig/fs/smbfs/sock.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/smbfs/sock.c	2006-05-11 13:05:44.000000000 +0400
+@@ -100,6 +100,7 @@ smb_close_socket(struct smb_sb_info *ser
+ 
+ 		VERBOSE("closing socket %p\n", sock);
+ 		sock->sk->sk_data_ready = server->data_ready;
++		sock->sk->sk_user_data = NULL;
+ 		server->sock_file = NULL;
+ 		fput(file);
+ 	}
+diff -uprN linux-2.6.8.1.orig/fs/stat.c linux-2.6.8.1-ve022stab078/fs/stat.c
+--- linux-2.6.8.1.orig/fs/stat.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/stat.c	2006-05-11 13:05:40.000000000 +0400
+@@ -14,6 +14,7 @@
+ #include <linux/fs.h>
+ #include <linux/namei.h>
+ #include <linux/security.h>
++#include <linux/faudit.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/unistd.h>
+@@ -41,11 +42,19 @@ int vfs_getattr(struct vfsmount *mnt, st
+ {
+ 	struct inode *inode = dentry->d_inode;
+ 	int retval;
++	struct faudit_stat_arg arg;
+ 
+ 	retval = security_inode_getattr(mnt, dentry);
+ 	if (retval)
+ 		return retval;
+ 
++	arg.mnt = mnt;
++	arg.dentry = dentry;
++	arg.stat = stat;
++	if (virtinfo_notifier_call(VITYPE_FAUDIT, VIRTINFO_FAUDIT_STAT, &arg)
++			!= NOTIFY_DONE)
++		return arg.err;
++
+ 	if (inode->i_op->getattr)
+ 		return inode->i_op->getattr(mnt, dentry, stat);
+ 
+diff -uprN linux-2.6.8.1.orig/fs/super.c linux-2.6.8.1-ve022stab078/fs/super.c
+--- linux-2.6.8.1.orig/fs/super.c	2004-08-14 14:55:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/super.c	2006-05-11 13:05:43.000000000 +0400
+@@ -23,6 +23,7 @@
+ #include <linux/config.h>
+ #include <linux/module.h>
+ #include <linux/slab.h>
++#include <linux/ve_owner.h>
+ #include <linux/init.h>
+ #include <linux/smp_lock.h>
+ #include <linux/acct.h>
+@@ -65,8 +66,10 @@ static struct super_block *alloc_super(v
+ 		}
+ 		INIT_LIST_HEAD(&s->s_dirty);
+ 		INIT_LIST_HEAD(&s->s_io);
++		INIT_LIST_HEAD(&s->s_inodes);
+ 		INIT_LIST_HEAD(&s->s_files);
+ 		INIT_LIST_HEAD(&s->s_instances);
++		INIT_LIST_HEAD(&s->s_dshrinkers);
+ 		INIT_HLIST_HEAD(&s->s_anon);
+ 		init_rwsem(&s->s_umount);
+ 		sema_init(&s->s_lock, 1);
+@@ -116,6 +119,27 @@ int __put_super(struct super_block *sb)
+ 	return ret;
+ }
+ 
++/*
++ * Drop a superblock's refcount.
++ * Returns non-zero if the superblock is about to be destroyed and
++ * at least is already removed from super_blocks list, so if we are
++ * making a loop through super blocks then we need to restart.
++ * The caller must hold sb_lock.
++ */
++int __put_super_and_need_restart(struct super_block *sb)
++{
++	/* check for race with generic_shutdown_super() */
++	if (list_empty(&sb->s_list)) {
++		/* super block is removed, need to restart... */
++		__put_super(sb);
++		return 1;
++	}
++	/* can't be the last, since s_list is still in use */
++	sb->s_count--;
++	BUG_ON(sb->s_count == 0);
++	return 0;
++}
++
+ /**
+  *	put_super	-	drop a temporary reference to superblock
+  *	@s: superblock in question
+@@ -205,14 +229,15 @@ void generic_shutdown_super(struct super
+ 	if (root) {
+ 		sb->s_root = NULL;
+ 		shrink_dcache_parent(root);
+-		shrink_dcache_anon(&sb->s_anon);
++		shrink_dcache_anon(sb);
+ 		dput(root);
++		dcache_shrinker_wait_sb(sb);
+ 		fsync_super(sb);
+ 		lock_super(sb);
+ 		lock_kernel();
+ 		sb->s_flags &= ~MS_ACTIVE;
+ 		/* bad name - it should be evict_inodes() */
+-		invalidate_inodes(sb);
++		invalidate_inodes(sb, 0);
+ 
+ 		if (sop->write_super && sb->s_dirt)
+ 			sop->write_super(sb);
+@@ -220,16 +245,16 @@ void generic_shutdown_super(struct super
+ 			sop->put_super(sb);
+ 
+ 		/* Forget any remaining inodes */
+-		if (invalidate_inodes(sb)) {
+-			printk("VFS: Busy inodes after unmount. "
+-			   "Self-destruct in 5 seconds.  Have a nice day...\n");
+-		}
++		if (invalidate_inodes(sb, 1))
++			printk("Self-destruct in 5 seconds. "
++				"Have a nice day...\n");
+ 
+ 		unlock_kernel();
+ 		unlock_super(sb);
+ 	}
+ 	spin_lock(&sb_lock);
+-	list_del(&sb->s_list);
++	/* should be initialized for __put_super_and_need_restart() */
++	list_del_init(&sb->s_list);
+ 	list_del(&sb->s_instances);
+ 	spin_unlock(&sb_lock);
+ 	up_write(&sb->s_umount);
+@@ -282,7 +307,7 @@ retry:
+ 	}
+ 	s->s_type = type;
+ 	strlcpy(s->s_id, type->name, sizeof(s->s_id));
+-	list_add(&s->s_list, super_blocks.prev);
++	list_add_tail(&s->s_list, &super_blocks);
+ 	list_add(&s->s_instances, &type->fs_supers);
+ 	spin_unlock(&sb_lock);
+ 	get_filesystem(type);
+@@ -315,20 +340,22 @@ static inline void write_super(struct su
+  */
+ void sync_supers(void)
+ {
+-	struct super_block * sb;
+-restart:
++	struct super_block *sb;
++	
+ 	spin_lock(&sb_lock);
+-	sb = sb_entry(super_blocks.next);
+-	while (sb != sb_entry(&super_blocks))
++restart:
++	list_for_each_entry(sb, &super_blocks, s_list) {
+ 		if (sb->s_dirt) {
+ 			sb->s_count++;
+ 			spin_unlock(&sb_lock);
+ 			down_read(&sb->s_umount);
+ 			write_super(sb);
+-			drop_super(sb);
+-			goto restart;
+-		} else
+-			sb = sb_entry(sb->s_list.next);
++			up_read(&sb->s_umount);
++			spin_lock(&sb_lock);
++			if (__put_super_and_need_restart(sb)) 
++				goto restart;
++		}
++	}
+ 	spin_unlock(&sb_lock);
+ }
+ 
+@@ -355,20 +382,16 @@ void sync_filesystems(int wait)
+ 
+ 	down(&mutex);		/* Could be down_interruptible */
+ 	spin_lock(&sb_lock);
+-	for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks);
+-			sb = sb_entry(sb->s_list.next)) {
++	list_for_each_entry(sb, &super_blocks, s_list) {
+ 		if (!sb->s_op->sync_fs)
+ 			continue;
+ 		if (sb->s_flags & MS_RDONLY)
+ 			continue;
+ 		sb->s_need_sync_fs = 1;
+ 	}
+-	spin_unlock(&sb_lock);
+ 
+ restart:
+-	spin_lock(&sb_lock);
+-	for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks);
+-			sb = sb_entry(sb->s_list.next)) {
++	list_for_each_entry(sb, &super_blocks, s_list) {
+ 		if (!sb->s_need_sync_fs)
+ 			continue;
+ 		sb->s_need_sync_fs = 0;
+@@ -379,8 +402,11 @@ restart:
+ 		down_read(&sb->s_umount);
+ 		if (sb->s_root && (wait || sb->s_dirt))
+ 			sb->s_op->sync_fs(sb, wait);
+-		drop_super(sb);
+-		goto restart;
++		up_read(&sb->s_umount);
++		/* restart only when sb is no longer on the list */
++		spin_lock(&sb_lock);
++		if (__put_super_and_need_restart(sb))
++			goto restart;
+ 	}
+ 	spin_unlock(&sb_lock);
+ 	up(&mutex);
+@@ -396,20 +422,20 @@ restart:
+ 
+ struct super_block * get_super(struct block_device *bdev)
+ {
+-	struct list_head *p;
++	struct super_block *sb;
++
+ 	if (!bdev)
+ 		return NULL;
+ rescan:
+ 	spin_lock(&sb_lock);
+-	list_for_each(p, &super_blocks) {
+-		struct super_block *s = sb_entry(p);
+-		if (s->s_bdev == bdev) {
+-			s->s_count++;
++	list_for_each_entry(sb, &super_blocks, s_list) {
++		if (sb->s_bdev == bdev) {
++			sb->s_count++;
+ 			spin_unlock(&sb_lock);
+-			down_read(&s->s_umount);
+-			if (s->s_root)
+-				return s;
+-			drop_super(s);
++			down_read(&sb->s_umount);
++			if (sb->s_root)
++				return sb;
++			drop_super(sb);
+ 			goto rescan;
+ 		}
+ 	}
+@@ -421,19 +447,18 @@ EXPORT_SYMBOL(get_super);
+  
+ struct super_block * user_get_super(dev_t dev)
+ {
+-	struct list_head *p;
++	struct super_block *sb;
+ 
+ rescan:
+ 	spin_lock(&sb_lock);
+-	list_for_each(p, &super_blocks) {
+-		struct super_block *s = sb_entry(p);
+-		if (s->s_dev ==  dev) {
+-			s->s_count++;
++	list_for_each_entry(sb, &super_blocks, s_list) {
++		if (sb->s_dev ==  dev) {
++			sb->s_count++;
+ 			spin_unlock(&sb_lock);
+-			down_read(&s->s_umount);
+-			if (s->s_root)
+-				return s;
+-			drop_super(s);
++			down_read(&sb->s_umount);
++			if (sb->s_root)
++				return sb;
++			drop_super(sb);
+ 			goto rescan;
+ 		}
+ 	}
+@@ -448,11 +473,20 @@ asmlinkage long sys_ustat(unsigned dev, 
+         struct super_block *s;
+         struct ustat tmp;
+         struct kstatfs sbuf;
+-	int err = -EINVAL;
++	dev_t kdev;
++	int err;
++
++	kdev = new_decode_dev(dev);
++#ifdef CONFIG_VE
++	err = get_device_perms_ve(S_IFBLK, kdev, FMODE_READ);
++	if (err)
++		goto out;
++#endif
+ 
+-        s = user_get_super(new_decode_dev(dev));
+-        if (s == NULL)
+-                goto out;
++	err = -EINVAL;
++	s = user_get_super(kdev);
++	if (s == NULL)
++		goto out;
+ 	err = vfs_statfs(s, &sbuf);
+ 	drop_super(s);
+ 	if (err)
+@@ -566,6 +600,13 @@ void emergency_remount(void)
+ static struct idr unnamed_dev_idr;
+ static spinlock_t unnamed_dev_lock = SPIN_LOCK_UNLOCKED;/* protects the above */
+ 
++/* for compatibility with coreutils still unaware of new minor sizes */
++int unnamed_dev_majors[] = {
++	0, 144, 145, 146, 242, 243, 244, 245,
++	246, 247, 248, 249, 250, 251, 252, 253
++};
++EXPORT_SYMBOL(unnamed_dev_majors);
++
+ int set_anon_super(struct super_block *s, void *data)
+ {
+ 	int dev;
+@@ -583,13 +624,13 @@ int set_anon_super(struct super_block *s
+ 	else if (error)
+ 		return -EAGAIN;
+ 
+-	if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) {
++	if ((dev & MAX_ID_MASK) >= (1 << MINORBITS)) {
+ 		spin_lock(&unnamed_dev_lock);
+ 		idr_remove(&unnamed_dev_idr, dev);
+ 		spin_unlock(&unnamed_dev_lock);
+ 		return -EMFILE;
+ 	}
+-	s->s_dev = MKDEV(0, dev & MINORMASK);
++	s->s_dev = make_unnamed_dev(dev);
+ 	return 0;
+ }
+ 
+@@ -597,8 +638,9 @@ EXPORT_SYMBOL(set_anon_super);
+ 
+ void kill_anon_super(struct super_block *sb)
+ {
+-	int slot = MINOR(sb->s_dev);
++	int slot;
+ 
++	slot = unnamed_dev_idx(sb->s_dev);
+ 	generic_shutdown_super(sb);
+ 	spin_lock(&unnamed_dev_lock);
+ 	idr_remove(&unnamed_dev_idr, slot);
+@@ -754,17 +796,14 @@ struct super_block *get_sb_single(struct
+ EXPORT_SYMBOL(get_sb_single);
+ 
+ struct vfsmount *
+-do_kern_mount(const char *fstype, int flags, const char *name, void *data)
++do_kern_mount(struct file_system_type *type, int flags,
++			const char *name, void *data)
+ {
+-	struct file_system_type *type = get_fs_type(fstype);
+ 	struct super_block *sb = ERR_PTR(-ENOMEM);
+ 	struct vfsmount *mnt;
+ 	int error;
+ 	char *secdata = NULL;
+ 
+-	if (!type)
+-		return ERR_PTR(-ENODEV);
+-
+ 	mnt = alloc_vfsmnt(name);
+ 	if (!mnt)
+ 		goto out;
+@@ -795,7 +834,6 @@ do_kern_mount(const char *fstype, int fl
+ 	mnt->mnt_parent = mnt;
+ 	mnt->mnt_namespace = current->namespace;
+ 	up_write(&sb->s_umount);
+-	put_filesystem(type);
+ 	return mnt;
+ out_sb:
+ 	up_write(&sb->s_umount);
+@@ -806,7 +844,6 @@ out_free_secdata:
+ out_mnt:
+ 	free_vfsmnt(mnt);
+ out:
+-	put_filesystem(type);
+ 	return (struct vfsmount *)sb;
+ }
+ 
+@@ -814,7 +851,7 @@ EXPORT_SYMBOL_GPL(do_kern_mount);
+ 
+ struct vfsmount *kern_mount(struct file_system_type *type)
+ {
+-	return do_kern_mount(type->name, 0, type->name, NULL);
++	return do_kern_mount(type, 0, type->name, NULL);
+ }
+ 
+ EXPORT_SYMBOL(kern_mount);
+diff -uprN linux-2.6.8.1.orig/fs/sysfs/bin.c linux-2.6.8.1-ve022stab078/fs/sysfs/bin.c
+--- linux-2.6.8.1.orig/fs/sysfs/bin.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/sysfs/bin.c	2006-05-11 13:05:42.000000000 +0400
+@@ -162,6 +162,11 @@ int sysfs_create_bin_file(struct kobject
+ 	struct dentry * parent;
+ 	int error = 0;
+ 
++#ifdef CONFIG_VE
++	if (!get_exec_env()->sysfs_sb)
++		return 0;
++#endif
++
+ 	if (!kobj || !attr)
+ 		return -EINVAL;
+ 
+@@ -195,6 +200,10 @@ int sysfs_create_bin_file(struct kobject
+ 
+ int sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
+ {
++#ifdef CONFIG_VE
++	if (!get_exec_env()->sysfs_sb)
++		return 0;
++#endif
+ 	sysfs_hash_and_remove(kobj->dentry,attr->attr.name);
+ 	return 0;
+ }
+diff -uprN linux-2.6.8.1.orig/fs/sysfs/dir.c linux-2.6.8.1-ve022stab078/fs/sysfs/dir.c
+--- linux-2.6.8.1.orig/fs/sysfs/dir.c	2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/sysfs/dir.c	2006-05-11 13:05:42.000000000 +0400
+@@ -63,13 +63,17 @@ int sysfs_create_dir(struct kobject * ko
+ 	struct dentry * parent;
+ 	int error = 0;
+ 
++#ifdef CONFIG_VE
++	if (!get_exec_env()->sysfs_sb)
++		return 0;
++#endif
+ 	if (!kobj)
+ 		return -EINVAL;
+ 
+ 	if (kobj->parent)
+ 		parent = kobj->parent->dentry;
+-	else if (sysfs_mount && sysfs_mount->mnt_sb)
+-		parent = sysfs_mount->mnt_sb->s_root;
++	else if (visible_sysfs_mount && visible_sysfs_mount->mnt_sb)
++		parent = visible_sysfs_mount->mnt_sb->s_root;
+ 	else
+ 		return -EFAULT;
+ 
+@@ -113,9 +117,14 @@ void sysfs_remove_subdir(struct dentry *
+ void sysfs_remove_dir(struct kobject * kobj)
+ {
+ 	struct list_head * node;
+-	struct dentry * dentry = dget(kobj->dentry);
++	struct dentry * dentry;
+ 
+-	if (!dentry)
++#ifdef CONFIG_VE
++	if (!get_exec_env()->sysfs_sb)
++		return;
++#endif
++	 dentry = dget(kobj->dentry);
++	 if (!dentry)
+ 		return;
+ 
+ 	pr_debug("sysfs %s: removing dir\n",dentry->d_name.name);
+@@ -129,6 +138,7 @@ restart:
+ 
+ 		node = node->next;
+ 		pr_debug(" o %s (%d): ",d->d_name.name,atomic_read(&d->d_count));
++		spin_lock(&d->d_lock);
+ 		if (!d_unhashed(d) && (d->d_inode)) {
+ 			d = dget_locked(d);
+ 			pr_debug("removing");
+@@ -137,6 +147,7 @@ restart:
+ 			 * Unlink and unhash.
+ 			 */
+ 			__d_drop(d);
++			spin_unlock(&d->d_lock);
+ 			spin_unlock(&dcache_lock);
+ 			/* release the target kobject in case of 
+ 			 * a symlink
+@@ -151,6 +162,7 @@ restart:
+ 			/* re-acquired dcache_lock, need to restart */
+ 			goto restart;
+ 		}
++		spin_unlock(&d->d_lock);
+ 	}
+ 	spin_unlock(&dcache_lock);
+ 	up(&dentry->d_inode->i_sem);
+@@ -167,6 +179,10 @@ int sysfs_rename_dir(struct kobject * ko
+ 	int error = 0;
+ 	struct dentry * new_dentry, * parent;
+ 
++#ifdef CONFIG_VE
++	if (!get_exec_env()->sysfs_sb)
++		return 0;
++#endif
+ 	if (!strcmp(kobject_name(kobj), new_name))
+ 		return -EINVAL;
+ 
+diff -uprN linux-2.6.8.1.orig/fs/sysfs/file.c linux-2.6.8.1-ve022stab078/fs/sysfs/file.c
+--- linux-2.6.8.1.orig/fs/sysfs/file.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/sysfs/file.c	2006-05-11 13:05:42.000000000 +0400
+@@ -228,13 +228,14 @@ static ssize_t
+ sysfs_write_file(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
+ {
+ 	struct sysfs_buffer * buffer = file->private_data;
++	ssize_t len;
+ 
+-	count = fill_write_buffer(buffer,buf,count);
+-	if (count > 0)
+-		count = flush_write_buffer(file,buffer,count);
+-	if (count > 0)
+-		*ppos += count;
+-	return count;
++	len = fill_write_buffer(buffer, buf, count);
++	if (len > 0)
++		len = flush_write_buffer(file, buffer, len);
++	if (len > 0)
++		*ppos += len;
++	return len;
+ }
+ 
+ static int check_perm(struct inode * inode, struct file * file)
+@@ -375,6 +376,10 @@ int sysfs_add_file(struct dentry * dir, 
+ 
+ int sysfs_create_file(struct kobject * kobj, const struct attribute * attr)
+ {
++#ifdef CONFIG_VE
++	if (!get_exec_env()->sysfs_sb)
++		return 0;
++#endif
+ 	if (kobj && attr)
+ 		return sysfs_add_file(kobj->dentry,attr);
+ 	return -EINVAL;
+@@ -395,6 +400,10 @@ int sysfs_update_file(struct kobject * k
+ 	struct dentry * victim;
+ 	int res = -ENOENT;
+ 
++#ifdef CONFIG_VE
++	if (!get_exec_env()->sysfs_sb)
++		return 0;
++#endif
+ 	down(&dir->d_inode->i_sem);
+ 	victim = sysfs_get_dentry(dir, attr->name);
+ 	if (!IS_ERR(victim)) {
+@@ -432,6 +441,10 @@ int sysfs_update_file(struct kobject * k
+ 
+ void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
+ {
++#ifdef CONFIG_VE
++	if (!get_exec_env()->sysfs_sb)
++		return;
++#endif
+ 	sysfs_hash_and_remove(kobj->dentry,attr->name);
+ }
+ 
+diff -uprN linux-2.6.8.1.orig/fs/sysfs/group.c linux-2.6.8.1-ve022stab078/fs/sysfs/group.c
+--- linux-2.6.8.1.orig/fs/sysfs/group.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/sysfs/group.c	2006-05-11 13:05:42.000000000 +0400
+@@ -45,6 +45,10 @@ int sysfs_create_group(struct kobject * 
+ 	struct dentry * dir;
+ 	int error;
+ 
++#ifdef CONFIG_VE
++	if (!get_exec_env()->sysfs_sb)
++		return 0;
++#endif
+ 	if (grp->name) {
+ 		error = sysfs_create_subdir(kobj,grp->name,&dir);
+ 		if (error)
+@@ -65,6 +69,10 @@ void sysfs_remove_group(struct kobject *
+ {
+ 	struct dentry * dir;
+ 
++#ifdef CONFIG_VE
++	if (!get_exec_env()->sysfs_sb)
++		return;
++#endif
+ 	if (grp->name)
+ 		dir = sysfs_get_dentry(kobj->dentry,grp->name);
+ 	else
+diff -uprN linux-2.6.8.1.orig/fs/sysfs/inode.c linux-2.6.8.1-ve022stab078/fs/sysfs/inode.c
+--- linux-2.6.8.1.orig/fs/sysfs/inode.c	2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/sysfs/inode.c	2006-05-11 13:05:42.000000000 +0400
+@@ -8,10 +8,17 @@
+ 
+ #undef DEBUG 
+ 
++#include <linux/config.h>
+ #include <linux/pagemap.h>
+ #include <linux/namei.h>
+ #include <linux/backing-dev.h>
+-extern struct super_block * sysfs_sb;
++
++#ifndef CONFIG_VE
++extern struct super_block *sysfs_sb;
++#define visible_sysfs_sb sysfs_sb
++#else
++#define visible_sysfs_sb  (get_exec_env()->sysfs_sb)
++#endif
+ 
+ static struct address_space_operations sysfs_aops = {
+ 	.readpage	= simple_readpage,
+@@ -26,7 +33,7 @@ static struct backing_dev_info sysfs_bac
+ 
+ struct inode * sysfs_new_inode(mode_t mode)
+ {
+-	struct inode * inode = new_inode(sysfs_sb);
++	struct inode * inode = new_inode(visible_sysfs_sb);
+ 	if (inode) {
+ 		inode->i_mode = mode;
+ 		inode->i_uid = current->fsuid;
+diff -uprN linux-2.6.8.1.orig/fs/sysfs/mount.c linux-2.6.8.1-ve022stab078/fs/sysfs/mount.c
+--- linux-2.6.8.1.orig/fs/sysfs/mount.c	2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/sysfs/mount.c	2006-05-11 13:05:42.000000000 +0400
+@@ -7,6 +7,7 @@
+ #include <linux/fs.h>
+ #include <linux/mount.h>
+ #include <linux/pagemap.h>
++#include <linux/module.h>
+ #include <linux/init.h>
+ 
+ #include "sysfs.h"
+@@ -17,6 +18,15 @@
+ struct vfsmount *sysfs_mount;
+ struct super_block * sysfs_sb = NULL;
+ 
++void prepare_sysfs(void)
++{
++#ifdef CONFIG_VE
++	get_ve0()->sysfs_mnt = sysfs_mount;
++	sysfs_mount = (struct vfsmount *)SYSFS_MAGIC;
++	/* ve0.sysfs_sb is setup by sysfs_fill_super() */
++#endif
++}
++
+ static struct super_operations sysfs_ops = {
+ 	.statfs		= simple_statfs,
+ 	.drop_inode	= generic_delete_inode,
+@@ -31,7 +41,7 @@ static int sysfs_fill_super(struct super
+ 	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ 	sb->s_magic = SYSFS_MAGIC;
+ 	sb->s_op = &sysfs_ops;
+-	sysfs_sb = sb;
++	visible_sysfs_sb = sb;
+ 
+ 	inode = sysfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO);
+ 	if (inode) {
+@@ -60,12 +70,14 @@ static struct super_block *sysfs_get_sb(
+ 	return get_sb_single(fs_type, flags, data, sysfs_fill_super);
+ }
+ 
+-static struct file_system_type sysfs_fs_type = {
++struct file_system_type sysfs_fs_type = {
+ 	.name		= "sysfs",
+ 	.get_sb		= sysfs_get_sb,
+ 	.kill_sb	= kill_litter_super,
+ };
+ 
++EXPORT_SYMBOL(sysfs_fs_type);
++
+ int __init sysfs_init(void)
+ {
+ 	int err;
+@@ -79,5 +91,6 @@ int __init sysfs_init(void)
+ 			sysfs_mount = NULL;
+ 		}
+ 	}
++	prepare_sysfs();
+ 	return err;
+ }
+diff -uprN linux-2.6.8.1.orig/fs/sysfs/symlink.c linux-2.6.8.1-ve022stab078/fs/sysfs/symlink.c
+--- linux-2.6.8.1.orig/fs/sysfs/symlink.c	2004-08-14 14:55:31.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/sysfs/symlink.c	2006-05-11 13:05:42.000000000 +0400
+@@ -65,6 +65,10 @@ int sysfs_create_link(struct kobject * k
+ 	struct dentry * d;
+ 	int error = 0;
+ 
++#ifdef CONFIG_VE
++	if (!get_exec_env()->sysfs_sb)
++		return 0;
++#endif
+ 	down(&dentry->d_inode->i_sem);
+ 	d = sysfs_get_dentry(dentry,name);
+ 	if (!IS_ERR(d)) {
+@@ -90,6 +94,10 @@ int sysfs_create_link(struct kobject * k
+ 
+ void sysfs_remove_link(struct kobject * kobj, char * name)
+ {
++#ifdef CONFIG_VE
++	if(!get_exec_env()->sysfs_sb)
++		return;
++#endif
+ 	sysfs_hash_and_remove(kobj->dentry,name);
+ }
+ 
+diff -uprN linux-2.6.8.1.orig/fs/sysfs/sysfs.h linux-2.6.8.1-ve022stab078/fs/sysfs/sysfs.h
+--- linux-2.6.8.1.orig/fs/sysfs/sysfs.h	2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/sysfs/sysfs.h	2006-05-11 13:05:42.000000000 +0400
+@@ -1,5 +1,13 @@
+ 
+-extern struct vfsmount * sysfs_mount;
++#ifndef CONFIG_VE
++extern struct vfsmount *sysfs_mount;
++extern struct super_block *sysfs_sb;
++#define visible_sysfs_mount sysfs_mount
++#define visible_sysfs_sb sysfs_sb
++#else
++#define visible_sysfs_mount (get_exec_env()->sysfs_mnt)
++#define visible_sysfs_sb  (get_exec_env()->sysfs_sb)
++#endif
+ 
+ extern struct inode * sysfs_new_inode(mode_t mode);
+ extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *));
+diff -uprN linux-2.6.8.1.orig/fs/sysv/inode.c linux-2.6.8.1-ve022stab078/fs/sysv/inode.c
+--- linux-2.6.8.1.orig/fs/sysv/inode.c	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/sysv/inode.c	2006-05-11 13:05:35.000000000 +0400
+@@ -260,13 +260,14 @@ static struct buffer_head * sysv_update_
+ 	return bh;
+ }
+ 
+-void sysv_write_inode(struct inode * inode, int wait)
++int sysv_write_inode(struct inode * inode, int wait)
+ {
+ 	struct buffer_head *bh;
+ 	lock_kernel();
+ 	bh = sysv_update_inode(inode);
+ 	brelse(bh);
+ 	unlock_kernel();
++	return 0;
+ }
+ 
+ int sysv_sync_inode(struct inode * inode)
+diff -uprN linux-2.6.8.1.orig/fs/sysv/namei.c linux-2.6.8.1-ve022stab078/fs/sysv/namei.c
+--- linux-2.6.8.1.orig/fs/sysv/namei.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/sysv/namei.c	2006-05-11 13:05:32.000000000 +0400
+@@ -114,7 +114,7 @@ static int sysv_symlink(struct inode * d
+ 		goto out;
+ 	
+ 	sysv_set_inode(inode, 0);
+-	err = page_symlink(inode, symname, l);
++	err = page_symlink(inode, symname, l, GFP_KERNEL);
+ 	if (err)
+ 		goto out_fail;
+ 
+diff -uprN linux-2.6.8.1.orig/fs/sysv/sysv.h linux-2.6.8.1-ve022stab078/fs/sysv/sysv.h
+--- linux-2.6.8.1.orig/fs/sysv/sysv.h	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/sysv/sysv.h	2006-05-11 13:05:35.000000000 +0400
+@@ -134,7 +134,7 @@ extern unsigned long sysv_count_free_blo
+ extern void sysv_truncate(struct inode *);
+ 
+ /* inode.c */
+-extern void sysv_write_inode(struct inode *, int);
++extern int sysv_write_inode(struct inode *, int);
+ extern int sysv_sync_inode(struct inode *);
+ extern int sysv_sync_file(struct file *, struct dentry *, int);
+ extern void sysv_set_inode(struct inode *, dev_t);
+diff -uprN linux-2.6.8.1.orig/fs/udf/file.c linux-2.6.8.1-ve022stab078/fs/udf/file.c
+--- linux-2.6.8.1.orig/fs/udf/file.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/udf/file.c	2006-05-11 13:05:35.000000000 +0400
+@@ -188,7 +188,7 @@ int udf_ioctl(struct inode *inode, struc
+ {
+ 	int result = -EINVAL;
+ 
+-	if ( permission(inode, MAY_READ, NULL) != 0 )
++	if ( permission(inode, MAY_READ, NULL, NULL) != 0 )
+ 	{
+ 		udf_debug("no permission to access inode %lu\n",
+ 						inode->i_ino);
+diff -uprN linux-2.6.8.1.orig/fs/udf/inode.c linux-2.6.8.1-ve022stab078/fs/udf/inode.c
+--- linux-2.6.8.1.orig/fs/udf/inode.c	2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/udf/inode.c	2006-05-11 13:05:35.000000000 +0400
+@@ -1313,11 +1313,13 @@ udf_convert_permissions(struct fileEntry
+  *	Written, tested, and released.
+  */
+ 
+-void udf_write_inode(struct inode * inode, int sync)
++int udf_write_inode(struct inode * inode, int sync)
+ {
++	int ret;
+ 	lock_kernel();
+-	udf_update_inode(inode, sync);
++	ret = udf_update_inode(inode, sync);
+ 	unlock_kernel();
++	return ret;
+ }
+ 
+ int udf_sync_inode(struct inode * inode)
+diff -uprN linux-2.6.8.1.orig/fs/udf/udfdecl.h linux-2.6.8.1-ve022stab078/fs/udf/udfdecl.h
+--- linux-2.6.8.1.orig/fs/udf/udfdecl.h	2004-08-14 14:55:20.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/udf/udfdecl.h	2006-05-11 13:05:35.000000000 +0400
+@@ -100,7 +100,7 @@ extern void udf_read_inode(struct inode 
+ extern void udf_put_inode(struct inode *);
+ extern void udf_delete_inode(struct inode *);
+ extern void udf_clear_inode(struct inode *);
+-extern void udf_write_inode(struct inode *, int);
++extern int udf_write_inode(struct inode *, int);
+ extern long udf_block_map(struct inode *, long);
+ extern int8_t inode_bmap(struct inode *, int, lb_addr *, uint32_t *, lb_addr *, uint32_t *, uint32_t *, struct buffer_head **);
+ extern int8_t udf_add_aext(struct inode *, lb_addr *, int *, lb_addr, uint32_t, struct buffer_head **, int);
+diff -uprN linux-2.6.8.1.orig/fs/ufs/inode.c linux-2.6.8.1-ve022stab078/fs/ufs/inode.c
+--- linux-2.6.8.1.orig/fs/ufs/inode.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ufs/inode.c	2006-05-11 13:05:35.000000000 +0400
+@@ -788,11 +788,13 @@ static int ufs_update_inode(struct inode
+ 	return 0;
+ }
+ 
+-void ufs_write_inode (struct inode * inode, int wait)
++int ufs_write_inode (struct inode * inode, int wait)
+ {
++	int ret;
+ 	lock_kernel();
+-	ufs_update_inode (inode, wait);
++	ret = ufs_update_inode (inode, wait);
+ 	unlock_kernel();
++	return ret;
+ }
+ 
+ int ufs_sync_inode (struct inode *inode)
+diff -uprN linux-2.6.8.1.orig/fs/ufs/namei.c linux-2.6.8.1-ve022stab078/fs/ufs/namei.c
+--- linux-2.6.8.1.orig/fs/ufs/namei.c	2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/ufs/namei.c	2006-05-11 13:05:32.000000000 +0400
+@@ -156,7 +156,7 @@ static int ufs_symlink (struct inode * d
+ 		/* slow symlink */
+ 		inode->i_op = &page_symlink_inode_operations;
+ 		inode->i_mapping->a_ops = &ufs_aops;
+-		err = page_symlink(inode, symname, l);
++		err = page_symlink(inode, symname, l, GFP_KERNEL);
+ 		if (err)
+ 			goto out_fail;
+ 	} else {
+diff -uprN linux-2.6.8.1.orig/fs/umsdos/inode.c linux-2.6.8.1-ve022stab078/fs/umsdos/inode.c
+--- linux-2.6.8.1.orig/fs/umsdos/inode.c	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/umsdos/inode.c	2006-05-11 13:05:35.000000000 +0400
+@@ -312,11 +312,12 @@ out:
+ /*
+  * Update the disk with the inode content
+  */
+-void UMSDOS_write_inode (struct inode *inode, int wait)
++int UMSDOS_write_inode (struct inode *inode, int wait)
+ {
+ 	struct iattr newattrs;
++	int ret;
+ 
+-	fat_write_inode (inode, wait);
++	ret = fat_write_inode (inode, wait);
+ 	newattrs.ia_mtime = inode->i_mtime;
+ 	newattrs.ia_atime = inode->i_atime;
+ 	newattrs.ia_ctime = inode->i_ctime;
+@@ -330,6 +331,7 @@ void UMSDOS_write_inode (struct inode *i
+  * UMSDOS_notify_change (inode, &newattrs);
+ 
+  * inode->i_state &= ~I_DIRTY; / * FIXME: this doesn't work.  We need to remove ourselves from list on dirty inodes. /mn/ */
++	return ret;
+ }
+ 
+ 
+diff -uprN linux-2.6.8.1.orig/fs/umsdos/namei.c linux-2.6.8.1-ve022stab078/fs/umsdos/namei.c
+--- linux-2.6.8.1.orig/fs/umsdos/namei.c	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/umsdos/namei.c	2006-05-11 13:05:32.000000000 +0400
+@@ -499,7 +499,7 @@ static int umsdos_symlink_x (struct inod
+ 	}
+ 
+ 	len = strlen (symname) + 1;
+-	ret = page_symlink(dentry->d_inode, symname, len);
++	ret = page_symlink(dentry->d_inode, symname, len, GFP_KERNEL);
+ 	if (ret < 0)
+ 		goto out_unlink;
+ out:
+diff -uprN linux-2.6.8.1.orig/fs/vzdq_file.c linux-2.6.8.1-ve022stab078/fs/vzdq_file.c
+--- linux-2.6.8.1.orig/fs/vzdq_file.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/fs/vzdq_file.c	2006-05-11 13:05:44.000000000 +0400
+@@ -0,0 +1,851 @@
++/*
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ * 
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * This file contains Virtuozzo quota files as proc entry implementation.
++ * It is required for std quota tools to work correctly as they are expecting
++ * aquota.user and aquota.group files.
++ */
++
++#include <linux/ctype.h>
++#include <linux/slab.h>
++#include <linux/list.h>
++#include <linux/module.h>
++#include <linux/proc_fs.h>
++#include <linux/sysctl.h>
++#include <linux/mount.h>
++#include <linux/namespace.h>
++#include <linux/quotaio_v2.h>
++#include <asm/uaccess.h>
++
++#include <linux/ve.h>
++#include <linux/ve_proto.h>
++#include <linux/vzdq_tree.h>
++#include <linux/vzquota.h>
++
++/* ----------------------------------------------------------------------
++ *
++ * File read operation
++ *
++ * FIXME: functions in this section (as well as many functions in vzdq_ugid.c,
++ * perhaps) abuse vz_quota_sem.
++ * Taking a global semaphore for lengthy and user-controlled operations inside
++ * VPSs is not a good idea in general.
++ * In this case, the reasons for taking this semaphore are completely unclear,
++ * especially taking into account that the only function that has comments
++ * about the necessity to be called under this semaphore
++ * (create_proc_quotafile) is actually called OUTSIDE it.
++ *
++ * --------------------------------------------------------------------- */
++
++#define DQBLOCK_SIZE		1024
++#define DQUOTBLKNUM		21U
++#define DQTREE_DEPTH		4
++#define TREENUM_2_BLKNUM(num)	(((num) + 1) << 1)
++#define ISINDBLOCK(num)		((num)%2 != 0)
++#define FIRST_DATABLK	  	2  /* first even number */
++#define LAST_IND_LEVEL		(DQTREE_DEPTH - 1)
++#define CONVERT_LEVEL(level)	((level) * (QUOTAID_EBITS/QUOTAID_BBITS))
++#define GETLEVINDX(ind, lev)	(((ind) >> QUOTAID_BBITS*(lev)) \
++					& QUOTATREE_BMASK)
++
++#if (QUOTAID_EBITS / QUOTAID_BBITS) != (QUOTATREE_DEPTH / DQTREE_DEPTH)
++#error xBITS and DQTREE_DEPTH does not correspond
++#endif
++
++#define BLOCK_NOT_FOUND	1
++
++/* data for quota file -- one per proc entry */
++struct quotatree_data {
++	struct list_head	list;
++	struct vz_quota_master	*qmblk;
++	int			type;	/* type of the tree */
++};
++
++/* serialized by vz_quota_sem */
++static LIST_HEAD(qf_data_head);
++
++static const u_int32_t vzquota_magics[] = V2_INITQMAGICS;
++static const u_int32_t vzquota_versions[] = V2_INITQVERSIONS;
++
++static inline loff_t get_depoff(int depth)
++{
++	loff_t res = 1;
++	while (depth) {
++		res += (1 << ((depth - 1)*QUOTAID_EBITS + 1));
++		depth--;
++	}
++	return res;
++}
++
++static inline loff_t get_blknum(loff_t num, int depth)
++{
++	loff_t res;
++	res = (num << 1) + get_depoff(depth);
++	return res;
++}
++
++static int get_depth(loff_t num)
++{
++	int i;
++	for (i = 0; i < DQTREE_DEPTH; i++) {
++		if (num >= get_depoff(i) && (i == DQTREE_DEPTH - 1
++				|| num < get_depoff(i + 1)))
++			return i;
++	}
++	return -1;
++}
++
++static inline loff_t get_offset(loff_t num)
++{
++	loff_t res, tmp;
++
++	tmp = get_depth(num);
++	if (tmp < 0)
++		return -1;
++	num -= get_depoff(tmp);
++	BUG_ON(num < 0);
++	res = num >> 1;
++
++	return res;
++}
++
++static inline loff_t get_quot_blk_num(struct quotatree_tree *tree, int level)
++{
++	/* return maximum available block num */
++	return tree->levels[level].freenum;
++}
++
++static inline loff_t get_block_num(struct quotatree_tree *tree)
++{
++	loff_t ind_blk_num, quot_blk_num, max_ind, max_quot;
++
++	quot_blk_num = get_quot_blk_num(tree, CONVERT_LEVEL(DQTREE_DEPTH) - 1);
++	max_quot = TREENUM_2_BLKNUM(quot_blk_num);
++	ind_blk_num = get_quot_blk_num(tree, CONVERT_LEVEL(DQTREE_DEPTH - 1));
++	max_ind = (quot_blk_num) ? get_blknum(ind_blk_num, LAST_IND_LEVEL)
++		: get_blknum(ind_blk_num, 0);
++
++	return (max_ind > max_quot) ? max_ind + 1 : max_quot + 1;
++}
++
++/*  Write quota file header */
++static int read_header(void *buf, struct quotatree_tree *tree,
++	struct dq_info *dq_ugid_info, int type)
++{
++	struct v2_disk_dqheader *dqh;
++	struct v2_disk_dqinfo *dq_disk_info;
++
++	dqh = buf;
++	dq_disk_info = buf + sizeof(struct v2_disk_dqheader);
++
++	dqh->dqh_magic = vzquota_magics[type];
++	dqh->dqh_version = vzquota_versions[type];
++
++	dq_disk_info->dqi_bgrace = dq_ugid_info[type].bexpire;
++	dq_disk_info->dqi_igrace = dq_ugid_info[type].iexpire;
++	dq_disk_info->dqi_flags = 0;	/* no flags */
++	dq_disk_info->dqi_blocks = get_block_num(tree);
++	dq_disk_info->dqi_free_blk = 0;	/* first block in the file */
++	dq_disk_info->dqi_free_entry = FIRST_DATABLK;
++
++	return 0;
++}
++
++static int get_block_child(int depth, struct quotatree_node *p, u_int32_t *buf)
++{
++	int i, j, lev_num;
++
++	lev_num = QUOTATREE_DEPTH/DQTREE_DEPTH - 1;
++	for (i = 0; i < BLOCK_SIZE/sizeof(u_int32_t); i++) {
++		struct quotatree_node *next, *parent;
++
++		parent = p;
++		next = p;
++		for (j = lev_num; j >= 0; j--) {
++			if (!next->blocks[GETLEVINDX(i,j)]) {
++				buf[i] = 0;
++				goto bad_branch;
++			}
++			parent = next;
++			next = next->blocks[GETLEVINDX(i,j)];
++		}
++		buf[i] = (depth == DQTREE_DEPTH - 1) ?
++			TREENUM_2_BLKNUM(parent->num)
++			: get_blknum(next->num, depth + 1);
++
++	bad_branch:
++		;
++	}
++
++	return 0;
++}
++
++/*
++ * Write index block to disk (or buffer)
++ * @buf has length 256*sizeof(u_int32_t) bytes
++ */
++static int read_index_block(int num, u_int32_t *buf,
++		struct quotatree_tree *tree)
++{
++	struct quotatree_node *p;
++	u_int32_t index;
++	loff_t off;
++	int depth, res;
++
++	res = BLOCK_NOT_FOUND; 
++	index = 0;
++	depth = get_depth(num);
++	off = get_offset(num);
++	if (depth < 0 || off < 0)
++		return -EINVAL;
++
++	list_for_each_entry(p, &tree->levels[CONVERT_LEVEL(depth)].usedlh,
++			list) {
++		if (p->num >= off)
++			res = 0;
++		if (p->num != off)
++			continue;
++		get_block_child(depth, p, buf);
++		break;
++	}
++
++	return res;
++}
++
++static inline void convert_quot_format(struct v2_disk_dqblk *dq,
++		struct vz_quota_ugid *vzq)
++{
++	dq->dqb_id = vzq->qugid_id;
++	dq->dqb_ihardlimit = vzq->qugid_stat.ihardlimit;
++	dq->dqb_isoftlimit = vzq->qugid_stat.isoftlimit;
++	dq->dqb_curinodes = vzq->qugid_stat.icurrent;
++	dq->dqb_bhardlimit = vzq->qugid_stat.bhardlimit / QUOTABLOCK_SIZE;
++	dq->dqb_bsoftlimit = vzq->qugid_stat.bsoftlimit / QUOTABLOCK_SIZE;
++	dq->dqb_curspace = vzq->qugid_stat.bcurrent;
++	dq->dqb_btime = vzq->qugid_stat.btime;
++	dq->dqb_itime = vzq->qugid_stat.itime;
++}
++
++static int read_dquot(loff_t num, void *buf, struct quotatree_tree *tree)
++{
++	int res, i, entries = 0;
++	struct v2_disk_dqdbheader *dq_header;
++	struct quotatree_node *p;
++	struct v2_disk_dqblk *blk = buf + sizeof(struct v2_disk_dqdbheader);
++
++	res = BLOCK_NOT_FOUND;
++	dq_header = buf;
++	memset(dq_header, 0, sizeof(*dq_header));
++
++	list_for_each_entry(p, &(tree->levels[QUOTATREE_DEPTH - 1].usedlh),
++			list) {
++		if (TREENUM_2_BLKNUM(p->num) >= num)
++			res = 0;
++		if (TREENUM_2_BLKNUM(p->num) != num)
++			continue;
++
++		for (i = 0; i < QUOTATREE_BSIZE; i++) {
++			if (!p->blocks[i])
++				continue;
++			convert_quot_format(blk + entries,
++					(struct vz_quota_ugid *)p->blocks[i]);
++			entries++;
++			res = 0;
++		}
++		break;
++	}
++	dq_header->dqdh_entries = entries;
++
++	return res;
++}
++
++static int read_block(int num, void *buf, struct quotatree_tree *tree,
++	struct dq_info *dq_ugid_info, int magic)
++{
++	int res;
++
++	memset(buf, 0, DQBLOCK_SIZE);
++	if (!num)
++		res = read_header(buf, tree, dq_ugid_info, magic);
++	else if (ISINDBLOCK(num))
++		res = read_index_block(num, (u_int32_t*)buf, tree);
++	else
++		res = read_dquot(num, buf, tree);
++
++	return res;
++}
++
++/*
++ * FIXME: this function can handle quota files up to 2GB only.
++ */
++static int read_proc_quotafile(char *page, char **start, off_t off, int count,
++		int *eof, void *data)
++{
++	off_t blk_num, blk_off, buf_off;
++	char *tmp;
++	size_t buf_size;
++	struct quotatree_data *qtd;
++	struct quotatree_tree *tree;
++	struct dq_info *dqi;
++	int res;
++
++	tmp = kmalloc(DQBLOCK_SIZE, GFP_KERNEL);
++	if (!tmp)
++		return -ENOMEM;
++
++	qtd = data;
++	down(&vz_quota_sem);
++	down(&qtd->qmblk->dq_sem);
++
++	res = 0;
++	tree = QUGID_TREE(qtd->qmblk, qtd->type);
++	if (!tree) {
++		*eof = 1;
++		goto out_dq;
++	}
++
++	dqi = &qtd->qmblk->dq_ugid_info[qtd->type];
++
++	buf_off = 0;
++	buf_size = count;
++	blk_num = off / DQBLOCK_SIZE;
++	blk_off = off % DQBLOCK_SIZE;
++
++	while (buf_size > 0) {
++		off_t len;
++
++		len = min((size_t)(DQBLOCK_SIZE-blk_off), buf_size);
++		res = read_block(blk_num, tmp, tree, dqi, qtd->type);
++		if (res < 0)
++			goto out_err;
++		if (res == BLOCK_NOT_FOUND) {
++			*eof = 1;
++			break;
++		} 
++		memcpy(page + buf_off, tmp + blk_off, len);
++
++		blk_num++;
++		buf_size -= len;
++		blk_off = 0;
++		buf_off += len;
++	}
++	res = buf_off;
++
++out_err:
++	*start = NULL + count;
++out_dq:
++	up(&qtd->qmblk->dq_sem);
++	up(&vz_quota_sem);
++	kfree(tmp);
++
++	return res;
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * /proc/vz/vzaquota/QID/aquota.* files
++ *
++ * FIXME: this code lacks serialization of read/readdir/lseek.
++ * However, this problem should be fixed after the mainstream issue of what
++ * appears to be non-atomic read and update of file position in sys_read.
++ *
++ * --------------------------------------------------------------------- */
++
++static inline unsigned long vzdq_aquot_getino(dev_t dev)
++{
++	return 0xec000000UL + dev;
++}
++
++static inline dev_t vzdq_aquot_getidev(struct inode *inode)
++{
++	return (dev_t)(unsigned long)PROC_I(inode)->op.proc_get_link;
++}
++
++static inline void vzdq_aquot_setidev(struct inode *inode, dev_t dev)
++{
++	PROC_I(inode)->op.proc_get_link = (void *)(unsigned long)dev;
++}
++
++static ssize_t vzdq_aquotf_read(struct file *file,
++		char __user *buf, size_t size, loff_t *ppos)
++{
++	char *page;
++	size_t bufsize;
++	ssize_t l, l2, copied;
++	char *start;
++	struct inode *inode;
++	struct block_device *bdev;
++	struct super_block *sb;
++	struct quotatree_data data;
++	int eof, err;
++
++	err = -ENOMEM;
++	page = (char *)__get_free_page(GFP_KERNEL);
++	if (page == NULL)
++		goto out_err;
++
++	err = -ENODEV;
++	inode = file->f_dentry->d_inode;
++	bdev = bdget(vzdq_aquot_getidev(inode));
++	if (bdev == NULL)
++		goto out_err;
++	sb = get_super(bdev);
++	bdput(bdev);
++	if (sb == NULL)
++		goto out_err;
++	data.qmblk = vzquota_find_qmblk(sb);
++	data.type = PROC_I(inode)->type - 1;
++	drop_super(sb);
++	if (data.qmblk == NULL || data.qmblk == VZ_QUOTA_BAD)
++		goto out_err;
++
++	copied = 0;
++	l = l2 = 0;
++	while (1) {
++		bufsize = min(size, (size_t)PAGE_SIZE);
++		if (bufsize <= 0)
++			break;
++
++		l = read_proc_quotafile(page, &start, *ppos, bufsize,
++				&eof, &data);
++		if (l <= 0)
++			break;
++
++		l2 = copy_to_user(buf, page, l);
++		copied += l - l2;
++		if (l2)
++			break;
++
++		buf += l;
++		size -= l;
++		*ppos += (unsigned long)start;
++		l = l2 = 0;
++	}
++
++	qmblk_put(data.qmblk);
++	free_page((unsigned long)page);
++	if (copied)
++		return copied;
++	else if (l2)		/* last copy_to_user failed */
++		return -EFAULT;
++	else			/* read error or EOF */
++		return l;
++
++out_err:
++	if (page != NULL)
++		free_page((unsigned long)page);
++	return err;
++}
++
++static struct file_operations vzdq_aquotf_file_operations = {
++	.read		= &vzdq_aquotf_read,
++};
++
++static struct inode_operations vzdq_aquotf_inode_operations = {
++};
++
++
++/* ----------------------------------------------------------------------
++ *
++ * /proc/vz/vzaquota/QID directory
++ *
++ * --------------------------------------------------------------------- */
++
++static int vzdq_aquotq_readdir(struct file *file, void *data, filldir_t filler)
++{
++	loff_t n;
++	int err;
++
++	n = file->f_pos;
++	for (err = 0; !err; n++) {
++		switch (n) {
++		case 0:
++			err = (*filler)(data, ".", 1, n,
++					file->f_dentry->d_inode->i_ino,
++					DT_DIR);
++			break;
++		case 1:
++			err = (*filler)(data, "..", 2, n,
++					parent_ino(file->f_dentry), DT_DIR);
++			break;
++		case 2:
++			err = (*filler)(data, "aquota.user", 11, n,
++					file->f_dentry->d_inode->i_ino
++								+ USRQUOTA + 1,
++					DT_REG);
++			break;
++		case 3:
++			err = (*filler)(data, "aquota.group", 12, n,
++					file->f_dentry->d_inode->i_ino 
++								+ GRPQUOTA + 1,
++					DT_REG);
++			break;
++		default:
++			goto out;
++		}
++	}
++out:
++	file->f_pos = n;
++	return err;
++}
++
++struct vzdq_aquotq_lookdata {
++	dev_t dev;
++	int type;
++};
++
++static int vzdq_aquotq_looktest(struct inode *inode, void *data)
++{
++	struct vzdq_aquotq_lookdata *d;
++
++	d = data;
++	return inode->i_op == &vzdq_aquotf_inode_operations &&
++	       vzdq_aquot_getidev(inode) == d->dev &&
++	       PROC_I(inode)->type == d->type + 1;
++}
++
++static int vzdq_aquotq_lookset(struct inode *inode, void *data)
++{
++	struct vzdq_aquotq_lookdata *d;
++
++	d = data;
++	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
++	inode->i_ino = vzdq_aquot_getino(d->dev) + d->type + 1;
++	inode->i_mode = S_IFREG | S_IRUSR;
++	inode->i_uid = 0;
++	inode->i_gid = 0;
++	inode->i_nlink = 1;
++	inode->i_op = &vzdq_aquotf_inode_operations;
++	inode->i_fop = &vzdq_aquotf_file_operations;
++	PROC_I(inode)->type = d->type + 1;
++	vzdq_aquot_setidev(inode, d->dev);
++	return 0;
++}
++
++static struct dentry *vzdq_aquotq_lookup(struct inode *dir,
++		struct dentry *dentry,
++		struct nameidata *nd)
++{
++	struct inode *inode;
++	struct vzdq_aquotq_lookdata d;
++	int k;
++
++	if (dentry->d_name.len == 11) {
++		if (memcmp(dentry->d_name.name, "aquota.user", 11))
++			goto out;
++		k = USRQUOTA;
++	} else if (dentry->d_name.len == 12) {
++		if (memcmp(dentry->d_name.name, "aquota.group", 11))
++			goto out;
++		k = GRPQUOTA;
++	} else
++		goto out;
++	d.dev = vzdq_aquot_getidev(dir);
++	d.type = k;
++	inode = iget5_locked(dir->i_sb, dir->i_ino + k + 1,
++			vzdq_aquotq_looktest, vzdq_aquotq_lookset, &d);
++	if (inode == NULL)
++		goto out;
++	unlock_new_inode(inode);
++	d_add(dentry, inode);
++	return NULL;
++
++out:
++	return ERR_PTR(-ENOENT);
++}
++
++static struct file_operations vzdq_aquotq_file_operations = {
++	.read		= &generic_read_dir,
++	.readdir	= &vzdq_aquotq_readdir,
++};
++
++static struct inode_operations vzdq_aquotq_inode_operations = {
++	.lookup		= &vzdq_aquotq_lookup,
++};
++
++
++/* ----------------------------------------------------------------------
++ *
++ * /proc/vz/vzaquota directory
++ *
++ * --------------------------------------------------------------------- */
++
++struct vzdq_aquot_de {
++	struct list_head list;
++	struct vfsmount *mnt;
++};
++
++static int vzdq_aquot_buildmntlist(struct ve_struct *ve,
++		struct list_head *head)
++{
++	struct vfsmount *rmnt, *mnt;
++	struct vzdq_aquot_de *p;
++	int err;
++
++#ifdef CONFIG_VE
++	rmnt = mntget(ve->fs_rootmnt);
++#else
++	read_lock(&current->fs->lock);
++	rmnt = mntget(current->fs->rootmnt);
++	read_unlock(&current->fs->lock);
++#endif
++	mnt = rmnt;
++	down_read(&rmnt->mnt_namespace->sem);
++	while (1) {
++		list_for_each_entry(p, head, list) {
++			if (p->mnt->mnt_sb == mnt->mnt_sb)
++				goto skip;
++		}
++
++		err = -ENOMEM;
++		p = kmalloc(sizeof(*p), GFP_KERNEL);
++		if (p == NULL)
++			goto out;
++		p->mnt = mntget(mnt);
++		list_add_tail(&p->list, head);
++
++skip:
++		err = 0;
++		if (list_empty(&mnt->mnt_mounts)) {
++			while (1) {
++				if (mnt == rmnt)
++					goto out;
++				if (mnt->mnt_child.next !=
++						&mnt->mnt_parent->mnt_mounts)
++					break;
++				mnt = mnt->mnt_parent;
++			}
++			mnt = list_entry(mnt->mnt_child.next,
++					struct vfsmount, mnt_child);
++		} else
++			mnt = list_first_entry(&mnt->mnt_mounts,
++					struct vfsmount, mnt_child);
++	}
++out:
++	up_read(&rmnt->mnt_namespace->sem);
++	mntput(rmnt);
++	return err;
++}
++
++static void vzdq_aquot_releasemntlist(struct ve_struct *ve,
++		struct list_head *head)
++{
++	struct vzdq_aquot_de *p;
++
++	while (!list_empty(head)) {
++		p = list_first_entry(head, typeof(*p), list);
++		mntput(p->mnt);
++		list_del(&p->list);
++		kfree(p);
++	}
++}
++
++static int vzdq_aquotd_readdir(struct file *file, void *data, filldir_t filler)
++{
++	struct ve_struct *ve, *old_ve;
++	struct list_head mntlist;
++	struct vzdq_aquot_de *de;
++	struct super_block *sb;
++	struct vz_quota_master *qmblk;
++	loff_t i, n;
++	char buf[24];
++	int l, err;
++
++	i = 0;
++	n = file->f_pos;
++	ve = VE_OWNER_FSTYPE(file->f_dentry->d_sb->s_type);
++	old_ve = set_exec_env(ve);
++
++	INIT_LIST_HEAD(&mntlist);
++#ifdef CONFIG_VE
++	/*
++	 * The only reason of disabling readdir for the host system is that
++	 * this readdir can be slow and CPU consuming with large number of VPSs
++	 * (or just mount points).
++	 */
++	err = ve_is_super(ve);
++#else
++	err = 0;
++#endif
++	if (!err) {
++		err = vzdq_aquot_buildmntlist(ve, &mntlist);
++		if (err)
++			goto out_err;
++	}
++
++	if (i >= n) {
++		if ((*filler)(data, ".", 1, i,
++					file->f_dentry->d_inode->i_ino, DT_DIR))
++			goto out_fill;
++	}
++	i++;
++
++	if (i >= n) {
++		if ((*filler)(data, "..", 2, i,
++					parent_ino(file->f_dentry), DT_DIR))
++			goto out_fill;
++	}
++	i++;
++
++	list_for_each_entry (de, &mntlist, list) {
++		sb = de->mnt->mnt_sb;
++#ifdef CONFIG_VE
++		if (get_device_perms_ve(S_IFBLK, sb->s_dev, FMODE_QUOTACTL))
++			continue;
++#endif
++		qmblk = vzquota_find_qmblk(sb);
++		if (qmblk == NULL || qmblk == VZ_QUOTA_BAD)
++			continue;
++
++		qmblk_put(qmblk);
++		i++;
++		if (i <= n)
++			continue;
++
++		l = sprintf(buf, "%08x", new_encode_dev(sb->s_dev));
++		if ((*filler)(data, buf, l, i - 1,
++					vzdq_aquot_getino(sb->s_dev), DT_DIR))
++			break;
++	}
++
++out_fill:
++	err = 0;
++	file->f_pos = i;
++out_err:
++	vzdq_aquot_releasemntlist(ve, &mntlist);
++	set_exec_env(old_ve);
++	return err;
++}
++
++static int vzdq_aquotd_looktest(struct inode *inode, void *data)
++{
++	return inode->i_op == &vzdq_aquotq_inode_operations &&
++	       vzdq_aquot_getidev(inode) == (dev_t)(unsigned long)data;
++}
++
++static int vzdq_aquotd_lookset(struct inode *inode, void *data)
++{
++	dev_t dev;
++
++	dev = (dev_t)(unsigned long)data;
++	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
++	inode->i_ino = vzdq_aquot_getino(dev);
++	inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR;
++	inode->i_uid = 0;
++	inode->i_gid = 0;
++	inode->i_nlink = 2;
++	inode->i_op = &vzdq_aquotq_inode_operations;
++	inode->i_fop = &vzdq_aquotq_file_operations;
++	vzdq_aquot_setidev(inode, dev);
++	return 0;
++}
++
++static struct dentry *vzdq_aquotd_lookup(struct inode *dir,
++		struct dentry *dentry,
++		struct nameidata *nd)
++{
++	struct ve_struct *ve, *old_ve;
++	const unsigned char *s;
++	int l;
++	dev_t dev;
++	struct inode *inode;
++
++	ve = VE_OWNER_FSTYPE(dir->i_sb->s_type);
++	old_ve = set_exec_env(ve);
++#ifdef CONFIG_VE
++	/*
++	 * Lookup is much lighter than readdir, so it can be allowed for the
++	 * host system.  But it would be strange to be able to do lookup only
++	 * without readdir...
++	 */
++	if (ve_is_super(ve))
++		goto out;
++#endif
++
++	dev = 0;
++	l = dentry->d_name.len;
++	if (l <= 0)
++		goto out;
++	for (s = dentry->d_name.name; l > 0; s++, l--) {
++		if (!isxdigit(*s))
++			goto out;
++		if (dev & ~(~0UL >> 4))
++			goto out;
++		dev <<= 4;
++		if (isdigit(*s))
++			dev += *s - '0';
++		else if (islower(*s))
++			dev += *s - 'a' + 10;
++		else
++			dev += *s - 'A' + 10;
++	}
++	dev = new_decode_dev(dev);
++
++#ifdef CONFIG_VE
++	if (get_device_perms_ve(S_IFBLK, dev, FMODE_QUOTACTL))
++		goto out;
++#endif
++
++	inode = iget5_locked(dir->i_sb, vzdq_aquot_getino(dev),
++			vzdq_aquotd_looktest, vzdq_aquotd_lookset,
++			(void *)(unsigned long)dev);
++	if (inode == NULL)
++		goto out;
++	unlock_new_inode(inode);
++
++	d_add(dentry, inode);
++	set_exec_env(old_ve);
++	return NULL;
++
++out:
++	set_exec_env(old_ve);
++	return ERR_PTR(-ENOENT);
++}
++
++static struct file_operations vzdq_aquotd_file_operations = {
++	.read		= &generic_read_dir,
++	.readdir	= &vzdq_aquotd_readdir,
++};
++
++static struct inode_operations vzdq_aquotd_inode_operations = {
++	.lookup		= &vzdq_aquotd_lookup,
++};
++
++
++/* ----------------------------------------------------------------------
++ *
++ * Initialization and deinitialization
++ *
++ * --------------------------------------------------------------------- */
++
++/*
++ * FIXME: creation of proc entries here is unsafe with respect to module
++ * unloading.
++ */
++void vzaquota_init(void)
++{
++	struct proc_dir_entry *de;
++
++	de = create_proc_glob_entry("vz/vzaquota",
++			S_IFDIR | S_IRUSR | S_IXUSR, NULL);
++	if (de != NULL) {
++		de->proc_iops = &vzdq_aquotd_inode_operations;
++		de->proc_fops = &vzdq_aquotd_file_operations;
++	} else
++		printk("VZDQ: vz/vzaquota creation failed\n");
++#if defined(CONFIG_SYSCTL)
++	de = create_proc_glob_entry("sys/fs/quota",
++			S_IFDIR | S_IRUSR | S_IXUSR, NULL);
++	if (de == NULL)
++		printk("VZDQ: sys/fs/quota creation failed\n");
++#endif
++}
++
++void vzaquota_fini(void)
++{
++}
+diff -uprN linux-2.6.8.1.orig/fs/vzdq_mgmt.c linux-2.6.8.1-ve022stab078/fs/vzdq_mgmt.c
+--- linux-2.6.8.1.orig/fs/vzdq_mgmt.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/fs/vzdq_mgmt.c	2006-05-11 13:05:43.000000000 +0400
+@@ -0,0 +1,735 @@
++/*
++ * Copyright (C) 2001, 2002, 2004, 2005  SWsoft
++ * All rights reserved.
++ * 
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ */
++
++#include <linux/config.h>
++#include <linux/kernel.h>
++#include <linux/string.h>
++#include <linux/list.h>
++#include <asm/semaphore.h>
++#include <linux/sched.h>
++#include <linux/fs.h>
++#include <linux/dcache.h>
++#include <linux/mount.h>
++#include <linux/namei.h>
++#include <linux/writeback.h>
++#include <linux/gfp.h>
++#include <asm/uaccess.h>
++#include <linux/proc_fs.h>
++#include <linux/quota.h>
++#include <linux/vzctl_quota.h>
++#include <linux/vzquota.h>
++
++
++/* ----------------------------------------------------------------------
++ * Switching quota on.
++ * --------------------------------------------------------------------- */
++
++/*
++ * check limits copied from user
++ */
++int vzquota_check_sane_limits(struct dq_stat *qstat)
++{
++	int err;
++
++	err = -EINVAL;
++
++	/* softlimit must be less then hardlimit */
++	if (qstat->bsoftlimit > qstat->bhardlimit)
++		goto out;
++
++	if (qstat->isoftlimit > qstat->ihardlimit)
++		goto out;
++
++	err = 0;
++out:
++	return err;
++}
++
++/*
++ * check usage values copied from user
++ */
++int vzquota_check_sane_values(struct dq_stat *qstat)
++{
++	int err;
++
++	err = -EINVAL;
++
++	/* expiration time must not be set if softlimit was not exceeded */
++	if (qstat->bcurrent < qstat->bsoftlimit && qstat->btime != (time_t)0)
++		goto out;
++
++	if (qstat->icurrent < qstat->isoftlimit && qstat->itime != (time_t)0)
++		goto out;
++
++	err = vzquota_check_sane_limits(qstat);
++out:
++	return err;
++}
++
++/*
++ * create new quota master block
++ * this function should:
++ *  - copy limits and usage parameters from user buffer;
++ *  - allock, initialize quota block and insert it to hash;
++ */
++static int vzquota_create(unsigned int quota_id, struct vz_quota_stat *u_qstat)
++{
++	int err;
++	struct vz_quota_stat qstat;
++	struct vz_quota_master *qmblk;
++
++	down(&vz_quota_sem);
++
++	err = -EFAULT;
++	if (copy_from_user(&qstat, u_qstat, sizeof(qstat)))
++		goto out;
++
++	err = -EINVAL;
++	if (quota_id == 0)
++		goto out;
++
++	if (vzquota_check_sane_values(&qstat.dq_stat))
++		goto out;
++	err = 0;
++	qmblk = vzquota_alloc_master(quota_id, &qstat);
++
++	if (IS_ERR(qmblk)) /* ENOMEM or EEXIST */
++		err = PTR_ERR(qmblk);
++out:
++	up(&vz_quota_sem);
++
++	return err;
++}
++
++/**
++ * vzquota_on - turn quota on
++ *
++ * This function should:
++ *  - find and get refcnt of directory entry for quota root and corresponding
++ *    mountpoint;
++ *  - find corresponding quota block and mark it with given path;
++ *  - check quota tree;
++ *  - initialize quota for the tree root.
++ */
++static int vzquota_on(unsigned int quota_id, const char *quota_root)
++{
++	int err;
++	struct nameidata nd;
++	struct vz_quota_master *qmblk;
++	struct super_block *dqsb;
++
++	dqsb = NULL;
++	down(&vz_quota_sem);
++
++	err = -ENOENT;
++	qmblk = vzquota_find_master(quota_id);
++	if (qmblk == NULL)
++		goto out;
++
++	err = -EBUSY;
++	if (qmblk->dq_state != VZDQ_STARTING)
++		goto out;
++
++	err = user_path_walk(quota_root, &nd);
++	if (err)
++		goto out;
++	/* init path must be a directory */
++	err = -ENOTDIR;
++	if (!S_ISDIR(nd.dentry->d_inode->i_mode))
++		goto out_path;
++
++	qmblk->dq_root_dentry = nd.dentry;
++	qmblk->dq_root_mnt = nd.mnt;
++	qmblk->dq_sb = nd.dentry->d_inode->i_sb;
++	err = vzquota_get_super(qmblk->dq_sb);
++	if (err)
++		goto out_super;
++
++	/*
++	 * Serialization with quota initialization and operations is performed
++	 * through generation check: generation is memorized before qmblk is
++	 * found and compared under inode_qmblk_lock with assignment.
++	 *
++	 * Note that the dentry tree is shrunk only for high-level logical
++	 * serialization, purely as a courtesy to the user: to have consistent
++	 * quota statistics, files should be closed etc. on quota on.
++	 */
++	err = vzquota_on_qmblk(qmblk->dq_sb, qmblk->dq_root_dentry->d_inode,
++			qmblk);
++	if (err)
++		goto out_init;
++	qmblk->dq_state = VZDQ_WORKING;
++
++	up(&vz_quota_sem);
++	return 0;
++
++out_init:
++	dqsb = qmblk->dq_sb;
++out_super:
++	/* clear for qmblk_put/quota_free_master */
++	qmblk->dq_sb = NULL;
++	qmblk->dq_root_dentry = NULL;
++	qmblk->dq_root_mnt = NULL;
++out_path:
++	path_release(&nd);
++out:
++	if (dqsb)
++		vzquota_put_super(dqsb);
++	up(&vz_quota_sem);
++	return err;
++}
++
++
++/* ----------------------------------------------------------------------
++ * Switching quota off.
++ * --------------------------------------------------------------------- */
++
++/*
++ * destroy quota block by ID
++ */
++static int vzquota_destroy(unsigned int quota_id)
++{
++	int err;
++	struct vz_quota_master *qmblk;
++	struct dentry *dentry;
++	struct vfsmount *mnt;
++
++	down(&vz_quota_sem);
++
++	err = -ENOENT;
++	qmblk = vzquota_find_master(quota_id);
++	if (qmblk == NULL)
++		goto out;
++
++	err = -EBUSY;
++	if (qmblk->dq_state == VZDQ_WORKING)
++		goto out; /* quota_off first */
++
++	list_del_init(&qmblk->dq_hash);
++	dentry = qmblk->dq_root_dentry;
++	qmblk->dq_root_dentry = NULL;
++	mnt = qmblk->dq_root_mnt;
++	qmblk->dq_root_mnt = NULL;
++
++	if (qmblk->dq_sb)
++		vzquota_put_super(qmblk->dq_sb);
++	up(&vz_quota_sem);
++
++	qmblk_put(qmblk);
++	dput(dentry);
++	mntput(mnt);
++	return 0;
++
++out:
++	up(&vz_quota_sem);
++	return err;
++}
++
++/**
++ * vzquota_off - turn quota off
++ */
++
++static int __vzquota_sync_list(struct list_head *lh,
++		struct vz_quota_master *qmblk,
++		enum writeback_sync_modes sync_mode)
++{
++	struct writeback_control wbc;
++	LIST_HEAD(list);
++	struct vz_quota_ilink *qlnk;
++	struct inode *inode;
++	int err;
++
++	memset(&wbc, 0, sizeof(wbc));
++	wbc.sync_mode = sync_mode;
++
++	err = 0;
++	while (!list_empty(lh) && !err) {
++		if (need_resched()) {
++			inode_qmblk_unlock(qmblk->dq_sb);
++			schedule();
++			inode_qmblk_lock(qmblk->dq_sb);
++		}
++
++		qlnk = list_first_entry(lh, struct vz_quota_ilink, list);
++		list_move(&qlnk->list, &list);
++
++		inode = igrab(QLNK_INODE(qlnk));
++		if (!inode)
++			continue;
++
++		inode_qmblk_unlock(qmblk->dq_sb);
++
++		wbc.nr_to_write = LONG_MAX;
++		err = sync_inode(inode, &wbc);
++		iput(inode);
++
++		inode_qmblk_lock(qmblk->dq_sb);
++	}
++
++	list_splice(&list, lh);
++	return err;
++}
++
++static int vzquota_sync_list(struct list_head *lh,
++		struct vz_quota_master *qmblk)
++{
++	int err;
++
++	err = __vzquota_sync_list(lh, qmblk, WB_SYNC_NONE);
++	if (err)
++		return err;
++
++	err = __vzquota_sync_list(lh, qmblk, WB_SYNC_ALL);
++	if (err)
++		return err;
++
++	return 0;
++}
++
++static int vzquota_sync_inodes(struct vz_quota_master *qmblk)
++{
++	int err;
++	LIST_HEAD(qlnk_list);
++
++	list_splice_init(&qmblk->dq_ilink_list, &qlnk_list);
++	err = vzquota_sync_list(&qlnk_list, qmblk);
++	if (!err && !list_empty(&qmblk->dq_ilink_list))
++		err = -EBUSY;
++	list_splice(&qlnk_list, &qmblk->dq_ilink_list);
++
++	return err;
++}
++
++static int vzquota_off(unsigned int quota_id)
++{
++	int err;
++	struct vz_quota_master *qmblk;
++
++	down(&vz_quota_sem);
++
++	err = -ENOENT;
++	qmblk = vzquota_find_master(quota_id);
++	if (qmblk == NULL)
++		goto out;
++
++	err = -EALREADY;
++	if (qmblk->dq_state != VZDQ_WORKING)
++		goto out;
++
++	inode_qmblk_lock(qmblk->dq_sb); /* protects dq_ilink_list also */
++	err = vzquota_sync_inodes(qmblk);
++	if (err)
++		goto out_unlock;
++	inode_qmblk_unlock(qmblk->dq_sb);
++
++	err = vzquota_off_qmblk(qmblk->dq_sb, qmblk);
++	if (err)
++		goto out;
++
++	/* vzquota_destroy will free resources */
++	qmblk->dq_state = VZDQ_STOPING;
++out:
++	up(&vz_quota_sem);
++
++	return err;
++
++out_unlock:
++	inode_qmblk_unlock(qmblk->dq_sb);
++	goto out;
++}
++
++
++/* ----------------------------------------------------------------------
++ * Other VZQUOTA ioctl's.
++ * --------------------------------------------------------------------- */
++
++/*
++ * this function should:
++ * - set new limits/buffer under quota master block lock
++ * - if new softlimit less then usage, then set expiration time
++ * - no need to alloc ugid hash table - we'll do that on demand
++ */
++int vzquota_update_limit(struct dq_stat *_qstat,
++		struct dq_stat *qstat)
++{
++	int err;
++
++	err = -EINVAL;
++	if (vzquota_check_sane_limits(qstat))
++		goto out;
++
++	err = 0;
++
++	/* limits */
++	_qstat->bsoftlimit = qstat->bsoftlimit;
++	_qstat->bhardlimit = qstat->bhardlimit;
++	/*
++	 * If the soft limit is exceeded, administrator can override the moment
++	 * when the grace period for limit exceeding ends.
++	 * Specifying the moment may be useful if the soft limit is set to be
++	 * lower than the current usage.  In the latter case, if the grace
++	 * period end isn't specified, the grace period will start from the
++	 * moment of the first write operation.
++	 * There is a race with the user level.  Soft limit may be already
++	 * exceeded before the limit change, and grace period end calculated by
++	 * the kernel will be overriden.  User level may check if the limit is
++	 * already exceeded, but check and set calls are not atomic.
++	 * This race isn't dangerous.  Under normal cicrumstances, the
++	 * difference between the grace period end calculated by the kernel and
++	 * the user level should be not greater than as the difference between
++	 * the moments of check and set calls, i.e. not bigger than the quota
++	 * timer resolution - 1 sec.
++	 */
++	if (qstat->btime != (time_t)0 &&
++			_qstat->bcurrent >= _qstat->bsoftlimit)
++		_qstat->btime = qstat->btime;
++
++	_qstat->isoftlimit = qstat->isoftlimit;
++	_qstat->ihardlimit = qstat->ihardlimit;
++	if (qstat->itime != (time_t)0 &&
++			_qstat->icurrent >= _qstat->isoftlimit)
++		_qstat->itime = qstat->itime;
++
++out:
++	return err;
++}
++
++/*
++ * set new quota limits.
++ * this function should:
++ *  copy new limits from user level
++ *  - find quota block
++ *  - set new limits and flags.
++ */
++static int vzquota_setlimit(unsigned int quota_id,
++		struct vz_quota_stat *u_qstat)
++{
++	int err;
++	struct vz_quota_stat qstat;
++	struct vz_quota_master *qmblk;
++
++	down(&vz_quota_sem); /* for hash list protection */
++
++	err = -ENOENT;
++	qmblk = vzquota_find_master(quota_id);
++	if (qmblk == NULL)
++		goto out;
++
++	err = -EFAULT;
++	if (copy_from_user(&qstat, u_qstat, sizeof(qstat)))
++		goto out;
++
++	qmblk_data_write_lock(qmblk);
++	err = vzquota_update_limit(&qmblk->dq_stat, &qstat.dq_stat);
++	if (err == 0)
++		qmblk->dq_info = qstat.dq_info;
++	qmblk_data_write_unlock(qmblk);
++
++out:
++	up(&vz_quota_sem);
++	return err;
++}
++
++/*
++ * get quota limits.
++ * very simple - just return stat buffer to user
++ */
++static int vzquota_getstat(unsigned int quota_id,
++		struct vz_quota_stat *u_qstat)
++{
++	int err;
++	struct vz_quota_stat qstat;
++	struct vz_quota_master *qmblk;
++
++	down(&vz_quota_sem);
++
++	err = -ENOENT;
++	qmblk = vzquota_find_master(quota_id);
++	if (qmblk == NULL)
++		goto out;
++
++	qmblk_data_read_lock(qmblk);
++	/* copy whole buffer under lock */
++	memcpy(&qstat.dq_stat, &qmblk->dq_stat, sizeof(qstat.dq_stat));
++	memcpy(&qstat.dq_info, &qmblk->dq_info, sizeof(qstat.dq_info));
++	qmblk_data_read_unlock(qmblk);
++
++	err = copy_to_user(u_qstat, &qstat, sizeof(qstat));
++	if (err)
++		err = -EFAULT;
++
++out:
++	up(&vz_quota_sem);
++	return err;
++}
++
++/*
++ * This is a system call to turn per-VE disk quota on.
++ * Note this call is allowed to run ONLY from VE0
++ */
++long do_vzquotactl(int cmd, unsigned int quota_id,
++			  struct vz_quota_stat *qstat, const char *ve_root)
++{
++	int ret;
++
++	ret = -EPERM;
++	/* access allowed only from root of VE0 */
++	if (!capable(CAP_SYS_RESOURCE) ||
++	    !capable(CAP_SYS_ADMIN))
++		goto out;
++
++	switch (cmd) {
++		case VZ_DQ_CREATE:
++			ret = vzquota_create(quota_id, qstat);
++			break;
++		case VZ_DQ_DESTROY:
++			ret = vzquota_destroy(quota_id);
++			break;
++		case VZ_DQ_ON:
++			ret = vzquota_on(quota_id, ve_root);
++			break;
++		case VZ_DQ_OFF:
++			ret = vzquota_off(quota_id);
++			break;
++		case VZ_DQ_SETLIMIT:
++			ret = vzquota_setlimit(quota_id, qstat);
++			break;
++		case VZ_DQ_GETSTAT:
++			ret = vzquota_getstat(quota_id, qstat);
++			break;
++
++		default:
++			ret = -EINVAL;
++			goto out;
++	}
++
++out:
++	return ret;
++}
++
++
++/* ----------------------------------------------------------------------
++ * Proc filesystem routines
++ * ---------------------------------------------------------------------*/
++
++#if defined(CONFIG_PROC_FS)
++
++#define QUOTA_UINT_LEN		15
++#define QUOTA_TIME_LEN_FMT_UINT	"%11u"
++#define QUOTA_NUM_LEN_FMT_UINT	"%15u"
++#define QUOTA_NUM_LEN_FMT_ULL	"%15Lu"
++#define QUOTA_TIME_LEN_FMT_STR	"%11s"
++#define QUOTA_NUM_LEN_FMT_STR	"%15s"
++#define QUOTA_PROC_MAX_LINE_LEN 2048
++
++/*
++ * prints /proc/ve_dq header line
++ */
++static int print_proc_header(char * buffer)
++{
++	return sprintf(buffer,
++		       "%-11s"
++		       QUOTA_NUM_LEN_FMT_STR
++		       QUOTA_NUM_LEN_FMT_STR
++		       QUOTA_NUM_LEN_FMT_STR
++		       QUOTA_TIME_LEN_FMT_STR
++		       QUOTA_TIME_LEN_FMT_STR
++		       "\n",
++		       "qid: path", 
++		       "usage", "softlimit", "hardlimit", "time", "expire");
++}
++
++/*
++ * prints proc master record id, dentry path
++ */
++static int print_proc_master_id(char * buffer, char * path_buf,
++		struct vz_quota_master * qp)
++{
++	char *path;
++	int over;
++
++	path = NULL;
++	switch (qp->dq_state) {
++		case VZDQ_WORKING:
++			if (!path_buf) {
++				path = "";
++				break;
++			}
++			path = d_path(qp->dq_root_dentry,
++				      qp->dq_root_mnt, path_buf, PAGE_SIZE);
++			if (IS_ERR(path)) {
++				path = "";
++				break;
++			}
++			/* do not print large path, truncate it */
++			over = strlen(path) -
++				(QUOTA_PROC_MAX_LINE_LEN - 3 - 3 -
++				 	QUOTA_UINT_LEN);
++			if (over > 0) {
++				path += over - 3;
++				path[0] = path[1] = path[3] = '.';
++			}
++			break;
++		case VZDQ_STARTING:
++			path = "-- started --";
++			break;
++		case VZDQ_STOPING:
++			path = "-- stopped --";
++			break;
++	}
++
++	return sprintf(buffer, "%u: %s\n", qp->dq_id, path);
++}
++
++/*
++ * prints struct vz_quota_stat data
++ */
++static int print_proc_stat(char * buffer, struct dq_stat *qs,
++		struct dq_info *qi)
++{
++	return sprintf(buffer,
++		       "%11s"
++		       QUOTA_NUM_LEN_FMT_ULL
++		       QUOTA_NUM_LEN_FMT_ULL
++		       QUOTA_NUM_LEN_FMT_ULL
++		       QUOTA_TIME_LEN_FMT_UINT
++		       QUOTA_TIME_LEN_FMT_UINT
++		       "\n"
++		       "%11s"
++		       QUOTA_NUM_LEN_FMT_UINT
++		       QUOTA_NUM_LEN_FMT_UINT
++		       QUOTA_NUM_LEN_FMT_UINT
++		       QUOTA_TIME_LEN_FMT_UINT
++		       QUOTA_TIME_LEN_FMT_UINT
++		       "\n",
++		       "1k-blocks",
++		       qs->bcurrent >> 10,
++		       qs->bsoftlimit >> 10,
++		       qs->bhardlimit >> 10,
++		       (unsigned int)qs->btime,
++		       (unsigned int)qi->bexpire,
++		       "inodes",
++		       qs->icurrent,
++		       qs->isoftlimit,
++		       qs->ihardlimit,
++		       (unsigned int)qs->itime,
++		       (unsigned int)qi->iexpire);
++}
++
++
++/*
++ * for /proc filesystem output
++ */
++static int vzquota_read_proc(char *page, char **start, off_t off, int count,
++			   int *eof, void *data)
++{
++	int len, i;
++	off_t printed = 0;
++	char *p = page;
++	struct vz_quota_master *qp;
++	struct vz_quota_ilink *ql2;
++	struct list_head *listp;
++	char *path_buf;
++
++	path_buf = (char*)__get_free_page(GFP_KERNEL);
++	if (path_buf == NULL)
++		return -ENOMEM;
++
++	len = print_proc_header(p);
++	printed += len;
++	if (off < printed) /* keep header in output */ {
++		*start = p + off;
++		p += len;
++	}
++
++	down(&vz_quota_sem);
++
++	/* traverse master hash table for all records */
++	for (i = 0; i < vzquota_hash_size; i++) {
++		list_for_each(listp, &vzquota_hash_table[i]) {
++			qp = list_entry(listp,
++					struct vz_quota_master, dq_hash);
++
++			/* Skip other VE's information if not root of VE0 */
++			if ((!capable(CAP_SYS_ADMIN) ||
++			     !capable(CAP_SYS_RESOURCE))) {
++				ql2 = INODE_QLNK(current->fs->root->d_inode);
++				if (ql2 == NULL || qp != ql2->qmblk)
++					continue;
++			}
++			/*
++			 * Now print the next record
++			 */
++			len = 0;
++			/* we print quotaid and path only in VE0 */
++			if (capable(CAP_SYS_ADMIN))
++				len += print_proc_master_id(p+len,path_buf, qp);
++			len += print_proc_stat(p+len, &qp->dq_stat,
++					&qp->dq_info);
++			printed += len;
++			/* skip unnecessary lines */
++			if (printed <= off)
++				continue;
++			p += len;
++			/* provide start offset */
++			if (*start == NULL)
++				*start = p + (off - printed);
++			/* have we printed all requested size? */
++			if (PAGE_SIZE - (p - page) < QUOTA_PROC_MAX_LINE_LEN ||
++			    (p - *start) >= count)
++				goto out;
++		}
++	}
++
++	*eof = 1; /* checked all hash */
++out:
++	up(&vz_quota_sem);
++
++	len = 0;
++	if (*start != NULL) {
++		len = (p - *start);
++		if (len > count)
++			len = count;
++	}
++
++	if (path_buf)
++		free_page((unsigned long) path_buf);
++
++	return len;
++}
++
++/*
++ * Register procfs read callback
++ */
++int vzquota_proc_init(void)
++{
++	struct proc_dir_entry *de;
++
++	de = create_proc_entry("vz/vzquota", S_IFREG|S_IRUSR, NULL);
++	if (de == NULL) {
++		/* create "vz" subdirectory, if not exist */
++		de = create_proc_entry("vz", S_IFDIR|S_IRUGO|S_IXUGO, NULL);
++		if (de == NULL)
++			goto out_err;
++		de = create_proc_entry("vzquota", S_IFREG|S_IRUSR, de);
++		if (de == NULL)
++			goto out_err;
++	}
++	de->read_proc = vzquota_read_proc;
++	de->data = NULL;
++	return 0;
++out_err:
++	return -EBUSY;
++}
++
++void vzquota_proc_release(void)
++{
++	/* Unregister procfs read callback */
++	remove_proc_entry("vz/vzquota", NULL);
++}
++
++#endif
+diff -uprN linux-2.6.8.1.orig/fs/vzdq_ops.c linux-2.6.8.1-ve022stab078/fs/vzdq_ops.c
+--- linux-2.6.8.1.orig/fs/vzdq_ops.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/fs/vzdq_ops.c	2006-05-11 13:05:43.000000000 +0400
+@@ -0,0 +1,563 @@
++/*
++ * Copyright (C) 2001, 2002, 2004, 2005  SWsoft
++ * All rights reserved.
++ * 
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ */
++
++#include <linux/config.h>
++#include <linux/kernel.h>
++#include <linux/types.h>
++#include <asm/semaphore.h>
++#include <linux/sched.h>
++#include <linux/fs.h>
++#include <linux/quota.h>
++#include <linux/vzquota.h>
++
++
++/* ----------------------------------------------------------------------
++ * Quota superblock operations - helper functions.
++ * --------------------------------------------------------------------- */
++
++static inline void vzquota_incr_inodes(struct dq_stat *dqstat,
++		unsigned long number)
++{
++	dqstat->icurrent += number;
++}
++
++static inline void vzquota_incr_space(struct dq_stat *dqstat,
++		__u64 number)
++{
++	dqstat->bcurrent += number;
++}
++
++static inline void vzquota_decr_inodes(struct dq_stat *dqstat,
++		unsigned long number)
++{
++	if (dqstat->icurrent > number)
++		dqstat->icurrent -= number;
++	else
++		dqstat->icurrent = 0;
++	if (dqstat->icurrent < dqstat->isoftlimit)
++		dqstat->itime = (time_t) 0;
++}
++
++static inline void vzquota_decr_space(struct dq_stat *dqstat,
++		__u64 number)
++{
++	if (dqstat->bcurrent > number)
++		dqstat->bcurrent -= number;
++	else
++		dqstat->bcurrent = 0;
++	if (dqstat->bcurrent < dqstat->bsoftlimit)
++		dqstat->btime = (time_t) 0;
++}
++
++/*
++ * better printk() message or use /proc/vzquotamsg interface
++ * similar to /proc/kmsg
++ */
++static inline void vzquota_warn(struct dq_info *dq_info, int dq_id, int flag,
++		const char *fmt)
++{
++	if (dq_info->flags & flag) /* warning already printed for this
++				       masterblock */
++		return;
++	printk(fmt, dq_id);
++	dq_info->flags |= flag;
++}
++
++/*
++ * ignore_hardlimit -
++ *
++ * Intended to allow superuser of VE0 to overwrite hardlimits.
++ *
++ * ignore_hardlimit() has a very bad feature:
++ *
++ *	writepage() operation for writable mapping of a file with holes
++ *	may trigger get_block() with wrong current and as a consequence,
++ *	opens a possibility to overcommit hardlimits
++ */
++/* for the reason above, it is disabled now */
++static inline int ignore_hardlimit(struct dq_info *dqstat)
++{
++#if 0
++	return	ve_is_super(get_exec_env()) &&
++		capable(CAP_SYS_RESOURCE) &&
++		(dqstat->options & VZ_QUOTA_OPT_RSQUASH);
++#else
++	return 0;
++#endif
++}
++
++static int vzquota_check_inodes(struct dq_info *dq_info,
++		struct dq_stat *dqstat,
++		unsigned long number, int dq_id)
++{
++	if (number == 0)
++		return QUOTA_OK;
++
++	if (dqstat->icurrent + number > dqstat->ihardlimit &&
++	    !ignore_hardlimit(dq_info)) {
++		vzquota_warn(dq_info, dq_id, VZ_QUOTA_INODES,
++			   "VZ QUOTA: file hardlimit reached for id=%d\n");
++		return NO_QUOTA;
++	}
++
++	if (dqstat->icurrent + number > dqstat->isoftlimit) {
++		if (dqstat->itime == (time_t)0) {
++			vzquota_warn(dq_info, dq_id, 0,
++				"VZ QUOTA: file softlimit exceeded "
++				"for id=%d\n");
++			dqstat->itime = CURRENT_TIME_SECONDS + dq_info->iexpire;
++		} else if (CURRENT_TIME_SECONDS >= dqstat->itime &&
++			   !ignore_hardlimit(dq_info)) {
++			vzquota_warn(dq_info, dq_id, VZ_QUOTA_INODES,
++				"VZ QUOTA: file softlimit expired "
++				"for id=%d\n");
++			return NO_QUOTA;
++		}
++	}
++
++	return QUOTA_OK;
++}
++
++static int vzquota_check_space(struct dq_info *dq_info,
++		struct dq_stat *dqstat,
++		__u64 number, int dq_id, char prealloc)
++{
++	if (number == 0)
++		return QUOTA_OK;
++
++	if (dqstat->bcurrent + number > dqstat->bhardlimit &&
++	    !ignore_hardlimit(dq_info)) {
++		if (!prealloc)
++			vzquota_warn(dq_info, dq_id, VZ_QUOTA_SPACE,
++				"VZ QUOTA: disk hardlimit reached "
++				"for id=%d\n");
++		return NO_QUOTA;
++	}
++
++	if (dqstat->bcurrent + number > dqstat->bsoftlimit) {
++		if (dqstat->btime == (time_t)0) {
++			if (!prealloc) {
++				vzquota_warn(dq_info, dq_id, 0,
++					"VZ QUOTA: disk softlimit exceeded "
++					"for id=%d\n");
++				dqstat->btime = CURRENT_TIME_SECONDS
++							+ dq_info->bexpire;
++			} else {
++				/*
++				 * Original Linux quota doesn't allow
++				 * preallocation to exceed softlimit so
++				 * exceeding will be always printed
++				 */
++				return NO_QUOTA;
++			}
++		} else if (CURRENT_TIME_SECONDS >= dqstat->btime &&
++			   !ignore_hardlimit(dq_info)) {
++			if (!prealloc)
++				vzquota_warn(dq_info, dq_id, VZ_QUOTA_SPACE,
++					"VZ QUOTA: disk quota "
++					"softlimit expired "
++					"for id=%d\n");
++			return NO_QUOTA;
++		}
++	}
++
++	return QUOTA_OK;
++}
++
++static int vzquota_check_ugid_inodes(struct vz_quota_master *qmblk,
++		struct vz_quota_ugid *qugid[],
++		int type, unsigned long number)
++{
++	struct dq_info *dqinfo;
++	struct dq_stat *dqstat;
++
++	if (qugid[type] == NULL)
++		return QUOTA_OK;
++	if (qugid[type] == VZ_QUOTA_UGBAD)
++		return NO_QUOTA;
++
++	if (type == USRQUOTA && !(qmblk->dq_flags & VZDQ_USRQUOTA))
++		return QUOTA_OK;
++	if (type == GRPQUOTA && !(qmblk->dq_flags & VZDQ_GRPQUOTA))
++		return QUOTA_OK;
++	if (number == 0)
++		return QUOTA_OK;
++
++	dqinfo = &qmblk->dq_ugid_info[type];
++	dqstat = &qugid[type]->qugid_stat;
++
++	if (dqstat->ihardlimit != 0 &&
++	    dqstat->icurrent + number > dqstat->ihardlimit)
++		return NO_QUOTA;
++
++	if (dqstat->isoftlimit != 0 &&
++	    dqstat->icurrent + number > dqstat->isoftlimit) {
++		if (dqstat->itime == (time_t)0)
++			dqstat->itime = CURRENT_TIME_SECONDS + dqinfo->iexpire;
++		else if (CURRENT_TIME_SECONDS >= dqstat->itime)
++			return NO_QUOTA;
++	}
++
++	return QUOTA_OK;
++}
++
++static int vzquota_check_ugid_space(struct vz_quota_master *qmblk,
++		struct vz_quota_ugid *qugid[],
++		int type, __u64 number, char prealloc)
++{
++	struct dq_info *dqinfo;
++	struct dq_stat *dqstat;
++	
++	if (qugid[type] == NULL)
++		return QUOTA_OK;
++	if (qugid[type] == VZ_QUOTA_UGBAD)
++		return NO_QUOTA;
++
++	if (type == USRQUOTA && !(qmblk->dq_flags & VZDQ_USRQUOTA))
++		return QUOTA_OK;
++	if (type == GRPQUOTA && !(qmblk->dq_flags & VZDQ_GRPQUOTA))
++		return QUOTA_OK;
++	if (number == 0)
++		return QUOTA_OK;
++
++	dqinfo = &qmblk->dq_ugid_info[type];
++	dqstat = &qugid[type]->qugid_stat;
++
++	if (dqstat->bhardlimit != 0 &&
++	    dqstat->bcurrent + number > dqstat->bhardlimit)
++		return NO_QUOTA;
++
++	if (dqstat->bsoftlimit != 0 &&
++	    dqstat->bcurrent + number > dqstat->bsoftlimit) {
++		if (dqstat->btime == (time_t)0) {
++			if (!prealloc)
++				dqstat->btime = CURRENT_TIME_SECONDS
++							+ dqinfo->bexpire;
++			else
++				/*
++				 * Original Linux quota doesn't allow
++				 * preallocation to exceed softlimit so
++				 * exceeding will be always printed
++				 */
++				return NO_QUOTA;
++		} else if (CURRENT_TIME_SECONDS >= dqstat->btime)
++			return NO_QUOTA;
++	}
++
++	return QUOTA_OK;
++}
++
++/* ----------------------------------------------------------------------
++ * Quota superblock operations
++ * --------------------------------------------------------------------- */
++
++/*
++ * S_NOQUOTA note.
++ * In the current kernel (2.6.8.1), S_NOQUOTA flag is set only for
++ *  - quota file (absent in our case)
++ *  - after explicit DQUOT_DROP (earlier than clear_inode) in functions like
++ *    filesystem-specific new_inode, before the inode gets outside links.
++ * For the latter case, the only quota operation where care about S_NOQUOTA
++ * might be required is vzquota_drop, but there S_NOQUOTA has already been
++ * checked in DQUOT_DROP().
++ * So, S_NOQUOTA may be ignored for now in the VZDQ code.
++ *
++ * The above note is not entirely correct.
++ * Both for ext2 and ext3 filesystems, DQUOT_FREE_INODE is called from
++ * delete_inode if new_inode fails (for example, because of inode quota
++ * limits), so S_NOQUOTA check is needed in free_inode.
++ * This seems to be the dark corner of the current quota API.
++ */
++
++/*
++ * Initialize quota operations for the specified inode.
++ */
++static int vzquota_initialize(struct inode *inode, int type)
++{
++	vzquota_inode_init_call(inode);
++	return 0; /* ignored by caller */
++}
++
++/*
++ * Release quota for the specified inode.
++ */
++static int vzquota_drop(struct inode *inode)
++{
++	vzquota_inode_drop_call(inode);
++	return 0; /* ignored by caller */
++}
++
++/*
++ * Allocate block callback.
++ *
++ * If (prealloc) disk quota exceeding warning is not printed.
++ * See Linux quota to know why.
++ *
++ * Return:
++ *	QUOTA_OK == 0 on SUCCESS
++ *	NO_QUOTA == 1 if allocation should fail
++ */
++static int vzquota_alloc_space(struct inode *inode,
++			     qsize_t number, int prealloc)
++{
++	struct vz_quota_master *qmblk;
++	struct vz_quota_datast data;
++	int ret = QUOTA_OK;
++
++	qmblk = vzquota_inode_data(inode, &data);
++	if (qmblk == VZ_QUOTA_BAD)
++		return NO_QUOTA;
++	if (qmblk != NULL) {
++#ifdef CONFIG_VZ_QUOTA_UGID
++		int cnt;
++		struct vz_quota_ugid * qugid[MAXQUOTAS];
++#endif
++
++		/* checking first */
++		ret = vzquota_check_space(&qmblk->dq_info, &qmblk->dq_stat,
++				number, qmblk->dq_id, prealloc);
++		if (ret == NO_QUOTA)
++			goto no_quota;
++#ifdef CONFIG_VZ_QUOTA_UGID
++		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++			qugid[cnt] = INODE_QLNK(inode)->qugid[cnt];
++			ret = vzquota_check_ugid_space(qmblk, qugid,
++					cnt, number, prealloc);
++			if (ret == NO_QUOTA)
++				goto no_quota;
++		}
++		/* check ok, may increment */
++		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++			if (qugid[cnt] == NULL)
++				continue;
++			vzquota_incr_space(&qugid[cnt]->qugid_stat, number);
++		}
++#endif
++		vzquota_incr_space(&qmblk->dq_stat, number);
++		vzquota_data_unlock(inode, &data);
++	}
++
++	inode_add_bytes(inode, number);
++	might_sleep();
++	return QUOTA_OK;
++
++no_quota:
++	vzquota_data_unlock(inode, &data);
++	return NO_QUOTA;
++}
++
++/*
++ * Allocate inodes callback.
++ *
++ * Return:
++ *	QUOTA_OK == 0 on SUCCESS
++ *	NO_QUOTA == 1 if allocation should fail
++ */
++static int vzquota_alloc_inode(const struct inode *inode, unsigned long number)
++{
++	struct vz_quota_master *qmblk;
++	struct vz_quota_datast data;
++	int ret = QUOTA_OK;
++
++	qmblk = vzquota_inode_data((struct inode *)inode, &data);
++	if (qmblk == VZ_QUOTA_BAD)
++		return NO_QUOTA;
++	if (qmblk != NULL) {
++#ifdef CONFIG_VZ_QUOTA_UGID
++		int cnt;
++		struct vz_quota_ugid *qugid[MAXQUOTAS];
++#endif
++
++		/* checking first */
++		ret = vzquota_check_inodes(&qmblk->dq_info, &qmblk->dq_stat,
++				number, qmblk->dq_id);
++		if (ret == NO_QUOTA)
++			goto no_quota;
++#ifdef CONFIG_VZ_QUOTA_UGID
++		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++			qugid[cnt] = INODE_QLNK(inode)->qugid[cnt];
++			ret = vzquota_check_ugid_inodes(qmblk, qugid,
++					cnt, number);
++			if (ret == NO_QUOTA)
++				goto no_quota;
++		}
++		/* check ok, may increment */
++		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++			if (qugid[cnt] == NULL)
++				continue;
++			vzquota_incr_inodes(&qugid[cnt]->qugid_stat, number);
++		}
++#endif
++		vzquota_incr_inodes(&qmblk->dq_stat, number);
++		vzquota_data_unlock((struct inode *)inode, &data);
++	}
++
++	might_sleep();
++	return QUOTA_OK;
++
++no_quota:
++	vzquota_data_unlock((struct inode *)inode, &data);
++	return NO_QUOTA;
++}
++
++/*
++ * Free space callback.
++ */
++static int vzquota_free_space(struct inode *inode, qsize_t number)
++{
++	struct vz_quota_master *qmblk;
++	struct vz_quota_datast data;
++
++	qmblk = vzquota_inode_data(inode, &data);
++	if (qmblk == VZ_QUOTA_BAD)
++		return NO_QUOTA; /* isn't checked by the caller */
++	if (qmblk != NULL) {
++#ifdef CONFIG_VZ_QUOTA_UGID
++		int cnt;
++		struct vz_quota_ugid * qugid;
++#endif
++
++		vzquota_decr_space(&qmblk->dq_stat, number);
++#ifdef CONFIG_VZ_QUOTA_UGID
++		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++			qugid = INODE_QLNK(inode)->qugid[cnt];
++			if (qugid == NULL || qugid == VZ_QUOTA_UGBAD)
++				continue;
++			vzquota_decr_space(&qugid->qugid_stat, number);
++		}
++#endif
++		vzquota_data_unlock(inode, &data);
++	}
++	inode_sub_bytes(inode, number);
++	might_sleep();
++	return QUOTA_OK;
++}
++
++/*
++ * Free inodes callback.
++ */
++static int vzquota_free_inode(const struct inode *inode, unsigned long number)
++{
++	struct vz_quota_master *qmblk;
++	struct vz_quota_datast data;
++
++	if (IS_NOQUOTA(inode))
++		return QUOTA_OK;
++
++	qmblk = vzquota_inode_data((struct inode *)inode, &data);
++	if (qmblk == VZ_QUOTA_BAD)
++		return NO_QUOTA;
++	if (qmblk != NULL) {
++#ifdef CONFIG_VZ_QUOTA_UGID
++		int cnt;
++		struct vz_quota_ugid * qugid;
++#endif
++
++		vzquota_decr_inodes(&qmblk->dq_stat, number);
++#ifdef CONFIG_VZ_QUOTA_UGID
++		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
++			qugid = INODE_QLNK(inode)->qugid[cnt];
++			if (qugid == NULL || qugid == VZ_QUOTA_UGBAD)
++				continue;
++			vzquota_decr_inodes(&qugid->qugid_stat, number);
++		}
++#endif
++		vzquota_data_unlock((struct inode *)inode, &data);
++	}
++	might_sleep();
++	return QUOTA_OK;
++}
++
++#if defined(CONFIG_VZ_QUOTA_UGID)
++
++/*
++ * helper function for quota_transfer
++ * check that we can add inode to this quota_id
++ */
++static int vzquota_transfer_check(struct vz_quota_master *qmblk,
++		struct vz_quota_ugid *qugid[],
++		unsigned int type, __u64 size)
++{
++	if (vzquota_check_ugid_space(qmblk, qugid, type, size, 0) != QUOTA_OK ||
++	    vzquota_check_ugid_inodes(qmblk, qugid, type, 1) != QUOTA_OK)
++		return -1;
++	return 0;
++}
++
++int vzquota_transfer_usage(struct inode *inode,
++		int mask,
++		struct vz_quota_ilink *qlnk)
++{
++	struct vz_quota_ugid *qugid_old;
++	__u64 space;
++	int i;
++
++	space = inode_get_bytes(inode);
++	for (i = 0; i < MAXQUOTAS; i++) {
++		if (!(mask & (1 << i)))
++			continue;
++		if (vzquota_transfer_check(qlnk->qmblk, qlnk->qugid, i, space))
++			return -1;
++	}
++
++	for (i = 0; i < MAXQUOTAS; i++) {
++		if (!(mask & (1 << i)))
++			continue;
++		qugid_old = INODE_QLNK(inode)->qugid[i];
++		vzquota_decr_space(&qugid_old->qugid_stat, space);
++		vzquota_decr_inodes(&qugid_old->qugid_stat, 1);
++		vzquota_incr_space(&qlnk->qugid[i]->qugid_stat, space);
++		vzquota_incr_inodes(&qlnk->qugid[i]->qugid_stat, 1);
++	}
++	return 0;
++}
++
++/*
++ * Transfer the inode between diffent user/group quotas.
++ */
++static int vzquota_transfer(struct inode *inode, struct iattr *iattr)
++{
++	return vzquota_inode_transfer_call(inode, iattr) ?
++		NO_QUOTA : QUOTA_OK;
++}
++
++#else /* CONFIG_VZ_QUOTA_UGID */
++
++static int vzquota_transfer(struct inode *inode, struct iattr *iattr)
++{
++	return QUOTA_OK;
++}
++
++#endif
++
++/*
++ * Called under following semaphores:
++ *	old_d->d_inode->i_sb->s_vfs_rename_sem
++ *	old_d->d_inode->i_sem
++ *	new_d->d_inode->i_sem
++ * [not verified  --SAW]
++ */
++static int vzquota_rename(struct inode *inode,
++		struct inode *old_dir, struct inode *new_dir)
++{
++	return vzquota_rename_check(inode, old_dir, new_dir) ?
++		NO_QUOTA : QUOTA_OK;
++}
++
++/*
++ * Structure of superblock diskquota operations.
++ */
++struct dquot_operations vz_quota_operations = {
++	initialize:	vzquota_initialize,
++	drop:		vzquota_drop,
++	alloc_space:	vzquota_alloc_space,
++	alloc_inode:	vzquota_alloc_inode,
++	free_space:	vzquota_free_space,
++	free_inode:	vzquota_free_inode,
++	transfer:	vzquota_transfer,
++	rename:		vzquota_rename
++};
+diff -uprN linux-2.6.8.1.orig/fs/vzdq_tree.c linux-2.6.8.1-ve022stab078/fs/vzdq_tree.c
+--- linux-2.6.8.1.orig/fs/vzdq_tree.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/fs/vzdq_tree.c	2006-05-11 13:05:44.000000000 +0400
+@@ -0,0 +1,286 @@
++/*
++ *
++ * Copyright (C) 2005  SWsoft
++ * All rights reserved.
++ * 
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * This file contains Virtuozzo quota tree implementation
++ */
++
++#include <linux/errno.h>
++#include <linux/slab.h>
++#include <linux/vzdq_tree.h>
++
++struct quotatree_tree *quotatree_alloc(void)
++{
++	int l;
++	struct quotatree_tree *tree;
++
++	tree = kmalloc(sizeof(struct quotatree_tree), GFP_KERNEL);
++	if (tree == NULL)
++		goto out;
++
++	for (l = 0; l < QUOTATREE_DEPTH; l++) {
++		INIT_LIST_HEAD(&tree->levels[l].usedlh);
++		INIT_LIST_HEAD(&tree->levels[l].freelh);
++		tree->levels[l].freenum = 0;
++	}
++	tree->root = NULL;
++	tree->leaf_num = 0;
++out:
++	return tree;
++}
++
++static struct quotatree_node *
++quotatree_follow(struct quotatree_tree *tree, quotaid_t id, int level,
++		struct quotatree_find_state *st)
++{
++	void **block;
++	struct quotatree_node *parent;
++	int l, index;
++
++	parent = NULL;
++	block = (void **)&tree->root;
++	l = 0;
++	while (l < level && *block != NULL) {
++		index = (id >>  QUOTATREE_BSHIFT(l)) & QUOTATREE_BMASK;
++		parent = *block;
++		block = parent->blocks + index;
++		l++;
++	}
++	if (st != NULL) {
++		st->block = block;
++		st->level = l;
++	}
++
++	return parent;
++}
++
++void *quotatree_find(struct quotatree_tree *tree, quotaid_t id,
++		struct quotatree_find_state *st)
++{
++	quotatree_follow(tree, id, QUOTATREE_DEPTH, st);
++	if (st->level == QUOTATREE_DEPTH)
++		return *st->block;
++	else
++		return NULL;
++}
++
++void *quotatree_leaf_byindex(struct quotatree_tree *tree, unsigned int index)
++{
++	int i, count;
++	struct quotatree_node *p;
++	void *leaf;
++
++	if (QTREE_LEAFNUM(tree) <= index)
++		return NULL;
++
++	count = 0;
++	list_for_each_entry(p, &QTREE_LEAFLVL(tree)->usedlh, list) {
++		for (i = 0; i < QUOTATREE_BSIZE; i++) {	
++			leaf = p->blocks[i];
++			if (leaf == NULL)
++				continue;
++			if (count == index)
++				return leaf;
++			count++;
++		}
++	}
++	return NULL;
++}
++
++/* returns data leaf (vz_quota_ugid) after _existent_ ugid (@id)
++ * in the tree... */
++void *quotatree_get_next(struct quotatree_tree *tree, quotaid_t id)
++{
++	int off;
++	struct quotatree_node *parent, *p;
++	struct list_head *lh;
++
++	/* get parent refering correct quota tree node of the last level */
++	parent = quotatree_follow(tree, id, QUOTATREE_DEPTH, NULL);
++	if (!parent)
++		return NULL;
++
++	off = (id & QUOTATREE_BMASK) + 1;	/* next ugid */
++	lh = &parent->list;
++	do {
++		p = list_entry(lh, struct quotatree_node, list);
++		for ( ; off < QUOTATREE_BSIZE; off++)
++			if (p->blocks[off])
++				return p->blocks[off];
++		off = 0;
++		lh = lh->next;
++	} while (lh != &QTREE_LEAFLVL(tree)->usedlh);
++
++	return NULL;
++}
++
++int quotatree_insert(struct quotatree_tree *tree, quotaid_t id,
++		struct quotatree_find_state *st, void *data)
++{
++	struct quotatree_node *p;
++	int l, index;
++
++	while (st->level < QUOTATREE_DEPTH) {
++		l = st->level;
++		if (!list_empty(&tree->levels[l].freelh)) {
++			p = list_entry(tree->levels[l].freelh.next,
++					struct quotatree_node, list);
++			list_del(&p->list);
++		} else {
++			p = kmalloc(sizeof(struct quotatree_node), GFP_NOFS | __GFP_NOFAIL);
++			if (p == NULL)
++				return -ENOMEM;
++			/* save block number in the l-level
++			 * it uses for quota file generation */
++			p->num = tree->levels[l].freenum++;
++		}
++		list_add(&p->list, &tree->levels[l].usedlh);
++		memset(p->blocks, 0, sizeof(p->blocks));
++		*st->block = p;
++
++		index = (id >> QUOTATREE_BSHIFT(l)) & QUOTATREE_BMASK;
++		st->block = p->blocks + index;
++		st->level++;
++	}
++	tree->leaf_num++;
++	*st->block = data;
++
++	return 0;
++}
++
++static struct quotatree_node *
++quotatree_remove_ptr(struct quotatree_tree *tree, quotaid_t id,
++		int level)
++{
++	struct quotatree_node *parent;
++	struct quotatree_find_state st;
++
++	parent = quotatree_follow(tree, id, level, &st);
++	if (st.level == QUOTATREE_DEPTH)
++		tree->leaf_num--;
++	*st.block = NULL;
++	return parent;
++}
++
++void quotatree_remove(struct quotatree_tree *tree, quotaid_t id)
++{
++	struct quotatree_node *p;
++	int level, i;
++
++	p = quotatree_remove_ptr(tree, id, QUOTATREE_DEPTH);
++	for (level = QUOTATREE_DEPTH - 1; level >= QUOTATREE_CDEPTH; level--) {
++		for (i = 0; i < QUOTATREE_BSIZE; i++)
++			if (p->blocks[i] != NULL)
++				return;
++		list_move(&p->list, &tree->levels[level].freelh);
++		p = quotatree_remove_ptr(tree, id, level);
++	}
++}
++
++#if 0
++static void quotatree_walk(struct quotatree_tree *tree,
++		struct quotatree_node *node_start,
++		quotaid_t id_start,
++		int level_start, int level_end,
++		int (*callback)(struct quotatree_tree *,
++				quotaid_t id,
++				int level,
++				void *ptr,
++				void *data),
++		void *data)
++{
++	struct quotatree_node *p;
++	int l, shift, index;
++	quotaid_t id;
++	struct quotatree_find_state st;
++
++	p = node_start;
++	l = level_start;
++	shift = (QUOTATREE_DEPTH - l) * QUOTAID_BBITS;
++	id = id_start;
++	index = 0;
++
++	/*
++	 * Invariants:
++	 * shift == (QUOTATREE_DEPTH - l) * QUOTAID_BBITS;
++	 * id & ((1 << shift) - 1) == 0
++	 * p is l-level node corresponding to id
++	 */
++	do {
++		if (!p)
++			break;
++
++		if (l < level_end) {
++			for (; index < QUOTATREE_BSIZE; index++)
++				if (p->blocks[index] != NULL)
++					break;
++			if (index < QUOTATREE_BSIZE) {
++				/* descend */
++				p = p->blocks[index];
++				l++;
++				shift -= QUOTAID_BBITS;
++				id += (quotaid_t)index << shift;
++				index = 0;
++				continue;
++			}
++		}
++
++		if ((*callback)(tree, id, l, p, data))
++			break;
++
++		/* ascend and to the next node */
++		p = quotatree_follow(tree, id, l, &st);
++
++		index = ((id >> shift) & QUOTATREE_BMASK) + 1;
++		l--;
++		shift += QUOTAID_BBITS;
++		id &= ~(((quotaid_t)1 << shift) - 1);
++	} while (l >= level_start);
++}
++#endif
++
++static void free_list(struct list_head *node_list)
++{
++	struct quotatree_node *p, *tmp;
++
++	list_for_each_entry_safe(p, tmp, node_list, list) {
++		list_del(&p->list);
++		kfree(p);
++	}
++}
++
++static inline void quotatree_free_nodes(struct quotatree_tree *tree)
++{
++	int i;
++
++	for (i = 0; i < QUOTATREE_DEPTH; i++) {
++		free_list(&tree->levels[i].usedlh);
++		free_list(&tree->levels[i].freelh);
++	}
++}
++
++static void quotatree_free_leafs(struct quotatree_tree *tree,
++		void (*dtor)(void *))
++{
++	int i;
++	struct quotatree_node *p;
++
++	list_for_each_entry(p, &QTREE_LEAFLVL(tree)->usedlh, list) {
++		for (i = 0; i < QUOTATREE_BSIZE; i++) {
++			if (p->blocks[i] == NULL)
++				continue;
++
++			dtor(p->blocks[i]);
++		}
++	}
++}
++
++void quotatree_free(struct quotatree_tree *tree, void (*dtor)(void *))
++{
++	quotatree_free_leafs(tree, dtor);
++	quotatree_free_nodes(tree);
++	kfree(tree);
++}
+diff -uprN linux-2.6.8.1.orig/fs/vzdq_ugid.c linux-2.6.8.1-ve022stab078/fs/vzdq_ugid.c
+--- linux-2.6.8.1.orig/fs/vzdq_ugid.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/fs/vzdq_ugid.c	2006-05-11 13:05:44.000000000 +0400
+@@ -0,0 +1,1130 @@
++/*
++ * Copyright (C) 2002 SWsoft
++ * All rights reserved.
++ * 
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * This file contains Virtuozzo UID/GID disk quota implementation
++ */
++
++#include <linux/config.h>
++#include <linux/string.h>
++#include <linux/slab.h>
++#include <linux/list.h>
++#include <linux/smp_lock.h>
++#include <linux/rcupdate.h>
++#include <asm/uaccess.h>
++#include <linux/proc_fs.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/quota.h>
++#include <linux/quotaio_v2.h>
++#include <linux/virtinfo.h>
++
++#include <linux/vzctl.h>
++#include <linux/vzctl_quota.h>
++#include <linux/vzquota.h>
++
++/*
++ * XXX
++ * may be something is needed for sb->s_dquot->info[]?
++ */
++
++#define USRQUOTA_MASK		(1 << USRQUOTA)
++#define GRPQUOTA_MASK		(1 << GRPQUOTA)
++#define QTYPE2MASK(type)	(1 << (type))
++
++static kmem_cache_t *vz_quota_ugid_cachep;
++
++/* guard to protect vz_quota_master from destroy in quota_on/off. Also protects
++ * list on the hash table */
++extern struct semaphore vz_quota_sem;
++
++inline struct vz_quota_ugid *vzquota_get_ugid(struct vz_quota_ugid *qugid)
++{
++	if (qugid != VZ_QUOTA_UGBAD)
++		atomic_inc(&qugid->qugid_count);
++	return qugid;
++}
++
++/* we don't limit users with zero limits */
++static inline int vzquota_fake_stat(struct dq_stat *stat)
++{
++	return stat->bhardlimit == 0 && stat->bsoftlimit == 0 &&
++		stat->ihardlimit == 0 && stat->isoftlimit == 0;
++}
++
++/* callback function for quotatree_free() */
++static inline void vzquota_free_qugid(void *ptr)
++{
++	kmem_cache_free(vz_quota_ugid_cachep, ptr);
++}
++
++/*
++ * destroy ugid, if it have zero refcount, limits and usage
++ * must be called under qmblk->dq_sem
++ */
++void vzquota_put_ugid(struct vz_quota_master *qmblk,
++		struct vz_quota_ugid *qugid)
++{
++	if (qugid == VZ_QUOTA_UGBAD)
++		return;
++	qmblk_data_read_lock(qmblk);
++	if (atomic_dec_and_test(&qugid->qugid_count) &&
++	    (qmblk->dq_flags & VZDQUG_FIXED_SET) == 0 &&
++	    vzquota_fake_stat(&qugid->qugid_stat) &&
++	    qugid->qugid_stat.bcurrent == 0 &&
++	    qugid->qugid_stat.icurrent == 0) {
++		quotatree_remove(QUGID_TREE(qmblk, qugid->qugid_type),
++				qugid->qugid_id);
++		qmblk->dq_ugid_count--;
++		vzquota_free_qugid(qugid);
++	}
++	qmblk_data_read_unlock(qmblk);
++}
++
++/*
++ * Get ugid block by its index, like it would present in array.
++ * In reality, this is not array - this is leafs chain of the tree.
++ * NULL if index is out of range.
++ * qmblk semaphore is required to protect the tree.
++ */
++static inline struct vz_quota_ugid *
++vzquota_get_byindex(struct vz_quota_master *qmblk, unsigned int index, int type)
++{
++	return quotatree_leaf_byindex(QUGID_TREE(qmblk, type), index);
++}
++
++/*
++ * get next element from ugid "virtual array"
++ * ugid must be in current array and this array may not be changed between
++ * two accesses (quaranteed by "stopped" quota state and quota semaphore)
++ * qmblk semaphore is required to protect the tree
++ */
++static inline struct vz_quota_ugid *
++vzquota_get_next(struct vz_quota_master *qmblk, struct vz_quota_ugid *qugid)
++{
++	return quotatree_get_next(QUGID_TREE(qmblk, qugid->qugid_type),
++			qugid->qugid_id);
++}
++
++/*
++ * requires dq_sem
++ */
++struct vz_quota_ugid *__vzquota_find_ugid(struct vz_quota_master *qmblk,
++			unsigned int quota_id, int type, int flags)
++{
++	struct vz_quota_ugid *qugid;
++	struct quotatree_tree *tree;
++	struct quotatree_find_state st;
++
++	tree = QUGID_TREE(qmblk, type);
++	qugid = quotatree_find(tree, quota_id, &st);
++	if (qugid)
++		goto success;
++
++	/* caller does not want alloc */
++	if (flags & VZDQUG_FIND_DONT_ALLOC)
++		goto fail;
++
++	if (flags & VZDQUG_FIND_FAKE)
++		goto doit;
++
++	/* check limit */
++	if (qmblk->dq_ugid_count >= qmblk->dq_ugid_max)
++		goto fail;
++
++	/* see comment at VZDQUG_FIXED_SET define */
++	if (qmblk->dq_flags & VZDQUG_FIXED_SET)
++		goto fail;
++
++doit:
++	/* alloc new structure */
++	qugid = kmem_cache_alloc(vz_quota_ugid_cachep,
++			SLAB_NOFS | __GFP_NOFAIL);
++	if (qugid == NULL)
++		goto fail;
++
++	/* initialize new structure */
++	qugid->qugid_id = quota_id;
++	memset(&qugid->qugid_stat, 0, sizeof(qugid->qugid_stat));
++	qugid->qugid_type = type;
++	atomic_set(&qugid->qugid_count, 0);
++
++	/* insert in tree */
++	if (quotatree_insert(tree, quota_id, &st, qugid) < 0)
++		goto fail_insert;
++	qmblk->dq_ugid_count++;
++
++success:
++	vzquota_get_ugid(qugid);
++	return qugid;
++
++fail_insert:
++	vzquota_free_qugid(qugid);
++fail:
++	return VZ_QUOTA_UGBAD;
++}
++
++/*
++ * takes dq_sem, may schedule
++ */
++struct vz_quota_ugid *vzquota_find_ugid(struct vz_quota_master *qmblk,
++			unsigned int quota_id, int type, int flags)
++{
++	struct vz_quota_ugid *qugid;
++
++	down(&qmblk->dq_sem);
++	qugid = __vzquota_find_ugid(qmblk, quota_id, type, flags);
++	up(&qmblk->dq_sem);
++
++	return qugid;
++}
++
++/*
++ * destroy all ugid records on given quota master
++ */
++void vzquota_kill_ugid(struct vz_quota_master *qmblk)
++{
++	BUG_ON((qmblk->dq_gid_tree == NULL && qmblk->dq_uid_tree != NULL) ||
++		(qmblk->dq_uid_tree == NULL && qmblk->dq_gid_tree != NULL));
++
++	if (qmblk->dq_uid_tree != NULL) {
++		quotatree_free(qmblk->dq_uid_tree, vzquota_free_qugid);
++		quotatree_free(qmblk->dq_gid_tree, vzquota_free_qugid);
++	}
++}
++
++
++/* ----------------------------------------------------------------------
++ * Management interface to ugid quota for (super)users.
++ * --------------------------------------------------------------------- */
++
++/**
++ * vzquota_find_qmblk - helper to emulate quota on virtual filesystems
++ *
++ * This function finds a quota master block corresponding to the root of
++ * a virtual filesystem.
++ * Returns a quota master block with reference taken, or %NULL if not under
++ * quota, or %VZ_QUOTA_BAD if quota inconsistency is found (and all allocation
++ * operations will fail).
++ *
++ * Note: this function uses vzquota_inode_qmblk().
++ * The latter is a rather confusing function: it returns qmblk that used to be
++ * on the inode some time ago (without guarantee that it still has any
++ * relations to the inode).  So, vzquota_find_qmblk() leaves it up to the
++ * caller to think whether the inode could have changed its qmblk and what to
++ * do in that case.
++ * Currently, the callers appear to not care :(
++ */
++struct vz_quota_master *vzquota_find_qmblk(struct super_block *sb)
++{
++	struct inode *qrinode;
++	struct vz_quota_master *qmblk;
++
++	qmblk = NULL;
++	qrinode = NULL;
++	if (sb->s_op->get_quota_root != NULL)
++		qrinode = sb->s_op->get_quota_root(sb);
++	if (qrinode != NULL)
++		qmblk = vzquota_inode_qmblk(qrinode);
++	return qmblk;
++}
++
++static int vzquota_initialize2(struct inode *inode, int type)
++{
++	return QUOTA_OK;
++}
++
++static int vzquota_drop2(struct inode *inode)
++{
++	return QUOTA_OK;
++}
++
++static int vzquota_alloc_space2(struct inode *inode,
++			     qsize_t number, int prealloc)
++{
++	inode_add_bytes(inode, number);
++	return QUOTA_OK;
++}
++
++static int vzquota_alloc_inode2(const struct inode *inode, unsigned long number)
++{
++	return QUOTA_OK;
++}
++
++static int vzquota_free_space2(struct inode *inode, qsize_t number)
++{
++	inode_sub_bytes(inode, number);
++	return QUOTA_OK;
++}
++
++static int vzquota_free_inode2(const struct inode *inode, unsigned long number)
++{
++	return QUOTA_OK;
++}
++
++static int vzquota_transfer2(struct inode *inode, struct iattr *iattr)
++{
++	return QUOTA_OK;
++}
++
++struct dquot_operations vz_quota_operations2 = {
++	initialize:	vzquota_initialize2,
++	drop:		vzquota_drop2,
++	alloc_space:	vzquota_alloc_space2,
++	alloc_inode:	vzquota_alloc_inode2,
++	free_space:	vzquota_free_space2,
++	free_inode:	vzquota_free_inode2,
++	transfer:	vzquota_transfer2
++};
++
++static int vz_quota_on(struct super_block *sb, int type,
++		int format_id, char *path)
++{
++	struct vz_quota_master *qmblk;
++	int mask, mask2;
++	int err;
++
++	qmblk = vzquota_find_qmblk(sb);
++	down(&vz_quota_sem);
++	err = -ESRCH;
++	if (qmblk == NULL)
++		goto out;
++	err = -EIO;
++	if (qmblk == VZ_QUOTA_BAD)
++		goto out;
++
++	mask = 0;
++	mask2 = 0;
++	sb->dq_op = &vz_quota_operations2;
++	sb->s_qcop = &vz_quotactl_operations;
++	if (type == USRQUOTA) {
++		mask = DQUOT_USR_ENABLED;
++		mask2 = VZDQ_USRQUOTA;
++	}
++	if (type == GRPQUOTA) {
++		mask = DQUOT_GRP_ENABLED;
++		mask2 = VZDQ_GRPQUOTA;
++	}
++	err = -EBUSY;
++	if (qmblk->dq_flags & mask2)
++		goto out;
++
++	err = 0;
++	qmblk->dq_flags |= mask2;
++	sb->s_dquot.flags |= mask;
++
++out:
++	up(&vz_quota_sem);
++	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++		qmblk_put(qmblk);
++	return err;
++}
++
++static int vz_quota_off(struct super_block *sb, int type)
++{
++	struct vz_quota_master *qmblk;
++	int mask2;
++	int err;
++
++	qmblk = vzquota_find_qmblk(sb);
++	down(&vz_quota_sem);
++	err = -ESRCH;
++	if (qmblk == NULL)
++		goto out;
++	err = -EIO;
++	if (qmblk == VZ_QUOTA_BAD)
++		goto out;
++
++	mask2 = 0;
++	if (type == USRQUOTA)
++		mask2 = VZDQ_USRQUOTA;
++	if (type == GRPQUOTA)
++		mask2 = VZDQ_GRPQUOTA;
++	err = -EINVAL;
++	if (!(qmblk->dq_flags & mask2))
++		goto out;
++
++	qmblk->dq_flags &= ~mask2;
++	err = 0;
++
++out:
++	up(&vz_quota_sem);
++	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++		qmblk_put(qmblk);
++	return err;
++}
++
++static int vz_quota_sync(struct super_block *sb, int type)
++{
++	return 0;	/* vz quota is always uptodate */
++}
++
++static int vz_get_dqblk(struct super_block *sb, int type,
++		qid_t id, struct if_dqblk *di)
++{
++	struct vz_quota_master *qmblk;
++	struct vz_quota_ugid *ugid;
++	int err;
++
++	qmblk = vzquota_find_qmblk(sb);
++	down(&vz_quota_sem);
++	err = -ESRCH;
++	if (qmblk == NULL)
++		goto out;
++	err = -EIO;
++	if (qmblk == VZ_QUOTA_BAD)
++		goto out;
++
++	err = 0;
++	ugid = vzquota_find_ugid(qmblk, id, type, VZDQUG_FIND_DONT_ALLOC);
++	if (ugid != VZ_QUOTA_UGBAD) {
++		qmblk_data_read_lock(qmblk);
++		di->dqb_bhardlimit = ugid->qugid_stat.bhardlimit >> 10;
++		di->dqb_bsoftlimit = ugid->qugid_stat.bsoftlimit >> 10;
++		di->dqb_curspace = ugid->qugid_stat.bcurrent;
++		di->dqb_ihardlimit = ugid->qugid_stat.ihardlimit;
++		di->dqb_isoftlimit = ugid->qugid_stat.isoftlimit;
++		di->dqb_curinodes = ugid->qugid_stat.icurrent;
++		di->dqb_btime = ugid->qugid_stat.btime;
++		di->dqb_itime = ugid->qugid_stat.itime;
++		qmblk_data_read_unlock(qmblk);
++		di->dqb_valid = QIF_ALL;
++		vzquota_put_ugid(qmblk, ugid);
++	} else {
++		memset(di, 0, sizeof(*di));
++		di->dqb_valid = QIF_ALL;
++	}
++
++out:
++	up(&vz_quota_sem);
++	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++		qmblk_put(qmblk);
++	return err;
++}
++
++/* must be called under vz_quota_sem */
++static int __vz_set_dqblk(struct vz_quota_master *qmblk,
++		int type, qid_t id, struct if_dqblk *di)
++{
++	struct vz_quota_ugid *ugid;
++
++	ugid = vzquota_find_ugid(qmblk, id, type, 0);
++	if (ugid == VZ_QUOTA_UGBAD)
++		return -ESRCH;
++
++	qmblk_data_write_lock(qmblk);
++	/*
++	 * Subtle compatibility breakage.
++	 *
++	 * Some old non-vz kernel quota didn't start grace period
++	 * if the new soft limit happens to be below the usage.
++	 * Non-vz kernel quota in 2.4.20 starts the grace period
++	 * (if it hasn't been started).
++	 * Current non-vz kernel performs even more complicated
++	 * manipulations...
++	 *
++	 * Also, current non-vz kernels have inconsistency related to 
++	 * the grace time start.  In regular operations the grace period
++	 * is started if the usage is greater than the soft limit (and,
++	 * strangely, is cancelled if the usage is less).
++	 * However, set_dqblk starts the grace period if the usage is greater
++	 * or equal to the soft limit.
++	 *
++	 * Here we try to mimic the behavior of the current non-vz kernel.
++	 */
++	if (di->dqb_valid & QIF_BLIMITS) {
++		ugid->qugid_stat.bhardlimit =
++			(__u64)di->dqb_bhardlimit << 10;
++		ugid->qugid_stat.bsoftlimit =
++			(__u64)di->dqb_bsoftlimit << 10;
++		if (di->dqb_bsoftlimit == 0 ||
++		    ugid->qugid_stat.bcurrent < ugid->qugid_stat.bsoftlimit)
++			ugid->qugid_stat.btime = 0;
++		else if (!(di->dqb_valid & QIF_BTIME))
++			ugid->qugid_stat.btime = CURRENT_TIME_SECONDS
++				+ qmblk->dq_ugid_info[type].bexpire;
++		else
++			ugid->qugid_stat.btime = di->dqb_btime;
++	}
++	if (di->dqb_valid & QIF_ILIMITS) {
++		ugid->qugid_stat.ihardlimit = di->dqb_ihardlimit;
++		ugid->qugid_stat.isoftlimit = di->dqb_isoftlimit;
++		if (di->dqb_isoftlimit == 0 ||
++		    ugid->qugid_stat.icurrent < ugid->qugid_stat.isoftlimit)
++			ugid->qugid_stat.itime = 0;
++		else if (!(di->dqb_valid & QIF_ITIME))
++			ugid->qugid_stat.itime = CURRENT_TIME_SECONDS
++				+ qmblk->dq_ugid_info[type].iexpire;
++		else
++			ugid->qugid_stat.itime = di->dqb_itime;
++	}
++	qmblk_data_write_unlock(qmblk);
++	vzquota_put_ugid(qmblk, ugid);
++
++	return 0;
++}
++
++static int vz_set_dqblk(struct super_block *sb, int type,
++		qid_t id, struct if_dqblk *di)
++{
++	struct vz_quota_master *qmblk;
++	int err;
++
++	qmblk = vzquota_find_qmblk(sb);
++	down(&vz_quota_sem);
++	err = -ESRCH;
++	if (qmblk == NULL)
++		goto out;
++	err = -EIO;
++	if (qmblk == VZ_QUOTA_BAD)
++		goto out;
++	err = __vz_set_dqblk(qmblk, type, id, di);
++out:
++	up(&vz_quota_sem);
++	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++		qmblk_put(qmblk);
++	return err;
++}
++
++static int vz_get_dqinfo(struct super_block *sb, int type,
++		struct if_dqinfo *ii)
++{
++	struct vz_quota_master *qmblk;
++	int err;
++
++	qmblk = vzquota_find_qmblk(sb);
++	down(&vz_quota_sem);
++	err = -ESRCH;
++	if (qmblk == NULL)
++		goto out;
++	err = -EIO;
++	if (qmblk == VZ_QUOTA_BAD)
++		goto out;
++
++	err = 0;
++	ii->dqi_bgrace = qmblk->dq_ugid_info[type].bexpire;
++	ii->dqi_igrace = qmblk->dq_ugid_info[type].iexpire;
++	ii->dqi_flags = 0;
++	ii->dqi_valid = IIF_ALL;
++
++out:
++	up(&vz_quota_sem);
++	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++		qmblk_put(qmblk);
++	return err;
++}
++
++/* must be called under vz_quota_sem */
++static int __vz_set_dqinfo(struct vz_quota_master *qmblk,
++		int type, struct if_dqinfo *ii)
++{
++	if (ii->dqi_valid & IIF_FLAGS)
++		if (ii->dqi_flags & DQF_MASK)
++			return -EINVAL;
++
++	if (ii->dqi_valid & IIF_BGRACE)
++		qmblk->dq_ugid_info[type].bexpire = ii->dqi_bgrace;
++	if (ii->dqi_valid & IIF_IGRACE)
++		qmblk->dq_ugid_info[type].iexpire = ii->dqi_igrace;
++	return 0;
++}
++
++static int vz_set_dqinfo(struct super_block *sb, int type,
++		struct if_dqinfo *ii)
++{
++	struct vz_quota_master *qmblk;
++	int err;
++
++	qmblk = vzquota_find_qmblk(sb);
++	down(&vz_quota_sem);
++	err = -ESRCH;
++	if (qmblk == NULL)
++		goto out;
++	err = -EIO;
++	if (qmblk == VZ_QUOTA_BAD)
++		goto out;
++	err = __vz_set_dqinfo(qmblk, type, ii);
++out:
++	up(&vz_quota_sem);
++	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++		qmblk_put(qmblk);
++	return err;
++}
++
++#ifdef CONFIG_QUOTA_COMPAT
++
++#define Q_GETQUOTI_SIZE 1024
++
++#define UGID2DQBLK(dst, src)						\
++	do {								\
++		(dst)->dqb_ihardlimit = (src)->qugid_stat.ihardlimit;	\
++		(dst)->dqb_isoftlimit = (src)->qugid_stat.isoftlimit;	\
++		(dst)->dqb_curinodes = (src)->qugid_stat.icurrent;	\
++		/* in 1K blocks */					\
++		(dst)->dqb_bhardlimit = (src)->qugid_stat.bhardlimit >> 10; \
++		/* in 1K blocks */					\
++		(dst)->dqb_bsoftlimit = (src)->qugid_stat.bsoftlimit >> 10; \
++		/* in bytes, 64 bit */					\
++		(dst)->dqb_curspace = (src)->qugid_stat.bcurrent;	\
++		(dst)->dqb_btime = (src)->qugid_stat.btime;		\
++		(dst)->dqb_itime = (src)->qugid_stat.itime;		\
++	} while (0)
++
++static int vz_get_quoti(struct super_block *sb, int type, qid_t idx,
++		struct v2_disk_dqblk *dqblk)
++{
++	struct vz_quota_master *qmblk;
++	struct v2_disk_dqblk *data, *kbuf;
++	struct vz_quota_ugid *ugid;
++	int count;
++	int err;
++
++	qmblk = vzquota_find_qmblk(sb);
++	err = -ESRCH;
++	if (qmblk == NULL)
++		goto out;
++	err = -EIO;
++	if (qmblk == VZ_QUOTA_BAD)
++		goto out;
++
++	err = -ENOMEM;
++	kbuf = vmalloc(Q_GETQUOTI_SIZE * sizeof(*kbuf));
++	if (!kbuf)
++		goto out;
++
++	down(&vz_quota_sem);
++	down(&qmblk->dq_sem);
++	for (ugid = vzquota_get_byindex(qmblk, idx, type), count = 0;
++		ugid != NULL && count < Q_GETQUOTI_SIZE;
++		count++)
++	{
++		data = kbuf + count;
++		qmblk_data_read_lock(qmblk);
++		UGID2DQBLK(data, ugid);
++		qmblk_data_read_unlock(qmblk);
++		data->dqb_id = ugid->qugid_id;
++
++		/* Find next entry */
++		ugid = vzquota_get_next(qmblk, ugid);
++		BUG_ON(ugid != NULL && ugid->qugid_type != type);
++	}
++	up(&qmblk->dq_sem);
++	up(&vz_quota_sem);
++
++	err = count;
++	if (copy_to_user(dqblk, kbuf, count * sizeof(*kbuf)))
++		err = -EFAULT;
++
++	vfree(kbuf);
++out:
++	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++		qmblk_put(qmblk);
++
++	return err;
++}
++
++#endif
++
++struct quotactl_ops vz_quotactl_operations = {
++	quota_on:	vz_quota_on,
++	quota_off:	vz_quota_off,
++	quota_sync:	vz_quota_sync,
++	get_info:	vz_get_dqinfo,
++	set_info:	vz_set_dqinfo,
++	get_dqblk:	vz_get_dqblk,
++	set_dqblk:	vz_set_dqblk,
++#ifdef CONFIG_QUOTA_COMPAT
++	get_quoti:	vz_get_quoti
++#endif
++};
++
++
++/* ----------------------------------------------------------------------
++ * Management interface for host system admins.
++ * --------------------------------------------------------------------- */
++
++static int quota_ugid_addstat(unsigned int quota_id, unsigned int ugid_size,
++		struct vz_quota_iface *u_ugid_buf)
++{
++	struct vz_quota_master *qmblk;
++	int ret;
++
++	down(&vz_quota_sem);
++
++	ret = -ENOENT;
++	qmblk = vzquota_find_master(quota_id);
++	if (qmblk == NULL)
++		goto out;
++
++	ret = -EBUSY;
++	if (qmblk->dq_state != VZDQ_STARTING)
++		goto out; /* working quota doesn't accept new ugids */
++
++	ret = 0;
++	/* start to add ugids */
++	for (ret = 0; ret < ugid_size; ret++) {
++		struct vz_quota_iface ugid_buf;
++		struct vz_quota_ugid *ugid;
++
++		if (copy_from_user(&ugid_buf, u_ugid_buf, sizeof(ugid_buf)))
++			break;
++
++		if (ugid_buf.qi_type >= MAXQUOTAS)
++			break; /* bad quota type - this is the only check */
++
++		ugid = vzquota_find_ugid(qmblk,
++				ugid_buf.qi_id, ugid_buf.qi_type, 0);
++		if (ugid == VZ_QUOTA_UGBAD) {
++			qmblk->dq_flags |= VZDQUG_FIXED_SET;
++			break; /* limit reached */
++		}
++
++		/* update usage/limits 
++		 * we can copy the data without the lock, because the data
++		 * cannot be modified in VZDQ_STARTING state */
++		ugid->qugid_stat = ugid_buf.qi_stat;
++
++		vzquota_put_ugid(qmblk, ugid);
++
++		u_ugid_buf++; /* next user buffer */
++	}
++out:
++	up(&vz_quota_sem);
++
++	return ret;
++}
++
++static int quota_ugid_setgrace(unsigned int quota_id,
++		struct dq_info u_dq_info[])
++{
++	struct vz_quota_master *qmblk;
++	struct dq_info dq_info[MAXQUOTAS];
++	struct dq_info *target;
++	int err, type;
++
++	down(&vz_quota_sem);
++
++	err = -ENOENT;
++	qmblk = vzquota_find_master(quota_id);
++	if (qmblk == NULL)
++		goto out;
++	
++	err = -EBUSY;
++	if (qmblk->dq_state != VZDQ_STARTING)
++		goto out; /* working quota doesn't accept changing options */
++
++	err = -EFAULT;
++	if (copy_from_user(dq_info, u_dq_info, sizeof(dq_info)))
++		goto out;
++
++	err = 0;
++
++	/* update in qmblk */
++	for (type = 0; type < MAXQUOTAS; type ++) {
++		target = &qmblk->dq_ugid_info[type];
++		target->bexpire = dq_info[type].bexpire;
++		target->iexpire = dq_info[type].iexpire;
++	}
++out:
++	up(&vz_quota_sem);
++
++	return err;
++}
++
++static int do_quota_ugid_getstat(struct vz_quota_master *qmblk, int index, int size,
++		struct vz_quota_iface *u_ugid_buf)
++{
++	int type, count;
++	struct vz_quota_ugid *ugid;
++
++	if (QTREE_LEAFNUM(qmblk->dq_uid_tree) +
++	    QTREE_LEAFNUM(qmblk->dq_gid_tree)
++	    		<= index)
++		return 0;
++
++	count = 0;
++
++	type = index < QTREE_LEAFNUM(qmblk->dq_uid_tree) ? USRQUOTA : GRPQUOTA;
++	if (type == GRPQUOTA)
++		index -= QTREE_LEAFNUM(qmblk->dq_uid_tree);
++
++	/* loop through ugid and then qgid quota */
++repeat:
++	for (ugid = vzquota_get_byindex(qmblk, index, type);
++		ugid != NULL && count < size;
++		ugid = vzquota_get_next(qmblk, ugid), count++)
++	{
++		struct vz_quota_iface ugid_buf;
++
++		/* form interface buffer and send in to user-level */
++		qmblk_data_read_lock(qmblk);
++		memcpy(&ugid_buf.qi_stat, &ugid->qugid_stat,
++				sizeof(ugid_buf.qi_stat));
++		qmblk_data_read_unlock(qmblk);
++		ugid_buf.qi_id = ugid->qugid_id;
++		ugid_buf.qi_type = ugid->qugid_type;
++
++		memcpy(u_ugid_buf, &ugid_buf, sizeof(ugid_buf));
++		u_ugid_buf++; /* next portion of user buffer */
++	}
++
++	if (type == USRQUOTA && count < size) {
++		type = GRPQUOTA;
++		index = 0;
++		goto repeat;
++	}
++
++	return count;
++}
++
++static int quota_ugid_getstat(unsigned int quota_id,
++		int index, int size, struct vz_quota_iface *u_ugid_buf)
++{
++	struct vz_quota_master *qmblk;
++	struct vz_quota_iface *k_ugid_buf;
++	int err;
++
++	if (index < 0 || size < 0)
++		return -EINVAL;
++
++	if (size > INT_MAX / sizeof(struct vz_quota_iface))
++		return -EINVAL;
++
++	k_ugid_buf = vmalloc(size * sizeof(struct vz_quota_iface));
++	if (k_ugid_buf == NULL)
++		return -ENOMEM;
++
++	down(&vz_quota_sem);
++
++	err = -ENOENT;
++	qmblk = vzquota_find_master(quota_id);
++	if (qmblk == NULL)
++		goto out;
++
++	down(&qmblk->dq_sem);
++	err = do_quota_ugid_getstat(qmblk, index, size, k_ugid_buf);
++	up(&qmblk->dq_sem);
++	if (err < 0)
++		goto out;
++
++	if (copy_to_user(u_ugid_buf, k_ugid_buf,
++				size * sizeof(struct vz_quota_iface)))
++		err = -EFAULT;
++
++out:
++	up(&vz_quota_sem);
++	vfree(k_ugid_buf);
++	return err;
++}
++
++static int quota_ugid_getgrace(unsigned int quota_id,
++		struct dq_info u_dq_info[])
++{
++	struct vz_quota_master *qmblk;
++	struct dq_info dq_info[MAXQUOTAS];
++	struct dq_info *target;
++	int err, type;
++
++	down(&vz_quota_sem);
++
++	err = -ENOENT;
++	qmblk = vzquota_find_master(quota_id);
++	if (qmblk == NULL)
++		goto out;
++	
++	err = 0;
++	/* update from qmblk */
++	for (type = 0; type < MAXQUOTAS; type ++) {
++		target = &qmblk->dq_ugid_info[type];
++		dq_info[type].bexpire = target->bexpire;
++		dq_info[type].iexpire = target->iexpire;
++		dq_info[type].flags = target->flags;
++	}
++
++	if (copy_to_user(u_dq_info, dq_info, sizeof(dq_info)))
++		err = -EFAULT;
++out:
++	up(&vz_quota_sem);
++
++	return err;
++}
++
++static int quota_ugid_getconfig(unsigned int quota_id, 
++		struct vz_quota_ugid_stat *info)
++{
++	struct vz_quota_master *qmblk;
++	struct vz_quota_ugid_stat kinfo;
++	int err;
++
++	down(&vz_quota_sem);
++
++	err = -ENOENT;
++	qmblk = vzquota_find_master(quota_id);
++	if (qmblk == NULL)
++		goto out;
++	
++	err = 0;
++	kinfo.limit = qmblk->dq_ugid_max;
++	kinfo.count = qmblk->dq_ugid_count;
++	kinfo.flags = qmblk->dq_flags;
++
++	if (copy_to_user(info, &kinfo, sizeof(kinfo)))
++		err = -EFAULT;
++out:
++	up(&vz_quota_sem);
++
++	return err;
++}
++
++static int quota_ugid_setconfig(unsigned int quota_id,
++		struct vz_quota_ugid_stat *info)
++{
++	struct vz_quota_master *qmblk;
++	struct vz_quota_ugid_stat kinfo;
++	int err;
++
++	down(&vz_quota_sem);
++
++	err = -ENOENT;
++	qmblk = vzquota_find_master(quota_id);
++	if (qmblk == NULL)
++		goto out;
++
++	err = -EFAULT;
++	if (copy_from_user(&kinfo, info, sizeof(kinfo)))
++		goto out;
++
++	err = 0;
++	qmblk->dq_ugid_max = kinfo.limit;
++	if (qmblk->dq_state == VZDQ_STARTING) {
++		qmblk->dq_flags = kinfo.flags;
++		if (qmblk->dq_flags & VZDQUG_ON)
++			qmblk->dq_flags |= VZDQ_USRQUOTA | VZDQ_GRPQUOTA;
++	}		
++
++out:
++	up(&vz_quota_sem);
++
++	return err;
++}
++
++static int quota_ugid_setlimit(unsigned int quota_id,
++		struct vz_quota_ugid_setlimit *u_lim)
++{
++	struct vz_quota_master *qmblk;
++	struct vz_quota_ugid_setlimit lim;
++	int err;
++
++	down(&vz_quota_sem);
++
++	err = -ESRCH;
++	qmblk = vzquota_find_master(quota_id);
++	if (qmblk == NULL)
++		goto out;
++
++	err = -EFAULT;
++	if (copy_from_user(&lim, u_lim, sizeof(lim)))
++		goto out;
++
++	err = __vz_set_dqblk(qmblk, lim.type, lim.id, &lim.dqb);
++
++out:
++	up(&vz_quota_sem);
++
++	return err;
++}
++
++static int quota_ugid_setinfo(unsigned int quota_id,
++		struct vz_quota_ugid_setinfo *u_info)
++{
++	struct vz_quota_master *qmblk;
++	struct vz_quota_ugid_setinfo info;
++	int err;
++
++	down(&vz_quota_sem);
++
++	err = -ESRCH;
++	qmblk = vzquota_find_master(quota_id);
++	if (qmblk == NULL)
++		goto out;
++
++	err = -EFAULT;
++	if (copy_from_user(&info, u_info, sizeof(info)))
++		goto out;
++
++	err = __vz_set_dqinfo(qmblk, info.type, &info.dqi);
++
++out:
++	up(&vz_quota_sem);
++
++	return err;
++}
++
++/*
++ * This is a system call to maintain UGID quotas
++ * Note this call is allowed to run ONLY from VE0
++ */
++long do_vzquotaugidctl(struct vzctl_quotaugidctl *qub)
++{
++	int ret;
++
++	ret = -EPERM;
++	/* access allowed only from root of VE0 */
++	if (!capable(CAP_SYS_RESOURCE) ||
++	    !capable(CAP_SYS_ADMIN))
++		goto out;
++
++	switch (qub->cmd) {
++		case VZ_DQ_UGID_GETSTAT:
++			ret = quota_ugid_getstat(qub->quota_id,
++					qub->ugid_index, qub->ugid_size,
++				       	(struct vz_quota_iface *)qub->addr);
++			break;
++		case VZ_DQ_UGID_ADDSTAT:
++			ret = quota_ugid_addstat(qub->quota_id, qub->ugid_size,
++				       	(struct vz_quota_iface *)qub->addr);
++			break;
++		case VZ_DQ_UGID_GETGRACE:
++			ret = quota_ugid_getgrace(qub->quota_id,
++					(struct dq_info *)qub->addr);
++			break;
++		case VZ_DQ_UGID_SETGRACE:
++			ret = quota_ugid_setgrace(qub->quota_id,
++					(struct dq_info *)qub->addr);
++			break;
++		case VZ_DQ_UGID_GETCONFIG:
++			ret = quota_ugid_getconfig(qub->quota_id,
++					(struct vz_quota_ugid_stat *)qub->addr);
++			break;
++		case VZ_DQ_UGID_SETCONFIG:
++			ret = quota_ugid_setconfig(qub->quota_id,
++					(struct vz_quota_ugid_stat *)qub->addr);
++			break;
++		case VZ_DQ_UGID_SETLIMIT:
++			ret = quota_ugid_setlimit(qub->quota_id,
++					(struct vz_quota_ugid_setlimit *)
++								qub->addr);
++			break;
++		case VZ_DQ_UGID_SETINFO:
++			ret = quota_ugid_setinfo(qub->quota_id,
++					(struct vz_quota_ugid_setinfo *)
++								qub->addr);
++			break;
++		default:
++			ret = -EINVAL;
++			goto out;
++	}
++out:
++	return ret;
++}
++
++static void ugid_quota_on_sb(struct super_block *sb)
++{
++	struct super_block *real_sb;
++	struct vz_quota_master *qmblk;
++
++	if (!sb->s_op->get_quota_root)
++		return;
++
++	real_sb = sb->s_op->get_quota_root(sb)->i_sb;
++	if (real_sb->dq_op != &vz_quota_operations)
++		return;
++
++	sb->dq_op = &vz_quota_operations2;
++	sb->s_qcop = &vz_quotactl_operations;
++	INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
++	INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
++	sb->s_dquot.info[USRQUOTA].dqi_format = &vz_quota_empty_v2_format;
++	sb->s_dquot.info[GRPQUOTA].dqi_format = &vz_quota_empty_v2_format;
++
++	qmblk = vzquota_find_qmblk(sb);
++	if ((qmblk == NULL) || (qmblk == VZ_QUOTA_BAD))
++		return;
++	down(&vz_quota_sem);
++	if (qmblk->dq_flags & VZDQ_USRQUOTA)
++		sb->s_dquot.flags |= DQUOT_USR_ENABLED;
++	if (qmblk->dq_flags & VZDQ_GRPQUOTA)
++		sb->s_dquot.flags |= DQUOT_GRP_ENABLED;
++	up(&vz_quota_sem);
++	qmblk_put(qmblk);
++}
++
++static void ugid_quota_off_sb(struct super_block *sb)
++{
++	/* can't make quota off on mounted super block */
++	BUG_ON(sb->s_root != NULL);
++}
++
++static int ugid_notifier_call(struct vnotifier_block *self,
++		unsigned long n, void *data, int old_ret)
++{
++	struct virt_info_quota *viq;
++
++	viq = (struct virt_info_quota *)data;
++
++	switch (n) {
++	case VIRTINFO_QUOTA_ON:
++		ugid_quota_on_sb(viq->super);
++		break;
++	case VIRTINFO_QUOTA_OFF:
++		ugid_quota_off_sb(viq->super);
++		break;
++	case VIRTINFO_QUOTA_GETSTAT:
++		break;
++	default:
++		return old_ret;
++	}
++	return NOTIFY_OK;
++}
++
++static struct vnotifier_block ugid_notifier_block = {
++	.notifier_call = ugid_notifier_call,
++};
++
++/* ----------------------------------------------------------------------
++ * Init/exit.
++ * --------------------------------------------------------------------- */
++
++struct quota_format_type vz_quota_empty_v2_format = {
++	qf_fmt_id:	QFMT_VFS_V0,
++	qf_ops:		NULL,
++	qf_owner:	THIS_MODULE
++};
++
++int vzquota_ugid_init()
++{
++	int err;
++
++	vz_quota_ugid_cachep = kmem_cache_create("vz_quota_ugid",
++				      sizeof(struct vz_quota_ugid),
++				      0, SLAB_HWCACHE_ALIGN,
++				      NULL, NULL);
++	if (vz_quota_ugid_cachep == NULL)
++		goto err_slab;
++
++	err = register_quota_format(&vz_quota_empty_v2_format);
++	if (err)
++		goto err_reg;
++
++	virtinfo_notifier_register(VITYPE_QUOTA, &ugid_notifier_block);
++	return 0;
++
++err_reg:
++	kmem_cache_destroy(vz_quota_ugid_cachep);
++	return err;
++
++err_slab:
++	printk(KERN_ERR "Cannot create VZ_QUOTA SLAB cache\n");
++	return -ENOMEM;
++}
++
++void vzquota_ugid_release()
++{
++	virtinfo_notifier_unregister(VITYPE_QUOTA, &ugid_notifier_block);
++	unregister_quota_format(&vz_quota_empty_v2_format);
++
++	if (kmem_cache_destroy(vz_quota_ugid_cachep))
++		printk(KERN_ERR "VZQUOTA: kmem_cache_destroy failed\n");
++}
+diff -uprN linux-2.6.8.1.orig/fs/vzdquot.c linux-2.6.8.1-ve022stab078/fs/vzdquot.c
+--- linux-2.6.8.1.orig/fs/vzdquot.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/fs/vzdquot.c	2006-05-11 13:05:43.000000000 +0400
+@@ -0,0 +1,1706 @@
++/*
++ * Copyright (C) 2001, 2002, 2004, 2005  SWsoft
++ * All rights reserved.
++ * 
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * This file contains the core of Virtuozzo disk quota implementation:
++ * maintenance of VZDQ information in inodes,
++ * external interfaces,
++ * module entry.
++ */
++
++#include <linux/config.h>
++#include <linux/kernel.h>
++#include <linux/string.h>
++#include <linux/list.h>
++#include <asm/atomic.h>
++#include <linux/spinlock.h>
++#include <asm/semaphore.h>
++#include <linux/slab.h>
++#include <linux/fs.h>
++#include <linux/dcache.h>
++#include <linux/quota.h>
++#include <linux/rcupdate.h>
++#include <linux/module.h>
++#include <asm/uaccess.h>
++#include <linux/vzctl.h>
++#include <linux/vzctl_quota.h>
++#include <linux/vzquota.h>
++#include <linux/virtinfo.h>
++#include <linux/vzdq_tree.h>
++
++/* ----------------------------------------------------------------------
++ *
++ * Locking
++ *
++ * ---------------------------------------------------------------------- */
++
++/*
++ * Serializes on/off and all other do_vzquotactl operations.
++ * Protects qmblk hash.
++ */
++struct semaphore vz_quota_sem;
++
++/*
++ * Data access locks
++ *  inode_qmblk
++ *	protects qmblk pointers in all inodes and qlnk content in general
++ *	(but not qmblk content);
++ *	also protects related qmblk invalidation procedures;
++ *	can't be per-inode because of vzquota_dtree_qmblk complications
++ *	and problems with serialization with quota_on,
++ *	but can be per-superblock;
++ *  qmblk_data
++ *	protects qmblk fields (such as current usage)
++ *  quota_data
++ *	protects charge/uncharge operations, thus, implies
++ *	qmblk_data lock and, if CONFIG_VZ_QUOTA_UGID, inode_qmblk lock
++ *	(to protect ugid pointers).
++ *
++ * Lock order:
++ *  inode_qmblk_lock -> dcache_lock
++ *  inode_qmblk_lock -> qmblk_data
++ */
++static spinlock_t vzdq_qmblk_lock = SPIN_LOCK_UNLOCKED;
++
++inline void inode_qmblk_lock(struct super_block *sb)
++{
++	spin_lock(&vzdq_qmblk_lock);
++}
++
++inline void inode_qmblk_unlock(struct super_block *sb)
++{
++	spin_unlock(&vzdq_qmblk_lock);
++}
++
++inline void qmblk_data_read_lock(struct vz_quota_master *qmblk)
++{
++	spin_lock(&qmblk->dq_data_lock);
++}
++
++inline void qmblk_data_read_unlock(struct vz_quota_master *qmblk)
++{
++	spin_unlock(&qmblk->dq_data_lock);
++}
++
++inline void qmblk_data_write_lock(struct vz_quota_master *qmblk)
++{
++	spin_lock(&qmblk->dq_data_lock);
++}
++
++inline void qmblk_data_write_unlock(struct vz_quota_master *qmblk)
++{
++	spin_unlock(&qmblk->dq_data_lock);
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * Master hash table handling.
++ *
++ * SMP not safe, serialied by vz_quota_sem within quota syscalls
++ *
++ * --------------------------------------------------------------------- */
++
++static kmem_cache_t *vzquota_cachep;
++
++/*
++ * Hash function.
++ */
++#define QHASH_BITS		6
++#define	VZ_QUOTA_HASH_SIZE	(1 << QHASH_BITS)
++#define QHASH_MASK		(VZ_QUOTA_HASH_SIZE - 1)
++
++struct list_head vzquota_hash_table[VZ_QUOTA_HASH_SIZE];
++int vzquota_hash_size = VZ_QUOTA_HASH_SIZE;
++
++static inline int vzquota_hash_func(unsigned int qid)
++{
++	return (((qid >> QHASH_BITS) ^ qid) & QHASH_MASK);
++}
++
++/**
++ * vzquota_alloc_master - alloc and instantiate master quota record
++ *
++ * Returns:
++ *	pointer to newly created record if SUCCESS
++ *	-ENOMEM if out of memory
++ *	-EEXIST if record with given quota_id already exist
++ */
++struct vz_quota_master *vzquota_alloc_master(unsigned int quota_id,
++		struct vz_quota_stat *qstat)
++{
++	int err;
++	struct vz_quota_master *qmblk;
++
++	err = -EEXIST;
++	if (vzquota_find_master(quota_id) != NULL)
++		goto out;
++
++	err = -ENOMEM;
++	qmblk = kmem_cache_alloc(vzquota_cachep, SLAB_KERNEL);
++	if (qmblk == NULL)
++		goto out;
++#ifdef CONFIG_VZ_QUOTA_UGID
++	qmblk->dq_uid_tree = quotatree_alloc();
++	if (!qmblk->dq_uid_tree)
++		goto out_free;
++
++	qmblk->dq_gid_tree = quotatree_alloc();
++	if (!qmblk->dq_gid_tree)
++		goto out_free_tree;
++#endif
++
++	qmblk->dq_state = VZDQ_STARTING;
++	init_MUTEX(&qmblk->dq_sem);
++	spin_lock_init(&qmblk->dq_data_lock);
++
++	qmblk->dq_id = quota_id;
++	qmblk->dq_stat = qstat->dq_stat;
++	qmblk->dq_info = qstat->dq_info;
++	qmblk->dq_root_dentry = NULL;
++	qmblk->dq_root_mnt = NULL;
++	qmblk->dq_sb = NULL;
++	qmblk->dq_ugid_count = 0;
++	qmblk->dq_ugid_max = 0;
++	qmblk->dq_flags = 0;
++	memset(qmblk->dq_ugid_info, 0, sizeof(qmblk->dq_ugid_info));
++	INIT_LIST_HEAD(&qmblk->dq_ilink_list);
++
++	atomic_set(&qmblk->dq_count, 1);
++
++	/* insert in hash chain */
++	list_add(&qmblk->dq_hash,
++		&vzquota_hash_table[vzquota_hash_func(quota_id)]);
++
++	/* success */
++	return qmblk;
++
++out_free_tree:
++	quotatree_free(qmblk->dq_uid_tree, NULL);
++out_free:
++	kmem_cache_free(vzquota_cachep, qmblk);
++out:
++	return ERR_PTR(err);
++}
++
++static struct vz_quota_master *vzquota_alloc_fake(void)
++{
++	struct vz_quota_master *qmblk;
++
++	qmblk = kmem_cache_alloc(vzquota_cachep, SLAB_KERNEL);
++	if (qmblk == NULL)
++		return NULL;
++	memset(qmblk, 0, sizeof(*qmblk));
++	qmblk->dq_state = VZDQ_STOPING;
++	qmblk->dq_flags = VZDQ_NOQUOT;
++	spin_lock_init(&qmblk->dq_data_lock);
++	INIT_LIST_HEAD(&qmblk->dq_ilink_list);
++	atomic_set(&qmblk->dq_count, 1);
++	return qmblk;
++}
++
++/**
++ * vzquota_find_master - find master record with given id
++ *
++ * Returns qmblk without touching its refcounter.
++ * Called under vz_quota_sem.
++ */
++struct vz_quota_master *vzquota_find_master(unsigned int quota_id)
++{
++	int i;
++	struct vz_quota_master *qp;
++
++	i = vzquota_hash_func(quota_id);
++	list_for_each_entry(qp, &vzquota_hash_table[i], dq_hash) {
++		if (qp->dq_id == quota_id)
++			return qp;
++	}
++	return NULL;
++}
++
++/**
++ * vzquota_free_master - release resources taken by qmblk, freeing memory
++ *
++ * qmblk is assumed to be already taken out from the hash.
++ * Should be called outside vz_quota_sem.
++ */
++void vzquota_free_master(struct vz_quota_master *qmblk)
++{
++#ifdef CONFIG_VZ_QUOTA_UGID
++	vzquota_kill_ugid(qmblk);
++#endif
++	BUG_ON(!list_empty(&qmblk->dq_ilink_list));
++	kmem_cache_free(vzquota_cachep, qmblk);
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * Passing quota information through current
++ *
++ * Used in inode -> qmblk lookup at inode creation stage (since at that
++ * time there are no links between the inode being created and its parent
++ * directory).
++ *
++ * --------------------------------------------------------------------- */
++
++#define VZDQ_CUR_MAGIC	0x57d0fee2
++
++static inline int vzquota_cur_qmblk_check(void)
++{
++	return current->magic == VZDQ_CUR_MAGIC;
++}
++
++static inline struct inode *vzquota_cur_qmblk_fetch(void)
++{
++	return current->ino;
++}
++
++static inline void vzquota_cur_qmblk_set(struct inode *data)
++{
++	struct task_struct *tsk;
++
++	tsk = current;
++	tsk->magic = VZDQ_CUR_MAGIC;
++	tsk->ino = data;
++}
++
++#if 0
++static inline void vzquota_cur_qmblk_reset(void)
++{
++	current->magic = 0;
++}
++#endif
++
++
++/* ----------------------------------------------------------------------
++ *
++ * Superblock quota operations
++ *
++ * --------------------------------------------------------------------- */
++
++/*
++ * Kernel structure abuse.
++ * We use files[0] pointer as an int variable:
++ * reference counter of how many quota blocks uses this superblock.
++ * files[1] is used for generations structure which helps us to track
++ * when traversing of dentries is really required.
++ */
++#define __VZ_QUOTA_NOQUOTA(sb)		(*(struct vz_quota_master **)\
++						&sb->s_dquot.files[1])
++#define __VZ_QUOTA_TSTAMP(sb)		((struct timeval *)\
++						&sb->s_dquot.dqio_sem)
++
++#if defined(VZ_QUOTA_UNLOAD)
++
++#define __VZ_QUOTA_SBREF(sb)		(*(int *)&sb->s_dquot.files[0])
++
++struct dquot_operations *orig_dq_op;
++struct quotactl_ops *orig_dq_cop;
++
++/**
++ * quota_get_super - account for new a quoted tree under the superblock
++ *
++ * One superblock can have multiple directory subtrees with different VZ
++ * quotas.  We keep a counter of such subtrees and set VZ quota operations or
++ * reset the default ones.
++ *
++ * Called under vz_quota_sem (from quota_on).
++ */
++int vzquota_get_super(struct super_block *sb)
++{
++	if (sb->dq_op != &vz_quota_operations) {
++		down(&sb->s_dquot.dqonoff_sem);
++		if (sb->s_dquot.flags & (DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED)) {
++			up(&sb->s_dquot.dqonoff_sem);
++			return -EEXIST;
++		}
++		if (orig_dq_op == NULL && sb->dq_op != NULL)
++			orig_dq_op = sb->dq_op;
++		sb->dq_op = &vz_quota_operations;
++		if (orig_dq_cop == NULL && sb->s_qcop != NULL)
++			orig_dq_cop = sb->s_qcop;
++		/* XXX this may race with sys_quotactl */
++#ifdef CONFIG_VZ_QUOTA_UGID
++		sb->s_qcop = &vz_quotactl_operations;
++#else
++		sb->s_qcop = NULL;
++#endif
++		do_gettimeofday(__VZ_QUOTA_TSTAMP(sb));
++		memset(&sb->s_dquot.info, 0, sizeof(sb->s_dquot.info));
++
++		INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
++		INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
++		sb->s_dquot.info[USRQUOTA].dqi_format = &vz_quota_empty_v2_format;
++		sb->s_dquot.info[GRPQUOTA].dqi_format = &vz_quota_empty_v2_format;
++		/*
++		 * To get quotaops.h call us we need to mark superblock
++		 * as having quota.  These flags mark the moment when
++		 * our dq_op start to be called.
++		 *
++		 * The ordering of dq_op and s_dquot.flags assignment
++		 * needs to be enforced, but other CPUs do not do rmb()
++		 * between s_dquot.flags and dq_op accesses.
++		 */
++		wmb(); synchronize_kernel();
++		sb->s_dquot.flags = DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED;
++		__module_get(THIS_MODULE);
++		up(&sb->s_dquot.dqonoff_sem);
++	}
++	/* protected by vz_quota_sem */
++	__VZ_QUOTA_SBREF(sb)++;
++	return 0;
++}
++
++/**
++ * quota_put_super - release superblock when one quota tree goes away
++ *
++ * Called under vz_quota_sem.
++ */
++void vzquota_put_super(struct super_block *sb)
++{
++	int count;
++
++	count = --__VZ_QUOTA_SBREF(sb);
++	if (count == 0) {
++		down(&sb->s_dquot.dqonoff_sem);
++		sb->s_dquot.flags = 0;
++		wmb(); synchronize_kernel();
++		sema_init(&sb->s_dquot.dqio_sem, 1);
++		sb->s_qcop = orig_dq_cop;
++		sb->dq_op = orig_dq_op;
++		inode_qmblk_lock(sb);
++		quota_gen_put(SB_QGEN(sb));
++		SB_QGEN(sb) = NULL;
++		/* release qlnk's without qmblk */
++		remove_inode_quota_links_list(&non_vzquota_inodes_lh,
++				sb, NULL);
++		/*
++		 * Races with quota initialization:
++		 * after this inode_qmblk_unlock all inode's generations are
++		 * invalidated, quota_inode_qmblk checks superblock operations.
++		 */
++		inode_qmblk_unlock(sb);
++		/*
++		 * Module refcounting: in theory, this is the best place
++		 * to call module_put(THIS_MODULE).
++		 * In reality, it can't be done because we can't be sure that
++		 * other CPUs do not enter our code segment through dq_op
++		 * cached long time ago.  Quotaops interface isn't supposed to
++		 * go into modules currently (that is, into unloadable
++		 * modules).  By omitting module_put, our module isn't
++		 * unloadable.
++		 */
++		up(&sb->s_dquot.dqonoff_sem);
++	}
++}
++
++#else
++
++struct vzquota_new_sop {
++	struct super_operations new_op;
++	struct super_operations *old_op;
++};
++
++/**
++ * vzquota_shutdown_super - callback on umount
++ */
++void vzquota_shutdown_super(struct super_block *sb)
++{
++	struct vz_quota_master *qmblk;
++	struct vzquota_new_sop *sop;
++
++	qmblk = __VZ_QUOTA_NOQUOTA(sb);
++	__VZ_QUOTA_NOQUOTA(sb) = NULL;
++	if (qmblk != NULL)
++		qmblk_put(qmblk);
++	sop = container_of(sb->s_op, struct vzquota_new_sop, new_op);
++	sb->s_op = sop->old_op;
++	kfree(sop);
++	(*sb->s_op->put_super)(sb);
++}
++
++/**
++ * vzquota_get_super - account for new a quoted tree under the superblock
++ *
++ * One superblock can have multiple directory subtrees with different VZ
++ * quotas.
++ *
++ * Called under vz_quota_sem (from vzquota_on).
++ */
++int vzquota_get_super(struct super_block *sb)
++{
++	struct vz_quota_master *qnew;
++	struct vzquota_new_sop *sop;
++	int err;
++
++	down(&sb->s_dquot.dqonoff_sem);
++	err = -EEXIST;
++	if ((sb->s_dquot.flags & (DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED)) &&
++	    sb->dq_op != &vz_quota_operations)
++		goto out_up;
++
++	/*
++	 * This allocation code should be under sb->dq_op check below, but
++	 * it doesn't really matter...
++	 */
++	if (__VZ_QUOTA_NOQUOTA(sb) == NULL) {
++		qnew = vzquota_alloc_fake();
++		if (qnew == NULL)
++			goto out_up;
++		__VZ_QUOTA_NOQUOTA(sb) = qnew;
++	}
++
++	if (sb->dq_op != &vz_quota_operations) {
++		sop = kmalloc(sizeof(*sop), GFP_KERNEL);
++		if (sop == NULL) {
++			vzquota_free_master(__VZ_QUOTA_NOQUOTA(sb));
++			__VZ_QUOTA_NOQUOTA(sb) = NULL;
++			goto out_up;
++		}
++		memcpy(&sop->new_op, sb->s_op, sizeof(sop->new_op));
++		sop->new_op.put_super = &vzquota_shutdown_super;
++		sop->old_op = sb->s_op;
++		sb->s_op = &sop->new_op;
++
++		sb->dq_op = &vz_quota_operations;
++#ifdef CONFIG_VZ_QUOTA_UGID
++		sb->s_qcop = &vz_quotactl_operations;
++#else
++		sb->s_qcop = NULL;
++#endif
++		do_gettimeofday(__VZ_QUOTA_TSTAMP(sb));
++
++		memset(&sb->s_dquot.info, 0, sizeof(sb->s_dquot.info));
++		/* these 2 list heads are checked in sync_dquots() */
++		INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
++		INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
++		sb->s_dquot.info[USRQUOTA].dqi_format =
++						&vz_quota_empty_v2_format;
++		sb->s_dquot.info[GRPQUOTA].dqi_format =
++						&vz_quota_empty_v2_format;
++
++		/*
++		 * To get quotaops.h to call us we need to mark superblock
++		 * as having quota.  These flags mark the moment when
++		 * our dq_op start to be called.
++		 *
++		 * The ordering of dq_op and s_dquot.flags assignment
++		 * needs to be enforced, but other CPUs do not do rmb()
++		 * between s_dquot.flags and dq_op accesses.
++		 */
++		wmb(); synchronize_kernel();
++		sb->s_dquot.flags = DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED;
++	}
++	err = 0;
++
++out_up:
++	up(&sb->s_dquot.dqonoff_sem);
++	return err;
++}
++
++/**
++ * vzquota_put_super - one quota tree less on this superblock
++ *
++ * Called under vz_quota_sem.
++ */
++void vzquota_put_super(struct super_block *sb)
++{
++	/*
++	 * Even if this put is the last one,
++	 * sb->s_dquot.flags can't be cleared, because otherwise vzquota_drop
++	 * won't be called and the remaining qmblk references won't be put.
++	 */
++}
++
++#endif
++
++
++/* ----------------------------------------------------------------------
++ *
++ * Helpers for inode -> qmblk link maintenance
++ *
++ * --------------------------------------------------------------------- */
++
++#define __VZ_QUOTA_EMPTY		((void *)0xbdbdbdbd)
++#define VZ_QUOTA_IS_NOQUOTA(qm, sb)	((qm)->dq_flags & VZDQ_NOQUOT)
++#define VZ_QUOTA_EMPTY_IOPS		(&vfs_empty_iops)
++extern struct inode_operations vfs_empty_iops;
++
++static int VZ_QUOTA_IS_ACTUAL(struct inode *inode)
++{
++	struct vz_quota_master *qmblk;
++
++	qmblk = INODE_QLNK(inode)->qmblk;
++	if (qmblk == VZ_QUOTA_BAD)
++		return 1;
++	if (qmblk == __VZ_QUOTA_EMPTY)
++		return 0;
++	if (qmblk->dq_flags & VZDQ_NOACT)
++		/* not actual (invalidated) qmblk */
++		return 0;
++	return 1;
++}
++
++static inline int vzquota_qlnk_is_empty(struct vz_quota_ilink *qlnk)
++{
++	return qlnk->qmblk == __VZ_QUOTA_EMPTY;
++}
++
++static inline void vzquota_qlnk_set_empty(struct vz_quota_ilink *qlnk)
++{
++	qlnk->qmblk = __VZ_QUOTA_EMPTY;
++	qlnk->origin = VZ_QUOTAO_SETE;
++}
++
++void vzquota_qlnk_init(struct vz_quota_ilink *qlnk)
++{
++	memset(qlnk, 0, sizeof(*qlnk));
++	INIT_LIST_HEAD(&qlnk->list);
++	vzquota_qlnk_set_empty(qlnk);
++	qlnk->origin = VZ_QUOTAO_INIT;
++}
++
++void vzquota_qlnk_destroy(struct vz_quota_ilink *qlnk)
++{
++	might_sleep();
++	if (vzquota_qlnk_is_empty(qlnk))
++		return;
++#if defined(CONFIG_VZ_QUOTA_UGID)
++	if (qlnk->qmblk != NULL && qlnk->qmblk != VZ_QUOTA_BAD) {
++		struct vz_quota_master *qmblk;
++		struct vz_quota_ugid *quid, *qgid;
++		qmblk = qlnk->qmblk;
++		quid = qlnk->qugid[USRQUOTA];
++		qgid = qlnk->qugid[GRPQUOTA];
++		if (quid != NULL || qgid != NULL) {
++			down(&qmblk->dq_sem);
++			if (qgid != NULL)
++				vzquota_put_ugid(qmblk, qgid);
++			if (quid != NULL)
++				vzquota_put_ugid(qmblk, quid);
++			up(&qmblk->dq_sem);
++		}
++	}
++#endif
++	if (qlnk->qmblk != NULL && qlnk->qmblk != VZ_QUOTA_BAD)
++		qmblk_put(qlnk->qmblk);
++	qlnk->origin = VZ_QUOTAO_DESTR;
++}
++
++/**
++ * vzquota_qlnk_swap - swap inode's and temporary vz_quota_ilink contents
++ * @qlt: temporary
++ * @qli: inode's
++ *
++ * Locking is provided by the caller (depending on the context).
++ * After swap, @qli is inserted into the corresponding dq_ilink_list,
++ * @qlt list is reinitialized.
++ */
++static void vzquota_qlnk_swap(struct vz_quota_ilink *qlt,
++		struct vz_quota_ilink *qli)
++{
++	struct vz_quota_master *qb;
++	struct vz_quota_ugid *qu;
++	int i;
++
++	qb = qlt->qmblk;
++	qlt->qmblk = qli->qmblk;
++	qli->qmblk = qb;
++	list_del_init(&qli->list);
++	if (qb != __VZ_QUOTA_EMPTY && qb != VZ_QUOTA_BAD)
++		list_add(&qli->list, &qb->dq_ilink_list);
++	INIT_LIST_HEAD(&qlt->list);
++	qli->origin = VZ_QUOTAO_SWAP;
++
++	for (i = 0; i < MAXQUOTAS; i++) {
++		qu = qlt->qugid[i];
++		qlt->qugid[i] = qli->qugid[i];
++		qli->qugid[i] = qu;
++	}
++}
++
++/**
++ * vzquota_qlnk_reinit_locked - destroy qlnk content, called under locks
++ *
++ * Called under dcache_lock and inode_qmblk locks.
++ * Returns 1 if locks were dropped inside, 0 if atomic.
++ */
++static int vzquota_qlnk_reinit_locked(struct vz_quota_ilink *qlnk,
++		struct inode *inode)
++{
++	if (vzquota_qlnk_is_empty(qlnk))
++		return 0;
++	if (qlnk->qmblk == VZ_QUOTA_BAD) {
++		vzquota_qlnk_set_empty(qlnk);
++		return 0;
++	}
++	spin_unlock(&dcache_lock);
++	inode_qmblk_unlock(inode->i_sb);
++	vzquota_qlnk_destroy(qlnk);
++	vzquota_qlnk_init(qlnk);
++	inode_qmblk_lock(inode->i_sb);
++	spin_lock(&dcache_lock);
++	return 1;
++}
++
++#if defined(CONFIG_VZ_QUOTA_UGID)
++/**
++ * vzquota_qlnk_reinit_attr - destroy and reinit qlnk content
++ *
++ * Similar to vzquota_qlnk_reinit_locked, called under different locks.
++ */
++static int vzquota_qlnk_reinit_attr(struct vz_quota_ilink *qlnk,
++		struct inode *inode,
++		struct vz_quota_master *qmblk)
++{
++	if (vzquota_qlnk_is_empty(qlnk))
++		return 0;
++	/* may be optimized if qlnk->qugid all NULLs */
++	qmblk_data_write_unlock(qmblk);
++	inode_qmblk_unlock(inode->i_sb);
++	vzquota_qlnk_destroy(qlnk);
++	vzquota_qlnk_init(qlnk);
++	inode_qmblk_lock(inode->i_sb);
++	qmblk_data_write_lock(qmblk);
++	return 1;
++}
++#endif
++
++/**
++ * vzquota_qlnk_fill - fill vz_quota_ilink content
++ * @qlnk: vz_quota_ilink to fill
++ * @inode: inode for which @qlnk is filled (i_sb, i_uid, i_gid)
++ * @qmblk: qmblk to which this @qlnk will belong
++ *
++ * Called under dcache_lock and inode_qmblk locks.
++ * Returns 1 if locks were dropped inside, 0 if atomic.
++ * @qlnk is expected to be empty.
++ */
++static int vzquota_qlnk_fill(struct vz_quota_ilink *qlnk,
++		struct inode *inode,
++		struct vz_quota_master *qmblk)
++{
++	if (qmblk != VZ_QUOTA_BAD)
++		qmblk_get(qmblk);
++	qlnk->qmblk = qmblk;
++
++#if defined(CONFIG_VZ_QUOTA_UGID)
++	if (qmblk != VZ_QUOTA_BAD &&
++	    !VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb) &&
++	    (qmblk->dq_flags & VZDQUG_ON)) {
++		struct vz_quota_ugid *quid, *qgid;
++
++		spin_unlock(&dcache_lock);
++		inode_qmblk_unlock(inode->i_sb);
++
++		down(&qmblk->dq_sem);
++		quid = __vzquota_find_ugid(qmblk, inode->i_uid, USRQUOTA, 0);
++		qgid = __vzquota_find_ugid(qmblk, inode->i_gid, GRPQUOTA, 0);
++		up(&qmblk->dq_sem);
++
++		inode_qmblk_lock(inode->i_sb);
++		spin_lock(&dcache_lock);
++		qlnk->qugid[USRQUOTA] = quid;
++		qlnk->qugid[GRPQUOTA] = qgid;
++		return 1;
++	}
++#endif
++
++	return 0;
++}
++
++#if defined(CONFIG_VZ_QUOTA_UGID)
++/**
++ * vzquota_qlnk_fill_attr - fill vz_quota_ilink content for uid, gid
++ *
++ * This function is a helper for vzquota_transfer, and differs from
++ * vzquota_qlnk_fill only by locking.
++ */
++static int vzquota_qlnk_fill_attr(struct vz_quota_ilink *qlnk,
++		struct inode *inode,
++		struct iattr *iattr,
++		int mask,
++		struct vz_quota_master *qmblk)
++{
++	qmblk_get(qmblk);
++	qlnk->qmblk = qmblk;
++
++	if (mask) {
++		struct vz_quota_ugid *quid, *qgid;
++
++		quid = qgid = NULL; /* to make gcc happy */
++		if (!(mask & (1 << USRQUOTA)))
++			quid = vzquota_get_ugid(INODE_QLNK(inode)->
++							qugid[USRQUOTA]);
++		if (!(mask & (1 << GRPQUOTA)))
++			qgid = vzquota_get_ugid(INODE_QLNK(inode)->
++							qugid[GRPQUOTA]);
++
++		qmblk_data_write_unlock(qmblk);
++		inode_qmblk_unlock(inode->i_sb);
++
++		down(&qmblk->dq_sem);
++		if (mask & (1 << USRQUOTA))
++			quid = __vzquota_find_ugid(qmblk, iattr->ia_uid,
++					USRQUOTA, 0);
++		if (mask & (1 << GRPQUOTA))
++			qgid = __vzquota_find_ugid(qmblk, iattr->ia_gid,
++					GRPQUOTA, 0);
++		up(&qmblk->dq_sem);
++
++		inode_qmblk_lock(inode->i_sb);
++		qmblk_data_write_lock(qmblk);
++		qlnk->qugid[USRQUOTA] = quid;
++		qlnk->qugid[GRPQUOTA] = qgid;
++		return 1;
++	}
++
++	return 0;
++}
++#endif
++
++/**
++ * __vzquota_inode_init - make sure inode's qlnk is initialized
++ *
++ * May be called if qlnk is already initialized, detects this situation itself.
++ * Called under inode_qmblk_lock.
++ */
++static void __vzquota_inode_init(struct inode *inode, unsigned char origin)
++{
++	if (inode->i_dquot[USRQUOTA] == NODQUOT) {
++		vzquota_qlnk_init(INODE_QLNK(inode));
++		inode->i_dquot[USRQUOTA] = (void *)~(unsigned long)NODQUOT;
++	}
++	INODE_QLNK(inode)->origin = origin;
++}
++
++/**
++ * vzquota_inode_drop - destroy VZ quota information in the inode
++ *
++ * Inode must not be externally accessible or dirty.
++ */
++static void vzquota_inode_drop(struct inode *inode)
++{
++	struct vz_quota_ilink qlnk;
++
++	vzquota_qlnk_init(&qlnk);
++	inode_qmblk_lock(inode->i_sb);
++	vzquota_qlnk_swap(&qlnk, INODE_QLNK(inode));
++	INODE_QLNK(inode)->origin = VZ_QUOTAO_DRCAL;
++	inode->i_dquot[USRQUOTA] = NODQUOT;
++	inode_qmblk_unlock(inode->i_sb);
++	vzquota_qlnk_destroy(&qlnk);
++}
++
++/**
++ * vzquota_inode_qmblk_set - initialize inode's qlnk
++ * @inode: inode to be initialized
++ * @qmblk: quota master block to which this inode should belong (may be BAD)
++ * @qlnk: placeholder to store data to resolve locking issues
++ *
++ * Returns 1 if locks were dropped and rechecks possibly needed, 0 otherwise.
++ * Called under dcache_lock and inode_qmblk locks.
++ * @qlnk will be destroyed in the caller chain.
++ *
++ * It is not mandatory to restart parent checks since quota on/off currently
++ * shrinks dentry tree and checks that there are not outside references.
++ * But if at some time that shink is removed, restarts will be required.
++ * Additionally, the restarts prevent inconsistencies if the dentry tree
++ * changes (inode is moved).  This is not a big deal, but anyway...
++ */
++static int vzquota_inode_qmblk_set(struct inode *inode,
++		struct vz_quota_master *qmblk,
++		struct vz_quota_ilink *qlnk)
++{
++	if (qmblk == NULL) {
++		printk(KERN_ERR "VZDQ: NULL in set, "
++				"orig %u, dev %s, inode %lu, fs %s\n",
++				INODE_QLNK(inode)->origin,
++				inode->i_sb->s_id, inode->i_ino,
++				inode->i_sb->s_type->name);
++		printk(KERN_ERR "current %d (%s), VE %d\n",
++				current->pid, current->comm,
++				VEID(get_exec_env()));
++		dump_stack();
++		qmblk = VZ_QUOTA_BAD;
++	}
++	while (1) {
++		if (vzquota_qlnk_is_empty(qlnk) &&
++		    vzquota_qlnk_fill(qlnk, inode, qmblk))
++			return 1;
++		if (qlnk->qmblk == qmblk)
++			break;
++		if (vzquota_qlnk_reinit_locked(qlnk, inode))
++			return 1;
++	}
++	vzquota_qlnk_swap(qlnk, INODE_QLNK(inode));
++	INODE_QLNK(inode)->origin = VZ_QUOTAO_QSET;
++	return 0;
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * vzquota_inode_qmblk (inode -> qmblk lookup) parts
++ *
++ * --------------------------------------------------------------------- */
++
++static int vzquota_dparents_check_attach(struct inode *inode)
++{
++	if (!list_empty(&inode->i_dentry))
++		return 0;
++	printk(KERN_ERR "VZDQ: no parent for "
++			"dev %s, inode %lu, fs %s\n",
++			inode->i_sb->s_id,
++			inode->i_ino,
++			inode->i_sb->s_type->name);
++	return -1;
++}
++
++static struct inode *vzquota_dparents_check_actual(struct inode *inode)
++{
++	struct dentry *de;
++
++	list_for_each_entry(de, &inode->i_dentry, d_alias) {
++		if (de->d_parent == de) /* detached dentry, perhaps */
++			continue;
++		/* first access to parent, make sure its qlnk initialized */
++		__vzquota_inode_init(de->d_parent->d_inode, VZ_QUOTAO_ACT);
++		if (!VZ_QUOTA_IS_ACTUAL(de->d_parent->d_inode))
++			return de->d_parent->d_inode;
++	}
++	return NULL;
++}
++
++static struct vz_quota_master *vzquota_dparents_check_same(struct inode *inode)
++{
++	struct dentry *de;
++	struct vz_quota_master *qmblk;
++
++	qmblk = NULL;
++	list_for_each_entry(de, &inode->i_dentry, d_alias) {
++		if (de->d_parent == de) /* detached dentry, perhaps */
++			continue;
++		if (qmblk == NULL) {
++			qmblk = INODE_QLNK(de->d_parent->d_inode)->qmblk;
++			continue;
++		}
++		if (INODE_QLNK(de->d_parent->d_inode)->qmblk != qmblk) {
++			printk(KERN_WARNING "VZDQ: multiple quotas for "
++					"dev %s, inode %lu, fs %s\n",
++					inode->i_sb->s_id,
++					inode->i_ino,
++					inode->i_sb->s_type->name);
++			qmblk = VZ_QUOTA_BAD;
++			break;
++		}
++	}
++	if (qmblk == NULL) {
++		printk(KERN_WARNING "VZDQ: not attached to tree, "
++				"dev %s, inode %lu, fs %s\n",
++				inode->i_sb->s_id,
++				inode->i_ino,
++				inode->i_sb->s_type->name);
++		qmblk = VZ_QUOTA_BAD;
++	}
++	return qmblk;
++}
++
++static void vzquota_dbranch_actualize(struct inode *inode,
++		struct inode *refinode)
++{
++	struct inode *pinode;
++	struct vz_quota_master *qmblk;
++	struct vz_quota_ilink qlnk;
++
++	vzquota_qlnk_init(&qlnk);
++
++start:
++	if (inode == inode->i_sb->s_root->d_inode) {
++		/* filesystem root */
++		atomic_inc(&inode->i_count);
++		do {
++			qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
++		} while (vzquota_inode_qmblk_set(inode, qmblk, &qlnk));
++		goto out;
++	}
++
++	if (!vzquota_dparents_check_attach(inode)) {
++		pinode = vzquota_dparents_check_actual(inode);
++		if (pinode != NULL) {
++			inode = pinode;
++			goto start;
++		}
++	}
++
++	atomic_inc(&inode->i_count);
++	while (1) {
++		if (VZ_QUOTA_IS_ACTUAL(inode)) /* actualized without us */
++			break;
++		/*
++		 * Need to check parents again if we have slept inside
++		 * vzquota_inode_qmblk_set() in the loop.
++		 * If the state of parents is different, just return and repeat
++		 * the actualizing process again from the inode passed to
++		 * vzquota_inode_qmblk_recalc().
++		 */
++		if (!vzquota_dparents_check_attach(inode)) {
++			if (vzquota_dparents_check_actual(inode) != NULL)
++				break;
++			qmblk = vzquota_dparents_check_same(inode);
++		} else
++			qmblk = VZ_QUOTA_BAD;
++		if (!vzquota_inode_qmblk_set(inode, qmblk, &qlnk)){/* success */
++			INODE_QLNK(inode)->origin = VZ_QUOTAO_ACT;
++			break;
++		}
++	}
++
++out:
++	spin_unlock(&dcache_lock);
++	inode_qmblk_unlock(refinode->i_sb);
++	vzquota_qlnk_destroy(&qlnk);
++	iput(inode);
++	inode_qmblk_lock(refinode->i_sb);
++	spin_lock(&dcache_lock);
++}
++
++static void vzquota_dtree_qmblk_recalc(struct inode *inode,
++		struct vz_quota_ilink *qlnk)
++{
++	struct inode *pinode;
++	struct vz_quota_master *qmblk;
++
++	if (inode == inode->i_sb->s_root->d_inode) {
++		/* filesystem root */
++		do {
++			qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
++		} while (vzquota_inode_qmblk_set(inode, qmblk, qlnk));
++		return;
++	}
++
++start:
++	if (VZ_QUOTA_IS_ACTUAL(inode))
++		return;
++	/*
++	 * Here qmblk is (re-)initialized for all ancestors.
++	 * This is not a very efficient procedure, but it guarantees that
++	 * the quota tree is consistent (that is, the inode doesn't have two
++	 * ancestors with different qmblk).
++	 */
++	if (!vzquota_dparents_check_attach(inode)) {
++		pinode = vzquota_dparents_check_actual(inode);
++		if (pinode != NULL) {
++			vzquota_dbranch_actualize(pinode, inode);
++			goto start;
++		}
++		qmblk = vzquota_dparents_check_same(inode);
++	} else
++		qmblk = VZ_QUOTA_BAD;
++
++	if (vzquota_inode_qmblk_set(inode, qmblk, qlnk))
++		goto start;
++	INODE_QLNK(inode)->origin = VZ_QUOTAO_DTREE;
++}
++
++static void vzquota_det_qmblk_recalc(struct inode *inode,
++		struct vz_quota_ilink *qlnk)
++{
++	struct inode *parent;
++	struct vz_quota_master *qmblk;
++	char *msg;
++	int cnt;
++	time_t timeout;
++
++	cnt = 0;
++	parent = NULL;
++start:
++	/*
++	 * qmblk of detached inodes shouldn't be considered as not actual.
++	 * They are not in any dentry tree, so quota on/off shouldn't affect
++	 * them.
++	 */
++	if (!vzquota_qlnk_is_empty(INODE_QLNK(inode)))
++		return;
++
++	timeout = 3;
++	qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
++	msg = "detached inode not in creation";
++	if (inode->i_op != VZ_QUOTA_EMPTY_IOPS)
++		goto fail;
++	qmblk = VZ_QUOTA_BAD;
++	msg = "unexpected creation context";
++	if (!vzquota_cur_qmblk_check())
++		goto fail;
++	timeout = 0;
++	parent = vzquota_cur_qmblk_fetch();
++	msg = "uninitialized parent";
++	if (vzquota_qlnk_is_empty(INODE_QLNK(parent)))
++		goto fail;
++	msg = "parent not in tree";
++	if (list_empty(&parent->i_dentry))
++		goto fail;
++	msg = "parent has 0 refcount";
++	if (!atomic_read(&parent->i_count))
++		goto fail;
++	msg = "parent has different sb";
++	if (parent->i_sb != inode->i_sb)
++		goto fail;
++	if (!VZ_QUOTA_IS_ACTUAL(parent)) {
++		vzquota_dbranch_actualize(parent, inode);
++		goto start;
++	}
++
++	qmblk = INODE_QLNK(parent)->qmblk;
++set:
++	if (vzquota_inode_qmblk_set(inode, qmblk, qlnk))
++		goto start;
++	INODE_QLNK(inode)->origin = VZ_QUOTAO_DET;
++	return;
++
++fail:
++	{
++		struct timeval tv, tvo;
++		do_gettimeofday(&tv);
++		memcpy(&tvo, __VZ_QUOTA_TSTAMP(inode->i_sb), sizeof(tvo));
++		tv.tv_sec -= tvo.tv_sec;
++		if (tv.tv_usec < tvo.tv_usec) {
++			tv.tv_sec--;
++			tv.tv_usec += USEC_PER_SEC - tvo.tv_usec;
++		} else
++			tv.tv_usec -= tvo.tv_usec;
++		if (tv.tv_sec < timeout)
++			goto set;
++		printk(KERN_ERR "VZDQ: %s, orig %u,"
++			" dev %s, inode %lu, fs %s\n",
++			msg, INODE_QLNK(inode)->origin,
++			inode->i_sb->s_id, inode->i_ino,
++			inode->i_sb->s_type->name);
++		if (!cnt++) {
++			printk(KERN_ERR "current %d (%s), VE %d,"
++				" time %ld.%06ld\n",
++				current->pid, current->comm,
++				VEID(get_exec_env()),
++				tv.tv_sec, tv.tv_usec);
++			dump_stack();
++		}
++		if (parent != NULL)
++			printk(KERN_ERR "VZDQ: parent of %lu is %lu\n",
++				inode->i_ino, parent->i_ino);
++	}
++	goto set;
++}
++
++static void vzquota_inode_qmblk_recalc(struct inode *inode,
++		struct vz_quota_ilink *qlnk)
++{
++	spin_lock(&dcache_lock);
++	if (!list_empty(&inode->i_dentry))
++		vzquota_dtree_qmblk_recalc(inode, qlnk);
++	else
++		vzquota_det_qmblk_recalc(inode, qlnk);
++	spin_unlock(&dcache_lock);
++}
++
++/**
++ * vzquota_inode_qmblk - obtain inode's qmblk
++ *
++ * Returns qmblk with refcounter taken, %NULL if not under
++ * VZ quota or %VZ_QUOTA_BAD.
++ *
++ * FIXME: This function should be removed when vzquota_find_qmblk /
++ * get_quota_root / vzquota_dstat code is cleaned up.
++ */
++struct vz_quota_master *vzquota_inode_qmblk(struct inode *inode)
++{
++	struct vz_quota_master *qmblk;
++	struct vz_quota_ilink qlnk;
++
++	might_sleep();
++
++	if (inode->i_sb->dq_op != &vz_quota_operations)
++		return NULL;
++#if defined(VZ_QUOTA_UNLOAD)
++#error Make sure qmblk does not disappear
++#endif
++
++	vzquota_qlnk_init(&qlnk);
++	inode_qmblk_lock(inode->i_sb);
++	__vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
++
++	if (vzquota_qlnk_is_empty(INODE_QLNK(inode)) ||
++	    !VZ_QUOTA_IS_ACTUAL(inode))
++		vzquota_inode_qmblk_recalc(inode, &qlnk);
++
++	qmblk = INODE_QLNK(inode)->qmblk;
++	if (qmblk != VZ_QUOTA_BAD) {
++		if (!VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb))
++			qmblk_get(qmblk);
++		else
++			qmblk = NULL;
++	}
++
++	inode_qmblk_unlock(inode->i_sb);
++	vzquota_qlnk_destroy(&qlnk);
++	return qmblk;
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * Calls from quota operations
++ *
++ * --------------------------------------------------------------------- */
++
++/**
++ * vzquota_inode_init_call - call from DQUOT_INIT
++ */
++void vzquota_inode_init_call(struct inode *inode)
++{
++	struct vz_quota_master *qmblk;
++	struct vz_quota_datast data;
++
++	/* initializes inode's quota inside */
++	qmblk = vzquota_inode_data(inode, &data);
++	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
++		vzquota_data_unlock(inode, &data);
++
++	/*
++	 * The check is needed for repeated new_inode() calls from a single
++	 * ext3 call like create or mkdir in case of -ENOSPC.
++	 */
++	spin_lock(&dcache_lock);
++	if (!list_empty(&inode->i_dentry))
++		vzquota_cur_qmblk_set(inode);
++	spin_unlock(&dcache_lock);
++}
++
++/**
++ * vzquota_inode_drop_call - call from DQUOT_DROP
++ */
++void vzquota_inode_drop_call(struct inode *inode)
++{
++	vzquota_inode_drop(inode);
++}
++
++/**
++ * vzquota_inode_data - initialize (if nec.) and lock inode quota ptrs
++ * @inode: the inode
++ * @data: storage space
++ *
++ * Returns: qmblk is NULL or VZ_QUOTA_BAD or actualized qmblk.
++ * On return if qmblk is neither NULL nor VZ_QUOTA_BAD:
++ *   qmblk in inode's qlnk is the same as returned,
++ *   ugid pointers inside inode's qlnk are valid,
++ *   some locks are taken (and should be released by vzquota_data_unlock).
++ * If qmblk is NULL or VZ_QUOTA_BAD, locks are NOT taken.
++ */
++struct vz_quota_master *vzquota_inode_data(struct inode *inode,
++		struct vz_quota_datast *data)
++{
++	struct vz_quota_master *qmblk;
++
++	might_sleep();
++
++	vzquota_qlnk_init(&data->qlnk);
++	inode_qmblk_lock(inode->i_sb);
++	__vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
++
++	if (vzquota_qlnk_is_empty(INODE_QLNK(inode)) ||
++	    !VZ_QUOTA_IS_ACTUAL(inode))
++		vzquota_inode_qmblk_recalc(inode, &data->qlnk);
++
++	qmblk = INODE_QLNK(inode)->qmblk;
++	if (qmblk != VZ_QUOTA_BAD) {
++		if (!VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb)) {
++			/*
++			 * Note that in the current implementation,
++			 * inode_qmblk_lock can theoretically be dropped here.
++			 * This place is serialized with quota_off because
++			 * quota_off fails when there are extra dentry
++			 * references and syncs inodes before removing quota
++			 * information from them.
++			 * However, quota usage information should stop being
++			 * updated immediately after vzquota_off.
++			 */
++			qmblk_data_write_lock(qmblk);
++		} else {
++			inode_qmblk_unlock(inode->i_sb);
++			qmblk = NULL;
++		}
++	} else {
++		inode_qmblk_unlock(inode->i_sb);
++	}
++	return qmblk;
++}
++
++void vzquota_data_unlock(struct inode *inode,
++		struct vz_quota_datast *data)
++{
++	qmblk_data_write_unlock(INODE_QLNK(inode)->qmblk);
++	inode_qmblk_unlock(inode->i_sb);
++	vzquota_qlnk_destroy(&data->qlnk);
++}
++
++#if defined(CONFIG_VZ_QUOTA_UGID)
++/**
++ * vzquota_inode_transfer_call - call from vzquota_transfer
++ */
++int vzquota_inode_transfer_call(struct inode *inode, struct iattr *iattr)
++{
++	struct vz_quota_master *qmblk;
++	struct vz_quota_datast data;
++	struct vz_quota_ilink qlnew;
++	int mask;
++	int ret;
++
++	might_sleep();
++	vzquota_qlnk_init(&qlnew);
++start:
++	qmblk = vzquota_inode_data(inode, &data);
++	ret = NO_QUOTA;
++	if (qmblk == VZ_QUOTA_BAD)
++		goto out_destr;
++	ret = QUOTA_OK;
++	if (qmblk == NULL)
++		goto out_destr;
++	qmblk_get(qmblk);
++
++	ret = QUOTA_OK;
++	if (!(qmblk->dq_flags & VZDQUG_ON))
++		/* no ugid quotas */
++		goto out_unlock;
++
++	mask = 0;
++	if ((iattr->ia_valid & ATTR_UID) && iattr->ia_uid != inode->i_uid)
++		mask |= 1 << USRQUOTA;
++	if ((iattr->ia_valid & ATTR_GID) && iattr->ia_gid != inode->i_gid)
++		mask |= 1 << GRPQUOTA;
++	while (1) {
++		if (vzquota_qlnk_is_empty(&qlnew) &&
++		    vzquota_qlnk_fill_attr(&qlnew, inode, iattr, mask, qmblk))
++			break;
++		if (qlnew.qmblk == INODE_QLNK(inode)->qmblk &&
++		    qlnew.qmblk == qmblk)
++			goto finish;
++		if (vzquota_qlnk_reinit_attr(&qlnew, inode, qmblk))
++			break;
++	}
++
++	/* prepare for restart */
++	vzquota_data_unlock(inode, &data);
++	qmblk_put(qmblk);
++	goto start;
++
++finish:
++	/* all references obtained successfully */
++	ret = vzquota_transfer_usage(inode, mask, &qlnew);
++	if (!ret) {
++		vzquota_qlnk_swap(&qlnew, INODE_QLNK(inode));
++		INODE_QLNK(inode)->origin = VZ_QUOTAO_TRANS;
++	}
++out_unlock:
++	vzquota_data_unlock(inode, &data);
++	qmblk_put(qmblk);
++out_destr:
++	vzquota_qlnk_destroy(&qlnew);
++	return ret;
++}
++#endif
++
++int vzquota_rename_check(struct inode *inode,
++		struct inode *old_dir, struct inode *new_dir)
++{
++	struct vz_quota_master *qmblk;
++	struct vz_quota_ilink qlnk1, qlnk2;
++	int c, ret;
++
++	if (inode->i_sb != old_dir->i_sb || inode->i_sb != new_dir->i_sb)
++		return -1;
++
++	might_sleep();
++
++	vzquota_qlnk_init(&qlnk1);
++	vzquota_qlnk_init(&qlnk2);
++	inode_qmblk_lock(inode->i_sb);
++	__vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
++	__vzquota_inode_init(old_dir, VZ_QUOTAO_INICAL);
++	__vzquota_inode_init(new_dir, VZ_QUOTAO_INICAL);
++
++	do {
++		c = 0;
++		if (vzquota_qlnk_is_empty(INODE_QLNK(inode)) ||
++		    !VZ_QUOTA_IS_ACTUAL(inode)) {
++			vzquota_inode_qmblk_recalc(inode, &qlnk1);
++			c++;
++		}
++		if (vzquota_qlnk_is_empty(INODE_QLNK(new_dir)) ||
++		    !VZ_QUOTA_IS_ACTUAL(new_dir)) {
++			vzquota_inode_qmblk_recalc(new_dir, &qlnk2);
++			c++;
++		}
++	} while (c);
++
++	ret = 0;
++	qmblk = INODE_QLNK(inode)->qmblk;
++	if (qmblk != INODE_QLNK(new_dir)->qmblk) {
++		ret = -1;
++		if (qmblk != VZ_QUOTA_BAD &&
++		    !VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb) &&
++		    qmblk->dq_root_dentry->d_inode == inode &&
++		    VZ_QUOTA_IS_NOQUOTA(INODE_QLNK(new_dir)->qmblk,
++			    				inode->i_sb) &&
++		    VZ_QUOTA_IS_NOQUOTA(INODE_QLNK(old_dir)->qmblk,
++			    				inode->i_sb))
++			/* quota root rename is allowed */
++			ret = 0;
++	}
++
++	inode_qmblk_unlock(inode->i_sb);
++	vzquota_qlnk_destroy(&qlnk2);
++	vzquota_qlnk_destroy(&qlnk1);
++	return ret;
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * qmblk-related parts of on/off operations
++ *
++ * --------------------------------------------------------------------- */
++
++/**
++ * vzquota_check_dtree - check dentry tree if quota on/off is allowed
++ *
++ * This function doesn't allow quota to be turned on/off if some dentries in
++ * the tree have external references.
++ * In addition to technical reasons, it enforces user-space correctness:
++ * current usage (taken from or reported to the user space) can be meaningful
++ * and accurate only if the tree is not being modified.
++ * Side effect: additional vfsmount structures referencing the tree (bind
++ * mounts of tree nodes to some other places) are not allowed at on/off time.
++ */
++int vzquota_check_dtree(struct vz_quota_master *qmblk, int off)
++{
++	struct dentry *dentry;
++	int err, count;
++
++	err = -EBUSY;
++	dentry = qmblk->dq_root_dentry;
++
++	if (d_unhashed(dentry) && dentry != dentry->d_sb->s_root)
++		goto unhashed;
++
++	/* attempt to shrink */
++  	if (!list_empty(&dentry->d_subdirs)) {
++		spin_unlock(&dcache_lock);
++		inode_qmblk_unlock(dentry->d_sb);
++		shrink_dcache_parent(dentry);
++		inode_qmblk_lock(dentry->d_sb);
++		spin_lock(&dcache_lock);
++		if (!list_empty(&dentry->d_subdirs))
++			goto out;
++
++		count = 1;
++		if (dentry == dentry->d_sb->s_root)
++			count += 2;	/* sb and mnt refs */
++		if (atomic_read(&dentry->d_count) < count) {
++			printk(KERN_ERR "%s: too small count %d vs %d.\n",
++					__FUNCTION__,
++					atomic_read(&dentry->d_count), count);
++			goto out;
++		}
++		if (atomic_read(&dentry->d_count) > count)
++			goto out;
++	}
++
++	err = 0;
++out:
++	return err;
++
++unhashed:
++	/*
++	 * Quota root is removed.
++	 * Allow to turn quota off, but not on.
++	 */
++	if (off)
++		err = 0;
++	goto out;
++}
++
++int vzquota_on_qmblk(struct super_block *sb, struct inode *inode,
++		struct vz_quota_master *qmblk)
++{
++	struct vz_quota_ilink qlnk;
++	struct vz_quota_master *qold, *qnew;
++	int err;
++
++	might_sleep();
++
++	qold = NULL;
++	qnew = vzquota_alloc_fake();
++	if (qnew == NULL)
++		return -ENOMEM;
++
++	vzquota_qlnk_init(&qlnk);
++	inode_qmblk_lock(sb);
++	__vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
++
++	spin_lock(&dcache_lock);
++	while (1) {
++		err = vzquota_check_dtree(qmblk, 0);
++		if (err)
++			break;
++		if (!vzquota_inode_qmblk_set(inode, qmblk, &qlnk))
++			break;
++	}
++	INODE_QLNK(inode)->origin = VZ_QUOTAO_ON;
++	spin_unlock(&dcache_lock);
++
++	if (!err) {
++		qold = __VZ_QUOTA_NOQUOTA(sb);
++		qold->dq_flags |= VZDQ_NOACT;
++		__VZ_QUOTA_NOQUOTA(sb) = qnew;
++	}
++
++	inode_qmblk_unlock(sb);
++	vzquota_qlnk_destroy(&qlnk);
++	if (qold != NULL)
++		qmblk_put(qold);
++
++	return err;
++}
++
++int vzquota_off_qmblk(struct super_block *sb, struct vz_quota_master *qmblk)
++{
++	int ret;
++
++	ret = 0;
++	inode_qmblk_lock(sb);
++
++	spin_lock(&dcache_lock);
++	if (vzquota_check_dtree(qmblk, 1))
++		ret = -EBUSY;
++	spin_unlock(&dcache_lock);
++
++	if (!ret)
++		qmblk->dq_flags |= VZDQ_NOACT | VZDQ_NOQUOT;
++	inode_qmblk_unlock(sb);
++	return ret;
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * External interfaces
++ *
++ * ---------------------------------------------------------------------*/
++
++static int vzquota_ioctl(struct inode *ino, struct file *file,
++		unsigned int cmd, unsigned long arg)
++{
++	int err;
++	struct vzctl_quotactl qb;
++	struct vzctl_quotaugidctl qub;
++
++	switch (cmd) {
++		case VZCTL_QUOTA_CTL:
++			err = -ENOTTY;
++			break;
++		case VZCTL_QUOTA_NEW_CTL:
++			err = -EFAULT;
++			if (copy_from_user(&qb, (void *)arg, sizeof(qb)))
++				break;
++			err = do_vzquotactl(qb.cmd, qb.quota_id,
++					qb.qstat, qb.ve_root);
++			break;
++#ifdef CONFIG_VZ_QUOTA_UGID
++		case VZCTL_QUOTA_UGID_CTL:
++			err = -EFAULT;
++			if (copy_from_user(&qub, (void *)arg, sizeof(qub)))
++				break;
++			err = do_vzquotaugidctl(&qub);
++			break;
++#endif
++		default:
++			err = -ENOTTY;
++	}
++	might_sleep(); /* debug */
++	return err;
++}
++
++static struct vzioctlinfo vzdqcalls = {
++	.type	= VZDQCTLTYPE,
++	.func	= vzquota_ioctl,
++	.owner	= THIS_MODULE,
++};
++
++/**
++ * vzquota_dstat - get quota usage info for virtual superblock
++ */
++static int vzquota_dstat(struct super_block *super, struct dq_stat *qstat)
++{
++	struct vz_quota_master *qmblk;
++
++	qmblk = vzquota_find_qmblk(super);
++	if (qmblk == NULL)
++		return -ENOENT;
++	if (qmblk == VZ_QUOTA_BAD) {
++		memset(qstat, 0, sizeof(*qstat));
++		return 0;
++	}
++
++	qmblk_data_read_lock(qmblk);
++	memcpy(qstat, &qmblk->dq_stat, sizeof(*qstat));
++	qmblk_data_read_unlock(qmblk);
++	qmblk_put(qmblk);
++	return 0;
++}
++
++
++/* ----------------------------------------------------------------------
++ *
++ * Init/exit helpers
++ *
++ * ---------------------------------------------------------------------*/
++
++static int vzquota_cache_init(void)
++{
++	int i;
++
++	vzquota_cachep = kmem_cache_create("vz_quota_master",
++					 sizeof(struct vz_quota_master),
++					 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
++	if (vzquota_cachep == NULL) {
++		printk(KERN_ERR "Cannot create VZ_QUOTA SLAB cache\n");
++		goto nomem2;
++	}
++	for (i = 0; i < VZ_QUOTA_HASH_SIZE; i++)
++		INIT_LIST_HEAD(&vzquota_hash_table[i]);
++
++	return 0;
++
++nomem2:
++	return -ENOMEM;
++}
++
++static void vzquota_cache_release(void)
++{
++	int i;
++
++	/* sanity check */
++	for (i = 0; i < VZ_QUOTA_HASH_SIZE; i++)
++		if (!list_empty(&vzquota_hash_table[i]))
++			BUG();
++
++	/* release caches */
++	if (kmem_cache_destroy(vzquota_cachep))
++		printk(KERN_ERR
++			"VZQUOTA: vz_quota_master kmem_cache_destroy failed\n");
++	vzquota_cachep = NULL;
++}
++
++static int quota_notifier_call(struct vnotifier_block *self,
++		unsigned long n, void *data, int err)
++{
++	struct virt_info_quota *viq;
++	struct super_block *sb;
++
++	viq = (struct virt_info_quota *)data;
++	switch (n) {
++	case VIRTINFO_QUOTA_ON:
++		err = NOTIFY_BAD;
++		if (!try_module_get(THIS_MODULE))
++			break;
++		sb = viq->super;
++		memset(&sb->s_dquot.info, 0, sizeof(sb->s_dquot.info));
++		INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
++		INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
++		err = NOTIFY_OK;
++		break;
++	case VIRTINFO_QUOTA_OFF:
++		module_put(THIS_MODULE);
++		err = NOTIFY_OK;
++		break;
++	case VIRTINFO_QUOTA_GETSTAT:
++		err = NOTIFY_BAD;
++		if (vzquota_dstat(viq->super, viq->qstat))
++			break;
++		err = NOTIFY_OK;
++		break;
++	}
++	return err;
++}
++
++struct vnotifier_block quota_notifier_block = {
++	.notifier_call = quota_notifier_call,
++	.priority = INT_MAX,
++};
++
++/* ----------------------------------------------------------------------
++ *
++ * Init/exit procedures
++ *
++ * ---------------------------------------------------------------------*/
++
++static int __init vzquota_init(void)
++{
++	int err;
++
++	if ((err = vzquota_cache_init()) != 0)
++		goto out_cache;
++
++	if ((err = vzquota_proc_init()) != 0)
++		goto out_proc;
++
++#ifdef CONFIG_VZ_QUOTA_UGID
++	if ((err = vzquota_ugid_init()) != 0)
++		goto out_ugid;
++#endif
++
++	init_MUTEX(&vz_quota_sem);
++	vzioctl_register(&vzdqcalls);
++	virtinfo_notifier_register(VITYPE_QUOTA, &quota_notifier_block);
++#if defined(CONFIG_VZ_QUOTA_UGID) && defined(CONFIG_PROC_FS)
++	vzaquota_init();
++#endif
++
++	return 0;
++
++#ifdef CONFIG_VZ_QUOTA_UGID
++out_ugid:
++	vzquota_proc_release();
++#endif
++out_proc:
++	vzquota_cache_release();
++out_cache:
++	return err;
++}
++
++#if defined(VZ_QUOTA_UNLOAD)
++static void __exit vzquota_release(void)
++{
++	virtinfo_notifier_unregister(VITYPE_QUOTA, &quota_notifier_block);
++	vzioctl_unregister(&vzdqcalls);
++#ifdef CONFIG_VZ_QUOTA_UGID
++#ifdef CONFIG_PROC_FS
++	vzaquota_fini();
++#endif
++	vzquota_ugid_release();
++#endif
++	vzquota_proc_release();
++	vzquota_cache_release();
++}
++#endif
++
++MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
++MODULE_DESCRIPTION("Virtuozzo Disk Quota");
++MODULE_LICENSE("GPL v2");
++
++module_init(vzquota_init)
++#if defined(VZ_QUOTA_UNLOAD)
++module_exit(vzquota_release)
++#endif
+diff -uprN linux-2.6.8.1.orig/fs/xfs/linux-2.6/xfs_buf.c linux-2.6.8.1-ve022stab078/fs/xfs/linux-2.6/xfs_buf.c
+--- linux-2.6.8.1.orig/fs/xfs/linux-2.6/xfs_buf.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/xfs/linux-2.6/xfs_buf.c	2006-05-11 13:05:25.000000000 +0400
+@@ -1628,8 +1628,8 @@ pagebuf_daemon(
+ 	INIT_LIST_HEAD(&tmp);
+ 	do {
+ 		/* swsusp */
+-		if (current->flags & PF_FREEZE)
+-			refrigerator(PF_FREEZE);
++		if (test_thread_flag(TIF_FREEZE))
++			refrigerator();
+ 
+ 		set_current_state(TASK_INTERRUPTIBLE);
+ 		schedule_timeout((xfs_buf_timer_centisecs * HZ) / 100);
+diff -uprN linux-2.6.8.1.orig/fs/xfs/linux-2.6/xfs_iops.c linux-2.6.8.1-ve022stab078/fs/xfs/linux-2.6/xfs_iops.c
+--- linux-2.6.8.1.orig/fs/xfs/linux-2.6/xfs_iops.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/xfs/linux-2.6/xfs_iops.c	2006-05-11 13:05:35.000000000 +0400
+@@ -468,7 +468,8 @@ STATIC int
+ linvfs_permission(
+ 	struct inode	*inode,
+ 	int		mode,
+-	struct nameidata *nd)
++	struct nameidata *nd,
++	struct exec_perm *exec_perm)
+ {
+ 	vnode_t		*vp = LINVFS_GET_VP(inode);
+ 	int		error;
+diff -uprN linux-2.6.8.1.orig/fs/xfs/linux-2.6/xfs_super.c linux-2.6.8.1-ve022stab078/fs/xfs/linux-2.6/xfs_super.c
+--- linux-2.6.8.1.orig/fs/xfs/linux-2.6/xfs_super.c	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/fs/xfs/linux-2.6/xfs_super.c	2006-05-11 13:05:35.000000000 +0400
+@@ -356,7 +356,7 @@ destroy_inodecache( void )
+  * at the point when it is unpinned after a log write,
+  * since this is when the inode itself becomes flushable. 
+  */
+-STATIC void
++STATIC int
+ linvfs_write_inode(
+ 	struct inode		*inode,
+ 	int			sync)
+@@ -364,12 +364,14 @@ linvfs_write_inode(
+ 	vnode_t			*vp = LINVFS_GET_VP(inode);
+ 	int			error, flags = FLUSH_INODE;
+ 
++	error = 0;
+ 	if (vp) {
+ 		vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
+ 		if (sync)
+ 			flags |= FLUSH_SYNC;
+ 		VOP_IFLUSH(vp, flags, error);
+ 	}
++	return error;
+ }
+ 
+ STATIC void
+@@ -408,8 +410,8 @@ xfssyncd(
+ 		set_current_state(TASK_INTERRUPTIBLE);
+ 		schedule_timeout((xfs_syncd_centisecs * HZ) / 100);
+ 		/* swsusp */
+-		if (current->flags & PF_FREEZE)
+-			refrigerator(PF_FREEZE);
++		if (test_thread_flag(TIF_FREEZE))
++			refrigerator();
+ 		if (vfsp->vfs_flag & VFS_UMOUNT)
+ 			break;
+ 		if (vfsp->vfs_flag & VFS_RDONLY)
+diff -uprN linux-2.6.8.1.orig/include/asm-generic/pgtable.h linux-2.6.8.1-ve022stab078/include/asm-generic/pgtable.h
+--- linux-2.6.8.1.orig/include/asm-generic/pgtable.h	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-generic/pgtable.h	2006-05-11 13:05:30.000000000 +0400
+@@ -126,4 +126,8 @@ static inline void ptep_mkdirty(pte_t *p
+ #define pgd_offset_gate(mm, addr)	pgd_offset(mm, addr)
+ #endif
+ 
++#ifndef __HAVE_ARCH_LAZY_MMU_PROT_UPDATE
++#define lazy_mmu_prot_update(pte)	do { } while (0)
++#endif
++
+ #endif /* _ASM_GENERIC_PGTABLE_H */
+diff -uprN linux-2.6.8.1.orig/include/asm-generic/tlb.h linux-2.6.8.1-ve022stab078/include/asm-generic/tlb.h
+--- linux-2.6.8.1.orig/include/asm-generic/tlb.h	2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-generic/tlb.h	2006-05-11 13:05:39.000000000 +0400
+@@ -110,6 +110,9 @@ tlb_is_full_mm(struct mmu_gather *tlb)
+  *	handling the additional races in SMP caused by other CPUs caching valid
+  *	mappings in their TLBs.
+  */
++#include <ub/ub_mem.h>
++#include <ub/ub_vmpages.h>
++
+ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+ {
+ 	tlb->need_flush = 1;
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/apic.h linux-2.6.8.1-ve022stab078/include/asm-i386/apic.h
+--- linux-2.6.8.1.orig/include/asm-i386/apic.h	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-i386/apic.h	2006-05-11 13:05:32.000000000 +0400
+@@ -79,7 +79,7 @@ extern void sync_Arb_IDs (void);
+ extern void init_bsp_APIC (void);
+ extern void setup_local_APIC (void);
+ extern void init_apic_mappings (void);
+-extern void smp_local_timer_interrupt (struct pt_regs * regs);
++extern asmlinkage void smp_local_timer_interrupt (struct pt_regs * regs);
+ extern void setup_boot_APIC_clock (void);
+ extern void setup_secondary_APIC_clock (void);
+ extern void setup_apic_nmi_watchdog (void);
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/atomic_kmap.h linux-2.6.8.1-ve022stab078/include/asm-i386/atomic_kmap.h
+--- linux-2.6.8.1.orig/include/asm-i386/atomic_kmap.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/asm-i386/atomic_kmap.h	2006-05-11 13:05:38.000000000 +0400
+@@ -0,0 +1,96 @@
++/*
++ * atomic_kmap.h: temporary virtual kernel memory mappings
++ *
++ * Copyright (C) 2003 Ingo Molnar <mingo@redhat.com>
++ */
++
++#ifndef _ASM_ATOMIC_KMAP_H
++#define _ASM_ATOMIC_KMAP_H
++
++#ifdef __KERNEL__
++
++#include <linux/config.h>
++#include <asm/tlbflush.h>
++
++#ifdef CONFIG_DEBUG_HIGHMEM
++#define HIGHMEM_DEBUG 1
++#else
++#define HIGHMEM_DEBUG 0
++#endif
++
++extern pte_t *kmap_pte;
++#define kmap_prot PAGE_KERNEL
++#define kmap_prot_nocache PAGE_KERNEL_NOCACHE
++
++#define PKMAP_BASE (0xff000000UL)
++#define NR_SHARED_PMDS ((0xffffffff-PKMAP_BASE+1)/PMD_SIZE)
++
++static inline unsigned long __kmap_atomic_vaddr(enum km_type type)
++{
++	enum fixed_addresses idx;
++
++	idx = type + KM_TYPE_NR*smp_processor_id();
++	return __fix_to_virt(FIX_KMAP_BEGIN + idx);
++}
++
++static inline void *__kmap_atomic_noflush(struct page *page, enum km_type type)
++{
++	enum fixed_addresses idx;
++	unsigned long vaddr;
++
++	idx = type + KM_TYPE_NR*smp_processor_id();
++	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
++	/*
++	 * NOTE: entries that rely on some secondary TLB-flush
++	 * effect must not be global:
++	 */
++	set_pte(kmap_pte-idx, mk_pte(page, PAGE_KERNEL));
++
++	return (void*) vaddr;
++}
++
++static inline void *__kmap_atomic(struct page *page, enum km_type type)
++{
++	enum fixed_addresses idx;
++	unsigned long vaddr;
++
++	idx = type + KM_TYPE_NR*smp_processor_id();
++	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
++#if HIGHMEM_DEBUG
++	BUG_ON(!pte_none(*(kmap_pte-idx)));
++#else
++	/*
++	 * Performance optimization - do not flush if the new
++	 * pte is the same as the old one:
++	 */
++	if (pte_val(*(kmap_pte-idx)) == pte_val(mk_pte(page, kmap_prot)))
++		return (void *) vaddr;
++#endif
++	set_pte(kmap_pte-idx, mk_pte(page, kmap_prot));
++	__flush_tlb_one(vaddr);
++
++	return (void*) vaddr;
++}
++
++static inline void __kunmap_atomic(void *kvaddr, enum km_type type)
++{
++#if HIGHMEM_DEBUG
++	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
++	enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
++
++	BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx));
++	/*
++	 * force other mappings to Oops if they'll try to access
++	 * this pte without first remap it
++	 */
++	pte_clear(kmap_pte-idx);
++	__flush_tlb_one(vaddr);
++#endif
++}
++
++#define __kunmap_atomic_type(type) \
++		__kunmap_atomic((void *)__kmap_atomic_vaddr(type), (type))
++
++#endif /* __KERNEL__ */
++
++#endif /* _ASM_ATOMIC_KMAP_H */
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/bug.h linux-2.6.8.1-ve022stab078/include/asm-i386/bug.h
+--- linux-2.6.8.1.orig/include/asm-i386/bug.h	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-i386/bug.h	2006-05-11 13:05:24.000000000 +0400
+@@ -12,7 +12,10 @@
+ #if 1	/* Set to zero for a slightly smaller kernel */
+ #define BUG()				\
+  __asm__ __volatile__(	"ud2\n"		\
++		 	"\t.byte 0x66\n"\
++		 	"\t.byte 0xb8\n" /* mov $xxx, %ax */\
+ 			"\t.word %c0\n"	\
++			"\t.byte 0xb8\n" /* mov $xxx, %eax */\
+ 			"\t.long %c1\n"	\
+ 			 : : "i" (__LINE__), "i" (__FILE__))
+ #else
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/checksum.h linux-2.6.8.1-ve022stab078/include/asm-i386/checksum.h
+--- linux-2.6.8.1.orig/include/asm-i386/checksum.h	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-i386/checksum.h	2006-05-11 13:05:38.000000000 +0400
+@@ -25,7 +25,7 @@ asmlinkage unsigned int csum_partial(con
+  * better 64-bit) boundary
+  */
+ 
+-asmlinkage unsigned int csum_partial_copy_generic( const char *src, char *dst, int len, int sum,
++asmlinkage unsigned int direct_csum_partial_copy_generic( const char *src, char *dst, int len, int sum,
+ 						   int *src_err_ptr, int *dst_err_ptr);
+ 
+ /*
+@@ -39,14 +39,19 @@ static __inline__
+ unsigned int csum_partial_copy_nocheck ( const char *src, char *dst,
+ 					int len, int sum)
+ {
+-	return csum_partial_copy_generic ( src, dst, len, sum, NULL, NULL);
++	/*
++	 * The direct function is OK for kernel-space => kernel-space copies:
++	 */
++	return direct_csum_partial_copy_generic ( src, dst, len, sum, NULL, NULL);
+ }
+ 
+ static __inline__
+ unsigned int csum_partial_copy_from_user ( const char __user *src, char *dst,
+ 						int len, int sum, int *err_ptr)
+ {
+-	return csum_partial_copy_generic ( (__force char *)src, dst, len, sum, err_ptr, NULL);
++	if (copy_from_user(dst, src, len))
++		*err_ptr = -EFAULT;
++	return csum_partial(dst, len, sum);
+ }
+ 
+ /*
+@@ -172,13 +177,28 @@ static __inline__ unsigned short int csu
+  *	Copy and checksum to user
+  */
+ #define HAVE_CSUM_COPY_USER
+-static __inline__ unsigned int csum_and_copy_to_user(const char *src, 
++static __inline__ unsigned int direct_csum_and_copy_to_user(const char *src, 
+ 						     char __user *dst,
+ 						     int len, int sum, 
+ 						     int *err_ptr)
+ {
+ 	if (access_ok(VERIFY_WRITE, dst, len))
+-		return csum_partial_copy_generic(src, (__force char *)dst, len, sum, NULL, err_ptr);
++		return direct_csum_partial_copy_generic(src, dst, len, sum, NULL, err_ptr);
++
++	if (len)
++		*err_ptr = -EFAULT;
++
++	return -1; /* invalid checksum */
++}
++
++static __inline__ unsigned int csum_and_copy_to_user(const char *src, char __user *dst,
++				    int len, int sum, int *err_ptr)
++{
++	if (access_ok(VERIFY_WRITE, dst, len)) {
++		if (copy_to_user(dst, src, len))
++			*err_ptr = -EFAULT;
++		return csum_partial(src, len, sum);
++	}
+ 
+ 	if (len)
+ 		*err_ptr = -EFAULT;
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/desc.h linux-2.6.8.1-ve022stab078/include/asm-i386/desc.h
+--- linux-2.6.8.1.orig/include/asm-i386/desc.h	2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-i386/desc.h	2006-05-11 13:05:38.000000000 +0400
+@@ -21,6 +21,13 @@ struct Xgt_desc_struct {
+ 
+ extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];
+ 
++extern void trap_init_virtual_IDT(void);
++extern void trap_init_virtual_GDT(void);
++
++asmlinkage int system_call(void);
++asmlinkage void lcall7(void);
++asmlinkage void lcall27(void);
++
+ #define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (GDT_ENTRY_TSS*8))
+ #define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (GDT_ENTRY_LDT*8))
+ 
+@@ -30,6 +37,7 @@ extern struct Xgt_desc_struct idt_descr,
+  */
+ extern struct desc_struct default_ldt[];
+ extern void set_intr_gate(unsigned int irq, void * addr);
++extern void set_trap_gate(unsigned int n, void *addr);
+ 
+ #define _set_tssldt_desc(n,addr,limit,type) \
+ __asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
+@@ -91,31 +99,8 @@ static inline void load_TLS(struct threa
+ #undef C
+ }
+ 
+-static inline void clear_LDT(void)
+-{
+-	int cpu = get_cpu();
+-
+-	set_ldt_desc(cpu, &default_ldt[0], 5);
+-	load_LDT_desc();
+-	put_cpu();
+-}
+-
+-/*
+- * load one particular LDT into the current CPU
+- */
+-static inline void load_LDT_nolock(mm_context_t *pc, int cpu)
+-{
+-	void *segments = pc->ldt;
+-	int count = pc->size;
+-
+-	if (likely(!count)) {
+-		segments = &default_ldt[0];
+-		count = 5;
+-	}
+-		
+-	set_ldt_desc(cpu, segments, count);
+-	load_LDT_desc();
+-}
++extern struct page *default_ldt_page;
++extern void load_LDT_nolock(mm_context_t *pc, int cpu);
+ 
+ static inline void load_LDT(mm_context_t *pc)
+ {
+@@ -124,6 +109,6 @@ static inline void load_LDT(mm_context_t
+ 	put_cpu();
+ }
+ 
+-#endif /* !__ASSEMBLY__ */
+ 
++#endif /* !__ASSEMBLY__ */
+ #endif
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/elf.h linux-2.6.8.1-ve022stab078/include/asm-i386/elf.h
+--- linux-2.6.8.1.orig/include/asm-i386/elf.h	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-i386/elf.h	2006-05-11 13:05:45.000000000 +0400
+@@ -107,7 +107,7 @@ typedef struct user_fxsr_struct elf_fpxr
+    For the moment, we have only optimizations for the Intel generations,
+    but that could change... */
+ 
+-#define ELF_PLATFORM  (system_utsname.machine)
++#define ELF_PLATFORM  (ve_utsname.machine)
+ 
+ /*
+  * Architecture-neutral AT_ values in 0-17, leave some room
+@@ -140,8 +140,10 @@ extern void __kernel_vsyscall;
+ 
+ #define ARCH_DLINFO						\
+ do {								\
++	if (sysctl_at_vsyscall) {				\
+ 		NEW_AUX_ENT(AT_SYSINFO,	VSYSCALL_ENTRY);	\
+ 		NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL_BASE);	\
++	}							\
+ } while (0)
+ 
+ /*
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/fixmap.h linux-2.6.8.1-ve022stab078/include/asm-i386/fixmap.h
+--- linux-2.6.8.1.orig/include/asm-i386/fixmap.h	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-i386/fixmap.h	2006-05-11 13:05:38.000000000 +0400
+@@ -18,17 +18,17 @@
+ #include <asm/acpi.h>
+ #include <asm/apicdef.h>
+ #include <asm/page.h>
+-#ifdef CONFIG_HIGHMEM
+ #include <linux/threads.h>
+ #include <asm/kmap_types.h>
+-#endif
++
++#define __FIXADDR_TOP (0xfffff000UL)
+ 
+ /*
+  * Here we define all the compile-time 'special' virtual
+  * addresses. The point is to have a constant address at
+  * compile time, but to set the physical address only
+- * in the boot process. We allocate these special addresses
+- * from the end of virtual memory (0xfffff000) backwards.
++ * in the boot process. We allocate these special  addresses
++ * from the end of virtual memory (0xffffe000) backwards.
+  * Also this lets us do fail-safe vmalloc(), we
+  * can guarantee that these special addresses and
+  * vmalloc()-ed addresses never overlap.
+@@ -41,11 +41,24 @@
+  * TLB entries of such buffers will not be flushed across
+  * task switches.
+  */
++
++/*
++ * on UP currently we will have no trace of the fixmap mechanizm,
++ * no page table allocations, etc. This might change in the
++ * future, say framebuffers for the console driver(s) could be
++ * fix-mapped?
++ */
++
++#define TSS_SIZE	sizeof(struct tss_struct)
++#define FIX_TSS_COUNT	((TSS_SIZE * NR_CPUS + PAGE_SIZE - 1)/ PAGE_SIZE)
++
+ enum fixed_addresses {
+ 	FIX_HOLE,
+ 	FIX_VSYSCALL,
+ #ifdef CONFIG_X86_LOCAL_APIC
+ 	FIX_APIC_BASE,	/* local (CPU) APIC) -- required for SMP or not */
++#else
++	FIX_VSTACK_HOLE_1,
+ #endif
+ #ifdef CONFIG_X86_IO_APIC
+ 	FIX_IO_APIC_BASE_0,
+@@ -57,16 +70,22 @@ enum fixed_addresses {
+ 	FIX_LI_PCIA,	/* Lithium PCI Bridge A */
+ 	FIX_LI_PCIB,	/* Lithium PCI Bridge B */
+ #endif
+-#ifdef CONFIG_X86_F00F_BUG
+-	FIX_F00F_IDT,	/* Virtual mapping for IDT */
+-#endif
++	FIX_IDT,
++	FIX_GDT_1,
++	FIX_GDT_0,
++	FIX_TSS_LAST,
++	FIX_TSS_0 = FIX_TSS_LAST + FIX_TSS_COUNT - 1,
++	FIX_ENTRY_TRAMPOLINE_1,
++	FIX_ENTRY_TRAMPOLINE_0,
+ #ifdef CONFIG_X86_CYCLONE_TIMER
+ 	FIX_CYCLONE_TIMER, /*cyclone timer register*/
++	FIX_VSTACK_HOLE_2,
+ #endif 
+-#ifdef CONFIG_HIGHMEM
+-	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
++	/* reserved pte's for temporary kernel mappings */
++	__FIX_KMAP_BEGIN,
++	FIX_KMAP_BEGIN = __FIX_KMAP_BEGIN + (__FIX_KMAP_BEGIN & 1) +
++		((__FIXADDR_TOP >> PAGE_SHIFT) & 1),
+ 	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+-#endif
+ #ifdef CONFIG_ACPI_BOOT
+ 	FIX_ACPI_BEGIN,
+ 	FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
+@@ -98,12 +117,15 @@ extern void __set_fixmap (enum fixed_add
+ 		__set_fixmap(idx, 0, __pgprot(0))
+ 
+ /*
+- * used by vmalloc.c.
++ * used by vmalloc.c and various other places.
+  *
+  * Leave one empty page between vmalloc'ed areas and
+  * the start of the fixmap.
++ *
++ * IMPORTANT: we have to align FIXADDR_TOP so that the virtual stack
++ * is THREAD_SIZE aligned.
+  */
+-#define FIXADDR_TOP	(0xfffff000UL)
++#define FIXADDR_TOP	__FIXADDR_TOP
+ #define __FIXADDR_SIZE	(__end_of_permanent_fixed_addresses << PAGE_SHIFT)
+ #define FIXADDR_START	(FIXADDR_TOP - __FIXADDR_SIZE)
+ 
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/highmem.h linux-2.6.8.1-ve022stab078/include/asm-i386/highmem.h
+--- linux-2.6.8.1.orig/include/asm-i386/highmem.h	2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-i386/highmem.h	2006-05-11 13:05:38.000000000 +0400
+@@ -25,26 +25,19 @@
+ #include <linux/threads.h>
+ #include <asm/kmap_types.h>
+ #include <asm/tlbflush.h>
++#include <asm/atomic_kmap.h>
+ 
+ /* declarations for highmem.c */
+ extern unsigned long highstart_pfn, highend_pfn;
+ 
+-extern pte_t *kmap_pte;
+-extern pgprot_t kmap_prot;
+ extern pte_t *pkmap_page_table;
+-
+-extern void kmap_init(void);
++extern void kmap_init(void) __init;
+ 
+ /*
+  * Right now we initialize only a single pte table. It can be extended
+  * easily, subsequent pte tables have to be allocated in one physical
+  * chunk of RAM.
+  */
+-#if NR_CPUS <= 32
+-#define PKMAP_BASE (0xff800000UL)
+-#else
+-#define PKMAP_BASE (0xff600000UL)
+-#endif
+ #ifdef CONFIG_X86_PAE
+ #define LAST_PKMAP 512
+ #else
+@@ -60,6 +53,7 @@ extern void FASTCALL(kunmap_high(struct 
+ void *kmap(struct page *page);
+ void kunmap(struct page *page);
+ void *kmap_atomic(struct page *page, enum km_type type);
++void *kmap_atomic_pte(pte_t *pte, enum km_type type);
+ void kunmap_atomic(void *kvaddr, enum km_type type);
+ struct page *kmap_atomic_to_page(void *ptr);
+ 
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/hpet.h linux-2.6.8.1-ve022stab078/include/asm-i386/hpet.h
+--- linux-2.6.8.1.orig/include/asm-i386/hpet.h	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-i386/hpet.h	2006-05-11 13:05:29.000000000 +0400
+@@ -93,6 +93,7 @@
+ extern unsigned long hpet_period;	/* fsecs / HPET clock */
+ extern unsigned long hpet_tick;  	/* hpet clks count per tick */
+ extern unsigned long hpet_address;	/* hpet memory map physical address */
++extern int hpet_use_timer;
+ 
+ extern int hpet_rtc_timer_init(void);
+ extern int hpet_enable(void);
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/irq.h linux-2.6.8.1-ve022stab078/include/asm-i386/irq.h
+--- linux-2.6.8.1.orig/include/asm-i386/irq.h	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-i386/irq.h	2006-05-11 13:05:28.000000000 +0400
+@@ -55,4 +55,10 @@ struct pt_regs;
+ asmlinkage int handle_IRQ_event(unsigned int, struct pt_regs *,
+ 				struct irqaction *);
+ 
++#ifdef CONFIG_IRQBALANCE
++extern int irqbalance_disable(char *str);
++#endif
++extern int no_irq_affinity;
++extern int noirqdebug_setup(char *str);
++
+ #endif /* _ASM_IRQ_H */
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/kmap_types.h linux-2.6.8.1-ve022stab078/include/asm-i386/kmap_types.h
+--- linux-2.6.8.1.orig/include/asm-i386/kmap_types.h	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-i386/kmap_types.h	2006-05-11 13:05:38.000000000 +0400
+@@ -2,30 +2,36 @@
+ #define _ASM_KMAP_TYPES_H
+ 
+ #include <linux/config.h>
+-
+-#ifdef CONFIG_DEBUG_HIGHMEM
+-# define D(n) __KM_FENCE_##n ,
+-#else
+-# define D(n)
+-#endif
++#include <linux/thread_info.h>
+ 
+ enum km_type {
+-D(0)	KM_BOUNCE_READ,
+-D(1)	KM_SKB_SUNRPC_DATA,
+-D(2)	KM_SKB_DATA_SOFTIRQ,
+-D(3)	KM_USER0,
+-D(4)	KM_USER1,
+-D(5)	KM_BIO_SRC_IRQ,
+-D(6)	KM_BIO_DST_IRQ,
+-D(7)	KM_PTE0,
+-D(8)	KM_PTE1,
+-D(9)	KM_IRQ0,
+-D(10)	KM_IRQ1,
+-D(11)	KM_SOFTIRQ0,
+-D(12)	KM_SOFTIRQ1,
+-D(13)	KM_TYPE_NR
+-};
++	/*
++	 * IMPORTANT: don't move these 3 entries, be wary when adding entries,
++	 * the 4G/4G virtual stack must be THREAD_SIZE aligned on each cpu.
++	 */
++	KM_BOUNCE_READ,
++	KM_VSTACK_BASE,
++	__KM_VSTACK_TOP = KM_VSTACK_BASE + STACK_PAGE_COUNT-1,
++	KM_VSTACK_TOP = __KM_VSTACK_TOP + (__KM_VSTACK_TOP % 2),
+ 
+-#undef D
++	KM_LDT_PAGE15,
++	KM_LDT_PAGE0 = KM_LDT_PAGE15 + 16-1,
++	KM_USER_COPY,
++	KM_VSTACK_HOLE,
++	KM_SKB_SUNRPC_DATA,
++	KM_SKB_DATA_SOFTIRQ,
++	KM_USER0,
++	KM_USER1,
++	KM_BIO_SRC_IRQ,
++	KM_BIO_DST_IRQ,
++	KM_PTE0,
++	KM_PTE1,
++	KM_IRQ0,
++	KM_IRQ1,
++	KM_SOFTIRQ0,
++	KM_SOFTIRQ1,
++	__KM_TYPE_NR,
++	KM_TYPE_NR=__KM_TYPE_NR + (__KM_TYPE_NR % 2)
++};
+ 
+ #endif
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/mach-default/mach_ipi.h linux-2.6.8.1-ve022stab078/include/asm-i386/mach-default/mach_ipi.h
+--- linux-2.6.8.1.orig/include/asm-i386/mach-default/mach_ipi.h	2004-08-14 14:56:14.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-i386/mach-default/mach_ipi.h	2006-05-11 13:05:32.000000000 +0400
+@@ -1,8 +1,8 @@
+ #ifndef __ASM_MACH_IPI_H
+ #define __ASM_MACH_IPI_H
+ 
+-inline void send_IPI_mask_bitmask(cpumask_t mask, int vector);
+-inline void __send_IPI_shortcut(unsigned int shortcut, int vector);
++void send_IPI_mask_bitmask(cpumask_t mask, int vector);
++void __send_IPI_shortcut(unsigned int shortcut, int vector);
+ 
+ static inline void send_IPI_mask(cpumask_t mask, int vector)
+ {
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/mman.h linux-2.6.8.1-ve022stab078/include/asm-i386/mman.h
+--- linux-2.6.8.1.orig/include/asm-i386/mman.h	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-i386/mman.h	2006-05-11 13:05:39.000000000 +0400
+@@ -22,6 +22,7 @@
+ #define MAP_NORESERVE	0x4000		/* don't check for reservations */
+ #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
+ #define MAP_NONBLOCK	0x10000		/* do not block on IO */
++#define MAP_EXECPRIO	0x80000		/* map from exec - try not to fail */
+ 
+ #define MS_ASYNC	1		/* sync memory asynchronously */
+ #define MS_INVALIDATE	2		/* invalidate the caches */
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/mmu.h linux-2.6.8.1-ve022stab078/include/asm-i386/mmu.h
+--- linux-2.6.8.1.orig/include/asm-i386/mmu.h	2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-i386/mmu.h	2006-05-11 13:05:38.000000000 +0400
+@@ -8,10 +8,13 @@
+  *
+  * cpu_vm_mask is used to optimize ldt flushing.
+  */
++
++#define MAX_LDT_PAGES 16
++
+ typedef struct { 
+ 	int size;
+ 	struct semaphore sem;
+-	void *ldt;
++	struct page *ldt_pages[MAX_LDT_PAGES];
+ } mm_context_t;
+ 
+ #endif
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/mmu_context.h linux-2.6.8.1-ve022stab078/include/asm-i386/mmu_context.h
+--- linux-2.6.8.1.orig/include/asm-i386/mmu_context.h	2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-i386/mmu_context.h	2006-05-11 13:05:38.000000000 +0400
+@@ -29,6 +29,10 @@ static inline void switch_mm(struct mm_s
+ {
+ 	int cpu = smp_processor_id();
+ 
++#ifdef CONFIG_X86_SWITCH_PAGETABLES
++	if (tsk->mm)
++		tsk->thread_info->user_pgd = (void *)__pa(tsk->mm->pgd);
++#endif
+ 	if (likely(prev != next)) {
+ 		/* stop flush ipis for the previous mm */
+ 		cpu_clear(cpu, prev->cpu_vm_mask);
+@@ -39,12 +43,14 @@ static inline void switch_mm(struct mm_s
+ 		cpu_set(cpu, next->cpu_vm_mask);
+ 
+ 		/* Re-load page tables */
++#if !defined(CONFIG_X86_SWITCH_PAGETABLES)
+ 		load_cr3(next->pgd);
++#endif
+ 
+ 		/*
+ 		 * load the LDT, if the LDT is different:
+ 		 */
+-		if (unlikely(prev->context.ldt != next->context.ldt))
++		if (unlikely(prev->context.size + next->context.size))
+ 			load_LDT_nolock(&next->context, cpu);
+ 	}
+ #ifdef CONFIG_SMP
+@@ -56,7 +62,9 @@ static inline void switch_mm(struct mm_s
+ 			/* We were in lazy tlb mode and leave_mm disabled 
+ 			 * tlb flush IPI delivery. We must reload %cr3.
+ 			 */
++#if !defined(CONFIG_X86_SWITCH_PAGETABLES)
+ 			load_cr3(next->pgd);
++#endif
+ 			load_LDT_nolock(&next->context, cpu);
+ 		}
+ 	}
+@@ -67,6 +75,6 @@ static inline void switch_mm(struct mm_s
+ 	asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0))
+ 
+ #define activate_mm(prev, next) \
+-	switch_mm((prev),(next),NULL)
++	switch_mm((prev),(next),current)
+ 
+ #endif
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/mtrr.h linux-2.6.8.1-ve022stab078/include/asm-i386/mtrr.h
+--- linux-2.6.8.1.orig/include/asm-i386/mtrr.h	2004-08-14 14:55:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-i386/mtrr.h	2006-05-11 13:05:32.000000000 +0400
+@@ -67,8 +67,6 @@ struct mtrr_gentry
+ 
+ #ifdef __KERNEL__
+ 
+-extern char *mtrr_strings[]; 
+-
+ /*  The following functions are for use by other drivers  */
+ # ifdef CONFIG_MTRR
+ extern int mtrr_add (unsigned long base, unsigned long size,
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/nmi.h linux-2.6.8.1-ve022stab078/include/asm-i386/nmi.h
+--- linux-2.6.8.1.orig/include/asm-i386/nmi.h	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-i386/nmi.h	2006-05-11 13:05:24.000000000 +0400
+@@ -17,6 +17,7 @@ typedef int (*nmi_callback_t)(struct pt_
+  * set. Return 1 if the NMI was handled.
+  */
+ void set_nmi_callback(nmi_callback_t callback);
++void set_nmi_ipi_callback(nmi_callback_t callback);
+  
+ /** 
+  * unset_nmi_callback
+@@ -24,5 +25,6 @@ void set_nmi_callback(nmi_callback_t cal
+  * Remove the handler previously set.
+  */
+ void unset_nmi_callback(void);
++void unset_nmi_ipi_callback(void);
+  
+ #endif /* ASM_NMI_H */
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/page.h linux-2.6.8.1-ve022stab078/include/asm-i386/page.h
+--- linux-2.6.8.1.orig/include/asm-i386/page.h	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-i386/page.h	2006-05-11 13:05:38.000000000 +0400
+@@ -1,6 +1,8 @@
+ #ifndef _I386_PAGE_H
+ #define _I386_PAGE_H
+ 
++#include <linux/config.h>
++
+ /* PAGE_SHIFT determines the page size */
+ #define PAGE_SHIFT	12
+ #define PAGE_SIZE	(1UL << PAGE_SHIFT)
+@@ -9,11 +11,10 @@
+ #define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1))
+ #define LARGE_PAGE_SIZE (1UL << PMD_SHIFT)
+ 
+-#ifdef __KERNEL__
+-#ifndef __ASSEMBLY__
+-
+ #include <linux/config.h>
+ 
++#ifdef __KERNEL__
++#ifndef __ASSEMBLY__
+ #ifdef CONFIG_X86_USE_3DNOW
+ 
+ #include <asm/mmx.h>
+@@ -92,13 +93,28 @@ typedef struct { unsigned long pgprot; }
+  *
+  * If you want more physical memory than this then see the CONFIG_HIGHMEM4G
+  * and CONFIG_HIGHMEM64G options in the kernel configuration.
++ *
++ * Note: on PAE the kernel must never go below 32 MB, we use the
++ * first 8 entries of the 2-level boot pgd for PAE magic.
+  */
+ 
++#ifdef CONFIG_X86_4G_VM_LAYOUT
++#define __PAGE_OFFSET		(0x02000000)
++#define TASK_SIZE		(0xc0000000)
++#else
++#define __PAGE_OFFSET		(0xc0000000)
++#define TASK_SIZE		(0xc0000000)
++#endif
++
+ /*
+  * This much address space is reserved for vmalloc() and iomap()
+  * as well as fixmap mappings.
+  */
+-#define __VMALLOC_RESERVE	(128 << 20)
++#ifdef CONFIG_X86_4G
++#define __VMALLOC_RESERVE	(320 << 20)
++#else
++#define __VMALLOC_RESERVE	(192 << 20)
++#endif
+ 
+ #ifndef __ASSEMBLY__
+ 
+@@ -118,16 +134,10 @@ static __inline__ int get_order(unsigned
+ 
+ #endif /* __ASSEMBLY__ */
+ 
+-#ifdef __ASSEMBLY__
+-#define __PAGE_OFFSET		(0xC0000000)
+-#else
+-#define __PAGE_OFFSET		(0xC0000000UL)
+-#endif
+-
+-
+ #define PAGE_OFFSET		((unsigned long)__PAGE_OFFSET)
+ #define VMALLOC_RESERVE		((unsigned long)__VMALLOC_RESERVE)
+-#define MAXMEM			(-__PAGE_OFFSET-__VMALLOC_RESERVE)
++#define __MAXMEM		(-__PAGE_OFFSET-__VMALLOC_RESERVE)
++#define MAXMEM			((unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE))
+ #define __pa(x)			((unsigned long)(x)-PAGE_OFFSET)
+ #define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))
+ #define pfn_to_kaddr(pfn)      __va((pfn) << PAGE_SHIFT)
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/pgtable.h linux-2.6.8.1-ve022stab078/include/asm-i386/pgtable.h
+--- linux-2.6.8.1.orig/include/asm-i386/pgtable.h	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-i386/pgtable.h	2006-05-11 13:05:38.000000000 +0400
+@@ -16,38 +16,41 @@
+ #include <asm/processor.h>
+ #include <asm/fixmap.h>
+ #include <linux/threads.h>
++#include <linux/slab.h>
+ 
+ #ifndef _I386_BITOPS_H
+ #include <asm/bitops.h>
+ #endif
+ 
+-#include <linux/slab.h>
+-#include <linux/list.h>
+-#include <linux/spinlock.h>
+-
+-/*
+- * ZERO_PAGE is a global shared page that is always zero: used
+- * for zero-mapped memory areas etc..
+- */
+-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+-extern unsigned long empty_zero_page[1024];
+ extern pgd_t swapper_pg_dir[1024];
+-extern kmem_cache_t *pgd_cache;
+-extern kmem_cache_t *pmd_cache;
++extern kmem_cache_t *pgd_cache, *pmd_cache, *kpmd_cache;
+ extern spinlock_t pgd_lock;
+ extern struct page *pgd_list;
+-
+ void pmd_ctor(void *, kmem_cache_t *, unsigned long);
++void kpmd_ctor(void *, kmem_cache_t *, unsigned long);
+ void pgd_ctor(void *, kmem_cache_t *, unsigned long);
+ void pgd_dtor(void *, kmem_cache_t *, unsigned long);
+ void pgtable_cache_init(void);
+-void paging_init(void);
++extern void paging_init(void);
++void setup_identity_mappings(pgd_t *pgd_base, unsigned long start, unsigned long end);
++
++/*
++ * ZERO_PAGE is a global shared page that is always zero: used
++ * for zero-mapped memory areas etc..
++ */
++extern unsigned long empty_zero_page[1024];
++#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+ 
+ /*
+  * The Linux x86 paging architecture is 'compile-time dual-mode', it
+  * implements both the traditional 2-level x86 page tables and the
+  * newer 3-level PAE-mode page tables.
+  */
++
++extern void set_system_gate(unsigned int n, void *addr);
++extern void init_entry_mappings(void);
++extern void entry_trampoline_setup(void);
++
+ #ifdef CONFIG_X86_PAE
+ # include <asm/pgtable-3level-defs.h>
+ #else
+@@ -59,7 +62,12 @@ void paging_init(void);
+ #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+ #define PGDIR_MASK	(~(PGDIR_SIZE-1))
+ 
+-#define USER_PTRS_PER_PGD	(TASK_SIZE/PGDIR_SIZE)
++#if defined(CONFIG_X86_PAE) && defined(CONFIG_X86_4G_VM_LAYOUT)
++# define USER_PTRS_PER_PGD	4
++#else
++# define USER_PTRS_PER_PGD	((TASK_SIZE/PGDIR_SIZE) + ((TASK_SIZE % PGDIR_SIZE) + PGDIR_SIZE-1)/PGDIR_SIZE)
++#endif
++
+ #define FIRST_USER_PGD_NR	0
+ 
+ #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
+@@ -274,6 +282,7 @@ static inline void ptep_mkdirty(pte_t *p
+ 
+ #define mk_pte(page, pgprot)	pfn_pte(page_to_pfn(page), (pgprot))
+ #define mk_pte_huge(entry) ((entry).pte_low |= _PAGE_PRESENT | _PAGE_PSE)
++#define mk_pte_phys(physpage, pgprot) pfn_pte((physpage) >> PAGE_SHIFT, pgprot)
+ 
+ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+ {
+@@ -421,4 +430,11 @@ extern pte_t *lookup_address(unsigned lo
+ #define __HAVE_ARCH_PTE_SAME
+ #include <asm-generic/pgtable.h>
+ 
++/*
++ * The size of the low 1:1 mappings we use during bootup,
++ * SMP-boot and ACPI-sleep:
++ */
++#define LOW_MAPPINGS_SIZE (16*1024*1024)
++
++
+ #endif /* _I386_PGTABLE_H */
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/processor.h linux-2.6.8.1-ve022stab078/include/asm-i386/processor.h
+--- linux-2.6.8.1.orig/include/asm-i386/processor.h	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-i386/processor.h	2006-05-11 13:05:38.000000000 +0400
+@@ -84,8 +84,6 @@ struct cpuinfo_x86 {
+ 
+ extern struct cpuinfo_x86 boot_cpu_data;
+ extern struct cpuinfo_x86 new_cpu_data;
+-extern struct tss_struct init_tss[NR_CPUS];
+-extern struct tss_struct doublefault_tss;
+ 
+ #ifdef CONFIG_SMP
+ extern struct cpuinfo_x86 cpu_data[];
+@@ -286,11 +284,6 @@ extern unsigned int machine_submodel_id;
+ extern unsigned int BIOS_revision;
+ extern unsigned int mca_pentium_flag;
+ 
+-/*
+- * User space process size: 3GB (default).
+- */
+-#define TASK_SIZE	(PAGE_OFFSET)
+-
+ /* This decides where the kernel will search for a free chunk of vm
+  * space during mmap's.
+  */
+@@ -302,7 +295,6 @@ extern unsigned int mca_pentium_flag;
+ #define IO_BITMAP_BITS  65536
+ #define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
+ #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
+-#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
+ #define INVALID_IO_BITMAP_OFFSET 0x8000
+ 
+ struct i387_fsave_struct {
+@@ -400,6 +392,11 @@ struct tss_struct {
+ 
+ #define ARCH_MIN_TASKALIGN	16
+ 
++#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
++
++extern struct tss_struct init_tss[NR_CPUS];
++extern struct tss_struct doublefault_tss;
++
+ struct thread_struct {
+ /* cached TLS descriptors. */
+ 	struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
+@@ -446,7 +443,8 @@ struct thread_struct {
+ 	.io_bitmap	= { [ 0 ... IO_BITMAP_LONGS] = ~0 },		\
+ }
+ 
+-static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread)
++static inline void
++load_esp0(struct tss_struct *tss, struct thread_struct *thread)
+ {
+ 	tss->esp0 = thread->esp0;
+ 	/* This can only happen when SEP is enabled, no need to test "SEP"arately */
+@@ -482,6 +480,23 @@ extern void prepare_to_copy(struct task_
+  */
+ extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
+ 
++#ifdef CONFIG_X86_HIGH_ENTRY
++#define virtual_esp0(tsk) \
++	((unsigned long)(tsk)->thread_info->virtual_stack + ((tsk)->thread.esp0 - (unsigned long)(tsk)->thread_info->real_stack))
++#else
++# define virtual_esp0(tsk) ((tsk)->thread.esp0)
++#endif
++
++#define load_virtual_esp0(tss, task)					\
++	do {								\
++		tss->esp0 = virtual_esp0(task);				\
++		if (likely(cpu_has_sep) && unlikely(tss->ss1 != task->thread.sysenter_cs)) {	\
++			tss->ss1 = task->thread.sysenter_cs;		\
++			wrmsr(MSR_IA32_SYSENTER_CS,			\
++				task->thread.sysenter_cs, 0);		\
++		}							\
++	} while (0)
++
+ extern unsigned long thread_saved_pc(struct task_struct *tsk);
+ void show_trace(struct task_struct *task, unsigned long *stack);
+ 
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/setup.h linux-2.6.8.1-ve022stab078/include/asm-i386/setup.h
+--- linux-2.6.8.1.orig/include/asm-i386/setup.h	2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-i386/setup.h	2006-05-11 13:05:29.000000000 +0400
+@@ -55,7 +55,7 @@ extern unsigned char boot_params[PARAM_S
+ #define KERNEL_START (*(unsigned long *) (PARAM+0x214))
+ #define INITRD_START (*(unsigned long *) (PARAM+0x218))
+ #define INITRD_SIZE (*(unsigned long *) (PARAM+0x21c))
+-#define EDID_INFO   (*(struct edid_info *) (PARAM+0x440))
++#define EDID_INFO   (*(struct edid_info *) (PARAM+0x140))
+ #define EDD_NR     (*(unsigned char *) (PARAM+EDDNR))
+ #define EDD_MBR_SIG_NR (*(unsigned char *) (PARAM+EDD_MBR_SIG_NR_BUF))
+ #define EDD_MBR_SIGNATURE ((unsigned int *) (PARAM+EDD_MBR_SIG_BUF))
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/string.h linux-2.6.8.1-ve022stab078/include/asm-i386/string.h
+--- linux-2.6.8.1.orig/include/asm-i386/string.h	2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-i386/string.h	2006-05-11 13:05:38.000000000 +0400
+@@ -60,6 +60,29 @@ __asm__ __volatile__(
+ return dest;
+ }
+ 
++/*
++ * This is a more generic variant of strncpy_count() suitable for
++ * implementing string-access routines with all sorts of return
++ * code semantics. It's used by mm/usercopy.c.
++ */
++static inline size_t strncpy_count(char * dest,const char *src,size_t count)
++{
++	__asm__ __volatile__(
++
++	"1:\tdecl %0\n\t"
++	"js 2f\n\t"
++	"lodsb\n\t"
++	"stosb\n\t"
++	"testb %%al,%%al\n\t"
++	"jne 1b\n\t"
++	"2:"
++	"incl %0"
++	: "=c" (count)
++	:"S" (src),"D" (dest),"0" (count) : "memory");
++
++	return count;
++}
++
+ #define __HAVE_ARCH_STRCAT
+ static inline char * strcat(char * dest,const char * src)
+ {
+@@ -117,7 +140,8 @@ __asm__ __volatile__(
+ 	"orb $1,%%al\n"
+ 	"3:"
+ 	:"=a" (__res), "=&S" (d0), "=&D" (d1)
+-		     :"1" (cs),"2" (ct));
++	:"1" (cs),"2" (ct)
++	:"memory");
+ return __res;
+ }
+ 
+@@ -139,8 +163,9 @@ __asm__ __volatile__(
+ 	"3:\tsbbl %%eax,%%eax\n\t"
+ 	"orb $1,%%al\n"
+ 	"4:"
+-		     :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
+-		     :"1" (cs),"2" (ct),"3" (count));
++	:"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
++	:"1" (cs),"2" (ct),"3" (count)
++	:"memory");
+ return __res;
+ }
+ 
+@@ -159,7 +184,9 @@ __asm__ __volatile__(
+ 	"movl $1,%1\n"
+ 	"2:\tmovl %1,%0\n\t"
+ 	"decl %0"
+-	:"=a" (__res), "=&S" (d0) : "1" (s),"0" (c));
++	:"=a" (__res), "=&S" (d0)
++	:"1" (s),"0" (c)
++	:"memory");
+ return __res;
+ }
+ 
+@@ -176,7 +203,9 @@ __asm__ __volatile__(
+ 	"leal -1(%%esi),%0\n"
+ 	"2:\ttestb %%al,%%al\n\t"
+ 	"jne 1b"
+-	:"=g" (__res), "=&S" (d0), "=&a" (d1) :"0" (0),"1" (s),"2" (c));
++	:"=g" (__res), "=&S" (d0), "=&a" (d1)
++	:"0" (0),"1" (s),"2" (c)
++	:"memory");
+ return __res;
+ }
+ 
+@@ -192,7 +221,9 @@ __asm__ __volatile__(
+ 	"scasb\n\t"
+ 	"notl %0\n\t"
+ 	"decl %0"
+-	:"=c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffffu));
++	:"=c" (__res), "=&D" (d0)
++	:"1" (s),"a" (0), "0" (0xffffffffu)
++	:"memory");
+ return __res;
+ }
+ 
+@@ -303,7 +334,9 @@ __asm__ __volatile__(
+ 	"je 1f\n\t"
+ 	"movl $1,%0\n"
+ 	"1:\tdecl %0"
+-	:"=D" (__res), "=&c" (d0) : "a" (c),"0" (cs),"1" (count));
++	:"=D" (__res), "=&c" (d0)
++	:"a" (c),"0" (cs),"1" (count)
++	:"memory");
+ return __res;
+ }
+ 
+@@ -339,7 +372,7 @@ __asm__ __volatile__(
+ 	"je 2f\n\t"
+ 	"stosb\n"
+ 	"2:"
+-	: "=&c" (d0), "=&D" (d1)
++	:"=&c" (d0), "=&D" (d1)
+ 	:"a" (c), "q" (count), "0" (count/4), "1" ((long) s)
+ 	:"memory");
+ return (s);	
+@@ -362,7 +395,8 @@ __asm__ __volatile__(
+ 	"jne 1b\n"
+ 	"3:\tsubl %2,%0"
+ 	:"=a" (__res), "=&d" (d0)
+-	:"c" (s),"1" (count));
++	:"c" (s),"1" (count)
++	:"memory");
+ return __res;
+ }
+ /* end of additional stuff */
+@@ -443,7 +477,8 @@ static inline void * memscan(void * addr
+ 		"dec %%edi\n"
+ 		"1:"
+ 		: "=D" (addr), "=c" (size)
+-		: "0" (addr), "1" (size), "a" (c));
++		: "0" (addr), "1" (size), "a" (c)
++		: "memory");
+ 	return addr;
+ }
+ 
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/thread_info.h linux-2.6.8.1-ve022stab078/include/asm-i386/thread_info.h
+--- linux-2.6.8.1.orig/include/asm-i386/thread_info.h	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-i386/thread_info.h	2006-05-11 13:05:39.000000000 +0400
+@@ -16,6 +16,15 @@
+ #include <asm/processor.h>
+ #endif
+ 
++#define PREEMPT_ACTIVE		0x4000000
++#ifdef CONFIG_4KSTACKS
++#define THREAD_SIZE		(4096)
++#else
++#define THREAD_SIZE		(8192)
++#endif
++#define STACK_PAGE_COUNT	(THREAD_SIZE/PAGE_SIZE)
++#define STACK_WARN             (THREAD_SIZE/8)
++
+ /*
+  * low level task data that entry.S needs immediate access to
+  * - this struct should fit entirely inside of one cache line
+@@ -37,6 +46,8 @@ struct thread_info {
+ 					 	   0-0xBFFFFFFF for user-thead
+ 						   0-0xFFFFFFFF for kernel-thread
+ 						*/
++	void			*real_stack, *virtual_stack, *user_pgd;
++	void			*stack_page[STACK_PAGE_COUNT];
+ 	struct restart_block    restart_block;
+ 
+ 	unsigned long           previous_esp;   /* ESP of the previous stack in case
+@@ -51,14 +62,6 @@ struct thread_info {
+ 
+ #endif
+ 
+-#define PREEMPT_ACTIVE		0x4000000
+-#ifdef CONFIG_4KSTACKS
+-#define THREAD_SIZE            (4096)
+-#else
+-#define THREAD_SIZE		(8192)
+-#endif
+-
+-#define STACK_WARN             (THREAD_SIZE/8)
+ /*
+  * macros/functions for gaining access to the thread information structure
+  *
+@@ -66,7 +69,7 @@ struct thread_info {
+  */
+ #ifndef __ASSEMBLY__
+ 
+-#define INIT_THREAD_INFO(tsk)			\
++#define INIT_THREAD_INFO(tsk, thread_info)	\
+ {						\
+ 	.task		= &tsk,			\
+ 	.exec_domain	= &default_exec_domain,	\
+@@ -77,6 +80,7 @@ struct thread_info {
+ 	.restart_block = {			\
+ 		.fn = do_no_restart_syscall,	\
+ 	},					\
++	.real_stack	= &thread_info,		\
+ }
+ 
+ #define init_thread_info	(init_thread_union.thread_info)
+@@ -105,13 +109,13 @@ static inline unsigned long current_stac
+ 	({							\
+ 		struct thread_info *ret;			\
+ 								\
+-		ret = kmalloc(THREAD_SIZE, GFP_KERNEL);		\
++		ret = kmalloc(THREAD_SIZE, GFP_KERNEL_UBC);	\
+ 		if (ret)					\
+ 			memset(ret, 0, THREAD_SIZE);		\
+ 		ret;						\
+ 	})
+ #else
+-#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL)
++#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL_UBC)
+ #endif
+ 
+ #define free_thread_info(info)	kfree(info)
+@@ -143,8 +147,10 @@ static inline unsigned long current_stac
+ #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
+ #define TIF_SINGLESTEP		4	/* restore singlestep on return to user mode */
+ #define TIF_IRET		5	/* return with iret */
++#define TIF_DB7			6	/* has debug registers */
+ #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
+ #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
++#define TIF_FREEZE		17	/* Freeze request, atomic version of PF_FREEZE */
+ 
+ #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
+ #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
+@@ -153,6 +159,7 @@ static inline unsigned long current_stac
+ #define _TIF_SINGLESTEP		(1<<TIF_SINGLESTEP)
+ #define _TIF_IRET		(1<<TIF_IRET)
+ #define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
++#define _TIF_DB7		(1<<TIF_DB7)
+ #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
+ 
+ /* work to do on interrupt/exception return */
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/timex.h linux-2.6.8.1-ve022stab078/include/asm-i386/timex.h
+--- linux-2.6.8.1.orig/include/asm-i386/timex.h	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-i386/timex.h	2006-05-11 13:05:40.000000000 +0400
+@@ -41,7 +41,7 @@ extern cycles_t cacheflush_time;
+ static inline cycles_t get_cycles (void)
+ {
+ #ifndef CONFIG_X86_TSC
+-	return 0;
++#error "CONFIG_X86_TCS is not set!"
+ #else
+ 	unsigned long long ret;
+ 
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/tlbflush.h linux-2.6.8.1-ve022stab078/include/asm-i386/tlbflush.h
+--- linux-2.6.8.1.orig/include/asm-i386/tlbflush.h	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-i386/tlbflush.h	2006-05-11 13:05:38.000000000 +0400
+@@ -85,22 +85,28 @@ extern unsigned long pgkern_mask;
+ 
+ static inline void flush_tlb_mm(struct mm_struct *mm)
+ {
++#ifndef CONFIG_X86_SWITCH_PAGETABLES
+ 	if (mm == current->active_mm)
+ 		__flush_tlb();
++#endif
+ }
+ 
+ static inline void flush_tlb_page(struct vm_area_struct *vma,
+ 	unsigned long addr)
+ {
++#ifndef CONFIG_X86_SWITCH_PAGETABLES
+ 	if (vma->vm_mm == current->active_mm)
+ 		__flush_tlb_one(addr);
++#endif
+ }
+ 
+ static inline void flush_tlb_range(struct vm_area_struct *vma,
+ 	unsigned long start, unsigned long end)
+ {
++#ifndef CONFIG_X86_SWITCH_PAGETABLES
+ 	if (vma->vm_mm == current->active_mm)
+ 		__flush_tlb();
++#endif
+ }
+ 
+ #else
+@@ -111,11 +117,10 @@ static inline void flush_tlb_range(struc
+ 	__flush_tlb()
+ 
+ extern void flush_tlb_all(void);
+-extern void flush_tlb_current_task(void);
+ extern void flush_tlb_mm(struct mm_struct *);
+ extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
+ 
+-#define flush_tlb()	flush_tlb_current_task()
++#define flush_tlb()	flush_tlb_all()
+ 
+ static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long start, unsigned long end)
+ {
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/uaccess.h linux-2.6.8.1-ve022stab078/include/asm-i386/uaccess.h
+--- linux-2.6.8.1.orig/include/asm-i386/uaccess.h	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-i386/uaccess.h	2006-05-11 13:05:38.000000000 +0400
+@@ -26,7 +26,7 @@
+ 
+ 
+ #define KERNEL_DS	MAKE_MM_SEG(0xFFFFFFFFUL)
+-#define USER_DS		MAKE_MM_SEG(PAGE_OFFSET)
++#define USER_DS		MAKE_MM_SEG(TASK_SIZE)
+ 
+ #define get_ds()	(KERNEL_DS)
+ #define get_fs()	(current_thread_info()->addr_limit)
+@@ -150,6 +150,55 @@ extern void __get_user_4(void);
+ 		:"=a" (ret),"=d" (x) \
+ 		:"0" (ptr))
+ 
++extern int get_user_size(unsigned int size, void *val, const void *ptr);
++extern int put_user_size(unsigned int size, const void *val, void *ptr);
++extern int zero_user_size(unsigned int size, void *ptr);
++extern int copy_str_fromuser_size(unsigned int size, void *val, const void *ptr);
++extern int strlen_fromuser_size(unsigned int size, const void *ptr);
++
++/*
++ * GCC 2.96 has stupid bug which forces us to use volatile or barrier below.
++ * without volatile or barrier compiler generates ABSOLUTELY wrong code which
++ * igonores XXX_size function return code, but generates EFAULT :)))
++ * the bug was found in sys_utime()
++ */
++# define indirect_get_user(x,ptr)					\
++({	int __ret_gu,__val_gu;						\
++	__typeof__(ptr) __ptr_gu = (ptr);				\
++	__ret_gu = get_user_size(sizeof(*__ptr_gu), &__val_gu,__ptr_gu) ? -EFAULT : 0;\
++	barrier();							\
++	(x) = (__typeof__(*__ptr_gu))__val_gu;				\
++	__ret_gu;							\
++})
++#define indirect_put_user(x,ptr)					\
++({									\
++	int __ret_pu;							\
++	__typeof__(*(ptr)) *__ptr_pu = (ptr), __x_pu = (x);		\
++	__ret_pu = put_user_size(sizeof(*__ptr_pu),			\
++		&__x_pu, __ptr_pu) ? -EFAULT : 0;			\
++	barrier();							\
++	__ret_pu;							\
++})
++#define __indirect_put_user indirect_put_user
++#define __indirect_get_user indirect_get_user
++
++#define indirect_copy_from_user(to,from,n) get_user_size(n,to,from)
++#define indirect_copy_to_user(to,from,n) put_user_size(n,from,to)
++
++#define __indirect_copy_from_user indirect_copy_from_user
++#define __indirect_copy_to_user indirect_copy_to_user
++
++#define indirect_strncpy_from_user(dst, src, count) \
++		copy_str_fromuser_size(count, dst, src)
++
++extern int strlen_fromuser_size(unsigned int size, const void *ptr);
++#define indirect_strnlen_user(str, n) strlen_fromuser_size(n, str)
++#define indirect_strlen_user(str) indirect_strnlen_user(str, ~0UL >> 1)
++
++extern int zero_user_size(unsigned int size, void *ptr);
++
++#define indirect_clear_user(mem, len) zero_user_size(len, mem)
++#define __indirect_clear_user clear_user
+ 
+ /* Careful: we have to cast the result to the type of the pointer for sign reasons */
+ /**
+@@ -169,7 +218,7 @@ extern void __get_user_4(void);
+  * Returns zero on success, or -EFAULT on error.
+  * On error, the variable @x is set to zero.
+  */
+-#define get_user(x,ptr)							\
++#define direct_get_user(x,ptr)						\
+ ({	int __ret_gu,__val_gu;						\
+ 	__chk_user_ptr(ptr);						\
+ 	switch(sizeof (*(ptr))) {					\
+@@ -200,7 +249,7 @@ extern void __put_user_bad(void);
+  *
+  * Returns zero on success, or -EFAULT on error.
+  */
+-#define put_user(x,ptr)							\
++#define direct_put_user(x,ptr)						\
+   __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
+ 
+ 
+@@ -224,7 +273,7 @@ extern void __put_user_bad(void);
+  * Returns zero on success, or -EFAULT on error.
+  * On error, the variable @x is set to zero.
+  */
+-#define __get_user(x,ptr) \
++#define __direct_get_user(x,ptr) \
+   __get_user_nocheck((x),(ptr),sizeof(*(ptr)))
+ 
+ 
+@@ -247,7 +296,7 @@ extern void __put_user_bad(void);
+  *
+  * Returns zero on success, or -EFAULT on error.
+  */
+-#define __put_user(x,ptr) \
++#define __direct_put_user(x,ptr) \
+   __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
+ 
+ #define __put_user_nocheck(x,ptr,size)				\
+@@ -400,7 +449,7 @@ unsigned long __copy_from_user_ll(void *
+  * On success, this will be zero.
+  */
+ static inline unsigned long
+-__copy_to_user(void __user *to, const void *from, unsigned long n)
++__direct_copy_to_user(void __user *to, const void *from, unsigned long n)
+ {
+ 	if (__builtin_constant_p(n)) {
+ 		unsigned long ret;
+@@ -438,7 +487,7 @@ __copy_to_user(void __user *to, const vo
+  * data to the requested size using zero bytes.
+  */
+ static inline unsigned long
+-__copy_from_user(void *to, const void __user *from, unsigned long n)
++__direct_copy_from_user(void *to, const void __user *from, unsigned long n)
+ {
+ 	if (__builtin_constant_p(n)) {
+ 		unsigned long ret;
+@@ -458,9 +507,55 @@ __copy_from_user(void *to, const void __
+ 	return __copy_from_user_ll(to, from, n);
+ }
+ 
+-unsigned long copy_to_user(void __user *to, const void *from, unsigned long n);
+-unsigned long copy_from_user(void *to,
+-			const void __user *from, unsigned long n);
++/**
++ * copy_to_user: - Copy a block of data into user space.
++ * @to:   Destination address, in user space.
++ * @from: Source address, in kernel space.
++ * @n:    Number of bytes to copy.
++ *
++ * Context: User context only.  This function may sleep.
++ *
++ * Copy data from kernel space to user space.
++ *
++ * Returns number of bytes that could not be copied.
++ * On success, this will be zero.
++ */
++static inline unsigned long
++direct_copy_to_user(void __user *to, const void *from, unsigned long n)
++{
++	might_sleep();
++	if (access_ok(VERIFY_WRITE, to, n))
++		n = __direct_copy_to_user(to, from, n);
++	return n;
++}
++
++/**
++ * copy_from_user: - Copy a block of data from user space.
++ * @to:   Destination address, in kernel space.
++ * @from: Source address, in user space.
++ * @n:    Number of bytes to copy.
++ *
++ * Context: User context only.  This function may sleep.
++ *
++ * Copy data from user space to kernel space.
++ *
++ * Returns number of bytes that could not be copied.
++ * On success, this will be zero.
++ *
++ * If some data could not be copied, this function will pad the copied
++ * data to the requested size using zero bytes.
++ */
++static inline unsigned long
++direct_copy_from_user(void *to, const void __user *from, unsigned long n)
++{
++	might_sleep();
++	if (access_ok(VERIFY_READ, from, n))
++		n = __direct_copy_from_user(to, from, n);
++	else
++		memset(to, 0, n);
++	return n;
++}
++
+ long strncpy_from_user(char *dst, const char __user *src, long count);
+ long __strncpy_from_user(char *dst, const char __user *src, long count);
+ 
+@@ -478,10 +573,68 @@ long __strncpy_from_user(char *dst, cons
+  * If there is a limit on the length of a valid string, you may wish to
+  * consider using strnlen_user() instead.
+  */
+-#define strlen_user(str) strnlen_user(str, ~0UL >> 1)
+ 
+-long strnlen_user(const char __user *str, long n);
+-unsigned long clear_user(void __user *mem, unsigned long len);
+-unsigned long __clear_user(void __user *mem, unsigned long len);
++long direct_strncpy_from_user(char *dst, const char *src, long count);
++long __direct_strncpy_from_user(char *dst, const char *src, long count);
++#define direct_strlen_user(str) direct_strnlen_user(str, ~0UL >> 1)
++long direct_strnlen_user(const char *str, long n);
++unsigned long direct_clear_user(void *mem, unsigned long len);
++unsigned long __direct_clear_user(void *mem, unsigned long len);
++
++extern int indirect_uaccess;
++
++#ifdef CONFIG_X86_UACCESS_INDIRECT
++
++/*
++ * Return code and zeroing semantics:
++
++ __clear_user          0                      <-> bytes not done
++ clear_user            0                      <-> bytes not done
++ __copy_to_user        0                      <-> bytes not done
++ copy_to_user          0                      <-> bytes not done
++ __copy_from_user      0                      <-> bytes not done, zero rest
++ copy_from_user        0                      <-> bytes not done, zero rest
++ __get_user            0                      <-> -EFAULT
++ get_user              0                      <-> -EFAULT
++ __put_user            0                      <-> -EFAULT
++ put_user              0                      <-> -EFAULT
++ strlen_user           strlen + 1             <-> 0
++ strnlen_user          strlen + 1 (or n+1)    <-> 0
++ strncpy_from_user     strlen (or n)          <-> -EFAULT
++
++ */
++
++#define __clear_user(mem,len) __indirect_clear_user(mem,len)
++#define clear_user(mem,len) indirect_clear_user(mem,len)
++#define __copy_to_user(to,from,n) __indirect_copy_to_user(to,from,n)
++#define copy_to_user(to,from,n) indirect_copy_to_user(to,from,n)
++#define __copy_from_user(to,from,n) __indirect_copy_from_user(to,from,n)
++#define copy_from_user(to,from,n) indirect_copy_from_user(to,from,n)
++#define __get_user(val,ptr) __indirect_get_user(val,ptr)
++#define get_user(val,ptr) indirect_get_user(val,ptr)
++#define __put_user(val,ptr) __indirect_put_user(val,ptr)
++#define put_user(val,ptr) indirect_put_user(val,ptr)
++#define strlen_user(str) indirect_strlen_user(str)
++#define strnlen_user(src,count) indirect_strnlen_user(src,count)
++#define strncpy_from_user(dst,src,count) \
++			indirect_strncpy_from_user(dst,src,count)
++
++#else
++
++#define __clear_user __direct_clear_user
++#define clear_user direct_clear_user
++#define __copy_to_user __direct_copy_to_user
++#define copy_to_user direct_copy_to_user
++#define __copy_from_user __direct_copy_from_user
++#define copy_from_user direct_copy_from_user
++#define __get_user __direct_get_user
++#define get_user direct_get_user
++#define __put_user __direct_put_user
++#define put_user direct_put_user
++#define strlen_user direct_strlen_user
++#define strnlen_user direct_strnlen_user
++#define strncpy_from_user direct_strncpy_from_user
++
++#endif /* CONFIG_X86_UACCESS_INDIRECT */
+ 
+ #endif /* __i386_UACCESS_H */
+diff -uprN linux-2.6.8.1.orig/include/asm-i386/unistd.h linux-2.6.8.1-ve022stab078/include/asm-i386/unistd.h
+--- linux-2.6.8.1.orig/include/asm-i386/unistd.h	2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-i386/unistd.h	2006-05-11 13:05:43.000000000 +0400
+@@ -289,8 +289,18 @@
+ #define __NR_mq_notify		(__NR_mq_open+4)
+ #define __NR_mq_getsetattr	(__NR_mq_open+5)
+ #define __NR_sys_kexec_load	283
+-
+-#define NR_syscalls 284
++#define __NR_fairsched_mknod	500     /* FairScheduler syscalls */
++#define __NR_fairsched_rmnod	501
++#define __NR_fairsched_chwt	502
++#define __NR_fairsched_mvpr	503
++#define __NR_fairsched_rate	504
++#define __NR_getluid		510
++#define __NR_setluid		511
++#define __NR_setublimit		512
++#define __NR_ubstat		513
++#define __NR_lchmod		516
++#define __NR_lutime		517
++#define NR_syscalls 517
+ 
+ /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
+ 
+diff -uprN linux-2.6.8.1.orig/include/asm-ia64/machvec_init.h linux-2.6.8.1-ve022stab078/include/asm-ia64/machvec_init.h
+--- linux-2.6.8.1.orig/include/asm-ia64/machvec_init.h	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-ia64/machvec_init.h	2006-05-11 13:05:37.000000000 +0400
+@@ -1,4 +1,5 @@
+ #include <asm/machvec.h>
++#include <asm/io.h>
+ 
+ extern ia64_mv_send_ipi_t ia64_send_ipi;
+ extern ia64_mv_global_tlb_purge_t ia64_global_tlb_purge;
+diff -uprN linux-2.6.8.1.orig/include/asm-ia64/mman.h linux-2.6.8.1-ve022stab078/include/asm-ia64/mman.h
+--- linux-2.6.8.1.orig/include/asm-ia64/mman.h	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-ia64/mman.h	2006-05-11 13:05:39.000000000 +0400
+@@ -30,6 +30,7 @@
+ #define MAP_NORESERVE	0x04000		/* don't check for reservations */
+ #define MAP_POPULATE	0x08000		/* populate (prefault) pagetables */
+ #define MAP_NONBLOCK	0x10000		/* do not block on IO */
++#define MAP_EXECPRIO	0x80000		/* map from exec - try not to fail */
+ 
+ #define MS_ASYNC	1		/* sync memory asynchronously */
+ #define MS_INVALIDATE	2		/* invalidate the caches */
+diff -uprN linux-2.6.8.1.orig/include/asm-ia64/pgtable.h linux-2.6.8.1-ve022stab078/include/asm-ia64/pgtable.h
+--- linux-2.6.8.1.orig/include/asm-ia64/pgtable.h	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-ia64/pgtable.h	2006-05-11 13:05:30.000000000 +0400
+@@ -8,7 +8,7 @@
+  * This hopefully works with any (fixed) IA-64 page-size, as defined
+  * in <asm/page.h> (currently 8192).
+  *
+- * Copyright (C) 1998-2004 Hewlett-Packard Co
++ * Copyright (C) 1998-2005 Hewlett-Packard Co
+  *	David Mosberger-Tang <davidm@hpl.hp.com>
+  */
+ 
+@@ -420,6 +420,8 @@ pte_same (pte_t a, pte_t b)
+ 	return pte_val(a) == pte_val(b);
+ }
+ 
++#define update_mmu_cache(vma, address, pte) do { } while (0)
++
+ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+ extern void paging_init (void);
+ 
+@@ -479,7 +481,7 @@ extern void hugetlb_free_pgtables(struct
+  * information.  However, we use this routine to take care of any (delayed) i-cache
+  * flushing that may be necessary.
+  */
+-extern void update_mmu_cache (struct vm_area_struct *vma, unsigned long vaddr, pte_t pte);
++extern void lazy_mmu_prot_update (pte_t pte);
+ 
+ #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+ /*
+@@ -549,7 +551,11 @@ do {											\
+ 
+ /* These tell get_user_pages() that the first gate page is accessible from user-level.  */
+ #define FIXADDR_USER_START	GATE_ADDR
+-#define FIXADDR_USER_END	(GATE_ADDR + 2*PERCPU_PAGE_SIZE)
++#ifdef HAVE_BUGGY_SEGREL
++# define FIXADDR_USER_END	(GATE_ADDR + 2*PAGE_SIZE)
++#else
++# define FIXADDR_USER_END	(GATE_ADDR + 2*PERCPU_PAGE_SIZE)
++#endif
+ 
+ #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+ #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
+@@ -558,6 +564,7 @@ do {											\
+ #define __HAVE_ARCH_PTEP_MKDIRTY
+ #define __HAVE_ARCH_PTE_SAME
+ #define __HAVE_ARCH_PGD_OFFSET_GATE
++#define __HAVE_ARCH_LAZY_MMU_PROT_UPDATE
+ #include <asm-generic/pgtable.h>
+ 
+ #endif /* _ASM_IA64_PGTABLE_H */
+diff -uprN linux-2.6.8.1.orig/include/asm-ia64/processor.h linux-2.6.8.1-ve022stab078/include/asm-ia64/processor.h
+--- linux-2.6.8.1.orig/include/asm-ia64/processor.h	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-ia64/processor.h	2006-05-11 13:05:40.000000000 +0400
+@@ -310,7 +310,7 @@ struct thread_struct {
+ 	regs->loadrs = 0;									\
+ 	regs->r8 = current->mm->dumpable;	/* set "don't zap registers" flag */		\
+ 	regs->r12 = new_sp - 16;	/* allocate 16 byte scratch area */			\
+-	if (unlikely(!current->mm->dumpable)) {							\
++	if (unlikely(!current->mm->dumpable || !current->mm->vps_dumpable)) {			\
+ 		/*										\
+ 		 * Zap scratch regs to avoid leaking bits between processes with different	\
+ 		 * uid/privileges.								\
+diff -uprN linux-2.6.8.1.orig/include/asm-ia64/ptrace.h linux-2.6.8.1-ve022stab078/include/asm-ia64/ptrace.h
+--- linux-2.6.8.1.orig/include/asm-ia64/ptrace.h	2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-ia64/ptrace.h	2006-05-11 13:05:30.000000000 +0400
+@@ -2,7 +2,7 @@
+ #define _ASM_IA64_PTRACE_H
+ 
+ /*
+- * Copyright (C) 1998-2003 Hewlett-Packard Co
++ * Copyright (C) 1998-2004 Hewlett-Packard Co
+  *	David Mosberger-Tang <davidm@hpl.hp.com>
+  *	Stephane Eranian <eranian@hpl.hp.com>
+  * Copyright (C) 2003 Intel Co
+@@ -110,7 +110,11 @@ struct pt_regs {
+ 
+ 	unsigned long cr_ipsr;		/* interrupted task's psr */
+ 	unsigned long cr_iip;		/* interrupted task's instruction pointer */
+-	unsigned long cr_ifs;		/* interrupted task's function state */
++	/*
++	 * interrupted task's function state; if bit 63 is cleared, it
++	 * contains syscall's ar.pfs.pfm:
++	 */
++	unsigned long cr_ifs;
+ 
+ 	unsigned long ar_unat;		/* interrupted task's NaT register (preserved) */
+ 	unsigned long ar_pfs;		/* prev function state  */
+diff -uprN linux-2.6.8.1.orig/include/asm-ia64/system.h linux-2.6.8.1-ve022stab078/include/asm-ia64/system.h
+--- linux-2.6.8.1.orig/include/asm-ia64/system.h	2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-ia64/system.h	2006-05-11 13:05:39.000000000 +0400
+@@ -279,7 +279,7 @@ do {						\
+ 	spin_lock(&(next)->switch_lock);	\
+ 	spin_unlock(&(rq)->lock);		\
+ } while (0)
+-#define finish_arch_switch(rq, prev)	spin_unlock_irq(&(prev)->switch_lock)
++#define finish_arch_switch(rq, prev)	spin_unlock(&(prev)->switch_lock)
+ #define task_running(rq, p) 		((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock))
+ 
+ #define ia64_platform_is(x) (strcmp(x, platform_name) == 0)
+diff -uprN linux-2.6.8.1.orig/include/asm-ia64/thread_info.h linux-2.6.8.1-ve022stab078/include/asm-ia64/thread_info.h
+--- linux-2.6.8.1.orig/include/asm-ia64/thread_info.h	2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-ia64/thread_info.h	2006-05-11 13:05:25.000000000 +0400
+@@ -75,6 +75,7 @@ struct thread_info {
+ #define TIF_SYSCALL_TRACE	3	/* syscall trace active */
+ #define TIF_SYSCALL_AUDIT	4	/* syscall auditing active */
+ #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
++#define TIF_FREEZE		17	/* Freeze request, atomic version of PF_FREEZE */
+ 
+ #define TIF_WORK_MASK		0x7	/* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE */
+ #define TIF_ALLWORK_MASK	0x1f	/* bits 0..4 are "work to do on user-return" bits */
+diff -uprN linux-2.6.8.1.orig/include/asm-ia64/timex.h linux-2.6.8.1-ve022stab078/include/asm-ia64/timex.h
+--- linux-2.6.8.1.orig/include/asm-ia64/timex.h	2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-ia64/timex.h	2006-05-11 13:05:40.000000000 +0400
+@@ -10,11 +10,14 @@
+  *			Also removed cacheflush_time as it's entirely unused.
+  */
+ 
+-#include <asm/intrinsics.h>
+-#include <asm/processor.h>
++extern unsigned int cpu_khz;
+ 
+ typedef unsigned long cycles_t;
+ 
++#ifdef __KERNEL__
++#include <asm/intrinsics.h>
++#include <asm/processor.h>
++
+ /*
+  * For performance reasons, we don't want to define CLOCK_TICK_TRATE as
+  * local_cpu_data->itc_rate.  Fortunately, we don't have to, either: according to George
+@@ -37,4 +40,5 @@ get_cycles (void)
+ 	return ret;
+ }
+ 
++#endif /* __KERNEL__ */
+ #endif /* _ASM_IA64_TIMEX_H */
+diff -uprN linux-2.6.8.1.orig/include/asm-ia64/unistd.h linux-2.6.8.1-ve022stab078/include/asm-ia64/unistd.h
+--- linux-2.6.8.1.orig/include/asm-ia64/unistd.h	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-ia64/unistd.h	2006-05-11 13:05:43.000000000 +0400
+@@ -259,12 +259,23 @@
+ #define __NR_mq_getsetattr		1267
+ #define __NR_kexec_load			1268
+ #define __NR_vserver			1269
++#define __NR_fairsched_mknod		1500
++#define __NR_fairsched_rmnod		1501
++#define __NR_fairsched_chwt		1502
++#define __NR_fairsched_mvpr		1503
++#define __NR_fairsched_rate		1504
++#define __NR_getluid			1505
++#define __NR_setluid			1506
++#define __NR_setublimit			1507
++#define __NR_ubstat			1508
++#define __NR_lchmod			1509
++#define __NR_lutime			1510
+ 
+ #ifdef __KERNEL__
+ 
+ #include <linux/config.h>
+ 
+-#define NR_syscalls			256 /* length of syscall table */
++#define NR_syscalls	(__NR_lutime - __NR_ni_syscall + 1) /* length of syscall table */
+ 
+ #define __ARCH_WANT_SYS_RT_SIGACTION
+ 
+@@ -369,7 +380,7 @@ asmlinkage unsigned long sys_mmap2(
+ 				int fd, long pgoff);
+ struct pt_regs;
+ struct sigaction;
+-asmlinkage long sys_execve(char *filename, char **argv, char **envp,
++long sys_execve(char *filename, char **argv, char **envp,
+ 				struct pt_regs *regs);
+ asmlinkage long sys_pipe(long arg0, long arg1, long arg2, long arg3,
+ 			long arg4, long arg5, long arg6, long arg7, long stack);
+diff -uprN linux-2.6.8.1.orig/include/asm-mips/system.h linux-2.6.8.1-ve022stab078/include/asm-mips/system.h
+--- linux-2.6.8.1.orig/include/asm-mips/system.h	2004-08-14 14:55:20.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-mips/system.h	2006-05-11 13:05:39.000000000 +0400
+@@ -496,7 +496,7 @@ do {						\
+ 	spin_lock(&(next)->switch_lock);	\
+ 	spin_unlock(&(rq)->lock);		\
+ } while (0)
+-#define finish_arch_switch(rq, prev)	spin_unlock_irq(&(prev)->switch_lock)
++#define finish_arch_switch(rq, prev)	spin_unlock(&(prev)->switch_lock)
+ #define task_running(rq, p) 		((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock))
+ 
+ #endif /* _ASM_SYSTEM_H */
+diff -uprN linux-2.6.8.1.orig/include/asm-s390/system.h linux-2.6.8.1-ve022stab078/include/asm-s390/system.h
+--- linux-2.6.8.1.orig/include/asm-s390/system.h	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-s390/system.h	2006-05-11 13:05:39.000000000 +0400
+@@ -107,7 +107,7 @@ static inline void restore_access_regs(u
+ #define task_running(rq, p)		((rq)->curr == (p))
+ #define finish_arch_switch(rq, prev) do {				     \
+ 	set_fs(current->thread.mm_segment);				     \
+-	spin_unlock_irq(&(rq)->lock);					     \
++	spin_unlock(&(rq)->lock);					     \
+ } while (0)
+ 
+ #define nop() __asm__ __volatile__ ("nop")
+diff -uprN linux-2.6.8.1.orig/include/asm-sparc/system.h linux-2.6.8.1-ve022stab078/include/asm-sparc/system.h
+--- linux-2.6.8.1.orig/include/asm-sparc/system.h	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-sparc/system.h	2006-05-11 13:05:39.000000000 +0400
+@@ -109,7 +109,7 @@ extern void fpsave(unsigned long *fpregs
+ 	"save %sp, -0x40, %sp\n\t" \
+ 	"restore; restore; restore; restore; restore; restore; restore"); \
+ } while(0)
+-#define finish_arch_switch(rq, next)	spin_unlock_irq(&(rq)->lock)
++#define finish_arch_switch(rq, next)	spin_unlock(&(rq)->lock)
+ #define task_running(rq, p)		((rq)->curr == (p))
+ 
+ 	/* Much care has gone into this code, do not touch it.
+diff -uprN linux-2.6.8.1.orig/include/asm-sparc64/system.h linux-2.6.8.1-ve022stab078/include/asm-sparc64/system.h
+--- linux-2.6.8.1.orig/include/asm-sparc64/system.h	2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-sparc64/system.h	2006-05-11 13:05:39.000000000 +0400
+@@ -146,7 +146,7 @@ do {	spin_lock(&(next)->switch_lock);	\
+ } while (0)
+ 
+ #define finish_arch_switch(rq, prev)		\
+-do {	spin_unlock_irq(&(prev)->switch_lock);	\
++do {	spin_unlock(&(prev)->switch_lock);	\
+ } while (0)
+ 
+ #define task_running(rq, p) \
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/a.out.h linux-2.6.8.1-ve022stab078/include/asm-x86_64/a.out.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/a.out.h	2004-08-14 14:55:20.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-x86_64/a.out.h	2006-05-11 13:05:29.000000000 +0400
+@@ -21,7 +21,7 @@ struct exec
+ 
+ #ifdef __KERNEL__
+ #include <linux/thread_info.h>
+-#define STACK_TOP (test_thread_flag(TIF_IA32) ? IA32_PAGE_OFFSET : TASK_SIZE)
++#define STACK_TOP TASK_SIZE
+ #endif
+ 
+ #endif /* __A_OUT_GNU_H__ */
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/cacheflush.h linux-2.6.8.1-ve022stab078/include/asm-x86_64/cacheflush.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/cacheflush.h	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-x86_64/cacheflush.h	2006-05-11 13:05:30.000000000 +0400
+@@ -25,5 +25,6 @@
+ 
+ void global_flush_tlb(void); 
+ int change_page_attr(struct page *page, int numpages, pgprot_t prot);
++int change_page_attr_addr(unsigned long addr, int numpages, pgprot_t prot);
+ 
+ #endif /* _X8664_CACHEFLUSH_H */
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/calling.h linux-2.6.8.1-ve022stab078/include/asm-x86_64/calling.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/calling.h	2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-x86_64/calling.h	2006-05-11 13:05:33.000000000 +0400
+@@ -143,22 +143,6 @@
+ 	RESTORE_ARGS 0,\addskip
+ 	.endm
+ 
+-	/* push in order ss, rsp, eflags, cs, rip */
+-	.macro FAKE_STACK_FRAME child_rip
+-	xorl %eax,%eax
+-	subq $6*8,%rsp
+-	movq %rax,5*8(%rsp)  /* ss */
+-	movq %rax,4*8(%rsp)  /* rsp */
+-	movq $(1<<9),3*8(%rsp)  /* eflags */
+-	movq $__KERNEL_CS,2*8(%rsp) /* cs */
+-	movq \child_rip,1*8(%rsp)  /* rip */ 
+-	movq %rax,(%rsp)   /* orig_rax */ 
+-	.endm
+-
+-	.macro UNFAKE_STACK_FRAME
+-	addq $8*6, %rsp
+-	.endm
+-
+ 	.macro icebp
+ 	.byte 0xf1
+ 	.endm
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/desc.h linux-2.6.8.1-ve022stab078/include/asm-x86_64/desc.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/desc.h	2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-x86_64/desc.h	2006-05-11 13:05:29.000000000 +0400
+@@ -128,13 +128,13 @@ static inline void set_tss_desc(unsigned
+ { 
+ 	set_tssldt_descriptor(&cpu_gdt_table[cpu][GDT_ENTRY_TSS], (unsigned long)addr, 
+ 			      DESC_TSS,
+-			      sizeof(struct tss_struct)); 
++			      sizeof(struct tss_struct) - 1);
+ } 
+ 
+ static inline void set_ldt_desc(unsigned cpu, void *addr, int size)
+ { 
+ 	set_tssldt_descriptor(&cpu_gdt_table[cpu][GDT_ENTRY_LDT], (unsigned long)addr, 
+-			      DESC_LDT, size * 8);
++			      DESC_LDT, size * 8 - 1);
+ }
+ 
+ static inline void set_seg_base(unsigned cpu, int entry, void *base)
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/hw_irq.h linux-2.6.8.1-ve022stab078/include/asm-x86_64/hw_irq.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/hw_irq.h	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-x86_64/hw_irq.h	2006-05-11 13:05:29.000000000 +0400
+@@ -163,7 +163,7 @@ static inline void x86_do_profile (struc
+ 	atomic_inc((atomic_t *)&prof_buffer[rip]);
+ }
+ 
+-#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP)
++#if defined(CONFIG_X86_IO_APIC)
+ static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {
+ 	if (IO_APIC_IRQ(i))
+ 		send_IPI_self(IO_APIC_VECTOR(i));
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/ia32.h linux-2.6.8.1-ve022stab078/include/asm-x86_64/ia32.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/ia32.h	2004-08-14 14:56:13.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-x86_64/ia32.h	2006-05-11 13:05:27.000000000 +0400
+@@ -84,7 +84,7 @@ typedef union sigval32 {
+ 	unsigned int sival_ptr;
+ } sigval_t32;
+ 
+-typedef struct siginfo32 {
++typedef struct compat_siginfo {
+ 	int si_signo;
+ 	int si_errno;
+ 	int si_code;
+@@ -134,7 +134,7 @@ typedef struct siginfo32 {
+ 			int _fd;
+ 		} _sigpoll;
+ 	} _sifields;
+-} siginfo_t32;
++} compat_siginfo_t;
+ 
+ struct sigframe32
+ {
+@@ -151,7 +151,7 @@ struct rt_sigframe32
+         int sig;
+         u32 pinfo;
+         u32 puc;
+-        struct siginfo32 info;
++        struct compat_siginfo info;
+         struct ucontext_ia32 uc;
+         struct _fpstate_ia32 fpstate;
+ };
+@@ -171,8 +171,6 @@ struct siginfo_t;
+ int do_get_thread_area(struct thread_struct *t, struct user_desc __user *info);
+ int do_set_thread_area(struct thread_struct *t, struct user_desc __user *info);
+ int ia32_child_tls(struct task_struct *p, struct pt_regs *childregs);
+-int ia32_copy_siginfo_from_user(siginfo_t *to, siginfo_t32 __user *from);
+-int ia32_copy_siginfo_to_user(siginfo_t32 __user *to, siginfo_t *from);
+ #endif
+ 
+ #endif /* !CONFIG_IA32_SUPPORT */
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/irq.h linux-2.6.8.1-ve022stab078/include/asm-x86_64/irq.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/irq.h	2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-x86_64/irq.h	2006-05-11 13:05:28.000000000 +0400
+@@ -57,4 +57,6 @@ struct irqaction;
+ struct pt_regs;
+ int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *);
+ 
++extern int no_irq_affinity;
++
+ #endif /* _ASM_IRQ_H */
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/mman.h linux-2.6.8.1-ve022stab078/include/asm-x86_64/mman.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/mman.h	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-x86_64/mman.h	2006-05-11 13:05:39.000000000 +0400
+@@ -23,6 +23,7 @@
+ #define MAP_NORESERVE	0x4000		/* don't check for reservations */
+ #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
+ #define MAP_NONBLOCK	0x10000		/* do not block on IO */
++#define MAP_EXECPRIO    0x80000         /* map from exec - try not to fail */
+ 
+ #define MS_ASYNC	1		/* sync memory asynchronously */
+ #define MS_INVALIDATE	2		/* invalidate the caches */
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/msr.h linux-2.6.8.1-ve022stab078/include/asm-x86_64/msr.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/msr.h	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-x86_64/msr.h	2006-05-11 13:05:28.000000000 +0400
+@@ -208,6 +208,7 @@ extern inline unsigned int cpuid_edx(uns
+ #define MSR_K8_TOP_MEM1		   0xC001001A
+ #define MSR_K8_TOP_MEM2		   0xC001001D
+ #define MSR_K8_SYSCFG		   0xC0000010	
++#define MSR_K8_HWCR		   0xC0010015
+ 
+ /* K6 MSRs */
+ #define MSR_K6_EFER			0xC0000080
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/mtrr.h linux-2.6.8.1-ve022stab078/include/asm-x86_64/mtrr.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/mtrr.h	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-x86_64/mtrr.h	2006-05-11 13:05:32.000000000 +0400
+@@ -71,8 +71,6 @@ struct mtrr_gentry
+ 
+ #ifdef __KERNEL__
+ 
+-extern char *mtrr_strings[MTRR_NUM_TYPES];
+-
+ /*  The following functions are for use by other drivers  */
+ # ifdef CONFIG_MTRR
+ extern int mtrr_add (unsigned long base, unsigned long size,
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/pgalloc.h linux-2.6.8.1-ve022stab078/include/asm-x86_64/pgalloc.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/pgalloc.h	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-x86_64/pgalloc.h	2006-05-11 13:05:39.000000000 +0400
+@@ -30,12 +30,12 @@ extern __inline__ void pmd_free(pmd_t *p
+ 
+ static inline pmd_t *pmd_alloc_one (struct mm_struct *mm, unsigned long addr)
+ {
+-	return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
++	return (pmd_t *)get_zeroed_page(GFP_KERNEL_UBC|__GFP_REPEAT);
+ }
+ 
+ static inline pgd_t *pgd_alloc (struct mm_struct *mm)
+ {
+-	return (pgd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
++	return (pgd_t *)get_zeroed_page(GFP_KERNEL_UBC|__GFP_REPEAT);
+ }
+ 
+ static inline void pgd_free (pgd_t *pgd)
+@@ -51,7 +51,7 @@ static inline pte_t *pte_alloc_one_kerne
+ 
+ static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+ {
+-	void *p = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
++	void *p = (void *)get_zeroed_page(GFP_KERNEL_UBC|__GFP_REPEAT);
+ 	if (!p)
+ 		return NULL;
+ 	return virt_to_page(p);
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/pgtable.h linux-2.6.8.1-ve022stab078/include/asm-x86_64/pgtable.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/pgtable.h	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-x86_64/pgtable.h	2006-05-11 13:05:29.000000000 +0400
+@@ -384,7 +384,7 @@ extern inline pte_t pte_modify(pte_t pte
+ }
+ 
+ #define pte_index(address) \
+-		((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
++		(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+ #define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_kernel(*(dir)) + \
+ 			pte_index(address))
+ 
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/processor.h linux-2.6.8.1-ve022stab078/include/asm-x86_64/processor.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/processor.h	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-x86_64/processor.h	2006-05-11 13:05:45.000000000 +0400
+@@ -76,7 +76,6 @@ struct cpuinfo_x86 {
+ #define X86_VENDOR_UNKNOWN 0xff
+ 
+ extern struct cpuinfo_x86 boot_cpu_data;
+-extern struct tss_struct init_tss[NR_CPUS];
+ 
+ #ifdef CONFIG_SMP
+ extern struct cpuinfo_x86 cpu_data[];
+@@ -166,16 +165,16 @@ static inline void clear_in_cr4 (unsigne
+ /*
+  * User space process size: 512GB - 1GB (default).
+  */
+-#define TASK_SIZE	(0x0000007fc0000000UL)
++#define TASK_SIZE64	(0x0000007fc0000000UL)
+ 
+ /* This decides where the kernel will search for a free chunk of vm
+  * space during mmap's.
+  */
+-#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? 0xc0000000 : 0xFFFFe000)
+-#define TASK_UNMAPPED_32 PAGE_ALIGN(IA32_PAGE_OFFSET/3)
+-#define TASK_UNMAPPED_64 PAGE_ALIGN(TASK_SIZE/3) 
+-#define TASK_UNMAPPED_BASE	\
+-	(test_thread_flag(TIF_IA32) ? TASK_UNMAPPED_32 : TASK_UNMAPPED_64)  
++#define IA32_PAGE_OFFSET	0xc0000000
++#define TASK_SIZE		(test_thread_flag(TIF_IA32) ? IA32_PAGE_OFFSET : TASK_SIZE64)
++#define TASK_SIZE_OF(child)	((test_tsk_thread_flag(child, TIF_IA32) ? IA32_PAGE_OFFSET : TASK_SIZE64))
++
++#define TASK_UNMAPPED_BASE	PAGE_ALIGN(TASK_SIZE/3)
+ 
+ /*
+  * Size of io_bitmap.
+@@ -183,7 +182,6 @@ static inline void clear_in_cr4 (unsigne
+ #define IO_BITMAP_BITS  65536
+ #define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
+ #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
+-#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
+ #define INVALID_IO_BITMAP_OFFSET 0x8000
+ 
+ struct i387_fxsave_struct {
+@@ -229,6 +227,10 @@ struct tss_struct {
+ 
+ #define ARCH_MIN_TASKALIGN	16
+ 
++#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
++
++extern struct tss_struct init_tss[NR_CPUS];
++
+ struct thread_struct {
+ 	unsigned long	rsp0;
+ 	unsigned long	rsp;
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/segment.h linux-2.6.8.1-ve022stab078/include/asm-x86_64/segment.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/segment.h	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-x86_64/segment.h	2006-05-11 13:05:45.000000000 +0400
+@@ -3,32 +3,31 @@
+ 
+ #include <asm/cache.h>
+ 
+-#define __KERNEL_CS	0x10
+-#define __KERNEL_DS	0x18
+-
+-#define __KERNEL32_CS   0x38
+-
++#define __KERNEL_COMPAT32_CS   0x8
++#define GDT_ENTRY_BOOT_CS		2
++#define __BOOT_CS	(GDT_ENTRY_BOOT_CS * 8)
++#define GDT_ENTRY_BOOT_DS		3
++#define __BOOT_DS	(GDT_ENTRY_BOOT_DS * 8)
++#define GDT_ENTRY_TSS 4	/* needs two entries */
+ /* 
+  * we cannot use the same code segment descriptor for user and kernel
+  * -- not even in the long flat mode, because of different DPL /kkeil 
+  * The segment offset needs to contain a RPL. Grr. -AK
+  * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets) 
+  */
+-
+-#define __USER32_CS   0x23   /* 4*8+3 */ 
+-#define __USER_DS     0x2b   /* 5*8+3 */ 
+-#define __USER_CS     0x33   /* 6*8+3 */ 
+-#define __USER32_DS	__USER_DS 
++#define GDT_ENTRY_TLS_MIN 6
++#define GDT_ENTRY_TLS_MAX 8
++#define GDT_ENTRY_KERNELCS16 9
+ #define __KERNEL16_CS	(GDT_ENTRY_KERNELCS16 * 8)
+-#define __KERNEL_COMPAT32_CS   0x8
+ 
+-#define GDT_ENTRY_TLS 1
+-#define GDT_ENTRY_TSS 8	/* needs two entries */
+ #define GDT_ENTRY_LDT 10
+-#define GDT_ENTRY_TLS_MIN 11
+-#define GDT_ENTRY_TLS_MAX 13
+-/* 14 free */
+-#define GDT_ENTRY_KERNELCS16 15
++#define __KERNEL32_CS   0x58	/* 11*8 */
++#define __KERNEL_CS	0x60	/* 12*8 */
++#define __KERNEL_DS	0x68	/* 13*8 */
++#define __USER32_CS   0x73   /* 14*8+3 */ 
++#define __USER_DS     0x7b   /* 15*8+3 */ 
++#define __USER32_DS	__USER_DS 
++#define __USER_CS     0x83   /* 16*8+3 */ 
+ 
+ #define GDT_ENTRY_TLS_ENTRIES 3
+ 
+@@ -40,7 +39,7 @@
+ #define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
+ 
+ #define IDT_ENTRIES 256
+-#define GDT_ENTRIES 16
++#define GDT_ENTRIES 32
+ #define GDT_SIZE (GDT_ENTRIES * 8)
+ #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) 
+ 
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/system.h linux-2.6.8.1-ve022stab078/include/asm-x86_64/system.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/system.h	2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-x86_64/system.h	2006-05-11 13:05:30.000000000 +0400
+@@ -35,7 +35,7 @@
+ 		     "thread_return:\n\t"					    \
+ 		     "movq %%gs:%P[pda_pcurrent],%%rsi\n\t"			  \
+ 		     "movq %P[thread_info](%%rsi),%%r8\n\t"			  \
+-		     "btr  %[tif_fork],%P[ti_flags](%%r8)\n\t"			  \
++		     LOCK "btr  %[tif_fork],%P[ti_flags](%%r8)\n\t"		  \
+ 		     "movq %%rax,%%rdi\n\t" 					  \
+ 		     "jc   ret_from_fork\n\t"					  \
+ 		     RESTORE_CONTEXT						    \
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/thread_info.h linux-2.6.8.1-ve022stab078/include/asm-x86_64/thread_info.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/thread_info.h	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-x86_64/thread_info.h	2006-05-11 13:05:25.000000000 +0400
+@@ -106,6 +106,7 @@ static inline struct thread_info *stack_
+ #define TIF_IA32		17	/* 32bit process */ 
+ #define TIF_FORK		18	/* ret_from_fork */
+ #define TIF_ABI_PENDING		19
++#define TIF_FREEZE		20	/* Freeze request, atomic version of PF_FREEZE */
+ 
+ #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
+ #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/unistd.h linux-2.6.8.1-ve022stab078/include/asm-x86_64/unistd.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/unistd.h	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-x86_64/unistd.h	2006-05-11 13:05:43.000000000 +0400
+@@ -554,8 +554,30 @@ __SYSCALL(__NR_mq_notify, sys_mq_notify)
+ __SYSCALL(__NR_mq_getsetattr, sys_mq_getsetattr)
+ #define __NR_kexec_load 	246
+ __SYSCALL(__NR_kexec_load, sys_ni_syscall)
++#define __NR_getluid		500
++__SYSCALL(__NR_getluid, sys_getluid)
++#define __NR_setluid		501
++__SYSCALL(__NR_setluid, sys_setluid)
++#define __NR_setublimit		502
++__SYSCALL(__NR_setublimit, sys_setublimit)
++#define __NR_ubstat		503
++__SYSCALL(__NR_ubstat, sys_ubstat)
++#define __NR_fairsched_mknod	504 /* FairScheduler syscalls */
++__SYSCALL(__NR_fairsched_mknod, sys_fairsched_mknod)
++#define __NR_fairsched_rmnod	505
++__SYSCALL(__NR_fairsched_rmnod, sys_fairsched_rmnod)
++#define __NR_fairsched_chwt	506
++__SYSCALL(__NR_fairsched_chwt, sys_fairsched_chwt)
++#define __NR_fairsched_mvpr	507
++__SYSCALL(__NR_fairsched_mvpr, sys_fairsched_mvpr)
++#define __NR_fairsched_rate	508
++__SYSCALL(__NR_fairsched_rate, sys_fairsched_rate)
++#define __NR_lchmod		509
++__SYSCALL(__NR_lchmod, sys_lchmod)
++#define __NR_lutime		510
++__SYSCALL(__NR_lutime, sys_lutime)
+ 
+-#define __NR_syscall_max __NR_kexec_load
++#define __NR_syscall_max __NR_lutime
+ #ifndef __NO_STUBS
+ 
+ /* user-visible error numbers are in the range -1 - -4095 */
+diff -uprN linux-2.6.8.1.orig/include/asm-x86_64/vsyscall.h linux-2.6.8.1-ve022stab078/include/asm-x86_64/vsyscall.h
+--- linux-2.6.8.1.orig/include/asm-x86_64/vsyscall.h	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/asm-x86_64/vsyscall.h	2006-05-11 13:05:37.000000000 +0400
+@@ -1,8 +1,6 @@
+ #ifndef _ASM_X86_64_VSYSCALL_H_
+ #define _ASM_X86_64_VSYSCALL_H_
+ 
+-#include <linux/seqlock.h>
+-
+ enum vsyscall_num {
+ 	__NR_vgettimeofday,
+ 	__NR_vtime,
+@@ -15,13 +13,15 @@ enum vsyscall_num {
+ 
+ #ifdef __KERNEL__
+ 
++#include <linux/seqlock.h>
++
+ #define __section_vxtime __attribute__ ((unused, __section__ (".vxtime"), aligned(16)))
+ #define __section_wall_jiffies __attribute__ ((unused, __section__ (".wall_jiffies"), aligned(16)))
+ #define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16)))
+ #define __section_sys_tz __attribute__ ((unused, __section__ (".sys_tz"), aligned(16)))
+ #define __section_sysctl_vsyscall __attribute__ ((unused, __section__ (".sysctl_vsyscall"), aligned(16)))
+ #define __section_xtime __attribute__ ((unused, __section__ (".xtime"), aligned(16)))
+-#define __section_xtime_lock __attribute__ ((unused, __section__ (".xtime_lock"), aligned(L1_CACHE_BYTES)))
++#define __section_xtime_lock __attribute__ ((unused, __section__ (".xtime_lock"), aligned(16)))
+ 
+ #define VXTIME_TSC	1
+ #define VXTIME_HPET	2
+diff -uprN linux-2.6.8.1.orig/include/linux/affs_fs.h linux-2.6.8.1-ve022stab078/include/linux/affs_fs.h
+--- linux-2.6.8.1.orig/include/linux/affs_fs.h	2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/affs_fs.h	2006-05-11 13:05:35.000000000 +0400
+@@ -63,7 +63,7 @@ extern void			 affs_put_inode(struct ino
+ extern void			 affs_delete_inode(struct inode *inode);
+ extern void			 affs_clear_inode(struct inode *inode);
+ extern void			 affs_read_inode(struct inode *inode);
+-extern void			 affs_write_inode(struct inode *inode, int);
++extern int			 affs_write_inode(struct inode *inode, int);
+ extern int			 affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s32 type);
+ 
+ /* super.c */
+diff -uprN linux-2.6.8.1.orig/include/linux/binfmts.h linux-2.6.8.1-ve022stab078/include/linux/binfmts.h
+--- linux-2.6.8.1.orig/include/linux/binfmts.h	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/binfmts.h	2006-05-11 13:05:35.000000000 +0400
+@@ -2,6 +2,7 @@
+ #define _LINUX_BINFMTS_H
+ 
+ #include <linux/capability.h>
++#include <linux/fs.h>
+ 
+ struct pt_regs;
+ 
+@@ -28,6 +29,7 @@ struct linux_binprm{
+ 	int sh_bang;
+ 	struct file * file;
+ 	int e_uid, e_gid;
++	struct exec_perm perm;
+ 	kernel_cap_t cap_inheritable, cap_permitted, cap_effective;
+ 	void *security;
+ 	int argc, envc;
+diff -uprN linux-2.6.8.1.orig/include/linux/bio.h linux-2.6.8.1-ve022stab078/include/linux/bio.h
+--- linux-2.6.8.1.orig/include/linux/bio.h	2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/bio.h	2006-05-11 13:05:31.000000000 +0400
+@@ -121,6 +121,7 @@ struct bio {
+ #define BIO_CLONED	4	/* doesn't own data */
+ #define BIO_BOUNCED	5	/* bio is a bounce bio */
+ #define BIO_USER_MAPPED 6	/* contains user pages */
++#define BIO_EOPNOTSUPP	7	/* not supported */
+ #define bio_flagged(bio, flag)	((bio)->bi_flags & (1 << (flag)))
+ 
+ /*
+@@ -160,6 +161,8 @@ struct bio {
+ #define bio_data(bio)		(page_address(bio_page((bio))) + bio_offset((bio)))
+ #define bio_barrier(bio)	((bio)->bi_rw & (1 << BIO_RW_BARRIER))
+ #define bio_sync(bio)		((bio)->bi_rw & (1 << BIO_RW_SYNC))
++#define bio_failfast(bio)	((bio)->bi_rw & (1 << BIO_RW_FAILFAST))
++#define bio_rw_ahead(bio)	((bio)->bi_rw & (1 << BIO_RW_AHEAD))
+ 
+ /*
+  * will die
+diff -uprN linux-2.6.8.1.orig/include/linux/blkdev.h linux-2.6.8.1-ve022stab078/include/linux/blkdev.h
+--- linux-2.6.8.1.orig/include/linux/blkdev.h	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/blkdev.h	2006-05-11 13:05:31.000000000 +0400
+@@ -195,6 +195,8 @@ enum rq_flag_bits {
+ 	__REQ_PM_SUSPEND,	/* suspend request */
+ 	__REQ_PM_RESUME,	/* resume request */
+ 	__REQ_PM_SHUTDOWN,	/* shutdown request */
++	__REQ_BAR_PREFLUSH,	/* barrier pre-flush done */
++	__REQ_BAR_POSTFLUSH,	/* barrier post-flush */
+ 	__REQ_NR_BITS,		/* stops here */
+ };
+ 
+@@ -220,6 +222,8 @@ enum rq_flag_bits {
+ #define REQ_PM_SUSPEND	(1 << __REQ_PM_SUSPEND)
+ #define REQ_PM_RESUME	(1 << __REQ_PM_RESUME)
+ #define REQ_PM_SHUTDOWN	(1 << __REQ_PM_SHUTDOWN)
++#define REQ_BAR_PREFLUSH	(1 << __REQ_BAR_PREFLUSH)
++#define REQ_BAR_POSTFLUSH	(1 << __REQ_BAR_POSTFLUSH)
+ 
+ /*
+  * State information carried for REQ_PM_SUSPEND and REQ_PM_RESUME
+@@ -248,6 +252,7 @@ typedef void (unplug_fn) (request_queue_
+ struct bio_vec;
+ typedef int (merge_bvec_fn) (request_queue_t *, struct bio *, struct bio_vec *);
+ typedef void (activity_fn) (void *data, int rw);
++typedef int (issue_flush_fn) (request_queue_t *, struct gendisk *, sector_t *);
+ 
+ enum blk_queue_state {
+ 	Queue_down,
+@@ -290,6 +295,7 @@ struct request_queue
+ 	unplug_fn		*unplug_fn;
+ 	merge_bvec_fn		*merge_bvec_fn;
+ 	activity_fn		*activity_fn;
++	issue_flush_fn		*issue_flush_fn;
+ 
+ 	/*
+ 	 * Auto-unplugging state
+@@ -373,6 +379,7 @@ struct request_queue
+ #define QUEUE_FLAG_DEAD		5	/* queue being torn down */
+ #define QUEUE_FLAG_REENTER	6	/* Re-entrancy avoidance */
+ #define QUEUE_FLAG_PLUGGED	7	/* queue is plugged */
++#define QUEUE_FLAG_ORDERED	8	/* supports ordered writes */
+ 
+ #define blk_queue_plugged(q)	test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
+ #define blk_queue_tagged(q)	test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
+@@ -390,6 +397,10 @@ struct request_queue
+ #define blk_pm_request(rq)	\
+ 	((rq)->flags & (REQ_PM_SUSPEND | REQ_PM_RESUME))
+ 
++#define blk_barrier_rq(rq)	((rq)->flags & REQ_HARDBARRIER)
++#define blk_barrier_preflush(rq)	((rq)->flags & REQ_BAR_PREFLUSH)
++#define blk_barrier_postflush(rq)	((rq)->flags & REQ_BAR_POSTFLUSH)
++
+ #define list_entry_rq(ptr)	list_entry((ptr), struct request, queuelist)
+ 
+ #define rq_data_dir(rq)		((rq)->flags & 1)
+@@ -560,6 +571,14 @@ extern void end_that_request_last(struct
+ extern int process_that_request_first(struct request *, unsigned int);
+ extern void end_request(struct request *req, int uptodate);
+ 
++/*
++ * end_that_request_first/chunk() takes an uptodate argument. we account
++ * any value <= as an io error. 0 means -EIO for compatability reasons,
++ * any other < 0 value is the direct error type. An uptodate value of
++ * 1 indicates successful io completion
++ */
++#define end_io_error(uptodate)	(unlikely((uptodate) <= 0))
++
+ static inline void blkdev_dequeue_request(struct request *req)
+ {
+ 	BUG_ON(list_empty(&req->queuelist));
+@@ -588,6 +607,9 @@ extern void blk_queue_prep_rq(request_qu
+ extern void blk_queue_merge_bvec(request_queue_t *, merge_bvec_fn *);
+ extern void blk_queue_dma_alignment(request_queue_t *, int);
+ extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
++extern void blk_queue_ordered(request_queue_t *, int);
++extern void blk_queue_issue_flush_fn(request_queue_t *, issue_flush_fn *);
++extern int blkdev_scsi_issue_flush_fn(request_queue_t *, struct gendisk *, sector_t *);
+ 
+ extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *);
+ extern void blk_dump_rq_flags(struct request *, char *);
+@@ -616,6 +638,7 @@ extern long blk_congestion_wait(int rw, 
+ 
+ extern void blk_rq_bio_prep(request_queue_t *, struct request *, struct bio *);
+ extern void blk_rq_prep_restart(struct request *);
++extern int blkdev_issue_flush(struct block_device *, sector_t *);
+ 
+ #define MAX_PHYS_SEGMENTS 128
+ #define MAX_HW_SEGMENTS 128
+diff -uprN linux-2.6.8.1.orig/include/linux/buffer_head.h linux-2.6.8.1-ve022stab078/include/linux/buffer_head.h
+--- linux-2.6.8.1.orig/include/linux/buffer_head.h	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/buffer_head.h	2006-05-11 13:05:31.000000000 +0400
+@@ -26,6 +26,7 @@ enum bh_state_bits {
+ 	BH_Delay,	/* Buffer is not yet allocated on disk */
+ 	BH_Boundary,	/* Block is followed by a discontiguity */
+ 	BH_Write_EIO,	/* I/O error on write */
++	BH_Ordered,	/* ordered write */
+ 
+ 	BH_PrivateStart,/* not a state bit, but the first bit available
+ 			 * for private allocation by other entities
+@@ -110,7 +111,8 @@ BUFFER_FNS(Async_Read, async_read)
+ BUFFER_FNS(Async_Write, async_write)
+ BUFFER_FNS(Delay, delay)
+ BUFFER_FNS(Boundary, boundary)
+-BUFFER_FNS(Write_EIO,write_io_error)
++BUFFER_FNS(Write_EIO, write_io_error)
++BUFFER_FNS(Ordered, ordered)
+ 
+ #define bh_offset(bh)		((unsigned long)(bh)->b_data & ~PAGE_MASK)
+ #define touch_buffer(bh)	mark_page_accessed(bh->b_page)
+@@ -173,7 +175,7 @@ void FASTCALL(unlock_buffer(struct buffe
+ void FASTCALL(__lock_buffer(struct buffer_head *bh));
+ void ll_rw_block(int, int, struct buffer_head * bh[]);
+ void sync_dirty_buffer(struct buffer_head *bh);
+-void submit_bh(int, struct buffer_head *);
++int submit_bh(int, struct buffer_head *);
+ void write_boundary_block(struct block_device *bdev,
+ 			sector_t bblock, unsigned blocksize);
+ 
+diff -uprN linux-2.6.8.1.orig/include/linux/byteorder/big_endian.h linux-2.6.8.1-ve022stab078/include/linux/byteorder/big_endian.h
+--- linux-2.6.8.1.orig/include/linux/byteorder/big_endian.h	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/byteorder/big_endian.h	2006-05-11 13:05:31.000000000 +0400
+@@ -8,48 +8,86 @@
+ #define __BIG_ENDIAN_BITFIELD
+ #endif
+ 
++#include <linux/types.h>
+ #include <linux/byteorder/swab.h>
+ 
+ #define __constant_htonl(x) ((__u32)(x))
+ #define __constant_ntohl(x) ((__u32)(x))
+ #define __constant_htons(x) ((__u16)(x))
+ #define __constant_ntohs(x) ((__u16)(x))
+-#define __constant_cpu_to_le64(x) ___constant_swab64((x))
+-#define __constant_le64_to_cpu(x) ___constant_swab64((x))
+-#define __constant_cpu_to_le32(x) ___constant_swab32((x))
+-#define __constant_le32_to_cpu(x) ___constant_swab32((x))
+-#define __constant_cpu_to_le16(x) ___constant_swab16((x))
+-#define __constant_le16_to_cpu(x) ___constant_swab16((x))
+-#define __constant_cpu_to_be64(x) ((__u64)(x))
+-#define __constant_be64_to_cpu(x) ((__u64)(x))
+-#define __constant_cpu_to_be32(x) ((__u32)(x))
+-#define __constant_be32_to_cpu(x) ((__u32)(x))
+-#define __constant_cpu_to_be16(x) ((__u16)(x))
+-#define __constant_be16_to_cpu(x) ((__u16)(x))
+-#define __cpu_to_le64(x) __swab64((x))
+-#define __le64_to_cpu(x) __swab64((x))
+-#define __cpu_to_le32(x) __swab32((x))
+-#define __le32_to_cpu(x) __swab32((x))
+-#define __cpu_to_le16(x) __swab16((x))
+-#define __le16_to_cpu(x) __swab16((x))
+-#define __cpu_to_be64(x) ((__u64)(x))
+-#define __be64_to_cpu(x) ((__u64)(x))
+-#define __cpu_to_be32(x) ((__u32)(x))
+-#define __be32_to_cpu(x) ((__u32)(x))
+-#define __cpu_to_be16(x) ((__u16)(x))
+-#define __be16_to_cpu(x) ((__u16)(x))
+-#define __cpu_to_le64p(x) __swab64p((x))
+-#define __le64_to_cpup(x) __swab64p((x))
+-#define __cpu_to_le32p(x) __swab32p((x))
+-#define __le32_to_cpup(x) __swab32p((x))
+-#define __cpu_to_le16p(x) __swab16p((x))
+-#define __le16_to_cpup(x) __swab16p((x))
+-#define __cpu_to_be64p(x) (*(__u64*)(x))
+-#define __be64_to_cpup(x) (*(__u64*)(x))
+-#define __cpu_to_be32p(x) (*(__u32*)(x))
+-#define __be32_to_cpup(x) (*(__u32*)(x))
+-#define __cpu_to_be16p(x) (*(__u16*)(x))
+-#define __be16_to_cpup(x) (*(__u16*)(x))
++#define __constant_cpu_to_le64(x) ((__force __le64)___constant_swab64((x)))
++#define __constant_le64_to_cpu(x) ___constant_swab64((__force __u64)(__le64)(x))
++#define __constant_cpu_to_le32(x) ((__force __le32)___constant_swab32((x)))
++#define __constant_le32_to_cpu(x) ___constant_swab32((__force __u32)(__le32)(x))
++#define __constant_cpu_to_le16(x) ((__force __le16)___constant_swab16((x)))
++#define __constant_le16_to_cpu(x) ___constant_swab16((__force __u16)(__le16)(x))
++#define __constant_cpu_to_be64(x) ((__force __be64)(__u64)(x))
++#define __constant_be64_to_cpu(x) ((__force __u64)(__be64)(x))
++#define __constant_cpu_to_be32(x) ((__force __be32)(__u32)(x))
++#define __constant_be32_to_cpu(x) ((__force __u32)(__be32)(x))
++#define __constant_cpu_to_be16(x) ((__force __be16)(__u16)(x))
++#define __constant_be16_to_cpu(x) ((__force __u16)(__be16)(x))
++#define __cpu_to_le64(x) ((__force __le64)___swab64((x)))
++#define __le64_to_cpu(x) ___swab64((__force __u64)(__le64)(x))
++#define __cpu_to_le32(x) ((__force __le32)___swab32((x)))
++#define __le32_to_cpu(x) ___swab32((__force __u32)(__le32)(x))
++#define __cpu_to_le16(x) ((__force __le16)___swab16((x)))
++#define __le16_to_cpu(x) ___swab16((__force __u16)(__le16)(x))
++#define __cpu_to_be64(x) ((__force __be64)(__u64)(x))
++#define __be64_to_cpu(x) ((__force __u64)(__be64)(x))
++#define __cpu_to_be32(x) ((__force __be32)(__u32)(x))
++#define __be32_to_cpu(x) ((__force __u32)(__be32)(x))
++#define __cpu_to_be16(x) ((__force __be16)(__u16)(x))
++#define __be16_to_cpu(x) ((__force __u16)(__be16)(x))
++
++static inline __le64 __cpu_to_le64p(const __u64 *p)
++{
++	return (__force __le64)__swab64p(p);
++}
++static inline __u64 __le64_to_cpup(const __le64 *p)
++{
++	return __swab64p((__u64 *)p);
++}
++static inline __le32 __cpu_to_le32p(const __u32 *p)
++{
++	return (__force __le32)__swab32p(p);
++}
++static inline __u32 __le32_to_cpup(const __le32 *p)
++{
++	return __swab32p((__u32 *)p);
++}
++static inline __le16 __cpu_to_le16p(const __u16 *p)
++{
++	return (__force __le16)__swab16p(p);
++}
++static inline __u16 __le16_to_cpup(const __le16 *p)
++{
++	return __swab16p((__u16 *)p);
++}
++static inline __be64 __cpu_to_be64p(const __u64 *p)
++{
++	return (__force __be64)*p;
++}
++static inline __u64 __be64_to_cpup(const __be64 *p)
++{
++	return (__force __u64)*p;
++}
++static inline __be32 __cpu_to_be32p(const __u32 *p)
++{
++	return (__force __be32)*p;
++}
++static inline __u32 __be32_to_cpup(const __be32 *p)
++{
++	return (__force __u32)*p;
++}
++static inline __be16 __cpu_to_be16p(const __u16 *p)
++{
++	return (__force __be16)*p;
++}
++static inline __u16 __be16_to_cpup(const __be16 *p)
++{
++	return (__force __u16)*p;
++}
+ #define __cpu_to_le64s(x) __swab64s((x))
+ #define __le64_to_cpus(x) __swab64s((x))
+ #define __cpu_to_le32s(x) __swab32s((x))
+diff -uprN linux-2.6.8.1.orig/include/linux/byteorder/little_endian.h linux-2.6.8.1-ve022stab078/include/linux/byteorder/little_endian.h
+--- linux-2.6.8.1.orig/include/linux/byteorder/little_endian.h	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/byteorder/little_endian.h	2006-05-11 13:05:31.000000000 +0400
+@@ -8,48 +8,86 @@
+ #define __LITTLE_ENDIAN_BITFIELD
+ #endif
+ 
++#include <linux/types.h>
+ #include <linux/byteorder/swab.h>
+ 
+ #define __constant_htonl(x) ___constant_swab32((x))
+ #define __constant_ntohl(x) ___constant_swab32((x))
+ #define __constant_htons(x) ___constant_swab16((x))
+ #define __constant_ntohs(x) ___constant_swab16((x))
+-#define __constant_cpu_to_le64(x) ((__u64)(x))
+-#define __constant_le64_to_cpu(x) ((__u64)(x))
+-#define __constant_cpu_to_le32(x) ((__u32)(x))
+-#define __constant_le32_to_cpu(x) ((__u32)(x))
+-#define __constant_cpu_to_le16(x) ((__u16)(x))
+-#define __constant_le16_to_cpu(x) ((__u16)(x))
+-#define __constant_cpu_to_be64(x) ___constant_swab64((x))
+-#define __constant_be64_to_cpu(x) ___constant_swab64((x))
+-#define __constant_cpu_to_be32(x) ___constant_swab32((x))
+-#define __constant_be32_to_cpu(x) ___constant_swab32((x))
+-#define __constant_cpu_to_be16(x) ___constant_swab16((x))
+-#define __constant_be16_to_cpu(x) ___constant_swab16((x))
+-#define __cpu_to_le64(x) ((__u64)(x))
+-#define __le64_to_cpu(x) ((__u64)(x))
+-#define __cpu_to_le32(x) ((__u32)(x))
+-#define __le32_to_cpu(x) ((__u32)(x))
+-#define __cpu_to_le16(x) ((__u16)(x))
+-#define __le16_to_cpu(x) ((__u16)(x))
+-#define __cpu_to_be64(x) __swab64((x))
+-#define __be64_to_cpu(x) __swab64((x))
+-#define __cpu_to_be32(x) __swab32((x))
+-#define __be32_to_cpu(x) __swab32((x))
+-#define __cpu_to_be16(x) __swab16((x))
+-#define __be16_to_cpu(x) __swab16((x))
+-#define __cpu_to_le64p(x) (*(__u64*)(x))
+-#define __le64_to_cpup(x) (*(__u64*)(x))
+-#define __cpu_to_le32p(x) (*(__u32*)(x))
+-#define __le32_to_cpup(x) (*(__u32*)(x))
+-#define __cpu_to_le16p(x) (*(__u16*)(x))
+-#define __le16_to_cpup(x) (*(__u16*)(x))
+-#define __cpu_to_be64p(x) __swab64p((x))
+-#define __be64_to_cpup(x) __swab64p((x))
+-#define __cpu_to_be32p(x) __swab32p((x))
+-#define __be32_to_cpup(x) __swab32p((x))
+-#define __cpu_to_be16p(x) __swab16p((x))
+-#define __be16_to_cpup(x) __swab16p((x))
++#define __constant_cpu_to_le64(x) ((__force __le64)(__u64)(x))
++#define __constant_le64_to_cpu(x) ((__force __u64)(__le64)(x))
++#define __constant_cpu_to_le32(x) ((__force __le32)(__u32)(x))
++#define __constant_le32_to_cpu(x) ((__force __u32)(__le32)(x))
++#define __constant_cpu_to_le16(x) ((__force __le16)(__u16)(x))
++#define __constant_le16_to_cpu(x) ((__force __u16)(__le16)(x))
++#define __constant_cpu_to_be64(x) ((__force __be64)___constant_swab64((x)))
++#define __constant_be64_to_cpu(x) ___constant_swab64((__force __u64)(__be64)(x))
++#define __constant_cpu_to_be32(x) ((__force __be32)___constant_swab32((x)))
++#define __constant_be32_to_cpu(x) ___constant_swab32((__force __u32)(__be32)(x))
++#define __constant_cpu_to_be16(x) ((__force __be16)___constant_swab16((x)))
++#define __constant_be16_to_cpu(x) ___constant_swab16((__force __u16)(__be16)(x))
++#define __cpu_to_le64(x) ((__force __le64)(__u64)(x))
++#define __le64_to_cpu(x) ((__force __u64)(__le64)(x))
++#define __cpu_to_le32(x) ((__force __le32)(__u32)(x))
++#define __le32_to_cpu(x) ((__force __u32)(__le32)(x))
++#define __cpu_to_le16(x) ((__force __le16)(__u16)(x))
++#define __le16_to_cpu(x) ((__force __u16)(__le16)(x))
++#define __cpu_to_be64(x) ((__force __be64)___swab64((x)))
++#define __be64_to_cpu(x) ___swab64((__force __u64)(__be64)(x))
++#define __cpu_to_be32(x) ((__force __be32)___swab32((x)))
++#define __be32_to_cpu(x) ___swab32((__force __u32)(__be32)(x))
++#define __cpu_to_be16(x) ((__force __be16)___swab16((x)))
++#define __be16_to_cpu(x) ___swab16((__force __u16)(__be16)(x))
++
++static inline __le64 __cpu_to_le64p(const __u64 *p)
++{
++	return (__force __le64)*p;
++}
++static inline __u64 __le64_to_cpup(const __le64 *p)
++{
++	return (__force __u64)*p;
++}
++static inline __le32 __cpu_to_le32p(const __u32 *p)
++{
++	return (__force __le32)*p;
++}
++static inline __u32 __le32_to_cpup(const __le32 *p)
++{
++	return (__force __u32)*p;
++}
++static inline __le16 __cpu_to_le16p(const __u16 *p)
++{
++	return (__force __le16)*p;
++}
++static inline __u16 __le16_to_cpup(const __le16 *p)
++{
++	return (__force __u16)*p;
++}
++static inline __be64 __cpu_to_be64p(const __u64 *p)
++{
++	return (__force __be64)__swab64p(p);
++}
++static inline __u64 __be64_to_cpup(const __be64 *p)
++{
++	return __swab64p((__u64 *)p);
++}
++static inline __be32 __cpu_to_be32p(const __u32 *p)
++{
++	return (__force __be32)__swab32p(p);
++}
++static inline __u32 __be32_to_cpup(const __be32 *p)
++{
++	return __swab32p((__u32 *)p);
++}
++static inline __be16 __cpu_to_be16p(const __u16 *p)
++{
++	return (__force __be16)__swab16p(p);
++}
++static inline __u16 __be16_to_cpup(const __be16 *p)
++{
++	return __swab16p((__u16 *)p);
++}
+ #define __cpu_to_le64s(x) do {} while (0)
+ #define __le64_to_cpus(x) do {} while (0)
+ #define __cpu_to_le32s(x) do {} while (0)
+diff -uprN linux-2.6.8.1.orig/include/linux/capability.h linux-2.6.8.1-ve022stab078/include/linux/capability.h
+--- linux-2.6.8.1.orig/include/linux/capability.h	2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/capability.h	2006-05-11 13:05:40.000000000 +0400
+@@ -147,12 +147,9 @@ typedef __u32 kernel_cap_t;
+ 
+ #define CAP_NET_BROADCAST    11
+ 
+-/* Allow interface configuration */
+ /* Allow administration of IP firewall, masquerading and accounting */
+ /* Allow setting debug option on sockets */
+ /* Allow modification of routing tables */
+-/* Allow setting arbitrary process / process group ownership on
+-   sockets */
+ /* Allow binding to any address for transparent proxying */
+ /* Allow setting TOS (type of service) */
+ /* Allow setting promiscuous mode */
+@@ -183,6 +180,7 @@ typedef __u32 kernel_cap_t;
+ #define CAP_SYS_MODULE       16
+ 
+ /* Allow ioperm/iopl access */
++/* Allow O_DIRECT access */
+ /* Allow sending USB messages to any device via /proc/bus/usb */
+ 
+ #define CAP_SYS_RAWIO        17
+@@ -201,24 +199,19 @@ typedef __u32 kernel_cap_t;
+ 
+ /* Allow configuration of the secure attention key */
+ /* Allow administration of the random device */
+-/* Allow examination and configuration of disk quotas */
+ /* Allow configuring the kernel's syslog (printk behaviour) */
+ /* Allow setting the domainname */
+ /* Allow setting the hostname */
+ /* Allow calling bdflush() */
+-/* Allow mount() and umount(), setting up new smb connection */
++/* Allow setting up new smb connection */
+ /* Allow some autofs root ioctls */
+ /* Allow nfsservctl */
+ /* Allow VM86_REQUEST_IRQ */
+ /* Allow to read/write pci config on alpha */
+ /* Allow irix_prctl on mips (setstacksize) */
+ /* Allow flushing all cache on m68k (sys_cacheflush) */
+-/* Allow removing semaphores */
+-/* Used instead of CAP_CHOWN to "chown" IPC message queues, semaphores
+-   and shared memory */
+ /* Allow locking/unlocking of shared memory segment */
+ /* Allow turning swap on/off */
+-/* Allow forged pids on socket credentials passing */
+ /* Allow setting readahead and flushing buffers on block devices */
+ /* Allow setting geometry in floppy driver */
+ /* Allow turning DMA on/off in xd driver */
+@@ -235,6 +228,8 @@ typedef __u32 kernel_cap_t;
+ /* Allow enabling/disabling tagged queuing on SCSI controllers and sending
+    arbitrary SCSI commands */
+ /* Allow setting encryption key on loopback filesystem */
++/* Modify data journaling mode on ext3 filesystem (uses journaling
++   resources) */
+ 
+ #define CAP_SYS_ADMIN        21
+ 
+@@ -254,8 +249,6 @@ typedef __u32 kernel_cap_t;
+ /* Override resource limits. Set resource limits. */
+ /* Override quota limits. */
+ /* Override reserved space on ext2 filesystem */
+-/* Modify data journaling mode on ext3 filesystem (uses journaling
+-   resources) */
+ /* NOTE: ext2 honors fsuid when checking for resource overrides, so 
+    you can override using fsuid too */
+ /* Override size restrictions on IPC message queues */
+@@ -284,6 +277,36 @@ typedef __u32 kernel_cap_t;
+ 
+ #define CAP_LEASE            28
+ 
++/* Allow access to all information. In the other case some structures will be
++   hiding to ensure different Virtual Environment non-interaction on the same
++   node */
++#define CAP_SETVEID	     29
++
++#define CAP_VE_ADMIN	     30
++
++/* Replacement for CAP_NET_ADMIN:
++   delegated rights to the Virtual environment of its network administration.
++   For now the following rights have been delegated:
++
++   Allow setting arbitrary process / process group ownership on sockets
++   Allow interface configuration
++*/
++#define CAP_VE_NET_ADMIN     CAP_VE_ADMIN
++
++/* Replacement for CAP_SYS_ADMIN:
++   delegated rights to the Virtual environment of its administration.
++   For now the following rights have been delegated:
++*/
++/* Allow mount/umount/remount */
++/* Allow examination and configuration of disk quotas */
++/* Allow removing semaphores */
++/* Used instead of CAP_CHOWN to "chown" IPC message queues, semaphores
++   and shared memory */
++/* Allow locking/unlocking of shared memory segment */
++/* Allow forged pids on socket credentials passing */
++
++#define CAP_VE_SYS_ADMIN     CAP_VE_ADMIN
++
+ #ifdef __KERNEL__
+ /* 
+  * Bounding set
+@@ -348,9 +371,16 @@ static inline kernel_cap_t cap_invert(ke
+ #define cap_issubset(a,set)  (!(cap_t(a) & ~cap_t(set)))
+ 
+ #define cap_clear(c)         do { cap_t(c) =  0; } while(0)
++
++#ifndef CONFIG_VE
+ #define cap_set_full(c)      do { cap_t(c) = ~0; } while(0)
+-#define cap_mask(c,mask)     do { cap_t(c) &= cap_t(mask); } while(0)
++#else
++#define cap_set_full(c) \
++        do {cap_t(c) = ve_is_super(get_exec_env()) ? ~0 :		\
++					get_exec_env()->cap_default; } while(0)
++#endif
+ 
++#define cap_mask(c,mask)     do { cap_t(c) &= cap_t(mask); } while(0)
+ #define cap_is_fs_cap(c)     (CAP_TO_MASK(c) & CAP_FS_MASK)
+ 
+ #endif /* __KERNEL__ */
+diff -uprN linux-2.6.8.1.orig/include/linux/coda_linux.h linux-2.6.8.1-ve022stab078/include/linux/coda_linux.h
+--- linux-2.6.8.1.orig/include/linux/coda_linux.h	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/coda_linux.h	2006-05-11 13:05:35.000000000 +0400
+@@ -38,7 +38,8 @@ extern struct file_operations coda_ioctl
+ int coda_open(struct inode *i, struct file *f);
+ int coda_flush(struct file *f);
+ int coda_release(struct inode *i, struct file *f);
+-int coda_permission(struct inode *inode, int mask, struct nameidata *nd);
++int coda_permission(struct inode *inode, int mask, struct nameidata *nd,
++		struct exec_perm *exec_perm);
+ int coda_revalidate_inode(struct dentry *);
+ int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+ int coda_setattr(struct dentry *, struct iattr *);
+diff -uprN linux-2.6.8.1.orig/include/linux/compat.h linux-2.6.8.1-ve022stab078/include/linux/compat.h
+--- linux-2.6.8.1.orig/include/linux/compat.h	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/compat.h	2006-05-11 13:05:27.000000000 +0400
+@@ -130,5 +130,8 @@ asmlinkage long compat_sys_select(int n,
+ 		compat_ulong_t __user *outp, compat_ulong_t __user *exp,
+ 		struct compat_timeval __user *tvp);
+ 
++struct compat_siginfo;
++int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from);
++int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from);
+ #endif /* CONFIG_COMPAT */
+ #endif /* _LINUX_COMPAT_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/compat_ioctl.h linux-2.6.8.1-ve022stab078/include/linux/compat_ioctl.h
+--- linux-2.6.8.1.orig/include/linux/compat_ioctl.h	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/compat_ioctl.h	2006-05-11 13:05:29.000000000 +0400
+@@ -16,6 +16,7 @@ COMPATIBLE_IOCTL(TCSETA)
+ COMPATIBLE_IOCTL(TCSETAW)
+ COMPATIBLE_IOCTL(TCSETAF)
+ COMPATIBLE_IOCTL(TCSBRK)
++ULONG_IOCTL(TCSBRKP)
+ COMPATIBLE_IOCTL(TCXONC)
+ COMPATIBLE_IOCTL(TCFLSH)
+ COMPATIBLE_IOCTL(TCGETS)
+@@ -23,6 +24,8 @@ COMPATIBLE_IOCTL(TCSETS)
+ COMPATIBLE_IOCTL(TCSETSW)
+ COMPATIBLE_IOCTL(TCSETSF)
+ COMPATIBLE_IOCTL(TIOCLINUX)
++COMPATIBLE_IOCTL(TIOCSBRK)
++COMPATIBLE_IOCTL(TIOCCBRK)
+ /* Little t */
+ COMPATIBLE_IOCTL(TIOCGETD)
+ COMPATIBLE_IOCTL(TIOCSETD)
+diff -uprN linux-2.6.8.1.orig/include/linux/dcache.h linux-2.6.8.1-ve022stab078/include/linux/dcache.h
+--- linux-2.6.8.1.orig/include/linux/dcache.h	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/dcache.h	2006-05-11 13:05:40.000000000 +0400
+@@ -80,6 +80,8 @@ struct dcookie_struct;
+ 
+ #define DNAME_INLINE_LEN_MIN 36
+ 
++#include <ub/ub_dcache.h>
++
+ struct dentry {
+ 	atomic_t d_count;
+ 	unsigned int d_flags;		/* protected by d_lock */
+@@ -106,9 +108,15 @@ struct dentry {
+  	struct rcu_head d_rcu;
+ 	struct dcookie_struct *d_cookie; /* cookie, if any */
+ 	struct hlist_node d_hash;	/* lookup hash list */	
++	/* It can't be at the end because of DNAME_INLINE_LEN */
++	struct dentry_beancounter dentry_bc;
+ 	unsigned char d_iname[DNAME_INLINE_LEN_MIN];	/* small names */
+ };
+ 
++#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
++
++#define dentry_bc(__d) (&(__d)->dentry_bc)
++
+ struct dentry_operations {
+ 	int (*d_revalidate)(struct dentry *, struct nameidata *);
+ 	int (*d_hash) (struct dentry *, struct qstr *);
+@@ -156,6 +164,9 @@ d_iput:		no		no		no       yes
+ 
+ #define DCACHE_REFERENCED	0x0008  /* Recently used, don't discard. */
+ #define DCACHE_UNHASHED		0x0010	
++#define DCACHE_VIRTUAL		0x0100	/* ve accessible */
++
++extern void mark_tree_virtual(struct vfsmount *m, struct dentry *d);
+ 
+ extern spinlock_t dcache_lock;
+ 
+@@ -163,17 +174,16 @@ extern spinlock_t dcache_lock;
+  * d_drop - drop a dentry
+  * @dentry: dentry to drop
+  *
+- * d_drop() unhashes the entry from the parent
+- * dentry hashes, so that it won't be found through
+- * a VFS lookup any more. Note that this is different
+- * from deleting the dentry - d_delete will try to
+- * mark the dentry negative if possible, giving a
+- * successful _negative_ lookup, while d_drop will
++ * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
++ * be found through a VFS lookup any more. Note that this is different from
++ * deleting the dentry - d_delete will try to mark the dentry negative if
++ * possible, giving a successful _negative_ lookup, while d_drop will
+  * just make the cache lookup fail.
+  *
+- * d_drop() is used mainly for stuff that wants
+- * to invalidate a dentry for some reason (NFS
+- * timeouts or autofs deletes).
++ * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
++ * reason (NFS timeouts or autofs deletes).
++ *
++ * __d_drop requires dentry->d_lock.
+  */
+ 
+ static inline void __d_drop(struct dentry *dentry)
+@@ -187,7 +197,9 @@ static inline void __d_drop(struct dentr
+ static inline void d_drop(struct dentry *dentry)
+ {
+ 	spin_lock(&dcache_lock);
++	spin_lock(&dentry->d_lock);
+  	__d_drop(dentry);
++	spin_unlock(&dentry->d_lock);
+ 	spin_unlock(&dcache_lock);
+ }
+ 
+@@ -208,7 +220,8 @@ extern struct dentry * d_alloc_anon(stru
+ extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
+ extern void shrink_dcache_sb(struct super_block *);
+ extern void shrink_dcache_parent(struct dentry *);
+-extern void shrink_dcache_anon(struct hlist_head *);
++extern void shrink_dcache_anon(struct super_block *);
++extern void dcache_shrinker_wait_sb(struct super_block *sb);
+ extern int d_invalidate(struct dentry *);
+ 
+ /* only used at mount-time */
+@@ -253,6 +266,7 @@ extern struct dentry * __d_lookup(struct
+ /* validate "insecure" dentry pointer */
+ extern int d_validate(struct dentry *, struct dentry *);
+ 
++extern int d_root_check(struct dentry *, struct vfsmount *);
+ extern char * d_path(struct dentry *, struct vfsmount *, char *, int);
+   
+ /* Allocation counts.. */
+@@ -273,6 +287,10 @@ extern char * d_path(struct dentry *, st
+ static inline struct dentry *dget(struct dentry *dentry)
+ {
+ 	if (dentry) {
++#ifdef CONFIG_USER_RESOURCE
++		if (atomic_inc_and_test(&dentry_bc(dentry)->d_inuse))
++			BUG();
++#endif
+ 		BUG_ON(!atomic_read(&dentry->d_count));
+ 		atomic_inc(&dentry->d_count);
+ 	}
+@@ -315,6 +333,8 @@ extern struct dentry *lookup_create(stru
+ 
+ extern int sysctl_vfs_cache_pressure;
+ 
++extern int check_area_access_ve(struct dentry *, struct vfsmount *);
++extern int check_area_execute_ve(struct dentry *, struct vfsmount *);
+ #endif /* __KERNEL__ */
+ 
+ #endif	/* __LINUX_DCACHE_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/devpts_fs.h linux-2.6.8.1-ve022stab078/include/linux/devpts_fs.h
+--- linux-2.6.8.1.orig/include/linux/devpts_fs.h	2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/devpts_fs.h	2006-05-11 13:05:40.000000000 +0400
+@@ -21,6 +21,13 @@ int devpts_pty_new(struct tty_struct *tt
+ struct tty_struct *devpts_get_tty(int number);	 /* get tty structure */
+ void devpts_pty_kill(int number);		 /* unlink */
+ 
++struct devpts_config {
++	int setuid;
++	int setgid;
++	uid_t   uid;
++	gid_t   gid;
++	umode_t mode;
++};
+ #else
+ 
+ /* Dummy stubs in the no-pty case */
+diff -uprN linux-2.6.8.1.orig/include/linux/elfcore.h linux-2.6.8.1-ve022stab078/include/linux/elfcore.h
+--- linux-2.6.8.1.orig/include/linux/elfcore.h	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/elfcore.h	2006-05-11 13:05:45.000000000 +0400
+@@ -6,6 +6,8 @@
+ #include <linux/time.h>
+ #include <linux/user.h>
+ 
++extern int sysctl_at_vsyscall;
++
+ struct elf_siginfo
+ {
+ 	int	si_signo;			/* signal number */
+diff -uprN linux-2.6.8.1.orig/include/linux/eventpoll.h linux-2.6.8.1-ve022stab078/include/linux/eventpoll.h
+--- linux-2.6.8.1.orig/include/linux/eventpoll.h	2004-08-14 14:55:20.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/eventpoll.h	2006-05-11 13:05:48.000000000 +0400
+@@ -85,6 +85,87 @@ static inline void eventpoll_release(str
+ 	eventpoll_release_file(file);
+ }
+ 
++struct epoll_filefd {
++	struct file *file;
++	int fd;
++};
++
++/*
++ * This structure is stored inside the "private_data" member of the file
++ * structure and rapresent the main data sructure for the eventpoll
++ * interface.
++ */
++struct eventpoll {
++	/* Protect the this structure access */
++	rwlock_t lock;
++
++	/*
++	 * This semaphore is used to ensure that files are not removed
++	 * while epoll is using them. This is read-held during the event
++	 * collection loop and it is write-held during the file cleanup
++	 * path, the epoll file exit code and the ctl operations.
++	 */
++	struct rw_semaphore sem;
++
++	/* Wait queue used by sys_epoll_wait() */
++	wait_queue_head_t wq;
++
++	/* Wait queue used by file->poll() */
++	wait_queue_head_t poll_wait;
++
++	/* List of ready file descriptors */
++	struct list_head rdllist;
++
++	/* RB-Tree root used to store monitored fd structs */
++	struct rb_root rbr;
++};
++
++/*
++ * Each file descriptor added to the eventpoll interface will
++ * have an entry of this type linked to the hash.
++ */
++struct epitem {
++	/* RB-Tree node used to link this structure to the eventpoll rb-tree */
++	struct rb_node rbn;
++
++	/* List header used to link this structure to the eventpoll ready list */
++	struct list_head rdllink;
++
++	/* The file descriptor information this item refers to */
++	struct epoll_filefd ffd;
++
++	/* Number of active wait queue attached to poll operations */
++	int nwait;
++
++	/* List containing poll wait queues */
++	struct list_head pwqlist;
++
++	/* The "container" of this item */
++	struct eventpoll *ep;
++
++	/* The structure that describe the interested events and the source fd */
++	struct epoll_event event;
++
++	/*
++	 * Used to keep track of the usage count of the structure. This avoids
++	 * that the structure will desappear from underneath our processing.
++	 */
++	atomic_t usecnt;
++
++	/* List header used to link this item to the "struct file" items list */
++	struct list_head fllink;
++
++	/* List header used to link the item to the transfer list */
++	struct list_head txlink;
++
++	/*
++	 * This is used during the collection/transfer of events to userspace
++	 * to pin items empty events set.
++	 */
++	unsigned int revents;
++};
++
++extern struct semaphore epsem;
+ 
+ #else
+ 
+diff -uprN linux-2.6.8.1.orig/include/linux/ext2_fs.h linux-2.6.8.1-ve022stab078/include/linux/ext2_fs.h
+--- linux-2.6.8.1.orig/include/linux/ext2_fs.h	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/ext2_fs.h	2006-05-11 13:05:31.000000000 +0400
+@@ -135,14 +135,14 @@ static inline struct ext2_sb_info *EXT2_
+  */
+ struct ext2_group_desc
+ {
+-	__u32	bg_block_bitmap;		/* Blocks bitmap block */
+-	__u32	bg_inode_bitmap;		/* Inodes bitmap block */
+-	__u32	bg_inode_table;		/* Inodes table block */
+-	__u16	bg_free_blocks_count;	/* Free blocks count */
+-	__u16	bg_free_inodes_count;	/* Free inodes count */
+-	__u16	bg_used_dirs_count;	/* Directories count */
+-	__u16	bg_pad;
+-	__u32	bg_reserved[3];
++	__le32	bg_block_bitmap;		/* Blocks bitmap block */
++	__le32	bg_inode_bitmap;		/* Inodes bitmap block */
++	__le32	bg_inode_table;		/* Inodes table block */
++	__le16	bg_free_blocks_count;	/* Free blocks count */
++	__le16	bg_free_inodes_count;	/* Free inodes count */
++	__le16	bg_used_dirs_count;	/* Directories count */
++	__le16	bg_pad;
++	__le32	bg_reserved[3];
+ };
+ 
+ /*
+@@ -209,49 +209,49 @@ struct ext2_group_desc
+  * Structure of an inode on the disk
+  */
+ struct ext2_inode {
+-	__u16	i_mode;		/* File mode */
+-	__u16	i_uid;		/* Low 16 bits of Owner Uid */
+-	__u32	i_size;		/* Size in bytes */
+-	__u32	i_atime;	/* Access time */
+-	__u32	i_ctime;	/* Creation time */
+-	__u32	i_mtime;	/* Modification time */
+-	__u32	i_dtime;	/* Deletion Time */
+-	__u16	i_gid;		/* Low 16 bits of Group Id */
+-	__u16	i_links_count;	/* Links count */
+-	__u32	i_blocks;	/* Blocks count */
+-	__u32	i_flags;	/* File flags */
++	__le16	i_mode;		/* File mode */
++	__le16	i_uid;		/* Low 16 bits of Owner Uid */
++	__le32	i_size;		/* Size in bytes */
++	__le32	i_atime;	/* Access time */
++	__le32	i_ctime;	/* Creation time */
++	__le32	i_mtime;	/* Modification time */
++	__le32	i_dtime;	/* Deletion Time */
++	__le16	i_gid;		/* Low 16 bits of Group Id */
++	__le16	i_links_count;	/* Links count */
++	__le32	i_blocks;	/* Blocks count */
++	__le32	i_flags;	/* File flags */
+ 	union {
+ 		struct {
+-			__u32  l_i_reserved1;
++			__le32  l_i_reserved1;
+ 		} linux1;
+ 		struct {
+-			__u32  h_i_translator;
++			__le32  h_i_translator;
+ 		} hurd1;
+ 		struct {
+-			__u32  m_i_reserved1;
++			__le32  m_i_reserved1;
+ 		} masix1;
+ 	} osd1;				/* OS dependent 1 */
+-	__u32	i_block[EXT2_N_BLOCKS];/* Pointers to blocks */
+-	__u32	i_generation;	/* File version (for NFS) */
+-	__u32	i_file_acl;	/* File ACL */
+-	__u32	i_dir_acl;	/* Directory ACL */
+-	__u32	i_faddr;	/* Fragment address */
++	__le32	i_block[EXT2_N_BLOCKS];/* Pointers to blocks */
++	__le32	i_generation;	/* File version (for NFS) */
++	__le32	i_file_acl;	/* File ACL */
++	__le32	i_dir_acl;	/* Directory ACL */
++	__le32	i_faddr;	/* Fragment address */
+ 	union {
+ 		struct {
+ 			__u8	l_i_frag;	/* Fragment number */
+ 			__u8	l_i_fsize;	/* Fragment size */
+ 			__u16	i_pad1;
+-			__u16	l_i_uid_high;	/* these 2 fields    */
+-			__u16	l_i_gid_high;	/* were reserved2[0] */
++			__le16	l_i_uid_high;	/* these 2 fields    */
++			__le16	l_i_gid_high;	/* were reserved2[0] */
+ 			__u32	l_i_reserved2;
+ 		} linux2;
+ 		struct {
+ 			__u8	h_i_frag;	/* Fragment number */
+ 			__u8	h_i_fsize;	/* Fragment size */
+-			__u16	h_i_mode_high;
+-			__u16	h_i_uid_high;
+-			__u16	h_i_gid_high;
+-			__u32	h_i_author;
++			__le16	h_i_mode_high;
++			__le16	h_i_uid_high;
++			__le16	h_i_gid_high;
++			__le32	h_i_author;
+ 		} hurd2;
+ 		struct {
+ 			__u8	m_i_frag;	/* Fragment number */
+@@ -335,31 +335,31 @@ struct ext2_inode {
+  * Structure of the super block
+  */
+ struct ext2_super_block {
+-	__u32	s_inodes_count;		/* Inodes count */
+-	__u32	s_blocks_count;		/* Blocks count */
+-	__u32	s_r_blocks_count;	/* Reserved blocks count */
+-	__u32	s_free_blocks_count;	/* Free blocks count */
+-	__u32	s_free_inodes_count;	/* Free inodes count */
+-	__u32	s_first_data_block;	/* First Data Block */
+-	__u32	s_log_block_size;	/* Block size */
+-	__s32	s_log_frag_size;	/* Fragment size */
+-	__u32	s_blocks_per_group;	/* # Blocks per group */
+-	__u32	s_frags_per_group;	/* # Fragments per group */
+-	__u32	s_inodes_per_group;	/* # Inodes per group */
+-	__u32	s_mtime;		/* Mount time */
+-	__u32	s_wtime;		/* Write time */
+-	__u16	s_mnt_count;		/* Mount count */
+-	__s16	s_max_mnt_count;	/* Maximal mount count */
+-	__u16	s_magic;		/* Magic signature */
+-	__u16	s_state;		/* File system state */
+-	__u16	s_errors;		/* Behaviour when detecting errors */
+-	__u16	s_minor_rev_level; 	/* minor revision level */
+-	__u32	s_lastcheck;		/* time of last check */
+-	__u32	s_checkinterval;	/* max. time between checks */
+-	__u32	s_creator_os;		/* OS */
+-	__u32	s_rev_level;		/* Revision level */
+-	__u16	s_def_resuid;		/* Default uid for reserved blocks */
+-	__u16	s_def_resgid;		/* Default gid for reserved blocks */
++	__le32	s_inodes_count;		/* Inodes count */
++	__le32	s_blocks_count;		/* Blocks count */
++	__le32	s_r_blocks_count;	/* Reserved blocks count */
++	__le32	s_free_blocks_count;	/* Free blocks count */
++	__le32	s_free_inodes_count;	/* Free inodes count */
++	__le32	s_first_data_block;	/* First Data Block */
++	__le32	s_log_block_size;	/* Block size */
++	__le32	s_log_frag_size;	/* Fragment size */
++	__le32	s_blocks_per_group;	/* # Blocks per group */
++	__le32	s_frags_per_group;	/* # Fragments per group */
++	__le32	s_inodes_per_group;	/* # Inodes per group */
++	__le32	s_mtime;		/* Mount time */
++	__le32	s_wtime;		/* Write time */
++	__le16	s_mnt_count;		/* Mount count */
++	__le16	s_max_mnt_count;	/* Maximal mount count */
++	__le16	s_magic;		/* Magic signature */
++	__le16	s_state;		/* File system state */
++	__le16	s_errors;		/* Behaviour when detecting errors */
++	__le16	s_minor_rev_level; 	/* minor revision level */
++	__le32	s_lastcheck;		/* time of last check */
++	__le32	s_checkinterval;	/* max. time between checks */
++	__le32	s_creator_os;		/* OS */
++	__le32	s_rev_level;		/* Revision level */
++	__le16	s_def_resuid;		/* Default uid for reserved blocks */
++	__le16	s_def_resgid;		/* Default gid for reserved blocks */
+ 	/*
+ 	 * These fields are for EXT2_DYNAMIC_REV superblocks only.
+ 	 *
+@@ -373,16 +373,16 @@ struct ext2_super_block {
+ 	 * feature set, it must abort and not try to meddle with
+ 	 * things it doesn't understand...
+ 	 */
+-	__u32	s_first_ino; 		/* First non-reserved inode */
+-	__u16   s_inode_size; 		/* size of inode structure */
+-	__u16	s_block_group_nr; 	/* block group # of this superblock */
+-	__u32	s_feature_compat; 	/* compatible feature set */
+-	__u32	s_feature_incompat; 	/* incompatible feature set */
+-	__u32	s_feature_ro_compat; 	/* readonly-compatible feature set */
++	__le32	s_first_ino; 		/* First non-reserved inode */
++	__le16   s_inode_size; 		/* size of inode structure */
++	__le16	s_block_group_nr; 	/* block group # of this superblock */
++	__le32	s_feature_compat; 	/* compatible feature set */
++	__le32	s_feature_incompat; 	/* incompatible feature set */
++	__le32	s_feature_ro_compat; 	/* readonly-compatible feature set */
+ 	__u8	s_uuid[16];		/* 128-bit uuid for volume */
+ 	char	s_volume_name[16]; 	/* volume name */
+ 	char	s_last_mounted[64]; 	/* directory where last mounted */
+-	__u32	s_algorithm_usage_bitmap; /* For compression */
++	__le32	s_algorithm_usage_bitmap; /* For compression */
+ 	/*
+ 	 * Performance hints.  Directory preallocation should only
+ 	 * happen if the EXT2_COMPAT_PREALLOC flag is on.
+@@ -401,8 +401,8 @@ struct ext2_super_block {
+ 	__u8	s_def_hash_version;	/* Default hash version to use */
+ 	__u8	s_reserved_char_pad;
+ 	__u16	s_reserved_word_pad;
+-	__u32	s_default_mount_opts;
+- 	__u32	s_first_meta_bg; 	/* First metablock block group */
++	__le32	s_default_mount_opts;
++ 	__le32	s_first_meta_bg; 	/* First metablock block group */
+ 	__u32	s_reserved[190];	/* Padding to the end of the block */
+ };
+ 
+@@ -504,9 +504,9 @@ struct ext2_super_block {
+ #define EXT2_NAME_LEN 255
+ 
+ struct ext2_dir_entry {
+-	__u32	inode;			/* Inode number */
+-	__u16	rec_len;		/* Directory entry length */
+-	__u16	name_len;		/* Name length */
++	__le32	inode;			/* Inode number */
++	__le16	rec_len;		/* Directory entry length */
++	__le16	name_len;		/* Name length */
+ 	char	name[EXT2_NAME_LEN];	/* File name */
+ };
+ 
+@@ -517,8 +517,8 @@ struct ext2_dir_entry {
+  * file_type field.
+  */
+ struct ext2_dir_entry_2 {
+-	__u32	inode;			/* Inode number */
+-	__u16	rec_len;		/* Directory entry length */
++	__le32	inode;			/* Inode number */
++	__le16	rec_len;		/* Directory entry length */
+ 	__u8	name_len;		/* Name length */
+ 	__u8	file_type;
+ 	char	name[EXT2_NAME_LEN];	/* File name */
+diff -uprN linux-2.6.8.1.orig/include/linux/ext3_fs.h linux-2.6.8.1-ve022stab078/include/linux/ext3_fs.h
+--- linux-2.6.8.1.orig/include/linux/ext3_fs.h	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/ext3_fs.h	2006-05-11 13:05:35.000000000 +0400
+@@ -129,14 +129,14 @@ struct statfs;
+  */
+ struct ext3_group_desc
+ {
+-	__u32	bg_block_bitmap;		/* Blocks bitmap block */
+-	__u32	bg_inode_bitmap;		/* Inodes bitmap block */
+-	__u32	bg_inode_table;		/* Inodes table block */
+-	__u16	bg_free_blocks_count;	/* Free blocks count */
+-	__u16	bg_free_inodes_count;	/* Free inodes count */
+-	__u16	bg_used_dirs_count;	/* Directories count */
++	__le32	bg_block_bitmap;		/* Blocks bitmap block */
++	__le32	bg_inode_bitmap;		/* Inodes bitmap block */
++	__le32	bg_inode_table;		/* Inodes table block */
++	__le16	bg_free_blocks_count;	/* Free blocks count */
++	__le16	bg_free_inodes_count;	/* Free inodes count */
++	__le16	bg_used_dirs_count;	/* Directories count */
+ 	__u16	bg_pad;
+-	__u32	bg_reserved[3];
++	__le32	bg_reserved[3];
+ };
+ 
+ /*
+@@ -196,6 +196,31 @@ struct ext3_group_desc
+ #define EXT3_STATE_JDATA		0x00000001 /* journaled data exists */
+ #define EXT3_STATE_NEW			0x00000002 /* inode is newly created */
+ 
++
++/* Used to pass group descriptor data when online resize is done */
++struct ext3_new_group_input {
++	__u32 group;            /* Group number for this data */
++	__u32 block_bitmap;     /* Absolute block number of block bitmap */
++	__u32 inode_bitmap;     /* Absolute block number of inode bitmap */
++	__u32 inode_table;      /* Absolute block number of inode table start */
++	__u32 blocks_count;     /* Total number of blocks in this group */
++	__u16 reserved_blocks;  /* Number of reserved blocks in this group */
++	__u16 unused;
++};
++
++/* The struct ext3_new_group_input in kernel space, with free_blocks_count */
++struct ext3_new_group_data {
++	__u32 group;
++	__u32 block_bitmap;
++	__u32 inode_bitmap;
++	__u32 inode_table;
++	__u32 blocks_count;
++	__u16 reserved_blocks;
++	__u16 unused;
++	__u32 free_blocks_count;
++};
++
++
+ /*
+  * ioctl commands
+  */
+@@ -203,6 +228,8 @@ struct ext3_group_desc
+ #define	EXT3_IOC_SETFLAGS		_IOW('f', 2, long)
+ #define	EXT3_IOC_GETVERSION		_IOR('f', 3, long)
+ #define	EXT3_IOC_SETVERSION		_IOW('f', 4, long)
++#define EXT3_IOC_GROUP_EXTEND		_IOW('f', 7, unsigned long)
++#define EXT3_IOC_GROUP_ADD		_IOW('f', 8,struct ext3_new_group_input)
+ #define	EXT3_IOC_GETVERSION_OLD		_IOR('v', 1, long)
+ #define	EXT3_IOC_SETVERSION_OLD		_IOW('v', 2, long)
+ #ifdef CONFIG_JBD_DEBUG
+@@ -213,17 +240,17 @@ struct ext3_group_desc
+  * Structure of an inode on the disk
+  */
+ struct ext3_inode {
+-	__u16	i_mode;		/* File mode */
+-	__u16	i_uid;		/* Low 16 bits of Owner Uid */
+-	__u32	i_size;		/* Size in bytes */
+-	__u32	i_atime;	/* Access time */
+-	__u32	i_ctime;	/* Creation time */
+-	__u32	i_mtime;	/* Modification time */
+-	__u32	i_dtime;	/* Deletion Time */
+-	__u16	i_gid;		/* Low 16 bits of Group Id */
+-	__u16	i_links_count;	/* Links count */
+-	__u32	i_blocks;	/* Blocks count */
+-	__u32	i_flags;	/* File flags */
++	__le16	i_mode;		/* File mode */
++	__le16	i_uid;		/* Low 16 bits of Owner Uid */
++	__le32	i_size;		/* Size in bytes */
++	__le32	i_atime;	/* Access time */
++	__le32	i_ctime;	/* Creation time */
++	__le32	i_mtime;	/* Modification time */
++	__le32	i_dtime;	/* Deletion Time */
++	__le16	i_gid;		/* Low 16 bits of Group Id */
++	__le16	i_links_count;	/* Links count */
++	__le32	i_blocks;	/* Blocks count */
++	__le32	i_flags;	/* File flags */
+ 	union {
+ 		struct {
+ 			__u32  l_i_reserved1;
+@@ -235,18 +262,18 @@ struct ext3_inode {
+ 			__u32  m_i_reserved1;
+ 		} masix1;
+ 	} osd1;				/* OS dependent 1 */
+-	__u32	i_block[EXT3_N_BLOCKS];/* Pointers to blocks */
+-	__u32	i_generation;	/* File version (for NFS) */
+-	__u32	i_file_acl;	/* File ACL */
+-	__u32	i_dir_acl;	/* Directory ACL */
+-	__u32	i_faddr;	/* Fragment address */
++	__le32	i_block[EXT3_N_BLOCKS];/* Pointers to blocks */
++	__le32	i_generation;	/* File version (for NFS) */
++	__le32	i_file_acl;	/* File ACL */
++	__le32	i_dir_acl;	/* Directory ACL */
++	__le32	i_faddr;	/* Fragment address */
+ 	union {
+ 		struct {
+ 			__u8	l_i_frag;	/* Fragment number */
+ 			__u8	l_i_fsize;	/* Fragment size */
+ 			__u16	i_pad1;
+-			__u16	l_i_uid_high;	/* these 2 fields    */
+-			__u16	l_i_gid_high;	/* were reserved2[0] */
++			__le16	l_i_uid_high;	/* these 2 fields    */
++			__le16	l_i_gid_high;	/* were reserved2[0] */
+ 			__u32	l_i_reserved2;
+ 		} linux2;
+ 		struct {
+@@ -363,31 +390,31 @@ struct ext3_inode {
+  * Structure of the super block
+  */
+ struct ext3_super_block {
+-/*00*/	__u32	s_inodes_count;		/* Inodes count */
+-	__u32	s_blocks_count;		/* Blocks count */
+-	__u32	s_r_blocks_count;	/* Reserved blocks count */
+-	__u32	s_free_blocks_count;	/* Free blocks count */
+-/*10*/	__u32	s_free_inodes_count;	/* Free inodes count */
+-	__u32	s_first_data_block;	/* First Data Block */
+-	__u32	s_log_block_size;	/* Block size */
+-	__s32	s_log_frag_size;	/* Fragment size */
+-/*20*/	__u32	s_blocks_per_group;	/* # Blocks per group */
+-	__u32	s_frags_per_group;	/* # Fragments per group */
+-	__u32	s_inodes_per_group;	/* # Inodes per group */
+-	__u32	s_mtime;		/* Mount time */
+-/*30*/	__u32	s_wtime;		/* Write time */
+-	__u16	s_mnt_count;		/* Mount count */
+-	__s16	s_max_mnt_count;	/* Maximal mount count */
+-	__u16	s_magic;		/* Magic signature */
+-	__u16	s_state;		/* File system state */
+-	__u16	s_errors;		/* Behaviour when detecting errors */
+-	__u16	s_minor_rev_level;	/* minor revision level */
+-/*40*/	__u32	s_lastcheck;		/* time of last check */
+-	__u32	s_checkinterval;	/* max. time between checks */
+-	__u32	s_creator_os;		/* OS */
+-	__u32	s_rev_level;		/* Revision level */
+-/*50*/	__u16	s_def_resuid;		/* Default uid for reserved blocks */
+-	__u16	s_def_resgid;		/* Default gid for reserved blocks */
++/*00*/	__le32	s_inodes_count;		/* Inodes count */
++	__le32	s_blocks_count;		/* Blocks count */
++	__le32	s_r_blocks_count;	/* Reserved blocks count */
++	__le32	s_free_blocks_count;	/* Free blocks count */
++/*10*/	__le32	s_free_inodes_count;	/* Free inodes count */
++	__le32	s_first_data_block;	/* First Data Block */
++	__le32	s_log_block_size;	/* Block size */
++	__le32	s_log_frag_size;	/* Fragment size */
++/*20*/	__le32	s_blocks_per_group;	/* # Blocks per group */
++	__le32	s_frags_per_group;	/* # Fragments per group */
++	__le32	s_inodes_per_group;	/* # Inodes per group */
++	__le32	s_mtime;		/* Mount time */
++/*30*/	__le32	s_wtime;		/* Write time */
++	__le16	s_mnt_count;		/* Mount count */
++	__le16	s_max_mnt_count;	/* Maximal mount count */
++	__le16	s_magic;		/* Magic signature */
++	__le16	s_state;		/* File system state */
++	__le16	s_errors;		/* Behaviour when detecting errors */
++	__le16	s_minor_rev_level;	/* minor revision level */
++/*40*/	__le32	s_lastcheck;		/* time of last check */
++	__le32	s_checkinterval;	/* max. time between checks */
++	__le32	s_creator_os;		/* OS */
++	__le32	s_rev_level;		/* Revision level */
++/*50*/	__le16	s_def_resuid;		/* Default uid for reserved blocks */
++	__le16	s_def_resgid;		/* Default gid for reserved blocks */
+ 	/*
+ 	 * These fields are for EXT3_DYNAMIC_REV superblocks only.
+ 	 *
+@@ -401,36 +428,36 @@ struct ext3_super_block {
+ 	 * feature set, it must abort and not try to meddle with
+ 	 * things it doesn't understand...
+ 	 */
+-	__u32	s_first_ino;		/* First non-reserved inode */
+-	__u16   s_inode_size;		/* size of inode structure */
+-	__u16	s_block_group_nr;	/* block group # of this superblock */
+-	__u32	s_feature_compat;	/* compatible feature set */
+-/*60*/	__u32	s_feature_incompat;	/* incompatible feature set */
+-	__u32	s_feature_ro_compat;	/* readonly-compatible feature set */
++	__le32	s_first_ino;		/* First non-reserved inode */
++	__le16   s_inode_size;		/* size of inode structure */
++	__le16	s_block_group_nr;	/* block group # of this superblock */
++	__le32	s_feature_compat;	/* compatible feature set */
++/*60*/	__le32	s_feature_incompat;	/* incompatible feature set */
++	__le32	s_feature_ro_compat;	/* readonly-compatible feature set */
+ /*68*/	__u8	s_uuid[16];		/* 128-bit uuid for volume */
+ /*78*/	char	s_volume_name[16];	/* volume name */
+ /*88*/	char	s_last_mounted[64];	/* directory where last mounted */
+-/*C8*/	__u32	s_algorithm_usage_bitmap; /* For compression */
++/*C8*/	__le32	s_algorithm_usage_bitmap; /* For compression */
+ 	/*
+ 	 * Performance hints.  Directory preallocation should only
+ 	 * happen if the EXT3_FEATURE_COMPAT_DIR_PREALLOC flag is on.
+ 	 */
+ 	__u8	s_prealloc_blocks;	/* Nr of blocks to try to preallocate*/
+ 	__u8	s_prealloc_dir_blocks;	/* Nr to preallocate for dirs */
+-	__u16	s_padding1;
++	__u16	s_reserved_gdt_blocks;	/* Per group desc for online growth */
+ 	/*
+ 	 * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set.
+ 	 */
+ /*D0*/	__u8	s_journal_uuid[16];	/* uuid of journal superblock */
+-/*E0*/	__u32	s_journal_inum;		/* inode number of journal file */
+-	__u32	s_journal_dev;		/* device number of journal file */
+-	__u32	s_last_orphan;		/* start of list of inodes to delete */
+-	__u32	s_hash_seed[4];		/* HTREE hash seed */
++/*E0*/	__le32	s_journal_inum;		/* inode number of journal file */
++	__le32	s_journal_dev;		/* device number of journal file */
++	__le32	s_last_orphan;		/* start of list of inodes to delete */
++	__le32	s_hash_seed[4];		/* HTREE hash seed */
+ 	__u8	s_def_hash_version;	/* Default hash version to use */
+ 	__u8	s_reserved_char_pad;
+ 	__u16	s_reserved_word_pad;
+-	__u32	s_default_mount_opts;
+-	__u32	s_first_meta_bg; 	/* First metablock block group */
++	__le32	s_default_mount_opts;
++	__le32	s_first_meta_bg; 	/* First metablock block group */
+ 	__u32	s_reserved[190];	/* Padding to the end of the block */
+ };
+ 
+@@ -545,9 +572,9 @@ static inline struct ext3_inode_info *EX
+ #define EXT3_NAME_LEN 255
+ 
+ struct ext3_dir_entry {
+-	__u32	inode;			/* Inode number */
+-	__u16	rec_len;		/* Directory entry length */
+-	__u16	name_len;		/* Name length */
++	__le32	inode;			/* Inode number */
++	__le16	rec_len;		/* Directory entry length */
++	__le16	name_len;		/* Name length */
+ 	char	name[EXT3_NAME_LEN];	/* File name */
+ };
+ 
+@@ -558,8 +585,8 @@ struct ext3_dir_entry {
+  * file_type field.
+  */
+ struct ext3_dir_entry_2 {
+-	__u32	inode;			/* Inode number */
+-	__u16	rec_len;		/* Directory entry length */
++	__le32	inode;			/* Inode number */
++	__le16	rec_len;		/* Directory entry length */
+ 	__u8	name_len;		/* Name length */
+ 	__u8	file_type;
+ 	char	name[EXT3_NAME_LEN];	/* File name */
+@@ -684,6 +711,8 @@ extern int ext3_new_block (handle_t *, s
+ 					    __u32 *, __u32 *, int *);
+ extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
+ 			      unsigned long);
++extern void ext3_free_blocks_sb (handle_t *, struct super_block *,
++				 unsigned long, unsigned long, int *);
+ extern unsigned long ext3_count_free_blocks (struct super_block *);
+ extern void ext3_check_blocks_bitmap (struct super_block *);
+ extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
+@@ -723,7 +752,7 @@ extern struct buffer_head * ext3_getblk 
+ extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
+ 
+ extern void ext3_read_inode (struct inode *);
+-extern void ext3_write_inode (struct inode *, int);
++extern int  ext3_write_inode (struct inode *, int);
+ extern int  ext3_setattr (struct dentry *, struct iattr *);
+ extern void ext3_put_inode (struct inode *);
+ extern void ext3_delete_inode (struct inode *);
+@@ -745,6 +774,13 @@ extern int ext3_orphan_del(handle_t *, s
+ extern int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
+ 				__u32 start_minor_hash, __u32 *next_hash);
+ 
++/* resize.c */
++extern int ext3_group_add(struct super_block *sb,
++				struct ext3_new_group_data *input);
++extern int ext3_group_extend(struct super_block *sb,
++				struct ext3_super_block *es,
++				unsigned long n_blocks_count);
++
+ /* super.c */
+ extern void ext3_error (struct super_block *, const char *, const char *, ...)
+ 	__attribute__ ((format (printf, 3, 4)));
+diff -uprN linux-2.6.8.1.orig/include/linux/ext3_fs_i.h linux-2.6.8.1-ve022stab078/include/linux/ext3_fs_i.h
+--- linux-2.6.8.1.orig/include/linux/ext3_fs_i.h	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/ext3_fs_i.h	2006-05-11 13:05:31.000000000 +0400
+@@ -22,7 +22,7 @@
+  * second extended file system inode data in memory
+  */
+ struct ext3_inode_info {
+-	__u32	i_data[15];
++	__le32	i_data[15];	/* unconverted */
+ 	__u32	i_flags;
+ #ifdef EXT3_FRAGMENTS
+ 	__u32	i_faddr;
+diff -uprN linux-2.6.8.1.orig/include/linux/ext3_fs_sb.h linux-2.6.8.1-ve022stab078/include/linux/ext3_fs_sb.h
+--- linux-2.6.8.1.orig/include/linux/ext3_fs_sb.h	2004-08-14 14:56:15.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/ext3_fs_sb.h	2006-05-11 13:05:31.000000000 +0400
+@@ -53,7 +53,6 @@ struct ext3_sb_info {
+ 	u32 s_next_generation;
+ 	u32 s_hash_seed[4];
+ 	int s_def_hash_version;
+-        u8 *s_debts;
+ 	struct percpu_counter s_freeblocks_counter;
+ 	struct percpu_counter s_freeinodes_counter;
+ 	struct percpu_counter s_dirs_counter;
+diff -uprN linux-2.6.8.1.orig/include/linux/ext3_jbd.h linux-2.6.8.1-ve022stab078/include/linux/ext3_jbd.h
+--- linux-2.6.8.1.orig/include/linux/ext3_jbd.h	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/ext3_jbd.h	2006-05-11 13:05:31.000000000 +0400
+@@ -138,10 +138,13 @@ ext3_journal_release_buffer(handle_t *ha
+ 	journal_release_buffer(handle, bh, credits);
+ }
+ 
+-static inline void
+-ext3_journal_forget(handle_t *handle, struct buffer_head *bh)
++static inline int
++__ext3_journal_forget(const char *where, handle_t *handle, struct buffer_head *bh)
+ {
+-	journal_forget(handle, bh);
++	int err = journal_forget(handle, bh);
++	if (err)
++		ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
++	return err;
+ }
+ 
+ static inline int
+@@ -187,10 +190,17 @@ __ext3_journal_dirty_metadata(const char
+ 	__ext3_journal_get_create_access(__FUNCTION__, (handle), (bh))
+ #define ext3_journal_dirty_metadata(handle, bh) \
+ 	__ext3_journal_dirty_metadata(__FUNCTION__, (handle), (bh))
++#define ext3_journal_forget(handle, bh) \
++	__ext3_journal_forget(__FUNCTION__, (handle), (bh))
+ 
+-handle_t *ext3_journal_start(struct inode *inode, int nblocks);
++handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks);
+ int __ext3_journal_stop(const char *where, handle_t *handle);
+ 
++static inline handle_t *ext3_journal_start(struct inode *inode, int nblocks)
++{
++	return ext3_journal_start_sb(inode->i_sb, nblocks);
++}
++
+ #define ext3_journal_stop(handle) \
+ 	__ext3_journal_stop(__FUNCTION__, (handle))
+ 
+diff -uprN linux-2.6.8.1.orig/include/linux/fairsched.h linux-2.6.8.1-ve022stab078/include/linux/fairsched.h
+--- linux-2.6.8.1.orig/include/linux/fairsched.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/linux/fairsched.h	2006-05-11 13:05:40.000000000 +0400
+@@ -0,0 +1,119 @@
++#ifndef __LINUX_FAIRSCHED_H__
++#define __LINUX_FAIRSCHED_H__
++
++/*
++ * Fair Scheduler
++ *
++ * Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/cache.h>
++#include <linux/cpumask.h>
++#include <asm/timex.h>
++
++#define FAIRSCHED_HAS_CPU_BINDING	0
++
++typedef struct { cycles_t t; } fschtag_t;
++typedef struct { unsigned long d; } fschdur_t;
++typedef struct { cycles_t v; } fschvalue_t;
++
++struct vcpu_scheduler;
++
++struct fairsched_node {
++	struct list_head runlist;
++
++	/*
++	 * Fair Scheduler fields
++	 *
++	 * nr_running >= nr_ready (!= if delayed)
++	 */
++	fschtag_t start_tag;
++	int nr_ready;
++	int nr_runnable;
++	int nr_pcpu;
++
++	/*
++	 * Rate limitator fields
++	 */
++	cycles_t last_updated_at;
++	fschvalue_t value;	/* leaky function value */
++	cycles_t delay;		/* removed from schedule till */
++	unsigned char delayed;
++
++	/*
++	 * Configuration
++	 *
++	 * Read-only most of the time.
++	 */
++	unsigned weight ____cacheline_aligned_in_smp;
++				/* fairness weight */
++	unsigned char rate_limited;
++	unsigned rate;		/* max CPU share */
++	fschtag_t max_latency;
++	unsigned min_weight;
++
++	struct list_head nodelist;
++	int id;
++#ifdef CONFIG_VE
++	struct ve_struct *owner_env;
++#endif
++	struct vcpu_scheduler *vsched;
++};
++
++#ifdef CONFIG_FAIRSCHED
++
++#define FSCHWEIGHT_MAX			((1 << 16) - 1)
++#define FSCHRATE_SHIFT			10
++
++/*
++ * Fairsched nodes used in boot process.
++ */
++extern struct fairsched_node fairsched_init_node;
++extern struct fairsched_node fairsched_idle_node;
++
++/*
++ * For proc output.
++ */
++extern unsigned fairsched_nr_cpus;
++extern void fairsched_cpu_online_map(int id, cpumask_t *mask);
++
++/* I hope vsched_id is always equal to fairsched node id  --SAW */
++#define task_fairsched_node_id(p)	task_vsched_id(p)
++
++/*
++ * Core functions.
++ */
++extern void fairsched_incrun(struct fairsched_node *node);
++extern void fairsched_decrun(struct fairsched_node *node);
++extern void fairsched_inccpu(struct fairsched_node *node);
++extern void fairsched_deccpu(struct fairsched_node *node);
++extern struct fairsched_node *fairsched_schedule(
++		struct fairsched_node *prev_node,
++		struct fairsched_node *cur_node,
++		int cur_node_active,
++		cycles_t time);
++
++/*
++ * Management functions.
++ */
++void fairsched_init_early(void);
++asmlinkage int sys_fairsched_mknod(unsigned int parent, unsigned int weight,
++		unsigned int newid);
++asmlinkage int sys_fairsched_rmnod(unsigned int id);
++asmlinkage int sys_fairsched_mvpr(pid_t pid, unsigned int nodeid);
++
++#else /* CONFIG_FAIRSCHED */
++
++#define task_fairsched_node_id(p)	0
++#define fairsched_incrun(p)		do { } while (0)
++#define fairsched_decrun(p)		do { } while (0)
++#define fairsched_deccpu(p)		do { } while (0)
++#define fairsched_cpu_online_map(id, mask)	do { *(mask) = cpu_online_map; } while (0)
++
++#endif /* CONFIG_FAIRSCHED */
++
++#endif /* __LINUX_FAIRSCHED_H__ */
+diff -uprN linux-2.6.8.1.orig/include/linux/faudit.h linux-2.6.8.1-ve022stab078/include/linux/faudit.h
+--- linux-2.6.8.1.orig/include/linux/faudit.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/linux/faudit.h	2006-05-11 13:05:40.000000000 +0400
+@@ -0,0 +1,51 @@
++/*
++ *  include/linux/faudit.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __FAUDIT_H_
++#define __FAUDIT_H_
++
++#include <linux/config.h>
++#include <linux/virtinfo.h>
++
++struct vfsmount;
++struct dentry;
++struct super_block;
++struct kstatfs;
++struct kstat;
++struct pt_regs;
++
++struct faudit_regs_arg {
++	int err;
++	struct pt_regs *regs;
++};
++
++struct faudit_stat_arg {
++	int err;
++	struct vfsmount *mnt;
++	struct dentry *dentry;
++	struct kstat *stat;
++};
++
++struct faudit_statfs_arg {
++	int err;
++	struct super_block *sb;
++	struct kstatfs *stat;
++};
++
++#define VIRTINFO_FAUDIT			(0)
++#define VIRTINFO_FAUDIT_EXIT		(VIRTINFO_FAUDIT + 0)
++#define VIRTINFO_FAUDIT_FORK		(VIRTINFO_FAUDIT + 1)
++#define VIRTINFO_FAUDIT_CLONE		(VIRTINFO_FAUDIT + 2)
++#define VIRTINFO_FAUDIT_VFORK		(VIRTINFO_FAUDIT + 3)
++#define VIRTINFO_FAUDIT_EXECVE		(VIRTINFO_FAUDIT + 4)
++#define VIRTINFO_FAUDIT_STAT		(VIRTINFO_FAUDIT + 5)
++#define VIRTINFO_FAUDIT_STATFS		(VIRTINFO_FAUDIT + 6)
++
++#endif
+diff -uprN linux-2.6.8.1.orig/include/linux/fb.h linux-2.6.8.1-ve022stab078/include/linux/fb.h
+--- linux-2.6.8.1.orig/include/linux/fb.h	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/fb.h	2006-05-11 13:05:32.000000000 +0400
+@@ -725,7 +725,6 @@ extern void fb_destroy_modedb(struct fb_
+ 
+ /* drivers/video/modedb.c */
+ #define VESA_MODEDB_SIZE 34
+-extern const struct fb_videomode vesa_modes[];
+ 
+ /* drivers/video/fbcmap.c */
+ extern int fb_alloc_cmap(struct fb_cmap *cmap, int len, int transp);
+@@ -754,6 +753,8 @@ struct fb_videomode {
+ 	u32 flag;
+ };
+ 
++extern const struct fb_videomode vesa_modes[];
++
+ extern int fb_find_mode(struct fb_var_screeninfo *var,
+ 			struct fb_info *info, const char *mode_option,
+ 			const struct fb_videomode *db,
+diff -uprN linux-2.6.8.1.orig/include/linux/fs.h linux-2.6.8.1-ve022stab078/include/linux/fs.h
+--- linux-2.6.8.1.orig/include/linux/fs.h	2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/fs.h	2006-05-11 13:05:43.000000000 +0400
+@@ -7,6 +7,7 @@
+  */
+ 
+ #include <linux/config.h>
++#include <linux/ve_owner.h>
+ #include <linux/linkage.h>
+ #include <linux/limits.h>
+ #include <linux/wait.h>
+@@ -79,6 +80,7 @@ extern int leases_enable, dir_notify_ena
+ #define FMODE_LSEEK	4
+ #define FMODE_PREAD	8
+ #define FMODE_PWRITE	FMODE_PREAD	/* These go hand in hand */
++#define FMODE_QUOTACTL	4
+ 
+ #define RW_MASK		1
+ #define RWA_MASK	2
+@@ -88,6 +90,7 @@ extern int leases_enable, dir_notify_ena
+ #define SPECIAL 4	/* For non-blockdevice requests in request queue */
+ #define READ_SYNC	(READ | (1 << BIO_RW_SYNC))
+ #define WRITE_SYNC	(WRITE | (1 << BIO_RW_SYNC))
++#define WRITE_BARRIER	((1 << BIO_RW) | (1 << BIO_RW_BARRIER))
+ 
+ #define SEL_IN		1
+ #define SEL_OUT		2
+@@ -96,6 +99,7 @@ extern int leases_enable, dir_notify_ena
+ /* public flags for file_system_type */
+ #define FS_REQUIRES_DEV 1 
+ #define FS_BINARY_MOUNTDATA 2
++#define FS_VIRTUALIZED	64	/* Can mount this fstype inside ve */
+ #define FS_REVAL_DOT	16384	/* Check the paths ".", ".." for staleness */
+ #define FS_ODD_RENAME	32768	/* Temporary stuff; will go away as soon
+ 				  * as nfs_rename() will be cleaned up
+@@ -118,7 +122,8 @@ extern int leases_enable, dir_notify_ena
+ #define MS_REC		16384
+ #define MS_VERBOSE	32768
+ #define MS_POSIXACL	(1<<16)	/* VFS does not apply the umask */
+-#define MS_ONE_SECOND	(1<<17)	/* fs has 1 sec a/m/ctime resolution */
++#define MS_ONE_SECOND	(1<<17)	/* fs has 1 sec time resolution (obsolete) */
++#define MS_TIME_GRAN	(1<<18)	/* fs has s_time_gran field */
+ #define MS_ACTIVE	(1<<30)
+ #define MS_NOUSER	(1<<31)
+ 
+@@ -292,6 +297,9 @@ struct iattr {
+  * Includes for diskquotas.
+  */
+ #include <linux/quota.h>
++#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
++#include <linux/vzquota_qlnk.h>
++#endif
+ 
+ /*
+  * oh the beauties of C type declarations.
+@@ -419,6 +427,7 @@ static inline int mapping_writably_mappe
+ struct inode {
+ 	struct hlist_node	i_hash;
+ 	struct list_head	i_list;
++	struct list_head	i_sb_list;
+ 	struct list_head	i_dentry;
+ 	unsigned long		i_ino;
+ 	atomic_t		i_count;
+@@ -448,6 +457,9 @@ struct inode {
+ #ifdef CONFIG_QUOTA
+ 	struct dquot		*i_dquot[MAXQUOTAS];
+ #endif
++#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
++	struct vz_quota_ilink	i_qlnk;
++#endif
+ 	/* These three should probably be a union */
+ 	struct list_head	i_devices;
+ 	struct pipe_inode_info	*i_pipe;
+@@ -536,6 +548,12 @@ static inline unsigned imajor(struct ino
+ 
+ extern struct block_device *I_BDEV(struct inode *inode);
+ 
++struct exec_perm {
++	umode_t mode;
++	uid_t uid, gid;
++	int set;
++};
++
+ struct fown_struct {
+ 	rwlock_t lock;          /* protects pid, uid, euid fields */
+ 	int pid;		/* pid or -pgrp where SIGIO should be sent */
+@@ -587,7 +605,10 @@ struct file {
+ 	spinlock_t		f_ep_lock;
+ #endif /* #ifdef CONFIG_EPOLL */
+ 	struct address_space	*f_mapping;
++	struct ve_struct	*owner_env;
+ };
++DCL_VE_OWNER_PROTO(FILP, GENERIC, struct file, owner_env,
++						inline, (always_inline))
+ extern spinlock_t files_lock;
+ #define file_list_lock() spin_lock(&files_lock);
+ #define file_list_unlock() spin_unlock(&files_lock);
+@@ -639,6 +660,7 @@ struct file_lock {
+ 	struct file *fl_file;
+ 	unsigned char fl_flags;
+ 	unsigned char fl_type;
++	unsigned char fl_charged;
+ 	loff_t fl_start;
+ 	loff_t fl_end;
+ 
+@@ -750,10 +772,12 @@ struct super_block {
+ 	atomic_t		s_active;
+ 	void                    *s_security;
+ 
++	struct list_head	s_inodes;	/* all inodes */
+ 	struct list_head	s_dirty;	/* dirty inodes */
+ 	struct list_head	s_io;		/* parked for writeback */
+ 	struct hlist_head	s_anon;		/* anonymous dentries for (nfs) exporting */
+ 	struct list_head	s_files;
++	struct list_head	s_dshrinkers;	/* active dcache shrinkers */
+ 
+ 	struct block_device	*s_bdev;
+ 	struct list_head	s_instances;
+@@ -771,8 +795,33 @@ struct super_block {
+ 	 * even looking at it. You had been warned.
+ 	 */
+ 	struct semaphore s_vfs_rename_sem;	/* Kludge */
++
++	/* Granuality of c/m/atime in ns.
++	   Cannot be worse than a second */
++#ifndef __GENKSYMS__
++	u32		   s_time_gran;
++#endif
+ };
+ 
++extern struct timespec current_fs_time(struct super_block *sb);
++
++static inline u32 get_sb_time_gran(struct super_block *sb) 
++{
++	if (sb->s_flags & MS_TIME_GRAN) 
++		return sb->s_time_gran;
++	if (sb->s_flags & MS_ONE_SECOND)
++		return 1000000000U;
++	return 1;
++}
++
++static inline void set_sb_time_gran(struct super_block *sb, u32 time_gran)
++{
++	sb->s_time_gran = time_gran;
++	sb->s_flags |= MS_TIME_GRAN;
++	if (time_gran == 1000000000U)
++		sb->s_flags |= MS_ONE_SECOND;
++}
++
+ /*
+  * Snapshotting support.
+  */
+@@ -911,7 +960,8 @@ struct inode_operations {
+ 	int (*follow_link) (struct dentry *, struct nameidata *);
+ 	void (*put_link) (struct dentry *, struct nameidata *);
+ 	void (*truncate) (struct inode *);
+-	int (*permission) (struct inode *, int, struct nameidata *);
++	int (*permission) (struct inode *, int, struct nameidata *,
++			struct exec_perm *);
+ 	int (*setattr) (struct dentry *, struct iattr *);
+ 	int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
+ 	int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
+@@ -940,7 +990,7 @@ struct super_operations {
+ 	void (*read_inode) (struct inode *);
+   
+    	void (*dirty_inode) (struct inode *);
+-	void (*write_inode) (struct inode *, int);
++	int (*write_inode) (struct inode *, int);
+ 	void (*put_inode) (struct inode *);
+ 	void (*drop_inode) (struct inode *);
+ 	void (*delete_inode) (struct inode *);
+@@ -955,6 +1005,8 @@ struct super_operations {
+ 	void (*umount_begin) (struct super_block *);
+ 
+ 	int (*show_options)(struct seq_file *, struct vfsmount *);
++
++	struct inode *(*get_quota_root)(struct super_block *);
+ };
+ 
+ /* Inode state bits.  Protected by inode_lock. */
+@@ -965,6 +1017,7 @@ struct super_operations {
+ #define I_FREEING		16
+ #define I_CLEAR			32
+ #define I_NEW			64
++#define I_WILL_FREE		128
+ 
+ #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
+ 
+@@ -1105,8 +1158,15 @@ struct file_system_type {
+ 	struct module *owner;
+ 	struct file_system_type * next;
+ 	struct list_head fs_supers;
++	struct ve_struct *owner_env;
+ };
+ 
++DCL_VE_OWNER_PROTO(FSTYPE, MODULE_NOCHECK, struct file_system_type, owner_env
++						, , ())
++
++void get_filesystem(struct file_system_type *fs);
++void put_filesystem(struct file_system_type *fs);
++
+ struct super_block *get_sb_bdev(struct file_system_type *fs_type,
+ 	int flags, const char *dev_name, void *data,
+ 	int (*fill_super)(struct super_block *, void *, int));
+@@ -1129,6 +1189,7 @@ struct super_block *sget(struct file_sys
+ struct super_block *get_sb_pseudo(struct file_system_type *, char *,
+ 			struct super_operations *ops, unsigned long);
+ int __put_super(struct super_block *sb);
++int __put_super_and_need_restart(struct super_block *sb);
+ void unnamed_dev_init(void);
+ 
+ /* Alas, no aliases. Too much hassle with bringing module.h everywhere */
+@@ -1143,8 +1204,11 @@ extern struct vfsmount *kern_mount(struc
+ extern int may_umount_tree(struct vfsmount *);
+ extern int may_umount(struct vfsmount *);
+ extern long do_mount(char *, char *, char *, unsigned long, void *);
++extern void umount_tree(struct vfsmount *);
++#define kern_umount mntput
+ 
+ extern int vfs_statfs(struct super_block *, struct kstatfs *);
++extern int faudit_statfs(struct super_block *, struct kstatfs *);
+ 
+ /* Return value for VFS lock functions - tells locks.c to lock conventionally
+  * REALLY kosha for root NFS and nfs_lock
+@@ -1260,7 +1324,7 @@ extern int chrdev_open(struct inode *, s
+ #define BDEVNAME_SIZE	32	/* Largest string for a blockdev identifier */
+ extern const char *__bdevname(dev_t, char *buffer);
+ extern const char *bdevname(struct block_device *bdev, char *buffer);
+-extern struct block_device *lookup_bdev(const char *);
++extern struct block_device *lookup_bdev(const char *, int mode);
+ extern struct block_device *open_bdev_excl(const char *, int, void *);
+ extern void close_bdev_excl(struct block_device *);
+ 
+@@ -1290,7 +1354,7 @@ extern int fs_may_remount_ro(struct supe
+ #define bio_data_dir(bio)	((bio)->bi_rw & 1)
+ 
+ extern int check_disk_change(struct block_device *);
+-extern int invalidate_inodes(struct super_block *);
++extern int invalidate_inodes(struct super_block *, int);
+ extern int __invalidate_device(struct block_device *, int);
+ extern int invalidate_partition(struct gendisk *, int);
+ unsigned long invalidate_mapping_pages(struct address_space *mapping,
+@@ -1317,8 +1381,9 @@ extern int do_remount_sb(struct super_bl
+ extern sector_t bmap(struct inode *, sector_t);
+ extern int setattr_mask(unsigned int);
+ extern int notify_change(struct dentry *, struct iattr *);
+-extern int permission(struct inode *, int, struct nameidata *);
+-extern int vfs_permission(struct inode *, int);
++extern int permission(struct inode *, int, struct nameidata *,
++		struct exec_perm *);
++extern int vfs_permission(struct inode *, int, struct exec_perm *);
+ extern int get_write_access(struct inode *);
+ extern int deny_write_access(struct file *);
+ static inline void put_write_access(struct inode * inode)
+@@ -1335,8 +1400,9 @@ extern int do_pipe(int *);
+ extern int open_namei(const char *, int, int, struct nameidata *);
+ extern int may_open(struct nameidata *, int, int);
+ 
++struct linux_binprm;
+ extern int kernel_read(struct file *, unsigned long, char *, unsigned long);
+-extern struct file * open_exec(const char *);
++extern struct file * open_exec(const char *, struct linux_binprm *);
+  
+ /* fs/dcache.c -- generic fs support functions */
+ extern int is_subdir(struct dentry *, struct dentry *);
+@@ -1482,7 +1548,7 @@ extern int page_readlink(struct dentry *
+ extern int page_follow_link(struct dentry *, struct nameidata *);
+ extern int page_follow_link_light(struct dentry *, struct nameidata *);
+ extern void page_put_link(struct dentry *, struct nameidata *);
+-extern int page_symlink(struct inode *inode, const char *symname, int len);
++extern int page_symlink(struct inode *inode, const char *symname, int len, int gfp_mask);
+ extern struct inode_operations page_symlink_inode_operations;
+ extern int generic_readlink(struct dentry *, char __user *, int);
+ extern void generic_fillattr(struct inode *, struct kstat *);
+diff -uprN linux-2.6.8.1.orig/include/linux/gfp.h linux-2.6.8.1-ve022stab078/include/linux/gfp.h
+--- linux-2.6.8.1.orig/include/linux/gfp.h	2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/gfp.h	2006-05-11 13:05:39.000000000 +0400
+@@ -38,19 +38,25 @@ struct vm_area_struct;
+ #define __GFP_NO_GROW	0x2000	/* Slab internal usage */
+ #define __GFP_COMP	0x4000	/* Add compound page metadata */
+ 
+-#define __GFP_BITS_SHIFT 16	/* Room for 16 __GFP_FOO bits */
++#define __GFP_UBC	0x08000	/* charge kmem in buddy and slab */
++#define __GFP_SOFT_UBC	0x10000	/* use soft charging */
++
++#define __GFP_BITS_SHIFT 17	/* Room for 15 __GFP_FOO bits */
+ #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1)
+ 
+ /* if you forget to add the bitmask here kernel will crash, period */
+ #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
+ 			__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
+-			__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP)
++			__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
++			__GFP_UBC|__GFP_SOFT_UBC) 
+ 
+ #define GFP_ATOMIC	(__GFP_HIGH)
+ #define GFP_NOIO	(__GFP_WAIT)
+ #define GFP_NOFS	(__GFP_WAIT | __GFP_IO)
+ #define GFP_KERNEL	(__GFP_WAIT | __GFP_IO | __GFP_FS)
++#define GFP_KERNEL_UBC	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_UBC)
+ #define GFP_USER	(__GFP_WAIT | __GFP_IO | __GFP_FS)
++#define GFP_USER_UBC	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_UBC)
+ #define GFP_HIGHUSER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HIGHMEM)
+ 
+ /* Flag - indicates that the buffer will be suitable for DMA.  Ignored on some
+diff -uprN linux-2.6.8.1.orig/include/linux/highmem.h linux-2.6.8.1-ve022stab078/include/linux/highmem.h
+--- linux-2.6.8.1.orig/include/linux/highmem.h	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/highmem.h	2006-05-11 13:05:38.000000000 +0400
+@@ -28,9 +28,10 @@ static inline void *kmap(struct page *pa
+ 
+ #define kunmap(page) do { (void) (page); } while (0)
+ 
+-#define kmap_atomic(page, idx)		page_address(page)
+-#define kunmap_atomic(addr, idx)	do { } while (0)
+-#define kmap_atomic_to_page(ptr)	virt_to_page(ptr)
++#define kmap_atomic(page, idx)			page_address(page)
++#define kmap_atomic_pte(pte, idx)		page_address(pte_page(*pte))
++#define kunmap_atomic(addr, idx)		do { } while (0)
++#define kmap_atomic_to_page(ptr)		virt_to_page(ptr)
+ 
+ #endif /* CONFIG_HIGHMEM */
+ 
+diff -uprN linux-2.6.8.1.orig/include/linux/inetdevice.h linux-2.6.8.1-ve022stab078/include/linux/inetdevice.h
+--- linux-2.6.8.1.orig/include/linux/inetdevice.h	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/inetdevice.h	2006-05-11 13:05:40.000000000 +0400
+@@ -28,6 +28,11 @@ struct ipv4_devconf
+ };
+ 
+ extern struct ipv4_devconf ipv4_devconf;
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_ipv4_devconf		(*(get_exec_env()->_ipv4_devconf))
++#else
++#define ve_ipv4_devconf		ipv4_devconf
++#endif
+ 
+ struct in_device
+ {
+@@ -53,28 +58,28 @@ struct in_device
+ };
+ 
+ #define IN_DEV_FORWARD(in_dev)		((in_dev)->cnf.forwarding)
+-#define IN_DEV_MFORWARD(in_dev)		(ipv4_devconf.mc_forwarding && (in_dev)->cnf.mc_forwarding)
+-#define IN_DEV_RPFILTER(in_dev)		(ipv4_devconf.rp_filter && (in_dev)->cnf.rp_filter)
+-#define IN_DEV_SOURCE_ROUTE(in_dev)	(ipv4_devconf.accept_source_route && (in_dev)->cnf.accept_source_route)
+-#define IN_DEV_BOOTP_RELAY(in_dev)	(ipv4_devconf.bootp_relay && (in_dev)->cnf.bootp_relay)
+-
+-#define IN_DEV_LOG_MARTIANS(in_dev)	(ipv4_devconf.log_martians || (in_dev)->cnf.log_martians)
+-#define IN_DEV_PROXY_ARP(in_dev)	(ipv4_devconf.proxy_arp || (in_dev)->cnf.proxy_arp)
+-#define IN_DEV_SHARED_MEDIA(in_dev)	(ipv4_devconf.shared_media || (in_dev)->cnf.shared_media)
+-#define IN_DEV_TX_REDIRECTS(in_dev)	(ipv4_devconf.send_redirects || (in_dev)->cnf.send_redirects)
+-#define IN_DEV_SEC_REDIRECTS(in_dev)	(ipv4_devconf.secure_redirects || (in_dev)->cnf.secure_redirects)
++#define IN_DEV_MFORWARD(in_dev)		(ve_ipv4_devconf.mc_forwarding && (in_dev)->cnf.mc_forwarding)
++#define IN_DEV_RPFILTER(in_dev)		(ve_ipv4_devconf.rp_filter && (in_dev)->cnf.rp_filter)
++#define IN_DEV_SOURCE_ROUTE(in_dev)	(ve_ipv4_devconf.accept_source_route && (in_dev)->cnf.accept_source_route)
++#define IN_DEV_BOOTP_RELAY(in_dev)	(ve_ipv4_devconf.bootp_relay && (in_dev)->cnf.bootp_relay)
++
++#define IN_DEV_LOG_MARTIANS(in_dev)	(ve_ipv4_devconf.log_martians || (in_dev)->cnf.log_martians)
++#define IN_DEV_PROXY_ARP(in_dev)	(ve_ipv4_devconf.proxy_arp || (in_dev)->cnf.proxy_arp)
++#define IN_DEV_SHARED_MEDIA(in_dev)	(ve_ipv4_devconf.shared_media || (in_dev)->cnf.shared_media)
++#define IN_DEV_TX_REDIRECTS(in_dev)	(ve_ipv4_devconf.send_redirects || (in_dev)->cnf.send_redirects)
++#define IN_DEV_SEC_REDIRECTS(in_dev)	(ve_ipv4_devconf.secure_redirects || (in_dev)->cnf.secure_redirects)
+ #define IN_DEV_IDTAG(in_dev)		((in_dev)->cnf.tag)
+ #define IN_DEV_MEDIUM_ID(in_dev)	((in_dev)->cnf.medium_id)
+ 
+ #define IN_DEV_RX_REDIRECTS(in_dev) \
+ 	((IN_DEV_FORWARD(in_dev) && \
+-	  (ipv4_devconf.accept_redirects && (in_dev)->cnf.accept_redirects)) \
++	  (ve_ipv4_devconf.accept_redirects && (in_dev)->cnf.accept_redirects)) \
+ 	 || (!IN_DEV_FORWARD(in_dev) && \
+-	  (ipv4_devconf.accept_redirects || (in_dev)->cnf.accept_redirects)))
++	  (ve_ipv4_devconf.accept_redirects || (in_dev)->cnf.accept_redirects)))
+ 
+-#define IN_DEV_ARPFILTER(in_dev)	(ipv4_devconf.arp_filter || (in_dev)->cnf.arp_filter)
+-#define IN_DEV_ARP_ANNOUNCE(in_dev)	(max(ipv4_devconf.arp_announce, (in_dev)->cnf.arp_announce))
+-#define IN_DEV_ARP_IGNORE(in_dev)	(max(ipv4_devconf.arp_ignore, (in_dev)->cnf.arp_ignore))
++#define IN_DEV_ARPFILTER(in_dev)	(ve_ipv4_devconf.arp_filter || (in_dev)->cnf.arp_filter)
++#define IN_DEV_ARP_ANNOUNCE(in_dev)	(max(ve_ipv4_devconf.arp_announce, (in_dev)->cnf.arp_announce))
++#define IN_DEV_ARP_IGNORE(in_dev)	(max(ve_ipv4_devconf.arp_ignore, (in_dev)->cnf.arp_ignore))
+ 
+ struct in_ifaddr
+ {
+@@ -104,6 +109,7 @@ extern u32		inet_select_addr(const struc
+ extern u32		inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scope);
+ extern struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, u32 mask);
+ extern void		inet_forward_change(void);
++extern void		inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy);
+ 
+ static __inline__ int inet_ifa_match(u32 addr, struct in_ifaddr *ifa)
+ {
+@@ -167,6 +173,10 @@ in_dev_put(struct in_device *idev)
+ #define __in_dev_put(idev)  atomic_dec(&(idev)->refcnt)
+ #define in_dev_hold(idev)   atomic_inc(&(idev)->refcnt)
+ 
++struct ve_struct;
++extern int devinet_sysctl_init(struct ve_struct *);
++extern void devinet_sysctl_fini(struct ve_struct *);
++extern void devinet_sysctl_free(struct ve_struct *);
+ #endif /* __KERNEL__ */
+ 
+ static __inline__ __u32 inet_make_mask(int logmask)
+diff -uprN linux-2.6.8.1.orig/include/linux/initrd.h linux-2.6.8.1-ve022stab078/include/linux/initrd.h
+--- linux-2.6.8.1.orig/include/linux/initrd.h	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/initrd.h	2006-05-11 13:05:37.000000000 +0400
+@@ -14,7 +14,7 @@ extern int rd_image_start;
+ extern int initrd_below_start_ok;
+ 
+ /* free_initrd_mem always gets called with the next two as arguments.. */
+-extern unsigned long initrd_start, initrd_end;
++extern unsigned long initrd_start, initrd_end, initrd_copy;
+ extern void free_initrd_mem(unsigned long, unsigned long);
+ 
+ extern unsigned int real_root_dev;
+diff -uprN linux-2.6.8.1.orig/include/linux/irq.h linux-2.6.8.1-ve022stab078/include/linux/irq.h
+--- linux-2.6.8.1.orig/include/linux/irq.h	2004-08-14 14:56:26.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/irq.h	2006-05-11 13:05:38.000000000 +0400
+@@ -77,4 +77,6 @@ extern hw_irq_controller no_irq_type;  /
+ 
+ #endif
+ 
++void check_stack_overflow(void);
++
+ #endif /* __irq_h */
+diff -uprN linux-2.6.8.1.orig/include/linux/jbd.h linux-2.6.8.1-ve022stab078/include/linux/jbd.h
+--- linux-2.6.8.1.orig/include/linux/jbd.h	2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/jbd.h	2006-05-11 13:05:43.000000000 +0400
+@@ -137,9 +137,9 @@ typedef struct journal_s	journal_t;	/* J
+  */
+ typedef struct journal_header_s
+ {
+-	__u32		h_magic;
+-	__u32		h_blocktype;
+-	__u32		h_sequence;
++	__be32		h_magic;
++	__be32		h_blocktype;
++	__be32		h_sequence;
+ } journal_header_t;
+ 
+ 
+@@ -148,8 +148,8 @@ typedef struct journal_header_s
+  */
+ typedef struct journal_block_tag_s
+ {
+-	__u32		t_blocknr;	/* The on-disk block number */
+-	__u32		t_flags;	/* See below */
++	__be32		t_blocknr;	/* The on-disk block number */
++	__be32		t_flags;	/* See below */
+ } journal_block_tag_t;
+ 
+ /* 
+@@ -159,7 +159,7 @@ typedef struct journal_block_tag_s
+ typedef struct journal_revoke_header_s
+ {
+ 	journal_header_t r_header;
+-	int		 r_count;	/* Count of bytes used in the block */
++	__be32		 r_count;	/* Count of bytes used in the block */
+ } journal_revoke_header_t;
+ 
+ 
+@@ -180,35 +180,35 @@ typedef struct journal_superblock_s
+ 
+ /* 0x000C */
+ 	/* Static information describing the journal */
+-	__u32	s_blocksize;		/* journal device blocksize */
+-	__u32	s_maxlen;		/* total blocks in journal file */
+-	__u32	s_first;		/* first block of log information */
++	__be32	s_blocksize;		/* journal device blocksize */
++	__be32	s_maxlen;		/* total blocks in journal file */
++	__be32	s_first;		/* first block of log information */
+ 
+ /* 0x0018 */
+ 	/* Dynamic information describing the current state of the log */
+-	__u32	s_sequence;		/* first commit ID expected in log */
+-	__u32	s_start;		/* blocknr of start of log */
++	__be32	s_sequence;		/* first commit ID expected in log */
++	__be32	s_start;		/* blocknr of start of log */
+ 
+ /* 0x0020 */
+ 	/* Error value, as set by journal_abort(). */
+-	__s32	s_errno;
++	__be32	s_errno;
+ 
+ /* 0x0024 */
+ 	/* Remaining fields are only valid in a version-2 superblock */
+-	__u32	s_feature_compat; 	/* compatible feature set */
+-	__u32	s_feature_incompat; 	/* incompatible feature set */
+-	__u32	s_feature_ro_compat; 	/* readonly-compatible feature set */
++	__be32	s_feature_compat; 	/* compatible feature set */
++	__be32	s_feature_incompat; 	/* incompatible feature set */
++	__be32	s_feature_ro_compat; 	/* readonly-compatible feature set */
+ /* 0x0030 */
+ 	__u8	s_uuid[16];		/* 128-bit uuid for journal */
+ 
+ /* 0x0040 */
+-	__u32	s_nr_users;		/* Nr of filesystems sharing log */
++	__be32	s_nr_users;		/* Nr of filesystems sharing log */
+ 
+-	__u32	s_dynsuper;		/* Blocknr of dynamic superblock copy*/
++	__be32	s_dynsuper;		/* Blocknr of dynamic superblock copy*/
+ 
+ /* 0x0048 */
+-	__u32	s_max_transaction;	/* Limit of journal blocks per trans.*/
+-	__u32	s_max_trans_data;	/* Limit of data blocks per trans. */
++	__be32	s_max_transaction;	/* Limit of journal blocks per trans.*/
++	__be32	s_max_trans_data;	/* Limit of data blocks per trans. */
+ 
+ /* 0x0050 */
+ 	__u32	s_padding[44];
+@@ -242,14 +242,28 @@ typedef struct journal_superblock_s
+ #include <asm/bug.h>
+ 
+ #define JBD_ASSERTIONS
++#define JBD_SOFT_ASSERTIONS
+ #ifdef JBD_ASSERTIONS
++#ifdef JBD_SOFT_ASSERTIONS
++#define J_BUG()								\
++do {									\
++	unsigned long stack;						\
++	printk("Stack=%p current=%p pid=%d ve=%d process='%s'\n",	\
++		&stack, current, current->pid,				\
++		get_exec_env()->veid,					\
++		current->comm);						\
++	dump_stack();							\
++} while(0)
++#else
++#define J_BUG()		BUG()
++#endif
+ #define J_ASSERT(assert)						\
+ do {									\
+ 	if (!(assert)) {						\
+ 		printk (KERN_EMERG					\
+ 			"Assertion failure in %s() at %s:%d: \"%s\"\n",	\
+ 			__FUNCTION__, __FILE__, __LINE__, # assert);	\
+-		BUG();							\
++		J_BUG();						\
+ 	}								\
+ } while (0)
+ 
+@@ -277,13 +291,15 @@ void buffer_assertion_failure(struct buf
+ #define J_EXPECT_JH(jh, expr, why...)	J_ASSERT_JH(jh, expr)
+ #else
+ #define __journal_expect(expr, why...)					     \
+-	do {								     \
+-		if (!(expr)) {						     \
++	({								     \
++		int val = (expr);					     \
++		if (!val) {						     \
+ 			printk(KERN_ERR					     \
+ 				"EXT3-fs unexpected failure: %s;\n",# expr); \
+-			printk(KERN_ERR why);				     \
++			printk(KERN_ERR why "\n");			     \
+ 		}							     \
+-	} while (0)
++		val;							     \
++	})
+ #define J_EXPECT(expr, why...)		__journal_expect(expr, ## why)
+ #define J_EXPECT_BH(bh, expr, why...)	__journal_expect(expr, ## why)
+ #define J_EXPECT_JH(jh, expr, why...)	__journal_expect(expr, ## why)
+@@ -826,6 +842,12 @@ struct journal_s
+ 	struct jbd_revoke_table_s *j_revoke_table[2];
+ 
+ 	/*
++	 * array of bhs for journal_commit_transaction
++	 */
++	struct buffer_head	**j_wbuf;
++	int			j_wbufsize;
++
++	/*
+ 	 * An opaque pointer to fs-private information.  ext3 puts its
+ 	 * superblock pointer here
+ 	 */
+@@ -847,6 +869,7 @@ struct journal_s
+  */
+ 
+ /* Filing buffers */
++extern void __journal_temp_unlink_buffer(struct journal_head *jh);
+ extern void journal_unfile_buffer(journal_t *, struct journal_head *);
+ extern void __journal_unfile_buffer(struct journal_head *);
+ extern void __journal_refile_buffer(struct journal_head *);
+@@ -912,7 +935,7 @@ extern int	 journal_dirty_data (handle_t
+ extern int	 journal_dirty_metadata (handle_t *, struct buffer_head *);
+ extern void	 journal_release_buffer (handle_t *, struct buffer_head *,
+ 						int credits);
+-extern void	 journal_forget (handle_t *, struct buffer_head *);
++extern int	 journal_forget (handle_t *, struct buffer_head *);
+ extern void	 journal_sync_buffer (struct buffer_head *);
+ extern int	 journal_invalidatepage(journal_t *,
+ 				struct page *, unsigned long);
+diff -uprN linux-2.6.8.1.orig/include/linux/jiffies.h linux-2.6.8.1-ve022stab078/include/linux/jiffies.h
+--- linux-2.6.8.1.orig/include/linux/jiffies.h	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/jiffies.h	2006-05-11 13:05:39.000000000 +0400
+@@ -15,6 +15,7 @@
+  */
+ extern u64 jiffies_64;
+ extern unsigned long volatile jiffies;
++extern unsigned long cycles_per_jiffy, cycles_per_clock;
+ 
+ #if (BITS_PER_LONG < 64)
+ u64 get_jiffies_64(void);
+diff -uprN linux-2.6.8.1.orig/include/linux/kdev_t.h linux-2.6.8.1-ve022stab078/include/linux/kdev_t.h
+--- linux-2.6.8.1.orig/include/linux/kdev_t.h	2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/kdev_t.h	2006-05-11 13:05:40.000000000 +0400
+@@ -87,6 +87,57 @@ static inline unsigned sysv_minor(u32 de
+ 	return dev & 0x3ffff;
+ }
+ 
++#define UNNAMED_MAJOR_COUNT	16
++
++#if UNNAMED_MAJOR_COUNT > 1
++
++extern int unnamed_dev_majors[UNNAMED_MAJOR_COUNT];
++
++static inline dev_t make_unnamed_dev(int idx)
++{
++	/*
++	 * Here we transfer bits from 8 to 8+log2(UNNAMED_MAJOR_COUNT) of the
++	 * unnamed device index into major number.
++	 */
++	return MKDEV(unnamed_dev_majors[(idx >> 8) & (UNNAMED_MAJOR_COUNT - 1)],
++		     idx & ~((UNNAMED_MAJOR_COUNT - 1) << 8));
++}
++
++static inline int unnamed_dev_idx(dev_t dev)
++{
++	int i;
++	for (i = 0; i < UNNAMED_MAJOR_COUNT &&
++				MAJOR(dev) != unnamed_dev_majors[i]; i++);
++	return MINOR(dev) | (i << 8);
++}
++
++static inline int is_unnamed_dev(dev_t dev)
++{
++	int i;
++	for (i = 0; i < UNNAMED_MAJOR_COUNT &&
++				MAJOR(dev) != unnamed_dev_majors[i]; i++);
++	return i < UNNAMED_MAJOR_COUNT;
++}
++
++#else /* UNNAMED_MAJOR_COUNT */
++
++static inline dev_t make_unnamed_dev(int idx)
++{
++	return MKDEV(0, idx);
++}
++
++static inline int unnamed_dev_idx(dev_t dev)
++{
++	return MINOR(dev);
++}
++
++static inline int is_unnamed_dev(dev_t dev)
++{
++	return MAJOR(dev) == 0;
++}
++
++#endif /* UNNAMED_MAJOR_COUNT */
++
+ 
+ #else /* __KERNEL__ */
+ 
+diff -uprN linux-2.6.8.1.orig/include/linux/kernel.h linux-2.6.8.1-ve022stab078/include/linux/kernel.h
+--- linux-2.6.8.1.orig/include/linux/kernel.h	2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/kernel.h	2006-05-11 13:05:49.000000000 +0400
+@@ -97,9 +97,18 @@ extern int __kernel_text_address(unsigne
+ extern int kernel_text_address(unsigned long addr);
+ extern int session_of_pgrp(int pgrp);
+ 
++asmlinkage int vprintk(const char *fmt, va_list args)
++        __attribute__ ((format (printf, 1, 0)));
+ asmlinkage int printk(const char * fmt, ...)
+ 	__attribute__ ((format (printf, 1, 2)));
+ 
++#define VE0_LOG		1
++#define VE_LOG		2
++#define VE_LOG_BOTH	(VE0_LOG | VE_LOG)
++asmlinkage int ve_printk(int, const char * fmt, ...)
++	__attribute__ ((format (printf, 2, 3)));
++void prepare_printk(void);
++
+ unsigned long int_sqrt(unsigned long);
+ 
+ static inline int __attribute_pure__ long_log2(unsigned long x)
+@@ -114,9 +123,14 @@ static inline int __attribute_pure__ lon
+ extern int printk_ratelimit(void);
+ extern int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst);
+ 
++extern int console_silence_loglevel;
++
+ static inline void console_silent(void)
+ {
+-	console_loglevel = 0;
++	if (console_loglevel > console_silence_loglevel) {
++		printk("console shuts up ...\n");
++		console_loglevel = 0;
++	}
+ }
+ 
+ static inline void console_verbose(void)
+@@ -126,10 +140,14 @@ static inline void console_verbose(void)
+ }
+ 
+ extern void bust_spinlocks(int yes);
++extern void wake_up_klogd(void);
+ extern int oops_in_progress;		/* If set, an oops, panic(), BUG() or die() is in progress */
+ extern int panic_on_oops;
++extern int decode_call_traces;
+ extern int tainted;
++extern int kernel_text_csum_broken;
+ extern const char *print_tainted(void);
++extern int alloc_fail_warn;
+ 
+ /* Values used for system_state */
+ extern enum system_states {
+diff -uprN linux-2.6.8.1.orig/include/linux/kmem_cache.h linux-2.6.8.1-ve022stab078/include/linux/kmem_cache.h
+--- linux-2.6.8.1.orig/include/linux/kmem_cache.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/linux/kmem_cache.h	2006-05-11 13:05:39.000000000 +0400
+@@ -0,0 +1,195 @@
++#ifndef __KMEM_CACHE_H__
++#define __KMEM_CACHE_H__
++
++#include <linux/config.h>
++#include <linux/threads.h>
++#include <linux/smp.h>
++#include <linux/spinlock.h>
++#include <linux/list.h>
++#include <linux/mm.h>
++#include <asm/atomic.h>
++
++/*
++ * SLAB_DEBUG	- 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
++ *		  SLAB_RED_ZONE & SLAB_POISON.
++ *		  0 for faster, smaller code (especially in the critical paths).
++ *
++ * SLAB_STATS	- 1 to collect stats for /proc/slabinfo.
++ *		  0 for faster, smaller code (especially in the critical paths).
++ *
++ * SLAB_FORCED_DEBUG	- 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
++ */
++
++#ifdef CONFIG_DEBUG_SLAB
++#define	SLAB_DEBUG		1
++#define	SLAB_STATS		1
++#define	SLAB_FORCED_DEBUG	1
++#else
++#define	SLAB_DEBUG		0
++#define	SLAB_STATS		0 /* must be off, see kmem_cache.h */
++#define	SLAB_FORCED_DEBUG	0
++#endif
++
++/*
++ * struct array_cache
++ *
++ * Per cpu structures
++ * Purpose:
++ * - LIFO ordering, to hand out cache-warm objects from _alloc
++ * - reduce the number of linked list operations
++ * - reduce spinlock operations
++ *
++ * The limit is stored in the per-cpu structure to reduce the data cache
++ * footprint.
++ *
++ */
++struct array_cache {
++	unsigned int avail;
++	unsigned int limit;
++	unsigned int batchcount;
++	unsigned int touched;
++};
++
++/* bootstrap: The caches do not work without cpuarrays anymore,
++ * but the cpuarrays are allocated from the generic caches...
++ */
++#define BOOT_CPUCACHE_ENTRIES	1
++struct arraycache_init {
++	struct array_cache cache;
++	void * entries[BOOT_CPUCACHE_ENTRIES];
++};
++
++/*
++ * The slab lists of all objects.
++ * Hopefully reduce the internal fragmentation
++ * NUMA: The spinlock could be moved from the kmem_cache_t
++ * into this structure, too. Figure out what causes
++ * fewer cross-node spinlock operations.
++ */
++struct kmem_list3 {
++	struct list_head	slabs_partial;	/* partial list first, better asm code */
++	struct list_head	slabs_full;
++	struct list_head	slabs_free;
++	unsigned long	free_objects;
++	int		free_touched;
++	unsigned long	next_reap;
++	struct array_cache	*shared;
++};
++
++#define LIST3_INIT(parent) \
++	{ \
++		.slabs_full	= LIST_HEAD_INIT(parent.slabs_full), \
++		.slabs_partial	= LIST_HEAD_INIT(parent.slabs_partial), \
++		.slabs_free	= LIST_HEAD_INIT(parent.slabs_free) \
++	}
++#define list3_data(cachep) \
++	(&(cachep)->lists)
++
++/* NUMA: per-node */
++#define list3_data_ptr(cachep, ptr) \
++		list3_data(cachep)
++
++/*
++ * kmem_cache_t
++ *
++ * manages a cache.
++ */
++	
++struct kmem_cache_s {
++/* 1) per-cpu data, touched during every alloc/free */
++	struct array_cache	*array[NR_CPUS];
++	unsigned int		batchcount;
++	unsigned int		limit;
++/* 2) touched by every alloc & free from the backend */
++	struct kmem_list3	lists;
++	/* NUMA: kmem_3list_t	*nodelists[MAX_NUMNODES] */
++	unsigned int		objsize;
++	unsigned int	 	flags;	/* constant flags */
++	unsigned int		num;	/* # of objs per slab */
++	unsigned int		free_limit; /* upper limit of objects in the lists */
++	spinlock_t		spinlock;
++
++/* 3) cache_grow/shrink */
++	/* order of pgs per slab (2^n) */
++	unsigned int		gfporder;
++
++	/* force GFP flags, e.g. GFP_DMA */
++	unsigned int		gfpflags;
++
++	size_t			colour;		/* cache colouring range */
++	unsigned int		colour_off;	/* colour offset */
++	unsigned int		colour_next;	/* cache colouring */
++	kmem_cache_t		*slabp_cache;
++	unsigned int		slab_size;
++	unsigned int		dflags;		/* dynamic flags */
++
++	/* constructor func */
++	void (*ctor)(void *, kmem_cache_t *, unsigned long);
++
++	/* de-constructor func */
++	void (*dtor)(void *, kmem_cache_t *, unsigned long);
++
++/* 4) cache creation/removal */
++	const char		*name;
++	struct list_head	next;
++
++/* 5) statistics */
++#if SLAB_STATS
++	unsigned long		num_active;
++	unsigned long		num_allocations;
++	unsigned long		high_mark;
++	unsigned long		grown;
++	unsigned long		reaped;
++	unsigned long 		errors;
++	unsigned long		max_freeable;
++	atomic_t		allochit;
++	atomic_t		allocmiss;
++	atomic_t		freehit;
++	atomic_t		freemiss;
++#endif
++#if SLAB_DEBUG
++	int			dbghead;
++	int			reallen;
++#endif
++#ifdef CONFIG_USER_RESOURCE
++	unsigned int		objuse;
++#endif
++};
++
++/* Macros for storing/retrieving the cachep and or slab from the
++ * global 'mem_map'. These are used to find the slab an obj belongs to.
++ * With kfree(), these are used to find the cache which an obj belongs to.
++ */
++#define	SET_PAGE_CACHE(pg,x)  ((pg)->lru.next = (struct list_head *)(x))
++#define	GET_PAGE_CACHE(pg)    ((kmem_cache_t *)(pg)->lru.next)
++#define	SET_PAGE_SLAB(pg,x)   ((pg)->lru.prev = (struct list_head *)(x))
++#define	GET_PAGE_SLAB(pg)     ((struct slab *)(pg)->lru.prev)
++
++#define CFLGS_OFF_SLAB		(0x80000000UL)
++#define CFLGS_ENVIDS		(0x04000000UL)
++#define	OFF_SLAB(x)	((x)->flags & CFLGS_OFF_SLAB)
++#define ENVIDS(x)	((x)->flags & CFLGS_ENVIDS)
++
++static inline unsigned int kmem_cache_memusage(kmem_cache_t *cache)
++{
++#ifdef CONFIG_USER_RESOURCE
++	return cache->objuse;
++#else
++	return 0;
++#endif
++}
++
++static inline unsigned int kmem_obj_memusage(void *obj)
++{
++	kmem_cache_t *cachep;
++
++	cachep = GET_PAGE_CACHE(virt_to_page(obj));
++	return kmem_cache_memusage(cachep);
++}
++
++static inline void kmem_mark_nocharge(kmem_cache_t *cachep)
++{
++	cachep->flags |= SLAB_NO_CHARGE;
++}
++
++#endif /* __KMEM_CACHE_H__ */
+diff -uprN linux-2.6.8.1.orig/include/linux/kmem_slab.h linux-2.6.8.1-ve022stab078/include/linux/kmem_slab.h
+--- linux-2.6.8.1.orig/include/linux/kmem_slab.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/linux/kmem_slab.h	2006-05-11 13:05:35.000000000 +0400
+@@ -0,0 +1,47 @@
++#ifndef __KMEM_SLAB_H__
++#define __KMEM_SLAB_H__
++
++/*
++ * kmem_bufctl_t:
++ *
++ * Bufctl's are used for linking objs within a slab
++ * linked offsets.
++ *
++ * This implementation relies on "struct page" for locating the cache &
++ * slab an object belongs to.
++ * This allows the bufctl structure to be small (one int), but limits
++ * the number of objects a slab (not a cache) can contain when off-slab
++ * bufctls are used. The limit is the size of the largest general cache
++ * that does not use off-slab slabs.
++ * For 32bit archs with 4 kB pages, is this 56.
++ * This is not serious, as it is only for large objects, when it is unwise
++ * to have too many per slab.
++ * Note: This limit can be raised by introducing a general cache whose size
++ * is less than 512 (PAGE_SIZE<<3), but greater than 256.
++ */
++
++#define BUFCTL_END	(((kmem_bufctl_t)(~0U))-0)
++#define BUFCTL_FREE	(((kmem_bufctl_t)(~0U))-1)
++#define	SLAB_LIMIT	(((kmem_bufctl_t)(~0U))-2)
++
++/*
++ * struct slab
++ *
++ * Manages the objs in a slab. Placed either at the beginning of mem allocated
++ * for a slab, or allocated from an general cache.
++ * Slabs are chained into three list: fully used, partial, fully free slabs.
++ */
++struct slab {
++	struct list_head	list;
++	unsigned long		colouroff;
++	void			*s_mem;		/* including colour offset */
++	unsigned int		inuse;		/* num of objs active in slab */
++	kmem_bufctl_t		free;
++};
++
++static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
++{
++	return (kmem_bufctl_t *)(slabp+1);
++}
++
++#endif /* __KMEM_SLAB_H__ */
+diff -uprN linux-2.6.8.1.orig/include/linux/list.h linux-2.6.8.1-ve022stab078/include/linux/list.h
+--- linux-2.6.8.1.orig/include/linux/list.h	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/list.h	2006-05-11 13:05:40.000000000 +0400
+@@ -305,6 +305,9 @@ static inline void list_splice_init(stru
+ #define list_entry(ptr, type, member) \
+ 	container_of(ptr, type, member)
+ 
++#define list_first_entry(ptr, type, member) \
++	container_of((ptr)->next, type, member)
++
+ /**
+  * list_for_each	-	iterate over a list
+  * @pos:	the &struct list_head to use as a loop counter.
+@@ -397,6 +400,20 @@ static inline void list_splice_init(stru
+ 		     prefetch(pos->member.next))
+ 
+ /**
++ * list_for_each_entry_continue_reverse - iterate backwards over list of given
++ *			type continuing after existing point
++ * @pos:	the type * to use as a loop counter.
++ * @head:	the head for your list.
++ * @member:	the name of the list_struct within the struct.
++ */
++#define list_for_each_entry_continue_reverse(pos, head, member) 	\
++	for (pos = list_entry(pos->member.prev, typeof(*pos), member),	\
++		     prefetch(pos->member.prev);			\
++	     &pos->member != (head);					\
++	     pos = list_entry(pos->member.prev, typeof(*pos), member),	\
++		     prefetch(pos->member.prev))
++
++/**
+  * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+  * @pos:	the type * to use as a loop counter.
+  * @n:		another type * to use as temporary storage
+diff -uprN linux-2.6.8.1.orig/include/linux/major.h linux-2.6.8.1-ve022stab078/include/linux/major.h
+--- linux-2.6.8.1.orig/include/linux/major.h	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/major.h	2006-05-11 13:05:40.000000000 +0400
+@@ -165,4 +165,7 @@
+ 
+ #define VIOTAPE_MAJOR		230
+ 
++#define UNNAMED_EXTRA_MAJOR		130
++#define UNNAMED_EXTRA_MAJOR_COUNT	120
++
+ #endif
+diff -uprN linux-2.6.8.1.orig/include/linux/mm.h linux-2.6.8.1-ve022stab078/include/linux/mm.h
+--- linux-2.6.8.1.orig/include/linux/mm.h	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/mm.h	2006-05-11 13:05:40.000000000 +0400
+@@ -101,6 +101,8 @@ struct vm_area_struct {
+ #ifdef CONFIG_NUMA
+ 	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
+ #endif
++	/* rss counter by vma */
++	unsigned long vm_rss;
+ };
+ 
+ /*
+@@ -191,6 +193,9 @@ typedef unsigned long page_flags_t;
+  * moment. Note that we have no way to track which tasks are using
+  * a page.
+  */
++struct user_beancounter;
++struct page_beancounter;
++
+ struct page {
+ 	page_flags_t flags;		/* Atomic flags, some possibly
+ 					 * updated asynchronously */
+@@ -229,6 +234,10 @@ struct page {
+ 	void *virtual;			/* Kernel virtual address (NULL if
+ 					   not kmapped, ie. highmem) */
+ #endif /* WANT_PAGE_VIRTUAL */
++	union {
++		struct user_beancounter	*page_ub;
++		struct page_beancounter *page_pbc;
++	} bc;
+ };
+ 
+ /*
+@@ -496,7 +505,6 @@ int shmem_set_policy(struct vm_area_stru
+ struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
+ 					unsigned long addr);
+ struct file *shmem_file_setup(char * name, loff_t size, unsigned long flags);
+-void shmem_lock(struct file * file, int lock);
+ int shmem_zero_setup(struct vm_area_struct *);
+ 
+ /*
+@@ -624,7 +632,7 @@ extern struct vm_area_struct *vma_merge(
+ extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
+ extern int split_vma(struct mm_struct *,
+ 	struct vm_area_struct *, unsigned long addr, int new_below);
+-extern void insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
++extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
+ extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
+ 	struct rb_node **, struct rb_node *);
+ extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
+@@ -709,6 +717,9 @@ extern struct vm_area_struct *find_exten
+ extern struct page * vmalloc_to_page(void *addr);
+ extern struct page * follow_page(struct mm_struct *mm, unsigned long address,
+ 		int write);
++extern struct page * follow_page_k(unsigned long address, int write);
++extern struct page * follow_page_pte(struct mm_struct *mm,
++		unsigned long address, int write, pte_t *pte);
+ extern int remap_page_range(struct vm_area_struct *vma, unsigned long from,
+ 		unsigned long to, unsigned long size, pgprot_t prot);
+ 
+@@ -724,5 +735,25 @@ extern struct vm_area_struct *get_gate_v
+ int in_gate_area(struct task_struct *task, unsigned long addr);
+ #endif
+ 
++/*
++ * Common MM functions for inclusion in the VFS
++ * or in other stackable file systems.  Some of these
++ * functions were in linux/mm/ C files.
++ *
++ */
++static inline int sync_page(struct page *page)
++{
++	struct address_space *mapping;
++
++	/*
++	 * FIXME, fercrissake.  What is this barrier here for?
++	 */
++	smp_mb();
++	mapping = page_mapping(page);
++	if (mapping && mapping->a_ops && mapping->a_ops->sync_page)
++		return mapping->a_ops->sync_page(page);
++	return 0;
++}
++
+ #endif /* __KERNEL__ */
+ #endif /* _LINUX_MM_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/mount.h linux-2.6.8.1-ve022stab078/include/linux/mount.h
+--- linux-2.6.8.1.orig/include/linux/mount.h	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/mount.h	2006-05-11 13:05:40.000000000 +0400
+@@ -63,7 +63,7 @@ static inline void mntput(struct vfsmoun
+ 
+ extern void free_vfsmnt(struct vfsmount *mnt);
+ extern struct vfsmount *alloc_vfsmnt(const char *name);
+-extern struct vfsmount *do_kern_mount(const char *fstype, int flags,
++extern struct vfsmount *do_kern_mount(struct file_system_type *type, int flags,
+ 				      const char *name, void *data);
+ 
+ struct nameidata;
+diff -uprN linux-2.6.8.1.orig/include/linux/msdos_fs.h linux-2.6.8.1-ve022stab078/include/linux/msdos_fs.h
+--- linux-2.6.8.1.orig/include/linux/msdos_fs.h	2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/msdos_fs.h	2006-05-11 13:05:35.000000000 +0400
+@@ -278,7 +278,7 @@ extern void fat_put_super(struct super_b
+ int fat_fill_super(struct super_block *sb, void *data, int silent,
+ 		   struct inode_operations *fs_dir_inode_ops, int isvfat);
+ extern int fat_statfs(struct super_block *sb, struct kstatfs *buf);
+-extern void fat_write_inode(struct inode *inode, int wait);
++extern int fat_write_inode(struct inode *inode, int wait);
+ extern int fat_notify_change(struct dentry * dentry, struct iattr * attr);
+ 
+ /* fat/misc.c */
+diff -uprN linux-2.6.8.1.orig/include/linux/namei.h linux-2.6.8.1-ve022stab078/include/linux/namei.h
+--- linux-2.6.8.1.orig/include/linux/namei.h	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/namei.h	2006-05-11 13:05:40.000000000 +0400
+@@ -45,6 +45,8 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA
+ #define LOOKUP_CONTINUE		 4
+ #define LOOKUP_PARENT		16
+ #define LOOKUP_NOALT		32
++#define LOOKUP_NOAREACHECK	64	/* no area check on lookup */
++#define LOOKUP_STRICT		128	/* no symlinks or other filesystems */
+ /*
+  * Intent data
+  */
+diff -uprN linux-2.6.8.1.orig/include/linux/netdevice.h linux-2.6.8.1-ve022stab078/include/linux/netdevice.h
+--- linux-2.6.8.1.orig/include/linux/netdevice.h	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/netdevice.h	2006-05-11 13:05:42.000000000 +0400
+@@ -37,6 +37,7 @@
+ #include <linux/config.h>
+ #include <linux/device.h>
+ #include <linux/percpu.h>
++#include <linux/ctype.h>
+ 
+ struct divert_blk;
+ struct vlan_group;
+@@ -245,6 +246,11 @@ struct netdev_boot_setup {
+ };
+ #define NETDEV_BOOT_SETUP_MAX 8
+ 
++struct netdev_bc {
++	struct user_beancounter *exec_ub, *owner_ub;
++};
++
++#define netdev_bc(dev)		(&(dev)->dev_bc)
+ 
+ /*
+  *	The DEVICE structure.
+@@ -389,6 +395,7 @@ struct net_device
+ 	enum { NETREG_UNINITIALIZED=0,
+ 	       NETREG_REGISTERING,	/* called register_netdevice */
+ 	       NETREG_REGISTERED,	/* completed register todo */
++	       NETREG_REGISTER_ERR,	/* register todo failed */
+ 	       NETREG_UNREGISTERING,	/* called unregister_netdevice */
+ 	       NETREG_UNREGISTERED,	/* completed unregister todo */
+ 	       NETREG_RELEASED,		/* called free_netdev */
+@@ -408,6 +415,8 @@ struct net_device
+ #define NETIF_F_VLAN_CHALLENGED	1024	/* Device cannot handle VLAN packets */
+ #define NETIF_F_TSO		2048	/* Can offload TCP/IP segmentation */
+ #define NETIF_F_LLTX		4096	/* LockLess TX */
++#define NETIF_F_VIRTUAL		0x40000000 /* can be registered in ve */
++#define NETIF_F_VENET		0x80000000 /* Device is VENET device */
+ 
+ 	/* Called after device is detached from network. */
+ 	void			(*uninit)(struct net_device *dev);
+@@ -477,11 +486,18 @@ struct net_device
+ 	struct divert_blk	*divert;
+ #endif /* CONFIG_NET_DIVERT */
+ 
++	unsigned                orig_mtu;   /* MTU value before move to VE */
++	struct ve_struct	*owner_env; /* Owner VE of the interface */
++	struct netdev_bc        dev_bc;
++
+ 	/* class/net/name entry */
+ 	struct class_device	class_dev;
+ 	struct net_device_stats* (*last_stats)(struct net_device *);
+ 	/* how much padding had been added by alloc_netdev() */
+ 	int padded;
++
++	/* List entry in global devices list to keep track of their names assignment */
++	struct list_head	dev_global_list_entry;
+ };
+ 
+ #define	NETDEV_ALIGN		32
+@@ -514,8 +530,21 @@ struct packet_type {
+ 
+ extern struct net_device		loopback_dev;		/* The loopback */
+ extern struct net_device		*dev_base;		/* All devices */
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define visible_loopback_dev	(*get_exec_env()->_loopback_dev)
++#define dev_base		(get_exec_env()->_net_dev_base)
++#define visible_dev_head(x)	(&(x)->_net_dev_head)
++#define visible_dev_index_head(x) (&(x)->_net_dev_index_head)
++#else
++#define visible_loopback_dev	loopback_dev
++#define visible_dev_head(x)	NULL
++#define visible_dev_index_head(x) NULL
++#endif
+ extern rwlock_t				dev_base_lock;		/* Device list lock */
+ 
++struct hlist_head *dev_name_hash(const char *name, struct ve_struct *env);
++struct hlist_head *dev_index_hash(int ifindex, struct ve_struct *env);
++
+ extern int			netdev_boot_setup_add(char *name, struct ifmap *map);
+ extern int 			netdev_boot_setup_check(struct net_device *dev);
+ extern unsigned long		netdev_boot_base(const char *prefix, int unit);
+@@ -540,6 +569,7 @@ extern int		dev_alloc_name(struct net_de
+ extern int		dev_open(struct net_device *dev);
+ extern int		dev_close(struct net_device *dev);
+ extern int		dev_queue_xmit(struct sk_buff *skb);
++extern int		dev_set_mtu(struct net_device *dev, int new_mtu);
+ extern int		register_netdevice(struct net_device *dev);
+ extern int		unregister_netdevice(struct net_device *dev);
+ extern void		free_netdev(struct net_device *dev);
+@@ -547,7 +577,8 @@ extern void		synchronize_net(void);
+ extern int 		register_netdevice_notifier(struct notifier_block *nb);
+ extern int		unregister_netdevice_notifier(struct notifier_block *nb);
+ extern int		call_netdevice_notifiers(unsigned long val, void *v);
+-extern int		dev_new_index(void);
++extern int		dev_new_index(struct net_device *dev);
++extern void		dev_free_index(struct net_device *dev);
+ extern struct net_device	*dev_get_by_index(int ifindex);
+ extern struct net_device	*__dev_get_by_index(int ifindex);
+ extern int		dev_restart(struct net_device *dev);
+@@ -946,6 +977,18 @@ extern int skb_checksum_help(struct sk_b
+ extern char *net_sysctl_strdup(const char *s);
+ #endif
+ 
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++static inline int ve_is_dev_movable(struct net_device *dev)
++{
++	return !(dev->features & NETIF_F_VIRTUAL);
++}
++#else
++static inline int ve_is_dev_movable(struct net_device *dev)
++{
++	return 0;
++}
++#endif
++
+ #endif /* __KERNEL__ */
+ 
+ #endif	/* _LINUX_DEV_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter.h linux-2.6.8.1-ve022stab078/include/linux/netfilter.h
+--- linux-2.6.8.1.orig/include/linux/netfilter.h	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/netfilter.h	2006-05-11 13:05:40.000000000 +0400
+@@ -25,6 +25,8 @@
+ #define NFC_UNKNOWN 0x4000
+ #define NFC_ALTERED 0x8000
+ 
++#define NFC_IPT_MASK (0x00FFFFFF)
++
+ #ifdef __KERNEL__
+ #include <linux/config.h>
+ #ifdef CONFIG_NETFILTER
+@@ -93,6 +95,9 @@ struct nf_info
+ int nf_register_hook(struct nf_hook_ops *reg);
+ void nf_unregister_hook(struct nf_hook_ops *reg);
+ 
++int visible_nf_register_hook(struct nf_hook_ops *reg);
++int visible_nf_unregister_hook(struct nf_hook_ops *reg);
++
+ /* Functions to register get/setsockopt ranges (non-inclusive).  You
+    need to check permissions yourself! */
+ int nf_register_sockopt(struct nf_sockopt_ops *reg);
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_conntrack.h linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ip_conntrack.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_conntrack.h	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ip_conntrack.h	2006-05-11 13:05:45.000000000 +0400
+@@ -158,6 +158,10 @@ struct ip_conntrack_expect
+ 
+ struct ip_conntrack_helper;
+ 
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/ve.h>
++#endif
++
+ struct ip_conntrack
+ {
+ 	/* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
+@@ -173,6 +177,10 @@ struct ip_conntrack
+ 	/* Timer function; drops refcnt when it goes off. */
+ 	struct timer_list timeout;
+ 
++#ifdef CONFIG_VE_IPTABLES
++	/* VE struct pointer for timers */
++	struct ve_ip_conntrack *ct_env;
++#endif
+ 	/* If we're expecting another related connection, this will be
+            in expected linked list */
+ 	struct list_head sibling_list;
+@@ -212,6 +220,9 @@ struct ip_conntrack
+ /* get master conntrack via master expectation */
+ #define master_ct(conntr) (conntr->master ? conntr->master->expectant : NULL)
+ 
++/* add conntrack entry to hash tables */
++extern void ip_conntrack_hash_insert(struct ip_conntrack *ct);
++
+ /* Alter reply tuple (maybe alter helper).  If it's already taken,
+    return 0 and don't do alteration. */
+ extern int
+@@ -231,10 +242,17 @@ ip_conntrack_get(struct sk_buff *skb, en
+ /* decrement reference count on a conntrack */
+ extern inline void ip_conntrack_put(struct ip_conntrack *ct);
+ 
++/* allocate conntrack structure */
++extern struct ip_conntrack *ip_conntrack_alloc(struct user_beancounter *ub);
++
+ /* find unconfirmed expectation based on tuple */
+ struct ip_conntrack_expect *
+ ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple);
+ 
++/* insert expecation into lists */
++void ip_conntrack_expect_insert(struct ip_conntrack_expect *new,
++				struct ip_conntrack *related_to);
++
+ /* decrement reference count on an expectation */
+ void ip_conntrack_expect_put(struct ip_conntrack_expect *exp);
+ 
+@@ -257,7 +275,7 @@ extern struct ip_conntrack ip_conntrack_
+ 
+ /* Returns new sk_buff, or NULL */
+ struct sk_buff *
+-ip_ct_gather_frags(struct sk_buff *skb);
++ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user);
+ 
+ /* Delete all conntracks which match. */
+ extern void
+@@ -271,6 +289,7 @@ static inline int is_confirmed(struct ip
+ }
+ 
+ extern unsigned int ip_conntrack_htable_size;
++extern int ip_conntrack_enable_ve0;
+ 
+ /* eg. PROVIDES_CONNTRACK(ftp); */
+ #define PROVIDES_CONNTRACK(name)                        \
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_conntrack_core.h linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ip_conntrack_core.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_conntrack_core.h	2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ip_conntrack_core.h	2006-05-11 13:05:40.000000000 +0400
+@@ -47,8 +47,37 @@ static inline int ip_conntrack_confirm(s
+ 	return NF_ACCEPT;
+ }
+ 
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_ip_conntrack_hash	\
++	(get_exec_env()->_ip_conntrack->_ip_conntrack_hash)
++#define ve_ip_conntrack_expect_list \
++	(get_exec_env()->_ip_conntrack->_ip_conntrack_expect_list)
++#define ve_ip_conntrack_protocol_list \
++	(get_exec_env()->_ip_conntrack->_ip_conntrack_protocol_list)
++#define ve_ip_conntrack_helpers \
++	(get_exec_env()->_ip_conntrack->_ip_conntrack_helpers)
++#define ve_ip_conntrack_count \
++	(get_exec_env()->_ip_conntrack->_ip_conntrack_count)
++#define ve_ip_conntrack_max \
++	(get_exec_env()->_ip_conntrack->_ip_conntrack_max)
++#define ve_ip_conntrack_destroyed	\
++	(get_exec_env()->_ip_conntrack->_ip_conntrack_destroyed)
++#else
++#define ve_ip_conntrack_hash		ip_conntrack_hash
++#define ve_ip_conntrack_expect_list	ip_conntrack_expect_list
++#define ve_ip_conntrack_protocol_list	protocol_list
++#define ve_ip_conntrack_helpers		helpers
++#define ve_ip_conntrack_count		ip_conntrack_count
++#define ve_ip_conntrack_max		ip_conntrack_max
++#define ve_ip_conntrack_destroyed	ip_conntrack_destroyed
++#endif /* CONFIG_VE_IPTABLES */
++
+ extern struct list_head *ip_conntrack_hash;
+ extern struct list_head ip_conntrack_expect_list;
++extern atomic_t ip_conntrack_count;
++extern unsigned long ** tcp_timeouts;
++
+ DECLARE_RWLOCK_EXTERN(ip_conntrack_lock);
+ DECLARE_RWLOCK_EXTERN(ip_conntrack_expect_tuple_lock);
+ #endif /* _IP_CONNTRACK_CORE_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_conntrack_ftp.h linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ip_conntrack_ftp.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_conntrack_ftp.h	2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ip_conntrack_ftp.h	2006-05-11 13:05:26.000000000 +0400
+@@ -4,11 +4,6 @@
+ 
+ #ifdef __KERNEL__
+ 
+-#include <linux/netfilter_ipv4/lockhelp.h>
+-
+-/* Protects ftp part of conntracks */
+-DECLARE_LOCK_EXTERN(ip_ftp_lock);
+-
+ #define FTP_PORT	21
+ 
+ #endif /* __KERNEL__ */
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_conntrack_helper.h linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ip_conntrack_helper.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_conntrack_helper.h	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ip_conntrack_helper.h	2006-05-11 13:05:40.000000000 +0400
+@@ -33,6 +33,9 @@ struct ip_conntrack_helper
+ extern int ip_conntrack_helper_register(struct ip_conntrack_helper *);
+ extern void ip_conntrack_helper_unregister(struct ip_conntrack_helper *);
+ 
++extern int visible_ip_conntrack_helper_register(struct ip_conntrack_helper *);
++extern void visible_ip_conntrack_helper_unregister(struct ip_conntrack_helper *);
++
+ extern struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple);
+ 
+ 
+@@ -46,4 +49,5 @@ extern int ip_conntrack_change_expect(st
+ 				      struct ip_conntrack_tuple *newtuple);
+ extern void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp);
+ 
++extern struct list_head helpers;
+ #endif /*_IP_CONNTRACK_HELPER_H*/
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_conntrack_irc.h linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ip_conntrack_irc.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_conntrack_irc.h	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ip_conntrack_irc.h	2006-05-11 13:05:26.000000000 +0400
+@@ -33,13 +33,8 @@ struct ip_ct_irc_master {
+ 
+ #ifdef __KERNEL__
+ 
+-#include <linux/netfilter_ipv4/lockhelp.h>
+-
+ #define IRC_PORT	6667
+ 
+-/* Protects irc part of conntracks */
+-DECLARE_LOCK_EXTERN(ip_irc_lock);
+-
+ #endif /* __KERNEL__ */
+ 
+ #endif /* _IP_CONNTRACK_IRC_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_conntrack_protocol.h linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_conntrack_protocol.h	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ip_conntrack_protocol.h	2006-05-11 13:05:40.000000000 +0400
+@@ -58,9 +58,35 @@ struct ip_conntrack_protocol
+ extern int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto);
+ extern void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto);
+ 
++extern int visible_ip_conntrack_protocol_register(
++				struct ip_conntrack_protocol *proto);
++extern void visible_ip_conntrack_protocol_unregister(
++				struct ip_conntrack_protocol *proto);
++
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_ip_ct_tcp_timeouts \
++	(get_exec_env()->_ip_conntrack->_ip_ct_tcp_timeouts)
++#define ve_ip_ct_udp_timeout \
++	(get_exec_env()->_ip_conntrack->_ip_ct_udp_timeout)
++#define ve_ip_ct_udp_timeout_stream \
++	(get_exec_env()->_ip_conntrack->_ip_ct_udp_timeout_stream)
++#define ve_ip_ct_icmp_timeout \
++	(get_exec_env()->_ip_conntrack->_ip_ct_icmp_timeout)
++#define ve_ip_ct_generic_timeout \
++	(get_exec_env()->_ip_conntrack->_ip_ct_generic_timeout)
++#else
++#define ve_ip_ct_tcp_timeouts		*tcp_timeouts
++#define ve_ip_ct_udp_timeout		ip_ct_udp_timeout
++#define ve_ip_ct_udp_timeout_stream	ip_ct_udp_timeout_stream
++#define ve_ip_ct_icmp_timeout		ip_ct_icmp_timeout
++#define ve_ip_ct_generic_timeout	ip_ct_generic_timeout
++#endif
++
+ /* Existing built-in protocols */
+ extern struct ip_conntrack_protocol ip_conntrack_protocol_tcp;
+ extern struct ip_conntrack_protocol ip_conntrack_protocol_udp;
+ extern struct ip_conntrack_protocol ip_conntrack_protocol_icmp;
+ extern int ip_conntrack_protocol_tcp_init(void);
++extern struct list_head protocol_list;
+ #endif /*_IP_CONNTRACK_PROTOCOL_H*/
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_nat.h linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ip_nat.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_nat.h	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ip_nat.h	2006-05-11 13:05:49.000000000 +0400
+@@ -1,5 +1,6 @@
+ #ifndef _IP_NAT_H
+ #define _IP_NAT_H
++#include <linux/config.h>
+ #include <linux/netfilter_ipv4.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
+ 
+@@ -55,6 +56,23 @@ struct ip_nat_multi_range
+ 	struct ip_nat_range range[1];
+ };
+ 
++#ifdef CONFIG_COMPAT
++#include <net/compat.h>
++
++struct compat_ip_nat_range
++{
++	compat_uint_t flags;
++	u_int32_t min_ip, max_ip;
++	union ip_conntrack_manip_proto min, max;
++};
++
++struct compat_ip_nat_multi_range
++{
++	compat_uint_t rangesize;
++	struct compat_ip_nat_range range[1];
++};
++#endif
++
+ /* Worst case: local-out manip + 1 post-routing, and reverse dirn. */
+ #define IP_NAT_MAX_MANIPS (2*3)
+ 
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_nat_core.h linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ip_nat_core.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_nat_core.h	2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ip_nat_core.h	2006-05-11 13:05:45.000000000 +0400
+@@ -25,9 +25,20 @@ extern void replace_in_hashes(struct ip_
+ 			      struct ip_nat_info *info);
+ extern void place_in_hashes(struct ip_conntrack *conntrack,
+ 			    struct ip_nat_info *info);
++extern int ip_nat_install_conntrack(struct ip_conntrack *conntrack, int helper);
+ 
+ /* Built-in protocols. */
+ extern struct ip_nat_protocol ip_nat_protocol_tcp;
+ extern struct ip_nat_protocol ip_nat_protocol_udp;
+ extern struct ip_nat_protocol ip_nat_protocol_icmp;
++
++#ifdef CONFIG_VE_IPTABLES
++
++#include <linux/sched.h>
++#define ve_ip_nat_protos	\
++	(get_exec_env()->_ip_conntrack->_ip_nat_protos)
++#else
++#define ve_ip_nat_protos	protos
++#endif /* CONFIG_VE_IPTABLES */
++
+ #endif /* _IP_NAT_CORE_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_nat_helper.h linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ip_nat_helper.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_nat_helper.h	2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ip_nat_helper.h	2006-05-11 13:05:40.000000000 +0400
+@@ -38,10 +38,18 @@ struct ip_nat_helper
+ 			       struct ip_nat_info *info);
+ };
+ 
++#ifdef CONFIG_VE_IPTABLES
++#define ve_ip_nat_helpers	\
++	(get_exec_env()->_ip_conntrack->_ip_nat_helpers)
++#else
+ extern struct list_head helpers;
++#define ve_ip_nat_helpers	helpers
++#endif
+ 
+ extern int ip_nat_helper_register(struct ip_nat_helper *me);
+ extern void ip_nat_helper_unregister(struct ip_nat_helper *me);
++extern int visible_ip_nat_helper_register(struct ip_nat_helper *me);
++extern void visible_ip_nat_helper_unregister(struct ip_nat_helper *me);
+ 
+ /* These return true or false. */
+ extern int ip_nat_mangle_tcp_packet(struct sk_buff **skb,
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_nat_protocol.h linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ip_nat_protocol.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_nat_protocol.h	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ip_nat_protocol.h	2006-05-11 13:05:40.000000000 +0400
+@@ -51,6 +51,9 @@ struct ip_nat_protocol
+ extern int ip_nat_protocol_register(struct ip_nat_protocol *proto);
+ extern void ip_nat_protocol_unregister(struct ip_nat_protocol *proto);
+ 
++extern int visible_ip_nat_protocol_register(struct ip_nat_protocol *proto);
++extern void visible_ip_nat_protocol_unregister(struct ip_nat_protocol *proto);
++
+ extern int init_protocols(void) __init;
+ extern void cleanup_protocols(void);
+ extern struct ip_nat_protocol *find_nat_proto(u_int16_t protonum);
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_nat_rule.h linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ip_nat_rule.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_nat_rule.h	2004-08-14 14:56:15.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ip_nat_rule.h	2006-05-11 13:05:40.000000000 +0400
+@@ -6,7 +6,7 @@
+ 
+ #ifdef __KERNEL__
+ 
+-extern int ip_nat_rule_init(void) __init;
++extern int ip_nat_rule_init(void);
+ extern void ip_nat_rule_cleanup(void);
+ extern int ip_nat_rule_find(struct sk_buff **pskb,
+ 			    unsigned int hooknum,
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_tables.h linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ip_tables.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ip_tables.h	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ip_tables.h	2006-05-11 13:05:49.000000000 +0400
+@@ -16,6 +16,7 @@
+ #define _IPTABLES_H
+ 
+ #ifdef __KERNEL__
++#include <linux/config.h>
+ #include <linux/if.h>
+ #include <linux/types.h>
+ #include <linux/in.h>
+@@ -341,6 +342,12 @@ static DECLARE_MUTEX(ipt_mutex);
+ #include <linux/init.h>
+ extern void ipt_init(void) __init;
+ 
++#ifdef CONFIG_COMPAT
++#define COMPAT_TO_USER		1
++#define COMPAT_FROM_USER	-1
++#define COMPAT_CALC_SIZE	0
++#endif
++
+ struct ipt_match
+ {
+ 	struct list_head list;
+@@ -370,6 +377,9 @@ struct ipt_match
+ 	/* Called when entry of this type deleted. */
+ 	void (*destroy)(void *matchinfo, unsigned int matchinfosize);
+ 
++#ifdef CONFIG_COMPAT
++	int (*compat)(void *match, void **dstptr, int *size, int convert);
++#endif
+ 	/* Set this to THIS_MODULE. */
+ 	struct module *me;
+ };
+@@ -404,6 +414,9 @@ struct ipt_target
+ 			       const void *targinfo,
+ 			       void *userdata);
+ 
++#ifdef CONFIG_COMPAT
++	int (*compat)(void *target, void **dstptr, int *size, int convert);
++#endif
+ 	/* Set this to THIS_MODULE. */
+ 	struct module *me;
+ };
+@@ -416,9 +429,15 @@ arpt_find_target_lock(const char *name, 
+ extern int ipt_register_target(struct ipt_target *target);
+ extern void ipt_unregister_target(struct ipt_target *target);
+ 
++extern int visible_ipt_register_target(struct ipt_target *target);
++extern void visible_ipt_unregister_target(struct ipt_target *target);
++
+ extern int ipt_register_match(struct ipt_match *match);
+ extern void ipt_unregister_match(struct ipt_match *match);
+ 
++extern int visible_ipt_register_match(struct ipt_match *match);
++extern void visible_ipt_unregister_match(struct ipt_match *match);
++
+ /* Furniture shopping... */
+ struct ipt_table
+ {
+@@ -453,5 +472,75 @@ extern unsigned int ipt_do_table(struct 
+ 				 void *userdata);
+ 
+ #define IPT_ALIGN(s) (((s) + (__alignof__(struct ipt_entry)-1)) & ~(__alignof__(struct ipt_entry)-1))
++
++#ifdef CONFIG_COMPAT
++#include <net/compat.h>
++
++struct compat_ipt_counters
++{
++	u_int32_t cnt[4];
++};
++
++struct compat_ipt_counters_info
++{
++	char name[IPT_TABLE_MAXNAMELEN];
++	compat_uint_t num_counters;
++	struct compat_ipt_counters counters[0];
++};
++
++struct compat_ipt_getinfo
++{
++	char name[IPT_TABLE_MAXNAMELEN];
++	compat_uint_t valid_hooks;
++	compat_uint_t hook_entry[NF_IP_NUMHOOKS];
++	compat_uint_t underflow[NF_IP_NUMHOOKS];
++	compat_uint_t num_entries;
++	compat_uint_t size;
++};
++
++struct compat_ipt_entry
++{
++	struct ipt_ip ip;
++	compat_uint_t nfcache;
++	u_int16_t target_offset;
++	u_int16_t next_offset;
++	compat_uint_t comefrom;
++	struct compat_ipt_counters counters;
++	unsigned char elems[0];
++};
++
++struct compat_ipt_entry_match
++{
++	union {
++		struct {
++			u_int16_t match_size;
++			char name[IPT_FUNCTION_MAXNAMELEN];
++		} user;
++		u_int16_t match_size;
++	} u;
++	unsigned char data[0];
++};
++
++struct compat_ipt_entry_target
++{
++	union {
++		struct {
++			u_int16_t target_size;
++			char name[IPT_FUNCTION_MAXNAMELEN];
++		} user;
++		u_int16_t target_size;
++	} u;
++	unsigned char data[0];
++};
++
++#define COMPAT_IPT_ALIGN(s) (((s) + (__alignof__(struct compat_ipt_entry)-1)) \
++		& ~(__alignof__(struct compat_ipt_entry)-1))
++
++extern int ipt_match_align_compat(void *match, void **dstptr,
++		int *size, int off, int convert);
++extern int ipt_target_align_compat(void *target, void **dstptr,
++		int *size, int off, int convert);
++
++#endif /* CONFIG_COMPAT */
+ #endif /*__KERNEL__*/
+ #endif /* _IPTABLES_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ipt_conntrack.h linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ipt_conntrack.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ipt_conntrack.h	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ipt_conntrack.h	2006-05-11 13:05:49.000000000 +0400
+@@ -5,6 +5,8 @@
+ #ifndef _IPT_CONNTRACK_H
+ #define _IPT_CONNTRACK_H
+ 
++#include <linux/config.h>
++
+ #define IPT_CONNTRACK_STATE_BIT(ctinfo) (1 << ((ctinfo)%IP_CT_IS_REPLY+1))
+ #define IPT_CONNTRACK_STATE_INVALID (1 << 0)
+ 
+@@ -36,4 +38,21 @@ struct ipt_conntrack_info
+ 	/* Inverse flags */
+ 	u_int8_t invflags;
+ };
++
++#ifdef CONFIG_COMPAT
++struct compat_ipt_conntrack_info
++{
++	compat_uint_t statemask, statusmask;
++
++	struct ip_conntrack_tuple tuple[IP_CT_DIR_MAX];
++	struct in_addr sipmsk[IP_CT_DIR_MAX], dipmsk[IP_CT_DIR_MAX];
++
++	compat_ulong_t expires_min, expires_max;
++
++	/* Flags word */
++	u_int8_t flags;
++	/* Inverse flags */
++	u_int8_t invflags;
++};
++#endif
+ #endif /*_IPT_CONNTRACK_H*/
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ipt_helper.h linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ipt_helper.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ipt_helper.h	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ipt_helper.h	2006-05-11 13:05:49.000000000 +0400
+@@ -1,8 +1,17 @@
+ #ifndef _IPT_HELPER_H
+ #define _IPT_HELPER_H
+ 
++#include <linux/config.h>
++
+ struct ipt_helper_info {
+ 	int invert;
+ 	char name[30];
+ };
++
++#ifdef CONFIG_COMPAT
++struct compat_ipt_helper_info {
++	compat_int_t invert;
++	char name[30];
++};
++#endif
+ #endif /* _IPT_HELPER_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ipt_limit.h linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ipt_limit.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ipt_limit.h	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ipt_limit.h	2006-05-11 13:05:49.000000000 +0400
+@@ -1,6 +1,8 @@
+ #ifndef _IPT_RATE_H
+ #define _IPT_RATE_H
+ 
++#include <linux/config.h>
++
+ /* timings are in milliseconds. */
+ #define IPT_LIMIT_SCALE 10000
+ 
+@@ -18,4 +20,20 @@ struct ipt_rateinfo {
+ 	/* Ugly, ugly fucker. */
+ 	struct ipt_rateinfo *master;
+ };
++
++#ifdef CONFIG_COMPAT
++struct compat_ipt_rateinfo {
++	u_int32_t avg;    /* Average secs between packets * scale */
++	u_int32_t burst;  /* Period multiplier for upper limit. */
++
++	/* Used internally by the kernel */
++	compat_ulong_t prev;
++	u_int32_t credit;
++	u_int32_t credit_cap, cost;
++
++	/* Ugly, ugly fucker. */
++	compat_uptr_t master;
++};
++#endif
++
+ #endif /*_IPT_RATE_H*/
+diff -uprN linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ipt_state.h linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ipt_state.h
+--- linux-2.6.8.1.orig/include/linux/netfilter_ipv4/ipt_state.h	2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/netfilter_ipv4/ipt_state.h	2006-05-11 13:05:49.000000000 +0400
+@@ -1,6 +1,8 @@
+ #ifndef _IPT_STATE_H
+ #define _IPT_STATE_H
+ 
++#include <linux/config.h>
++
+ #define IPT_STATE_BIT(ctinfo) (1 << ((ctinfo)%IP_CT_IS_REPLY+1))
+ #define IPT_STATE_INVALID (1 << 0)
+ 
+@@ -10,4 +12,11 @@ struct ipt_state_info
+ {
+ 	unsigned int statemask;
+ };
++
++#ifdef CONFIG_COMPAT
++struct compat_ipt_state_info
++{
++	compat_uint_t statemask;
++};
++#endif
+ #endif /*_IPT_STATE_H*/
+diff -uprN linux-2.6.8.1.orig/include/linux/netlink.h linux-2.6.8.1-ve022stab078/include/linux/netlink.h
+--- linux-2.6.8.1.orig/include/linux/netlink.h	2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/netlink.h	2006-05-11 13:05:45.000000000 +0400
+@@ -100,6 +100,20 @@ enum {
+ 
+ #include <linux/capability.h>
+ 
++struct netlink_opt
++{
++	u32			pid;
++	unsigned		groups;
++	u32			dst_pid;
++	unsigned		dst_groups;
++	unsigned long		state;
++	int			(*handler)(int unit, struct sk_buff *skb);
++	wait_queue_head_t	wait;
++	struct netlink_callback	*cb;
++	spinlock_t		cb_lock;
++	void			(*data_ready)(struct sock *sk, int bytes);
++};
++
+ struct netlink_skb_parms
+ {
+ 	struct ucred		creds;		/* Skb credentials	*/
+@@ -129,14 +143,13 @@ extern int netlink_unregister_notifier(s
+ /* finegrained unicast helpers: */
+ struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid);
+ struct sock *netlink_getsockbyfilp(struct file *filp);
+-int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long timeo);
+ void netlink_detachskb(struct sock *sk, struct sk_buff *skb);
+ int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol);
+ 
+ /* finegrained unicast helpers: */
+ struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid);
+ struct sock *netlink_getsockbyfilp(struct file *filp);
+-int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long timeo);
++int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long timeo, struct sock *ssk);
+ void netlink_detachskb(struct sock *sk, struct sk_buff *skb);
+ int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol);
+ 
+diff -uprN linux-2.6.8.1.orig/include/linux/nfcalls.h linux-2.6.8.1-ve022stab078/include/linux/nfcalls.h
+--- linux-2.6.8.1.orig/include/linux/nfcalls.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/linux/nfcalls.h	2006-05-11 13:05:42.000000000 +0400
+@@ -0,0 +1,224 @@
++/*
++ *  include/linux/nfcalls.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _LINUX_NFCALLS_H
++#define _LINUX_NFCALLS_H
++
++#include <linux/rcupdate.h>
++
++#ifdef CONFIG_MODULES
++extern struct module no_module;
++
++#define DECL_KSYM_MODULE(name)				\
++	extern struct module *vz_mod_##name
++#define DECL_KSYM_CALL(type, name, args)		\
++	extern type (*vz_##name) args
++
++#define INIT_KSYM_MODULE(name)				\
++	struct module *vz_mod_##name = &no_module;	\
++	EXPORT_SYMBOL(vz_mod_##name)
++#define INIT_KSYM_CALL(type, name, args)		\
++	type (*vz_##name) args;				\
++	EXPORT_SYMBOL(vz_##name)
++
++#define __KSYMERRCALL(err, type, mod, name, args)	\
++({							\
++	type ret = (type)err;				\
++	if (!__vzksym_module_get(vz_mod_##mod))	{	\
++		if (vz_##name)				\
++			ret = ((*vz_##name)args); 	\
++		__vzksym_module_put(vz_mod_##mod);	\
++	}						\
++	ret;						\
++})
++#define __KSYMSAFECALL_VOID(mod, name, args)		\
++do {							\
++	if (!__vzksym_module_get(vz_mod_##mod)) {	\
++		if (vz_##name)				\
++			((*vz_##name)args); 		\
++		__vzksym_module_put(vz_mod_##mod);	\
++	}						\
++} while (0)
++#else
++#define DECL_KSYM_CALL(type, name, args)                \
++	extern type name args
++#define INIT_KSYM_MODULE(name)
++#define INIT_KSYM_CALL(type, name, args)		\
++	type name args
++#define __KSYMERRCALL(err, type, mod, name, args)      ((*name)args)
++#define __KSYMSAFECALL_VOID(mod, name, args)           ((*name)args)
++#endif
++
++#define KSYMERRCALL(err, mod, name, args)		\
++	__KSYMERRCALL(err, int, mod, name, args)
++#define KSYMSAFECALL(type, mod, name, args)		\
++	__KSYMERRCALL(0, type, mod, name, args)
++#define KSYMSAFECALL_VOID(mod, name, args)		\
++	__KSYMSAFECALL_VOID(mod, name, args)
++
++#if defined(CONFIG_VE) && defined(CONFIG_MODULES)
++/* should be called _after_ KSYMRESOLVE's */
++#define KSYMMODRESOLVE(name)				\
++	__vzksym_modresolve(&vz_mod_##name, THIS_MODULE)
++#define KSYMMODUNRESOLVE(name)				\
++	__vzksym_modunresolve(&vz_mod_##name)
++
++#define KSYMRESOLVE(name)				\
++	vz_##name = &name
++#define KSYMUNRESOLVE(name)				\
++	vz_##name = NULL
++#else
++#define KSYMRESOLVE(name)	do { } while (0)
++#define KSYMUNRESOLVE(name)	do { } while (0)
++#define KSYMMODRESOLVE(name)	do { } while (0)
++#define KSYMMODUNRESOLVE(name)	do { } while (0)
++#endif
++
++#ifdef CONFIG_MODULES
++static inline void __vzksym_modresolve(struct module **modp, struct module *mod)
++{
++	/*
++	 * we want to be sure, that pointer updates are visible first:
++	 * 1. wmb() is here only for piece of sure
++	 *    (note, no rmb() in KSYMSAFECALL)
++	 * 2. synchronize_kernel() guarantees that updates are visible
++	 *    on all cpus and allows us to remove rmb() in KSYMSAFECALL
++	 */
++	wmb(); synchronize_kernel();
++	*modp = mod;
++	/* just to be sure, our changes are visible as soon as possible */
++	wmb(); synchronize_kernel();
++}
++
++static inline void __vzksym_modunresolve(struct module **modp)
++{
++	/*
++	 * try_module_get() in KSYMSAFECALL should fail at this moment since
++	 * THIS_MODULE in in unloading state (we should be called from fini),
++	 * no need to syncronize pointers/ve_module updates.
++	 */
++	*modp = &no_module;
++	/*
++	 * synchronize_kernel() guarantees here that we see
++	 * updated module pointer before the module really gets away
++	 */
++	synchronize_kernel();
++}
++
++static inline int __vzksym_module_get(struct module *mod)
++{
++	/*
++	 * we want to avoid rmb(), so use synchronize_kernel() in KSYMUNRESOLVE
++	 * and smp_read_barrier_depends() here...
++	 */
++	smp_read_barrier_depends(); /* for module loading */
++	if (!try_module_get(mod))
++		return -EBUSY;
++
++	return 0;
++}
++
++static inline void __vzksym_module_put(struct module *mod)
++{
++	module_put(mod);
++}
++#endif
++
++#if defined(CONFIG_VE_IPTABLES)
++#ifdef CONFIG_MODULES
++DECL_KSYM_MODULE(ip_tables);
++DECL_KSYM_MODULE(iptable_filter);
++DECL_KSYM_MODULE(iptable_mangle);
++DECL_KSYM_MODULE(ipt_limit);
++DECL_KSYM_MODULE(ipt_multiport);
++DECL_KSYM_MODULE(ipt_tos);
++DECL_KSYM_MODULE(ipt_TOS);
++DECL_KSYM_MODULE(ipt_REJECT);
++DECL_KSYM_MODULE(ipt_TCPMSS);
++DECL_KSYM_MODULE(ipt_tcpmss);
++DECL_KSYM_MODULE(ipt_ttl);
++DECL_KSYM_MODULE(ipt_LOG);
++DECL_KSYM_MODULE(ipt_length);
++DECL_KSYM_MODULE(ip_conntrack);
++DECL_KSYM_MODULE(ip_conntrack_ftp);
++DECL_KSYM_MODULE(ip_conntrack_irc);
++DECL_KSYM_MODULE(ipt_conntrack);
++DECL_KSYM_MODULE(ipt_state);
++DECL_KSYM_MODULE(ipt_helper);
++DECL_KSYM_MODULE(iptable_nat);
++DECL_KSYM_MODULE(ip_nat_ftp);
++DECL_KSYM_MODULE(ip_nat_irc);
++DECL_KSYM_MODULE(ipt_REDIRECT);
++#endif
++
++struct sk_buff;
++
++DECL_KSYM_CALL(int, init_netfilter, (void));
++DECL_KSYM_CALL(int, init_iptables, (void));
++DECL_KSYM_CALL(int, init_iptable_filter, (void));
++DECL_KSYM_CALL(int, init_iptable_mangle, (void));
++DECL_KSYM_CALL(int, init_iptable_limit, (void));
++DECL_KSYM_CALL(int, init_iptable_multiport, (void));
++DECL_KSYM_CALL(int, init_iptable_tos, (void));
++DECL_KSYM_CALL(int, init_iptable_TOS, (void));
++DECL_KSYM_CALL(int, init_iptable_REJECT, (void));
++DECL_KSYM_CALL(int, init_iptable_TCPMSS, (void));
++DECL_KSYM_CALL(int, init_iptable_tcpmss, (void));
++DECL_KSYM_CALL(int, init_iptable_ttl, (void));
++DECL_KSYM_CALL(int, init_iptable_LOG, (void));
++DECL_KSYM_CALL(int, init_iptable_length, (void));
++DECL_KSYM_CALL(int, init_iptable_conntrack, (void));
++DECL_KSYM_CALL(int, init_iptable_ftp, (void));
++DECL_KSYM_CALL(int, init_iptable_irc, (void));
++DECL_KSYM_CALL(int, init_iptable_conntrack_match, (void));
++DECL_KSYM_CALL(int, init_iptable_state, (void));
++DECL_KSYM_CALL(int, init_iptable_helper, (void));
++DECL_KSYM_CALL(int, init_iptable_nat, (void));
++DECL_KSYM_CALL(int, init_iptable_nat_ftp, (void));
++DECL_KSYM_CALL(int, init_iptable_nat_irc, (void));
++DECL_KSYM_CALL(int, init_iptable_REDIRECT, (void));
++DECL_KSYM_CALL(void, fini_iptable_nat_irc, (void));
++DECL_KSYM_CALL(void, fini_iptable_nat_ftp, (void));
++DECL_KSYM_CALL(void, fini_iptable_nat, (void));
++DECL_KSYM_CALL(void, fini_iptable_helper, (void));
++DECL_KSYM_CALL(void, fini_iptable_state, (void));
++DECL_KSYM_CALL(void, fini_iptable_conntrack_match, (void));
++DECL_KSYM_CALL(void, fini_iptable_irc, (void));
++DECL_KSYM_CALL(void, fini_iptable_ftp, (void));
++DECL_KSYM_CALL(void, fini_iptable_conntrack, (void));
++DECL_KSYM_CALL(void, fini_iptable_length, (void));
++DECL_KSYM_CALL(void, fini_iptable_LOG, (void));
++DECL_KSYM_CALL(void, fini_iptable_ttl, (void));
++DECL_KSYM_CALL(void, fini_iptable_tcpmss, (void));
++DECL_KSYM_CALL(void, fini_iptable_TCPMSS, (void));
++DECL_KSYM_CALL(void, fini_iptable_REJECT, (void));
++DECL_KSYM_CALL(void, fini_iptable_TOS, (void));
++DECL_KSYM_CALL(void, fini_iptable_tos, (void));
++DECL_KSYM_CALL(void, fini_iptable_multiport, (void));
++DECL_KSYM_CALL(void, fini_iptable_limit, (void));
++DECL_KSYM_CALL(void, fini_iptable_filter, (void));
++DECL_KSYM_CALL(void, fini_iptable_mangle, (void));
++DECL_KSYM_CALL(void, fini_iptables, (void));
++DECL_KSYM_CALL(void, fini_netfilter, (void));
++DECL_KSYM_CALL(void, fini_iptable_REDIRECT, (void));
++
++DECL_KSYM_CALL(void, ipt_flush_table, (struct ipt_table *table));
++#endif /* CONFIG_VE_IPTABLES */
++
++#ifdef CONFIG_VE_CALLS_MODULE
++DECL_KSYM_MODULE(vzmon);
++DECL_KSYM_CALL(int, real_get_device_perms_ve,
++	(int dev_type, dev_t dev, int access_mode));
++DECL_KSYM_CALL(void, real_do_env_cleanup, (struct ve_struct *env));
++DECL_KSYM_CALL(void, real_do_env_free, (struct ve_struct *env));
++DECL_KSYM_CALL(void, real_update_load_avg_ve, (void));
++#endif
++
++#endif /* _LINUX_NFCALLS_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/nfs_fs.h linux-2.6.8.1-ve022stab078/include/linux/nfs_fs.h
+--- linux-2.6.8.1.orig/include/linux/nfs_fs.h	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/nfs_fs.h	2006-05-11 13:05:35.000000000 +0400
+@@ -267,7 +267,8 @@ extern struct inode *nfs_fhget(struct su
+ 				struct nfs_fattr *);
+ extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
+ extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+-extern int nfs_permission(struct inode *, int, struct nameidata *);
++extern int nfs_permission(struct inode *, int, struct nameidata *,
++		struct exec_perm *);
+ extern void nfs_set_mmcred(struct inode *, struct rpc_cred *);
+ extern int nfs_open(struct inode *, struct file *);
+ extern int nfs_release(struct inode *, struct file *);
+diff -uprN linux-2.6.8.1.orig/include/linux/notifier.h linux-2.6.8.1-ve022stab078/include/linux/notifier.h
+--- linux-2.6.8.1.orig/include/linux/notifier.h	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/notifier.h	2006-05-11 13:05:39.000000000 +0400
+@@ -27,8 +27,9 @@ extern int notifier_call_chain(struct no
+ 
+ #define NOTIFY_DONE		0x0000		/* Don't care */
+ #define NOTIFY_OK		0x0001		/* Suits me */
++#define NOTIFY_FAIL		0x0002		/* Reject */
+ #define NOTIFY_STOP_MASK	0x8000		/* Don't call further */
+-#define NOTIFY_BAD		(NOTIFY_STOP_MASK|0x0002)	/* Bad/Veto action	*/
++#define NOTIFY_BAD		(NOTIFY_STOP_MASK|NOTIFY_FAIL)	/* Bad/Veto action	*/
+ 
+ /*
+  *	Declared notifiers so far. I can imagine quite a few more chains
+diff -uprN linux-2.6.8.1.orig/include/linux/pagevec.h linux-2.6.8.1-ve022stab078/include/linux/pagevec.h
+--- linux-2.6.8.1.orig/include/linux/pagevec.h	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/pagevec.h	2006-05-11 13:05:29.000000000 +0400
+@@ -5,14 +5,15 @@
+  * pages.  A pagevec is a multipage container which is used for that.
+  */
+ 
+-#define PAGEVEC_SIZE	16
++/* 14 pointers + two long's align the pagevec structure to a power of two */
++#define PAGEVEC_SIZE	14
+ 
+ struct page;
+ struct address_space;
+ 
+ struct pagevec {
+-	unsigned nr;
+-	int cold;
++	unsigned long nr;
++	unsigned long cold;
+ 	struct page *pages[PAGEVEC_SIZE];
+ };
+ 
+diff -uprN linux-2.6.8.1.orig/include/linux/pci_ids.h linux-2.6.8.1-ve022stab078/include/linux/pci_ids.h
+--- linux-2.6.8.1.orig/include/linux/pci_ids.h	2004-08-14 14:56:26.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/pci_ids.h	2006-05-11 13:05:28.000000000 +0400
+@@ -2190,6 +2190,8 @@
+ #define PCI_DEVICE_ID_INTEL_82855GM_HB	0x3580
+ #define PCI_DEVICE_ID_INTEL_82855GM_IG	0x3582
+ #define PCI_DEVICE_ID_INTEL_SMCH	0x3590
++#define PCI_DEVICE_ID_INTEL_E7320_MCH	0x3592
++#define PCI_DEVICE_ID_INTEL_E7525_MCH	0x359e
+ #define PCI_DEVICE_ID_INTEL_80310	0x530d
+ #define PCI_DEVICE_ID_INTEL_82371SB_0	0x7000
+ #define PCI_DEVICE_ID_INTEL_82371SB_1	0x7010
+diff -uprN linux-2.6.8.1.orig/include/linux/pid.h linux-2.6.8.1-ve022stab078/include/linux/pid.h
+--- linux-2.6.8.1.orig/include/linux/pid.h	2004-08-14 14:54:52.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/pid.h	2006-05-11 13:05:40.000000000 +0400
+@@ -1,6 +1,18 @@
+ #ifndef _LINUX_PID_H
+ #define _LINUX_PID_H
+ 
++#define VPID_BIT	10
++#define VPID_DIV	(1<<VPID_BIT)
++
++#ifdef CONFIG_VE
++#define __is_virtual_pid(pid)	((pid) & VPID_DIV)
++#define is_virtual_pid(pid)	\
++   (__is_virtual_pid(pid) || ((pid)==1 && !ve_is_super(get_exec_env())))
++#else
++#define __is_virtual_pid(pid)	0
++#define is_virtual_pid(pid)	0
++#endif
++
+ enum pid_type
+ {
+ 	PIDTYPE_PID,
+@@ -12,34 +24,24 @@ enum pid_type
+ 
+ struct pid
+ {
++	/* Try to keep pid_chain in the same cacheline as nr for find_pid */
+ 	int nr;
+-	atomic_t count;
+-	struct task_struct *task;
+-	struct list_head task_list;
+-	struct list_head hash_chain;
+-};
+-
+-struct pid_link
+-{
+-	struct list_head pid_chain;
+-	struct pid *pidptr;
+-	struct pid pid;
++	struct hlist_node pid_chain;
++#ifdef CONFIG_VE
++	int vnr;
++#endif
++	/* list of pids with the same nr, only one of them is in the hash */
++	struct list_head pid_list;
+ };
+ 
+ #define pid_task(elem, type) \
+-	list_entry(elem, struct task_struct, pids[type].pid_chain)
++	list_entry(elem, struct task_struct, pids[type].pid_list)
+ 
+ /*
+- * attach_pid() and link_pid() must be called with the tasklist_lock
++ * attach_pid() and detach_pid() must be called with the tasklist_lock
+  * write-held.
+  */
+ extern int FASTCALL(attach_pid(struct task_struct *task, enum pid_type type, int nr));
+-
+-extern void FASTCALL(link_pid(struct task_struct *task, struct pid_link *link, struct pid *pid));
+-
+-/*
+- * detach_pid() must be called with the tasklist_lock write-held.
+- */
+ extern void FASTCALL(detach_pid(struct task_struct *task, enum pid_type));
+ 
+ /*
+@@ -52,13 +54,89 @@ extern int alloc_pidmap(void);
+ extern void FASTCALL(free_pidmap(int));
+ extern void switch_exec_pids(struct task_struct *leader, struct task_struct *thread);
+ 
+-#define for_each_task_pid(who, type, task, elem, pid)		\
+-	if ((pid = find_pid(type, who)))			\
+-	        for (elem = pid->task_list.next,			\
+-			prefetch(elem->next),				\
+-			task = pid_task(elem, type);			\
+-			elem != &pid->task_list;			\
+-			elem = elem->next, prefetch(elem->next), 	\
+-			task = pid_task(elem, type))
++#ifndef CONFIG_VE
++
++#define vpid_to_pid(pid)	(pid)
++#define __vpid_to_pid(pid)	(pid)
++#define pid_type_to_vpid(pid, type)	(pid)
++#define __pid_type_to_vpid(pid, type)	(pid)
++
++#define comb_vpid_to_pid(pid)	(pid)
++#define comb_pid_to_vpid(pid)	(pid)
++
++#else
++
++struct ve_struct;
++extern void free_vpid(int vpid, struct ve_struct *ve);
++extern int alloc_vpid(int pid, int vpid);
++extern int vpid_to_pid(int pid);
++extern int __vpid_to_pid(int pid);
++extern pid_t pid_type_to_vpid(int type, pid_t pid);
++extern pid_t _pid_type_to_vpid(int type, pid_t pid);
++
++static inline int comb_vpid_to_pid(int vpid)
++{
++	int pid = vpid;
++
++	if (vpid > 0) {
++		pid = vpid_to_pid(vpid);
++		if (unlikely(pid < 0))
++			return 0;
++	} else if (vpid < 0) {
++		pid = vpid_to_pid(-vpid);
++		if (unlikely(pid < 0))
++			return 0;
++		pid = -pid;
++	}
++	return pid;
++}
++
++static inline int comb_pid_to_vpid(int pid)
++{
++	int vpid = pid;
++
++	if (pid > 0) {
++		vpid = pid_type_to_vpid(PIDTYPE_PID, pid);
++		if (unlikely(vpid < 0))
++			return 0;
++	} else if (pid < 0) {
++		vpid = pid_type_to_vpid(PIDTYPE_PGID, -pid);
++		if (unlikely(vpid < 0))
++			return 0;
++		vpid = -vpid;
++	}
++	return vpid;
++}
++#endif
++
++#define do_each_task_pid_all(who, type, task)				\
++	if ((task = find_task_by_pid_type_all(type, who))) {		\
++		prefetch((task)->pids[type].pid_list.next);		\
++		do {
++
++#define while_each_task_pid_all(who, type, task)			\
++		} while (task = pid_task((task)->pids[type].pid_list.next,\
++						type),			\
++			prefetch((task)->pids[type].pid_list.next),	\
++			hlist_unhashed(&(task)->pids[type].pid_chain));	\
++	}								\
++
++#ifndef CONFIG_VE
++#define __do_each_task_pid_ve(who, type, task, owner)			\
++		do_each_task_pid_all(who, type, task)
++#define __while_each_task_pid_ve(who, type, task, owner)		\
++		while_each_task_pid_all(who, type, task)
++#else /* CONFIG_VE */
++#define __do_each_task_pid_ve(who, type, task, owner)			\
++		do_each_task_pid_all(who, type, task)			\
++			if (ve_accessible(VE_TASK_INFO(task)->owner_env, owner))
++#define __while_each_task_pid_ve(who, type, task, owner)		\
++		while_each_task_pid_all(who, type, task)
++#endif /* CONFIG_VE */
++
++#define do_each_task_pid_ve(who, type, task)				\
++		__do_each_task_pid_ve(who, type, task, get_exec_env());
++#define while_each_task_pid_ve(who, type, task)				\
++		__while_each_task_pid_ve(who, type, task, get_exec_env());
+ 
+ #endif /* _LINUX_PID_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/proc_fs.h linux-2.6.8.1-ve022stab078/include/linux/proc_fs.h
+--- linux-2.6.8.1.orig/include/linux/proc_fs.h	2004-08-14 14:56:25.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/proc_fs.h	2006-05-11 13:05:40.000000000 +0400
+@@ -66,8 +66,17 @@ struct proc_dir_entry {
+ 	write_proc_t *write_proc;
+ 	atomic_t count;		/* use count */
+ 	int deleted;		/* delete flag */
++	void *set;
+ };
+ 
++extern void de_put(struct proc_dir_entry *);
++static inline struct proc_dir_entry *de_get(struct proc_dir_entry *de)
++{
++	if (de)
++		atomic_inc(&de->count);
++	return de;
++}
++
+ struct kcore_list {
+ 	struct kcore_list *next;
+ 	unsigned long addr;
+@@ -87,12 +96,15 @@ extern void proc_root_init(void);
+ extern void proc_misc_init(void);
+ 
+ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *);
+-struct dentry *proc_pid_unhash(struct task_struct *p);
+-void proc_pid_flush(struct dentry *proc_dentry);
++void proc_pid_unhash(struct task_struct *p, struct dentry * [2]);
++void proc_pid_flush(struct dentry *proc_dentry[2]);
+ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
+ 
+ extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
+ 						struct proc_dir_entry *parent);
++extern struct proc_dir_entry *create_proc_glob_entry(const char *name,
++						mode_t mode,
++						struct proc_dir_entry *parent);
+ extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent);
+ 
+ extern struct vfsmount *proc_mnt;
+@@ -169,6 +181,15 @@ static inline struct proc_dir_entry *pro
+ 	return create_proc_info_entry(name,mode,proc_net,get_info);
+ }
+ 
++static inline struct proc_dir_entry *__proc_net_fops_create(const char *name,
++	mode_t mode, struct file_operations *fops, struct proc_dir_entry *p)
++{
++	struct proc_dir_entry *res = create_proc_entry(name, mode, p);
++	if (res)
++		res->proc_fops = fops;
++	return res;
++}
++
+ static inline struct proc_dir_entry *proc_net_fops_create(const char *name,
+ 	mode_t mode, struct file_operations *fops)
+ {
+@@ -178,6 +199,11 @@ static inline struct proc_dir_entry *pro
+ 	return res;
+ }
+ 
++static inline void __proc_net_remove(const char *name)
++{
++	remove_proc_entry(name, NULL);
++}
++
+ static inline void proc_net_remove(const char *name)
+ {
+ 	remove_proc_entry(name,proc_net);
+@@ -188,15 +214,20 @@ static inline void proc_net_remove(const
+ #define proc_root_driver NULL
+ #define proc_net NULL
+ 
++#define __proc_net_fops_create(name, mode, fops, p) ({ (void)(mode), NULL; })
+ #define proc_net_fops_create(name, mode, fops)  ({ (void)(mode), NULL; })
+ #define proc_net_create(name, mode, info)	({ (void)(mode), NULL; })
++static inline void __proc_net_remove(const char *name) {}
+ static inline void proc_net_remove(const char *name) {}
+ 
+-static inline struct dentry *proc_pid_unhash(struct task_struct *p) { return NULL; }
+-static inline void proc_pid_flush(struct dentry *proc_dentry) { }
++static inline void proc_pid_unhash(struct task_struct *p, struct dentry * [2])
++	{ return NULL; }
++static inline void proc_pid_flush(struct dentry *proc_dentry[2]) { }
+ 
+ static inline struct proc_dir_entry *create_proc_entry(const char *name,
+ 	mode_t mode, struct proc_dir_entry *parent) { return NULL; }
++static inline struct proc_dir_entry *create_proc_glob_entry(const char *name,
++	mode_t mode, struct proc_dir_entry *parent) { return NULL; }
+ 
+ #define remove_proc_entry(name, parent) do {} while (0)
+ 
+@@ -255,4 +286,9 @@ static inline struct proc_dir_entry *PDE
+ 	return PROC_I(inode)->pde;
+ }
+ 
++#define LPDE(inode)	(PROC_I((inode))->pde)
++#ifdef CONFIG_VE
++#define GPDE(inode)	(*(struct proc_dir_entry **)(&(inode)->i_pipe))
++#endif
++
+ #endif /* _LINUX_PROC_FS_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/ptrace.h linux-2.6.8.1-ve022stab078/include/linux/ptrace.h
+--- linux-2.6.8.1.orig/include/linux/ptrace.h	2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/ptrace.h	2006-05-11 13:05:34.000000000 +0400
+@@ -79,6 +79,7 @@ extern int ptrace_readdata(struct task_s
+ extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len);
+ extern int ptrace_attach(struct task_struct *tsk);
+ extern int ptrace_detach(struct task_struct *, unsigned int);
++extern void __ptrace_detach(struct task_struct *, unsigned int);
+ extern void ptrace_disable(struct task_struct *);
+ extern int ptrace_check_attach(struct task_struct *task, int kill);
+ extern int ptrace_request(struct task_struct *child, long request, long addr, long data);
+diff -uprN linux-2.6.8.1.orig/include/linux/quota.h linux-2.6.8.1-ve022stab078/include/linux/quota.h
+--- linux-2.6.8.1.orig/include/linux/quota.h	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/quota.h	2006-05-11 13:05:43.000000000 +0400
+@@ -37,7 +37,6 @@
+ 
+ #include <linux/errno.h>
+ #include <linux/types.h>
+-#include <linux/spinlock.h>
+ 
+ #define __DQUOT_VERSION__	"dquot_6.5.1"
+ #define __DQUOT_NUM_VERSION__	6*10000+5*100+1
+@@ -45,9 +44,6 @@
+ typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
+ typedef __u64 qsize_t;          /* Type in which we store sizes */
+ 
+-extern spinlock_t dq_list_lock;
+-extern spinlock_t dq_data_lock;
+-
+ /* Size of blocks in which are counted size limits */
+ #define QUOTABLOCK_BITS 10
+ #define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
+@@ -134,6 +130,12 @@ struct if_dqinfo {
+ 
+ #ifdef __KERNEL__
+ 
++#include <linux/spinlock.h>
++
++extern spinlock_t dq_list_lock;
++extern spinlock_t dq_data_lock;
++
++
+ #include <linux/dqblk_xfs.h>
+ #include <linux/dqblk_v1.h>
+ #include <linux/dqblk_v2.h>
+@@ -240,6 +242,8 @@ struct quota_format_ops {
+ 	int (*release_dqblk)(struct dquot *dquot);	/* Called when last reference to dquot is being dropped */
+ };
+ 
++struct inode;
++struct iattr;
+ /* Operations working with dquots */
+ struct dquot_operations {
+ 	int (*initialize) (struct inode *, int);
+@@ -254,9 +258,11 @@ struct dquot_operations {
+ 	int (*release_dquot) (struct dquot *);		/* Quota is going to be deleted from disk */
+ 	int (*mark_dirty) (struct dquot *);		/* Dquot is marked dirty */
+ 	int (*write_info) (struct super_block *, int);	/* Write of quota "superblock" */
++	int (*rename) (struct inode *, struct inode *, struct inode *);
+ };
+ 
+ /* Operations handling requests from userspace */
++struct v2_disk_dqblk;
+ struct quotactl_ops {
+ 	int (*quota_on)(struct super_block *, int, int, char *);
+ 	int (*quota_off)(struct super_block *, int);
+@@ -269,6 +275,9 @@ struct quotactl_ops {
+ 	int (*set_xstate)(struct super_block *, unsigned int, int);
+ 	int (*get_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
+ 	int (*set_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
++#ifdef CONFIG_QUOTA_COMPAT
++	int (*get_quoti)(struct super_block *, int, unsigned int, struct v2_disk_dqblk *);
++#endif
+ };
+ 
+ struct quota_format_type {
+diff -uprN linux-2.6.8.1.orig/include/linux/quotaops.h linux-2.6.8.1-ve022stab078/include/linux/quotaops.h
+--- linux-2.6.8.1.orig/include/linux/quotaops.h	2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/quotaops.h	2006-05-11 13:05:43.000000000 +0400
+@@ -170,6 +170,19 @@ static __inline__ int DQUOT_TRANSFER(str
+ 	return 0;
+ }
+ 
++static __inline__ int DQUOT_RENAME(struct inode *inode,
++		struct inode *old_dir, struct inode *new_dir)
++{
++	struct dquot_operations *q_op;
++
++	q_op = inode->i_sb->dq_op;
++	if (q_op && q_op->rename) {
++		if (q_op->rename(inode, old_dir, new_dir) == NO_QUOTA)
++			return 1;
++	}
++	return 0;
++}
++
+ /* The following two functions cannot be called inside a transaction */
+ #define DQUOT_SYNC(sb)	sync_dquots(sb, -1)
+ 
+@@ -197,6 +210,7 @@ static __inline__ int DQUOT_OFF(struct s
+ #define DQUOT_SYNC(sb)				do { } while(0)
+ #define DQUOT_OFF(sb)				do { } while(0)
+ #define DQUOT_TRANSFER(inode, iattr)		(0)
++#define DQUOT_RENAME(inode, old_dir, new_dir)	(0)
+ extern __inline__ int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+ {
+ 	inode_add_bytes(inode, nr);
+diff -uprN linux-2.6.8.1.orig/include/linux/reiserfs_fs.h linux-2.6.8.1-ve022stab078/include/linux/reiserfs_fs.h
+--- linux-2.6.8.1.orig/include/linux/reiserfs_fs.h	2004-08-14 14:56:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/reiserfs_fs.h	2006-05-11 13:05:35.000000000 +0400
+@@ -1944,7 +1944,7 @@ void reiserfs_read_locked_inode(struct i
+ int reiserfs_find_actor(struct inode * inode, void *p) ;
+ int reiserfs_init_locked_inode(struct inode * inode, void *p) ;
+ void reiserfs_delete_inode (struct inode * inode);
+-void reiserfs_write_inode (struct inode * inode, int) ;
++int reiserfs_write_inode (struct inode * inode, int) ;
+ struct dentry *reiserfs_get_dentry(struct super_block *, void *) ;
+ struct dentry *reiserfs_decode_fh(struct super_block *sb, __u32 *data,
+                                      int len, int fhtype,
+diff -uprN linux-2.6.8.1.orig/include/linux/reiserfs_xattr.h linux-2.6.8.1-ve022stab078/include/linux/reiserfs_xattr.h
+--- linux-2.6.8.1.orig/include/linux/reiserfs_xattr.h	2004-08-14 14:56:26.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/reiserfs_xattr.h	2006-05-11 13:05:35.000000000 +0400
+@@ -42,7 +42,8 @@ int reiserfs_removexattr (struct dentry 
+ int reiserfs_delete_xattrs (struct inode *inode);
+ int reiserfs_chown_xattrs (struct inode *inode, struct iattr *attrs);
+ int reiserfs_xattr_init (struct super_block *sb, int mount_flags);
+-int reiserfs_permission (struct inode *inode, int mask, struct nameidata *nd);
++int reiserfs_permission (struct inode *inode, int mask, struct nameidata *nd,
++		struct exec_perm *exec_perm);
+ int reiserfs_permission_locked (struct inode *inode, int mask, struct nameidata *nd);
+ 
+ int reiserfs_xattr_del (struct inode *, const char *);
+diff -uprN linux-2.6.8.1.orig/include/linux/sched.h linux-2.6.8.1-ve022stab078/include/linux/sched.h
+--- linux-2.6.8.1.orig/include/linux/sched.h	2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/sched.h	2006-05-11 13:05:49.000000000 +0400
+@@ -30,7 +30,12 @@
+ #include <linux/pid.h>
+ #include <linux/percpu.h>
+ 
++#include <ub/ub_task.h>
++
+ struct exec_domain;
++struct task_beancounter;
++struct user_beancounter;
++struct ve_struct;
+ 
+ /*
+  * cloning flags:
+@@ -85,6 +90,9 @@ extern unsigned long avenrun[];		/* Load
+ 	load += n*(FIXED_1-exp); \
+ 	load >>= FSHIFT;
+ 
++#define LOAD_INT(x) ((x) >> FSHIFT)
++#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
++
+ #define CT_TO_SECS(x)	((x) / HZ)
+ #define CT_TO_USECS(x)	(((x) % HZ) * 1000000/HZ)
+ 
+@@ -92,10 +100,22 @@ extern int nr_threads;
+ extern int last_pid;
+ DECLARE_PER_CPU(unsigned long, process_counts);
+ extern int nr_processes(void);
++
++extern unsigned long nr_sleeping(void);
++extern unsigned long nr_stopped(void);
++extern unsigned long nr_zombie;
++extern unsigned long nr_dead;
+ extern unsigned long nr_running(void);
+ extern unsigned long nr_uninterruptible(void);
+ extern unsigned long nr_iowait(void);
+ 
++#ifdef CONFIG_VE
++struct ve_struct;
++extern unsigned long nr_running_ve(struct ve_struct *);
++extern unsigned long nr_iowait_ve(struct ve_struct *);
++extern unsigned long nr_uninterruptible_ve(struct ve_struct *);
++#endif
++
+ #include <linux/time.h>
+ #include <linux/param.h>
+ #include <linux/resource.h>
+@@ -107,8 +127,8 @@ extern unsigned long nr_iowait(void);
+ #define TASK_INTERRUPTIBLE	1
+ #define TASK_UNINTERRUPTIBLE	2
+ #define TASK_STOPPED		4
+-#define TASK_ZOMBIE		8
+-#define TASK_DEAD		16
++#define EXIT_ZOMBIE		16
++#define EXIT_DEAD		32
+ 
+ #define __set_task_state(tsk, state_value)		\
+ 	do { (tsk)->state = (state_value); } while (0)
+@@ -154,6 +174,8 @@ extern cpumask_t nohz_cpu_mask;
+ 
+ extern void show_state(void);
+ extern void show_regs(struct pt_regs *);
++extern void smp_show_regs(struct pt_regs *, void *);
++extern void show_vsched(void);
+ 
+ /*
+  * TASK is a pointer to the task whose backtrace we want to see (or NULL for current
+@@ -171,6 +193,8 @@ extern void update_process_times(int use
+ extern void scheduler_tick(int user_tick, int system);
+ extern unsigned long cache_decay_ticks;
+ 
++int setscheduler(pid_t pid, int policy, struct sched_param __user *param);
++
+ /* Attach to any functions which should be ignored in wchan output. */
+ #define __sched		__attribute__((__section__(".sched.text")))
+ /* Is this address in the __sched functions? */
+@@ -215,6 +239,7 @@ struct mm_struct {
+ 	unsigned long saved_auxv[40]; /* for /proc/PID/auxv */
+ 
+ 	unsigned dumpable:1;
++	unsigned vps_dumpable:1;
+ 	cpumask_t cpu_vm_mask;
+ 
+ 	/* Architecture-specific MM context */
+@@ -229,8 +254,12 @@ struct mm_struct {
+ 	struct kioctx		*ioctx_list;
+ 
+ 	struct kioctx		default_kioctx;
++
++	struct user_beancounter	*mm_ub;
+ };
+ 
++#define mm_ub(__mm)	((__mm)->mm_ub)
++
+ extern int mmlist_nr;
+ 
+ struct sighand_struct {
+@@ -239,6 +268,9 @@ struct sighand_struct {
+ 	spinlock_t		siglock;
+ };
+ 
++#include <linux/ve.h>
++#include <linux/ve_task.h>
++
+ /*
+  * NOTE! "signal_struct" does not have it's own
+  * locking, because a shared signal_struct always
+@@ -386,6 +418,8 @@ int set_current_groups(struct group_info
+ 
+ struct audit_context;		/* See audit.c */
+ struct mempolicy;
++struct vcpu_scheduler;
++struct vcpu_info;
+ 
+ struct task_struct {
+ 	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
+@@ -396,6 +430,14 @@ struct task_struct {
+ 
+ 	int lock_depth;		/* Lock depth */
+ 
++#ifdef CONFIG_SCHED_VCPU
++	struct vcpu_scheduler *vsched;
++	struct vcpu_info *vcpu;
++
++	/* id's are saved to avoid locking (e.g. on vsched->id access) */
++	int vsched_id;
++	int vcpu_id;
++#endif
+ 	int prio, static_prio;
+ 	struct list_head run_list;
+ 	prio_array_t *array;
+@@ -410,6 +452,7 @@ struct task_struct {
+ 	unsigned int time_slice, first_time_slice;
+ 
+ 	struct list_head tasks;
++
+ 	/*
+ 	 * ptrace_list/ptrace_children forms the list of my children
+ 	 * that were stolen by a ptracer.
+@@ -421,6 +464,7 @@ struct task_struct {
+ 
+ /* task state */
+ 	struct linux_binfmt *binfmt;
++	long exit_state;
+ 	int exit_code, exit_signal;
+ 	int pdeath_signal;  /*  The signal sent when the parent dies  */
+ 	/* ??? */
+@@ -444,7 +488,7 @@ struct task_struct {
+ 	struct task_struct *group_leader;	/* threadgroup leader */
+ 
+ 	/* PID/PID hash table linkage. */
+-	struct pid_link pids[PIDTYPE_MAX];
++	struct pid pids[PIDTYPE_MAX];
+ 
+ 	wait_queue_head_t wait_chldexit;	/* for wait4() */
+ 	struct completion *vfork_done;		/* for vfork() */
+@@ -523,10 +567,25 @@ struct task_struct {
+ 	unsigned long ptrace_message;
+ 	siginfo_t *last_siginfo; /* For ptrace use.  */
+ 
++/* state tracking for suspend */
++	sigset_t saved_sigset;
++	__u8	 pn_state;
++	__u8	 stopped_state:1, sigsuspend_state:1;
++
+ #ifdef CONFIG_NUMA
+   	struct mempolicy *mempolicy;
+   	short il_next;		/* could be shared with used_math */
+ #endif
++#ifdef CONFIG_USER_RESOURCE
++	struct task_beancounter	task_bc;
++#endif
++#ifdef CONFIG_VE
++	struct ve_task_info ve_task_info;
++#endif
++#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
++	unsigned long	magic;
++	struct inode	*ino;
++#endif
+ };
+ 
+ static inline pid_t process_group(struct task_struct *tsk)
+@@ -534,6 +593,11 @@ static inline pid_t process_group(struct
+ 	return tsk->signal->pgrp;
+ }
+ 
++static inline int pid_alive(struct task_struct *p)
++{
++	return p->pids[PIDTYPE_PID].nr != 0;
++}
++
+ extern void __put_task_struct(struct task_struct *tsk);
+ #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
+ #define put_task_struct(tsk) \
+@@ -555,7 +619,6 @@ do { if (atomic_dec_and_test(&(tsk)->usa
+ #define PF_MEMDIE	0x00001000	/* Killed for out-of-memory */
+ #define PF_FLUSHER	0x00002000	/* responsible for disk writeback */
+ 
+-#define PF_FREEZE	0x00004000	/* this task should be frozen for suspend */
+ #define PF_NOFREEZE	0x00008000	/* this thread should not be frozen */
+ #define PF_FROZEN	0x00010000	/* frozen for system suspend */
+ #define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
+@@ -564,6 +627,57 @@ do { if (atomic_dec_and_test(&(tsk)->usa
+ #define PF_LESS_THROTTLE 0x00100000	/* Throttle me less: I clean memory */
+ #define PF_SYNCWRITE	0x00200000	/* I am doing a sync write */
+ 
++#ifndef CONFIG_VE
++#define set_pn_state(tsk, state)	do { } while(0)
++#define clear_pn_state(tsk)		do { } while(0)
++#define set_sigsuspend_state(tsk, sig)	do { } while(0)
++#define clear_sigsuspend_state(tsk)	do { } while(0)
++#define set_stop_state(tsk)		do { } while(0)
++#define clear_stop_state(tsk)		do { } while(0)
++#else
++#define PN_STOP_TF	1	/* was not in 2.6.8 */
++#define PN_STOP_TF_RT	2	/* was not in 2.6.8 */ 
++#define PN_STOP_ENTRY	3
++#define PN_STOP_FORK	4
++#define PN_STOP_VFORK	5
++#define PN_STOP_SIGNAL	6
++#define PN_STOP_EXIT	7
++#define PN_STOP_EXEC	8
++#define PN_STOP_LEAVE	9
++
++static inline void set_pn_state(struct task_struct *tsk, int state)
++{
++	tsk->pn_state = state;
++}
++
++static inline void clear_pn_state(struct task_struct *tsk)
++{
++	tsk->pn_state = 0;
++}
++
++static inline void set_sigsuspend_state(struct task_struct *tsk, sigset_t saveset)
++{
++	tsk->sigsuspend_state = 1;
++	tsk->saved_sigset = saveset;
++}
++
++static inline void clear_sigsuspend_state(struct task_struct *tsk)
++{
++	tsk->sigsuspend_state = 0;
++	siginitset(&tsk->saved_sigset, 0);
++}
++
++static inline void set_stop_state(struct task_struct *tsk)
++{
++	tsk->stopped_state = 1;
++}
++
++static inline void clear_stop_state(struct task_struct *tsk)
++{
++	tsk->stopped_state = 0;
++}
++#endif
++
+ #ifdef CONFIG_SMP
+ #define SCHED_LOAD_SCALE	128UL	/* increase resolution of load */
+ 
+@@ -687,6 +801,20 @@ static inline int set_cpus_allowed(task_
+ 
+ extern unsigned long long sched_clock(void);
+ 
++static inline unsigned long cycles_to_clocks(cycles_t cycles)
++{
++	extern unsigned long cycles_per_clock;
++	do_div(cycles, cycles_per_clock);
++	return cycles;
++}
++
++static inline u64 cycles_to_jiffies(cycles_t cycles)
++{
++	extern unsigned long cycles_per_jiffy;
++	do_div(cycles, cycles_per_jiffy);
++	return cycles;
++}
++
+ #ifdef CONFIG_SMP
+ extern void sched_balance_exec(void);
+ #else
+@@ -699,6 +827,7 @@ extern int task_prio(const task_t *p);
+ extern int task_nice(const task_t *p);
+ extern int task_curr(const task_t *p);
+ extern int idle_cpu(int cpu);
++extern task_t *idle_task(int cpu);
+ 
+ void yield(void);
+ 
+@@ -727,11 +856,243 @@ extern struct task_struct init_task;
+ 
+ extern struct   mm_struct init_mm;
+ 
+-extern struct task_struct *find_task_by_pid(int pid);
++#define find_task_by_pid_all(nr)	\
++		find_task_by_pid_type_all(PIDTYPE_PID, nr)
++extern struct task_struct *find_task_by_pid_type_all(int type, int pid);
+ extern void set_special_pids(pid_t session, pid_t pgrp);
+ extern void __set_special_pids(pid_t session, pid_t pgrp);
+ 
++#ifndef CONFIG_VE
++#define find_task_by_pid_ve find_task_by_pid_all
++
++#define get_exec_env()	NULL
++static inline struct ve_struct * set_exec_env(struct ve_struct *new_env)
++{
++	return NULL;
++}
++#define ve_is_super(env)			1
++#define ve_accessible(target, owner)		1
++#define ve_accessible_strict(target, owner)	1
++#define ve_accessible_veid(target, owner)		1
++#define ve_accessible_strict_veid(target, owner)	1
++
++#define VEID(envid)				0
++#define get_ve0() NULL
++
++static inline pid_t virt_pid(struct task_struct *tsk)
++{
++	return tsk->pid;
++}
++
++static inline pid_t virt_tgid(struct task_struct *tsk)
++{
++	return tsk->tgid;
++}
++
++static inline pid_t virt_pgid(struct task_struct *tsk)
++{
++	return tsk->signal->pgrp;
++}
++
++static inline pid_t virt_sid(struct task_struct *tsk)
++{
++	return tsk->signal->session;
++}
++
++static inline pid_t get_task_pid_ve(struct task_struct *tsk, struct ve_struct *ve)
++{
++	return tsk->pid;
++}
++
++static inline pid_t get_task_pid(struct task_struct *tsk)
++{
++	return tsk->pid;
++}
++
++static inline pid_t get_task_tgid(struct task_struct *tsk)
++{
++	return tsk->tgid;
++}
++
++static inline pid_t get_task_pgid(struct task_struct *tsk)
++{
++	return tsk->signal->pgrp;
++}
++
++static inline pid_t get_task_sid(struct task_struct *tsk)
++{
++	return tsk->signal->session;
++}
++
++static inline void set_virt_pid(struct task_struct *tsk, pid_t pid)
++{
++}
++
++static inline void set_virt_tgid(struct task_struct *tsk, pid_t pid)
++{
++}
++
++static inline void set_virt_pgid(struct task_struct *tsk, pid_t pid)
++{
++}
++
++static inline void set_virt_sid(struct task_struct *tsk, pid_t pid)
++{
++}
++
++static inline pid_t get_task_ppid(struct task_struct *p)
++{
++	if (!pid_alive(p))
++		return 0;
++	return (p->pid > 1 ? p->group_leader->real_parent->pid : 0);
++}
++
++#else	/* CONFIG_VE */
++
++#include <asm/current.h>
++#include <linux/ve.h>
++
++extern struct ve_struct ve0;
++
++#define find_task_by_pid_ve(nr)	\
++		find_task_by_pid_type_ve(PIDTYPE_PID, nr)
++
++extern struct task_struct *find_task_by_pid_type_ve(int type, int pid);
++
++#define get_ve0()	(&ve0)
++#define VEID(envid)	((envid)->veid)
++
++#define get_exec_env()	(VE_TASK_INFO(current)->exec_env)
++static inline struct ve_struct *set_exec_env(struct ve_struct *new_env)
++{
++	struct ve_struct *old_env;
++
++	old_env = VE_TASK_INFO(current)->exec_env;
++	VE_TASK_INFO(current)->exec_env = new_env;
++
++	return old_env;
++}
++
++#define ve_is_super(env) ((env) == get_ve0())
++#define ve_accessible_strict(target, owner)	((target) == (owner))
++static inline int ve_accessible(struct ve_struct *target,
++				struct ve_struct *owner) {
++	return ve_is_super(owner) || ve_accessible_strict(target, owner);
++}
++
++#define ve_accessible_strict_veid(target, owner) ((target) == (owner))
++static inline int ve_accessible_veid(envid_t target, envid_t owner)
++{
++	return get_ve0()->veid == owner ||
++	       ve_accessible_strict_veid(target, owner);
++}
++
++static inline pid_t virt_pid(struct task_struct *tsk)
++{
++	return tsk->pids[PIDTYPE_PID].vnr;
++}
++
++static inline pid_t virt_tgid(struct task_struct *tsk)
++{
++	return tsk->pids[PIDTYPE_TGID].vnr;
++}
++
++static inline pid_t virt_pgid(struct task_struct *tsk)
++{
++	return tsk->pids[PIDTYPE_PGID].vnr;
++}
++
++static inline pid_t virt_sid(struct task_struct *tsk)
++{
++	return tsk->pids[PIDTYPE_SID].vnr;
++}
++
++static inline pid_t get_task_pid_ve(struct task_struct *tsk, struct ve_struct *env)
++{
++	return ve_is_super(env) ? tsk->pid : virt_pid(tsk);
++}
++
++static inline pid_t get_task_pid(struct task_struct *tsk)
++{
++	return get_task_pid_ve(tsk, get_exec_env());
++}
++
++static inline pid_t get_task_tgid(struct task_struct *tsk)
++{
++	return ve_is_super(get_exec_env()) ? tsk->tgid : virt_tgid(tsk);
++}
++
++static inline pid_t get_task_pgid(struct task_struct *tsk)
++{
++	return ve_is_super(get_exec_env()) ? tsk->signal->pgrp : virt_pgid(tsk);
++}
++
++static inline pid_t get_task_sid(struct task_struct *tsk)
++{
++	return ve_is_super(get_exec_env()) ? tsk->signal->session : virt_sid(tsk);
++}
++
++static inline void set_virt_pid(struct task_struct *tsk, pid_t pid)
++{
++	tsk->pids[PIDTYPE_PID].vnr = pid;
++}
++
++static inline void set_virt_tgid(struct task_struct *tsk, pid_t pid)
++{
++	tsk->pids[PIDTYPE_TGID].vnr = pid;
++}
++
++static inline void set_virt_pgid(struct task_struct *tsk, pid_t pid)
++{
++	tsk->pids[PIDTYPE_PGID].vnr = pid;
++}
++
++static inline void set_virt_sid(struct task_struct *tsk, pid_t pid)
++{
++	tsk->pids[PIDTYPE_SID].vnr = pid;
++}
++
++static inline pid_t get_task_ppid(struct task_struct *p)
++{
++	struct task_struct *parent;
++	struct ve_struct *env;
++
++	if (!pid_alive(p))
++		return 0;
++	env = get_exec_env();
++	if (get_task_pid_ve(p, env) == 1)
++		return 0;
++	parent = p->group_leader->real_parent;
++	return ve_accessible(VE_TASK_INFO(parent)->owner_env, env) ?
++		get_task_pid_ve(parent, env) : 1;
++}
++
++void ve_sched_get_cpu_stat(struct ve_struct *envid, cycles_t *idle,
++				cycles_t *strv, unsigned int cpu);
++void ve_sched_attach(struct ve_struct *envid);
++
++#endif	/* CONFIG_VE */
++
++#if defined(CONFIG_SCHED_VCPU) && defined(CONFIG_VE)
++extern cycles_t ve_sched_get_idle_time(struct ve_struct *, int);
++extern cycles_t ve_sched_get_iowait_time(struct ve_struct *, int);
++#else
++#define ve_sched_get_idle_time(ve, cpu)	0
++#define ve_sched_get_iowait_time(ve, cpu)	0
++#endif
++
++#ifdef CONFIG_SCHED_VCPU
++struct vcpu_scheduler;
++extern void fastcall vsched_cpu_online_map(struct vcpu_scheduler *sched,
++		cpumask_t *mask);
++#else
++#define vsched_cpu_online_map(vsched, mask)	do {	\
++			*mask = cpu_online_map;		\
++	} while (0)
++#endif
++
+ /* per-UID process charging. */
++extern int set_user(uid_t new_ruid, int dumpclear);
+ extern struct user_struct * alloc_uid(uid_t);
+ static inline struct user_struct *get_uid(struct user_struct *u)
+ {
+@@ -747,6 +1108,7 @@ extern unsigned long itimer_ticks;
+ extern unsigned long itimer_next;
+ extern void do_timer(struct pt_regs *);
+ 
++extern void wake_up_init(void);
+ extern int FASTCALL(wake_up_state(struct task_struct * tsk, unsigned int state));
+ extern int FASTCALL(wake_up_process(struct task_struct * tsk));
+ extern void FASTCALL(wake_up_forked_process(struct task_struct * tsk));
+@@ -807,7 +1169,7 @@ extern struct sigqueue *sigqueue_alloc(v
+ extern void sigqueue_free(struct sigqueue *);
+ extern int send_sigqueue(int, struct sigqueue *,  struct task_struct *);
+ extern int send_group_sigqueue(int, struct sigqueue *,  struct task_struct *);
+-extern int do_sigaction(int, const struct k_sigaction *, struct k_sigaction *);
++extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
+ extern int do_sigaltstack(const stack_t __user *, stack_t __user *, unsigned long);
+ 
+ /* These can be the second arg to send_sig_info/send_group_sig_info.  */
+@@ -885,7 +1247,10 @@ extern task_t *child_reaper;
+ 
+ extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *);
+ extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
+-extern struct task_struct * copy_process(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
++extern struct task_struct * copy_process(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *, long pid);
++
++extern void set_task_comm(struct task_struct *tsk, char *from);
++extern void get_task_comm(char *to, struct task_struct *tsk);
+ 
+ #ifdef CONFIG_SMP
+ extern void wait_task_inactive(task_t * p);
+@@ -908,31 +1273,105 @@ extern void wait_task_inactive(task_t * 
+ 	add_parent(p, (p)->parent);				\
+ 	} while (0)
+ 
+-#define next_task(p)	list_entry((p)->tasks.next, struct task_struct, tasks)
+-#define prev_task(p)	list_entry((p)->tasks.prev, struct task_struct, tasks)
++#define next_task_all(p)	list_entry((p)->tasks.next, struct task_struct, tasks)
++#define prev_task_all(p)	list_entry((p)->tasks.prev, struct task_struct, tasks)
+ 
+-#define for_each_process(p) \
+-	for (p = &init_task ; (p = next_task(p)) != &init_task ; )
++#define for_each_process_all(p) \
++	for (p = &init_task ; (p = next_task_all(p)) != &init_task ; )
+ 
+ /*
+  * Careful: do_each_thread/while_each_thread is a double loop so
+  *          'break' will not work as expected - use goto instead.
+  */
+-#define do_each_thread(g, t) \
+-	for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do
++#define do_each_thread_all(g, t) \
++	for (g = t = &init_task ; (g = t = next_task_all(g)) != &init_task ; ) do
++
++#define while_each_thread_all(g, t) \
++	while ((t = next_thread(t)) != g)
++
++#ifndef CONFIG_VE
++
++#define SET_VE_LINKS(p)
++#define REMOVE_VE_LINKS(p)
++#define for_each_process_ve(p)		for_each_process_all(p)
++#define do_each_thread_ve(g, t)		do_each_thread_all(g, t)
++#define while_each_thread_ve(g, t)	while_each_thread_all(g, t)
++#define first_task_ve()			next_task_ve(&init_task)
++#define next_task_ve(p)			\
++		(next_task_all(p) != &init_task ? next_task_all(p) : NULL)
++
++#else	/* CONFIG_VE */
++
++#define SET_VE_LINKS(p)							\
++	do {								\
++		if (thread_group_leader(p))				\
++			list_add_tail(&VE_TASK_INFO(p)->vetask_list,	\
++					&VE_TASK_INFO(p)->owner_env->vetask_lh); \
++	} while (0)
+ 
+-#define while_each_thread(g, t) \
++#define REMOVE_VE_LINKS(p)						\
++	do {								\
++		if (thread_group_leader(p))				\
++			list_del(&VE_TASK_INFO(p)->vetask_list);	\
++	} while(0)
++
++static inline task_t* __first_task_ve(struct ve_struct *ve)
++{
++	task_t *tsk;
++
++	if (unlikely(ve_is_super(ve))) {
++		tsk = next_task_all(&init_task);
++		if (tsk == &init_task)
++			tsk = NULL;
++	} else {
++		/* probably can return ve->init_entry, but it's more clear */
++		BUG_ON(list_empty(&ve->vetask_lh));
++		tsk = VE_TASK_LIST_2_TASK(ve->vetask_lh.next);
++	}
++	return tsk;
++}
++
++static inline task_t* __next_task_ve(struct ve_struct *ve, task_t *tsk)
++{
++	if (unlikely(ve_is_super(ve))) {
++		tsk = next_task_all(tsk);
++		if (tsk == &init_task)
++			tsk = NULL;
++	} else {
++		struct list_head *tmp;
++
++		BUG_ON(VE_TASK_INFO(tsk)->owner_env != ve);
++		tmp = VE_TASK_INFO(tsk)->vetask_list.next;
++		if (tmp == &ve->vetask_lh)
++			tsk = NULL;
++		else
++			tsk = VE_TASK_LIST_2_TASK(tmp);
++	}
++	return tsk;
++}
++
++#define first_task_ve()	__first_task_ve(get_exec_env())
++#define next_task_ve(p)	__next_task_ve(get_exec_env(), p)
++/* no one uses prev_task_ve(), copy next_task_ve() if needed */
++
++#define for_each_process_ve(p) \
++	for (p = first_task_ve(); p != NULL ; p = next_task_ve(p))
++
++#define do_each_thread_ve(g, t) \
++	for (g = t = first_task_ve() ; g != NULL; g = t = next_task_ve(g)) do
++
++#define while_each_thread_ve(g, t) \
+ 	while ((t = next_thread(t)) != g)
+ 
++#endif	/* CONFIG_VE */
++
+ extern task_t * FASTCALL(next_thread(const task_t *p));
+ 
+ #define thread_group_leader(p)	(p->pid == p->tgid)
+ 
+ static inline int thread_group_empty(task_t *p)
+ {
+-	struct pid *pid = p->pids[PIDTYPE_TGID].pidptr;
+-
+-	return pid->task_list.next->next == &pid->task_list;
++	return list_empty(&p->pids[PIDTYPE_TGID].pid_list);
+ }
+ 
+ #define delay_group_leader(p) \
+@@ -941,8 +1380,8 @@ static inline int thread_group_empty(tas
+ extern void unhash_process(struct task_struct *p);
+ 
+ /*
+- * Protects ->fs, ->files, ->mm, ->ptrace, ->group_info and synchronises with
+- * wait4().
++ * Protects ->fs, ->files, ->mm, ->ptrace, ->group_info, ->comm and
++ * synchronises with wait4().
+  *
+  * Nests both inside and outside of read_lock(&tasklist_lock).
+  * It must not be nested with write_lock_irq(&tasklist_lock),
+@@ -1065,28 +1504,61 @@ extern void signal_wake_up(struct task_s
+  */
+ #ifdef CONFIG_SMP
+ 
+-static inline unsigned int task_cpu(const struct task_struct *p)
++static inline unsigned int task_pcpu(const struct task_struct *p)
+ {
+ 	return p->thread_info->cpu;
+ }
+ 
+-static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
++static inline void set_task_pcpu(struct task_struct *p, unsigned int cpu)
+ {
+ 	p->thread_info->cpu = cpu;
+ }
+ 
+ #else
+ 
++static inline unsigned int task_pcpu(const struct task_struct *p)
++{
++	return 0;
++}
++
++static inline void set_task_pcpu(struct task_struct *p, unsigned int cpu)
++{
++}
++
++#endif /* CONFIG_SMP */
++
++#ifdef CONFIG_SCHED_VCPU
++
++static inline unsigned int task_vsched_id(const struct task_struct *p)
++{
++	return p->vsched_id;
++}
++
+ static inline unsigned int task_cpu(const struct task_struct *p)
+ {
++	return p->vcpu_id;
++}
++
++extern void set_task_cpu(struct task_struct *p, unsigned int vcpu);
++
++#else
++
++static inline unsigned int task_vsched_id(const struct task_struct *p)
++{
+ 	return 0;
+ }
+ 
++static inline unsigned int task_cpu(const struct task_struct *p)
++{
++	return task_pcpu(p);
++}
++
+ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
+ {
++	set_task_pcpu(p, cpu);
+ }
+ 
+-#endif /* CONFIG_SMP */
++#endif /* CONFIG_SCHED_VCPU */
+ 
+ #endif /* __KERNEL__ */
+ 
+diff -uprN linux-2.6.8.1.orig/include/linux/security.h linux-2.6.8.1-ve022stab078/include/linux/security.h
+--- linux-2.6.8.1.orig/include/linux/security.h	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/security.h	2006-05-11 13:05:40.000000000 +0400
+@@ -61,7 +61,7 @@ static inline int cap_netlink_send (stru
+ 
+ static inline int cap_netlink_recv (struct sk_buff *skb)
+ {
+-	if (!cap_raised (NETLINK_CB (skb).eff_cap, CAP_NET_ADMIN))
++	if (!cap_raised (NETLINK_CB (skb).eff_cap, CAP_VE_NET_ADMIN))
+ 		return -EPERM;
+ 	return 0;
+ }
+diff -uprN linux-2.6.8.1.orig/include/linux/shm.h linux-2.6.8.1-ve022stab078/include/linux/shm.h
+--- linux-2.6.8.1.orig/include/linux/shm.h	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/shm.h	2006-05-11 13:05:40.000000000 +0400
+@@ -72,6 +72,8 @@ struct shm_info {
+ };
+ 
+ #ifdef __KERNEL__
++struct user_beancounter;
++
+ struct shmid_kernel /* private to the kernel */
+ {	
+ 	struct kern_ipc_perm	shm_perm;
+@@ -84,8 +86,12 @@ struct shmid_kernel /* private to the ke
+ 	time_t			shm_ctim;
+ 	pid_t			shm_cprid;
+ 	pid_t			shm_lprid;
++	struct user_beancounter *shmidk_ub;
++	struct ipc_ids		*_shm_ids;
+ };
+ 
++#define shmid_ub(__shmid) (__shmid)->shmidk_ub
++
+ /* shm_mode upper byte flags */
+ #define	SHM_DEST	01000	/* segment will be destroyed on last detach */
+ #define SHM_LOCKED      02000   /* segment will not be swapped */
+diff -uprN linux-2.6.8.1.orig/include/linux/shmem_fs.h linux-2.6.8.1-ve022stab078/include/linux/shmem_fs.h
+--- linux-2.6.8.1.orig/include/linux/shmem_fs.h	2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/shmem_fs.h	2006-05-11 13:05:39.000000000 +0400
+@@ -8,6 +8,8 @@
+ 
+ #define SHMEM_NR_DIRECT 16
+ 
++struct user_beancounter;
++
+ struct shmem_inode_info {
+ 	spinlock_t		lock;
+ 	unsigned long		next_index;
+@@ -19,8 +21,11 @@ struct shmem_inode_info {
+ 	struct shared_policy     policy;
+ 	struct list_head	list;
+ 	struct inode		vfs_inode;
++	struct user_beancounter *info_ub;
+ };
+ 
++#define shm_info_ub(__shmi) (__shmi)->info_ub
++
+ struct shmem_sb_info {
+ 	unsigned long max_blocks;   /* How many blocks are allowed */
+ 	unsigned long free_blocks;  /* How many are left for allocation */
+diff -uprN linux-2.6.8.1.orig/include/linux/signal.h linux-2.6.8.1-ve022stab078/include/linux/signal.h
+--- linux-2.6.8.1.orig/include/linux/signal.h	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/signal.h	2006-05-11 13:05:39.000000000 +0400
+@@ -14,14 +14,19 @@
+  * Real Time signals may be queued.
+  */
+ 
++struct user_beancounter;
++
+ struct sigqueue {
+ 	struct list_head list;
+ 	spinlock_t *lock;
+ 	int flags;
+ 	siginfo_t info;
+ 	struct user_struct *user;
++	struct user_beancounter *sig_ub;
+ };
+ 
++#define sig_ub(__q) ((__q)->sig_ub)
++
+ /* flags values. */
+ #define SIGQUEUE_PREALLOC	1
+ 
+diff -uprN linux-2.6.8.1.orig/include/linux/skbuff.h linux-2.6.8.1-ve022stab078/include/linux/skbuff.h
+--- linux-2.6.8.1.orig/include/linux/skbuff.h	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/skbuff.h	2006-05-11 13:05:40.000000000 +0400
+@@ -19,6 +19,7 @@
+ #include <linux/compiler.h>
+ #include <linux/time.h>
+ #include <linux/cache.h>
++#include <linux/ve_owner.h>
+ 
+ #include <asm/atomic.h>
+ #include <asm/types.h>
+@@ -190,6 +191,8 @@ struct skb_shared_info {
+  *	@tc_index: Traffic control index
+  */
+ 
++#include <ub/ub_sk.h>
++
+ struct sk_buff {
+ 	/* These two members must be first. */
+ 	struct sk_buff		*next;
+@@ -281,13 +284,18 @@ struct sk_buff {
+ 				*data,
+ 				*tail,
+ 				*end;
++	struct skb_beancounter	skb_bc;
++	struct ve_struct	*owner_env;
+ };
+ 
++DCL_VE_OWNER_PROTO(SKB, SLAB, struct sk_buff, owner_env, , (noinline, regparm(1)))
++
+ #ifdef __KERNEL__
+ /*
+  *	Handling routines are only of interest to the kernel
+  */
+ #include <linux/slab.h>
++#include <ub/ub_net.h>
+ 
+ #include <asm/system.h>
+ 
+@@ -902,6 +910,8 @@ static inline int pskb_trim(struct sk_bu
+  */
+ static inline void skb_orphan(struct sk_buff *skb)
+ {
++	ub_skb_uncharge(skb);
++
+ 	if (skb->destructor)
+ 		skb->destructor(skb);
+ 	skb->destructor = NULL;
+diff -uprN linux-2.6.8.1.orig/include/linux/slab.h linux-2.6.8.1-ve022stab078/include/linux/slab.h
+--- linux-2.6.8.1.orig/include/linux/slab.h	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/slab.h	2006-05-11 13:05:39.000000000 +0400
+@@ -46,6 +46,27 @@ typedef struct kmem_cache_s kmem_cache_t
+ 						   what is reclaimable later*/
+ #define SLAB_PANIC		0x00040000UL	/* panic if kmem_cache_create() fails */
+ 
++/*
++ * allocation rules:                            __GFP_UBC       0
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ *  cache (SLAB_UBC)				charge		charge
++ *				      (usual caches: mm, vma, task_struct, ...)
++ *
++ *  cache (SLAB_UBC | SLAB_NO_CHARGE)		charge		---
++ *					     (ub_kmalloc)    (kmalloc)
++ *
++ *  cache (no UB flags)				BUG()		---
++ *							(nonub caches, mempools)
++ *
++ *  pages					charge		---
++ *					   (ub_vmalloc,	      (vmalloc,
++ *				        poll, fdsets, ...)  non-ub allocs)
++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ */
++#define SLAB_UBC		0x20000000UL	/* alloc space for ubs ... */
++#define SLAB_NO_CHARGE		0x40000000UL	/* ... but don't charge */
++
++
+ /* flags passed to a constructor func */
+ #define	SLAB_CTOR_CONSTRUCTOR	0x001UL		/* if not set, then deconstructor */
+ #define SLAB_CTOR_ATOMIC	0x002UL		/* tell constructor it can't sleep */
+@@ -97,6 +118,8 @@ found:
+ 	return __kmalloc(size, flags);
+ }
+ 
++extern void *kzalloc(size_t, gfp_t);
++
+ extern void kfree(const void *);
+ extern unsigned int ksize(const void *);
+ 
+diff -uprN linux-2.6.8.1.orig/include/linux/smp.h linux-2.6.8.1-ve022stab078/include/linux/smp.h
+--- linux-2.6.8.1.orig/include/linux/smp.h	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/smp.h	2006-05-11 13:05:24.000000000 +0400
+@@ -54,6 +54,9 @@ extern void smp_cpus_done(unsigned int m
+ extern int smp_call_function (void (*func) (void *info), void *info,
+ 			      int retry, int wait);
+ 
++typedef void (*smp_nmi_function)(struct pt_regs *regs, void *info);
++extern int smp_nmi_call_function(smp_nmi_function func, void *info, int wait);
++
+ /*
+  * Call a function on all processors
+  */
+@@ -100,6 +103,7 @@ void smp_prepare_boot_cpu(void);
+ #define hard_smp_processor_id()			0
+ #define smp_threads_ready			1
+ #define smp_call_function(func,info,retry,wait)	({ 0; })
++#define smp_nmi_call_function(func, info, wait)	({ 0; })
+ #define on_each_cpu(func,info,retry,wait)	({ func(info); 0; })
+ static inline void smp_send_reschedule(int cpu) { }
+ #define num_booting_cpus()			1
+diff -uprN linux-2.6.8.1.orig/include/linux/socket.h linux-2.6.8.1-ve022stab078/include/linux/socket.h
+--- linux-2.6.8.1.orig/include/linux/socket.h	2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/socket.h	2006-05-11 13:05:42.000000000 +0400
+@@ -90,6 +90,10 @@ struct cmsghdr {
+ 				  (struct cmsghdr *)(ctl) : \
+ 				  (struct cmsghdr *)NULL)
+ #define CMSG_FIRSTHDR(msg)	__CMSG_FIRSTHDR((msg)->msg_control, (msg)->msg_controllen)
++#define CMSG_OK(mhdr, cmsg) ((cmsg)->cmsg_len >= sizeof(struct cmsghdr) && \
++			     (cmsg)->cmsg_len <= (unsigned long) \
++			     ((mhdr)->msg_controllen - \
++			      ((char *)(cmsg) - (char *)(mhdr)->msg_control)))
+ 
+ /*
+  *	This mess will go away with glibc
+@@ -287,6 +291,7 @@ extern void memcpy_tokerneliovec(struct 
+ extern int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, int __user *ulen);
+ extern int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr);
+ extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
++extern int vz_security_proto_check(int family, int type, int protocol);
+ 
+ #endif
+ #endif /* not kernel and not glibc */
+diff -uprN linux-2.6.8.1.orig/include/linux/suspend.h linux-2.6.8.1-ve022stab078/include/linux/suspend.h
+--- linux-2.6.8.1.orig/include/linux/suspend.h	2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/suspend.h	2006-05-11 13:05:25.000000000 +0400
+@@ -59,7 +59,7 @@ static inline int software_suspend(void)
+ 
+ 
+ #ifdef CONFIG_PM
+-extern void refrigerator(unsigned long);
++extern void refrigerator(void);
+ extern int freeze_processes(void);
+ extern void thaw_processes(void);
+ 
+@@ -67,7 +67,7 @@ extern int pm_prepare_console(void);
+ extern void pm_restore_console(void);
+ 
+ #else
+-static inline void refrigerator(unsigned long flag) {}
++static inline void refrigerator(void) {}
+ #endif	/* CONFIG_PM */
+ 
+ #ifdef CONFIG_SMP
+diff -uprN linux-2.6.8.1.orig/include/linux/swap.h linux-2.6.8.1-ve022stab078/include/linux/swap.h
+--- linux-2.6.8.1.orig/include/linux/swap.h	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/swap.h	2006-05-11 13:05:45.000000000 +0400
+@@ -13,6 +13,7 @@
+ #define SWAP_FLAG_PREFER	0x8000	/* set if swap priority specified */
+ #define SWAP_FLAG_PRIO_MASK	0x7fff
+ #define SWAP_FLAG_PRIO_SHIFT	0
++#define SWAP_FLAG_READONLY	0x40000000	/* set if swap is read-only */
+ 
+ static inline int current_is_kswapd(void)
+ {
+@@ -79,6 +80,7 @@ struct address_space;
+ struct sysinfo;
+ struct writeback_control;
+ struct zone;
++struct user_beancounter;
+ 
+ /*
+  * A swap extent maps a range of a swapfile's PAGE_SIZE pages onto a range of
+@@ -106,6 +108,7 @@ enum {
+ 	SWP_USED	= (1 << 0),	/* is slot in swap_info[] used? */
+ 	SWP_WRITEOK	= (1 << 1),	/* ok to write to this swap?	*/
+ 	SWP_ACTIVE	= (SWP_USED | SWP_WRITEOK),
++	SWP_READONLY	= (1 << 2)
+ };
+ 
+ #define SWAP_CLUSTER_MAX 32
+@@ -118,6 +121,8 @@ enum {
+  * extent_list.prev points at the lowest-index extent.  That list is
+  * sorted.
+  */
++struct user_beancounter;
++
+ struct swap_info_struct {
+ 	unsigned int flags;
+ 	spinlock_t sdev_lock;
+@@ -132,6 +137,7 @@ struct swap_info_struct {
+ 	unsigned int highest_bit;
+ 	unsigned int cluster_next;
+ 	unsigned int cluster_nr;
++	struct user_beancounter **owner_map;	
+ 	int prio;			/* swap priority */
+ 	int pages;
+ 	unsigned long max;
+@@ -148,7 +154,8 @@ struct swap_list_t {
+ #define vm_swap_full() (nr_swap_pages*2 < total_swap_pages)
+ 
+ /* linux/mm/oom_kill.c */
+-extern void out_of_memory(int gfp_mask);
++struct oom_freeing_stat;
++extern void out_of_memory(struct oom_freeing_stat *, int gfp_mask);
+ 
+ /* linux/mm/memory.c */
+ extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *);
+@@ -210,7 +217,7 @@ extern long total_swap_pages;
+ extern unsigned int nr_swapfiles;
+ extern struct swap_info_struct swap_info[];
+ extern void si_swapinfo(struct sysinfo *);
+-extern swp_entry_t get_swap_page(void);
++extern swp_entry_t get_swap_page(struct user_beancounter *);
+ extern int swap_duplicate(swp_entry_t);
+ extern int valid_swaphandles(swp_entry_t, unsigned long *);
+ extern void swap_free(swp_entry_t);
+@@ -219,6 +226,7 @@ extern sector_t map_swap_page(struct swa
+ extern struct swap_info_struct *get_swap_info_struct(unsigned);
+ extern int can_share_swap_page(struct page *);
+ extern int remove_exclusive_swap_page(struct page *);
++extern int try_to_remove_exclusive_swap_page(struct page *);
+ struct backing_dev_info;
+ 
+ extern struct swap_list_t swap_list;
+@@ -259,7 +267,7 @@ static inline int remove_exclusive_swap_
+ 	return 0;
+ }
+ 
+-static inline swp_entry_t get_swap_page(void)
++static inline swp_entry_t get_swap_page(struct user_beancounter *ub)
+ {
+ 	swp_entry_t entry;
+ 	entry.val = 0;
+diff -uprN linux-2.6.8.1.orig/include/linux/sysctl.h linux-2.6.8.1-ve022stab078/include/linux/sysctl.h
+--- linux-2.6.8.1.orig/include/linux/sysctl.h	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/sysctl.h	2006-05-11 13:05:49.000000000 +0400
+@@ -24,6 +24,7 @@
+ #include <linux/compiler.h>
+ 
+ struct file;
++struct completion;
+ 
+ #define CTL_MAXNAME 10		/* how many path components do we allow in a
+ 				   call to sysctl?   In other words, what is
+@@ -133,6 +134,13 @@ enum
+ 	KERN_NGROUPS_MAX=63,	/* int: NGROUPS_MAX */
+ 	KERN_SPARC_SCONS_PWROFF=64, /* int: serial console power-off halt */
+ 	KERN_HZ_TIMER=65,	/* int: hz timer on or off */
++	KERN_SILENCE_LEVEL=66, /* int: Console silence loglevel */
++	KERN_ALLOC_FAIL_WARN=67, /* int: whether we'll print "alloc failure" */
++	KERN_FAIRSCHED_MAX_LATENCY=201, /* int: Max start_tag delta */
++	KERN_VCPU_SCHED_TIMESLICE=202,
++	KERN_VCPU_TIMESLICE=203,
++	KERN_VIRT_PIDS=204,	/* int: VE pids virtualization */
++	KERN_VIRT_OSRELEASE=205,/* virtualization of utsname.release */
+ };
+ 
+ 
+@@ -320,6 +328,7 @@ enum
+ 	NET_TCP_RMEM=85,
+ 	NET_TCP_APP_WIN=86,
+ 	NET_TCP_ADV_WIN_SCALE=87,
++	NET_TCP_USE_SG=245,
+ 	NET_IPV4_NONLOCAL_BIND=88,
+ 	NET_IPV4_ICMP_RATELIMIT=89,
+ 	NET_IPV4_ICMP_RATEMASK=90,
+@@ -343,6 +352,7 @@ enum
+ 
+ enum {
+ 	NET_IPV4_ROUTE_FLUSH=1,
++	NET_IPV4_ROUTE_SRC_CHECK=188,
+ 	NET_IPV4_ROUTE_MIN_DELAY=2,
+ 	NET_IPV4_ROUTE_MAX_DELAY=3,
+ 	NET_IPV4_ROUTE_GC_THRESH=4,
+@@ -650,6 +660,12 @@ enum
+ 	FS_XFS=17,	/* struct: control xfs parameters */
+ 	FS_AIO_NR=18,	/* current system-wide number of aio requests */
+ 	FS_AIO_MAX_NR=19,	/* system-wide maximum number of aio requests */
++	FS_AT_VSYSCALL=20,	/* int: to announce vsyscall data */
++};
++
++/* /proc/sys/debug */
++enum {
++	DBG_DECODE_CALLTRACES = 1,	/* int: decode call traces on oops */
+ };
+ 
+ /* /proc/sys/fs/quota/ */
+@@ -780,6 +796,8 @@ extern int proc_doulongvec_minmax(ctl_ta
+ 				  void __user *, size_t *, loff_t *);
+ extern int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int,
+ 				      struct file *, void __user *, size_t *, loff_t *);
++extern int proc_doutsstring(ctl_table *table, int write, struct file *,
++			    void __user *, size_t *, loff_t *);
+ 
+ extern int do_sysctl (int __user *name, int nlen,
+ 		      void __user *oldval, size_t __user *oldlenp,
+@@ -833,6 +851,8 @@ extern ctl_handler sysctl_jiffies;
+  */
+ 
+ /* A sysctl table is an array of struct ctl_table: */
++struct ve_struct;
++
+ struct ctl_table 
+ {
+ 	int ctl_name;			/* Binary ID */
+@@ -846,6 +866,7 @@ struct ctl_table 
+ 	struct proc_dir_entry *de;	/* /proc control block */
+ 	void *extra1;
+ 	void *extra2;
++	struct ve_struct *owner_env;
+ };
+ 
+ /* struct ctl_table_header is used to maintain dynamic lists of
+@@ -854,12 +875,17 @@ struct ctl_table_header
+ {
+ 	ctl_table *ctl_table;
+ 	struct list_head ctl_entry;
++	int used;
++	struct completion *unregistering;
+ };
+ 
+ struct ctl_table_header * register_sysctl_table(ctl_table * table, 
+ 						int insert_at_head);
+ void unregister_sysctl_table(struct ctl_table_header * table);
+ 
++ctl_table *clone_sysctl_template(ctl_table *tmpl, int nr);
++void free_sysctl_clone(ctl_table *clone);
++
+ #else /* __KERNEL__ */
+ 
+ #endif /* __KERNEL__ */
+diff -uprN linux-2.6.8.1.orig/include/linux/sysrq.h linux-2.6.8.1-ve022stab078/include/linux/sysrq.h
+--- linux-2.6.8.1.orig/include/linux/sysrq.h	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/sysrq.h	2006-05-11 13:05:24.000000000 +0400
+@@ -29,6 +29,12 @@ struct sysrq_key_op {
+  * are available -- else NULL's).
+  */
+ 
++#ifdef CONFIG_SYSRQ_DEBUG
++int sysrq_eat_all(void);
++#else
++#define sysrq_eat_all()	(0)
++#endif
++
+ void handle_sysrq(int, struct pt_regs *, struct tty_struct *);
+ void __handle_sysrq(int, struct pt_regs *, struct tty_struct *);
+ 
+diff -uprN linux-2.6.8.1.orig/include/linux/tcp.h linux-2.6.8.1-ve022stab078/include/linux/tcp.h
+--- linux-2.6.8.1.orig/include/linux/tcp.h	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/tcp.h	2006-05-11 13:05:37.000000000 +0400
+@@ -201,6 +201,27 @@ struct tcp_sack_block {
+ 	__u32	end_seq;
+ };
+ 
++struct tcp_options_received {
++/*	PAWS/RTTM data	*/
++	long	ts_recent_stamp;/* Time we stored ts_recent (for aging) */
++	__u32	ts_recent;	/* Time stamp to echo next		*/
++	__u32	rcv_tsval;	/* Time stamp value             	*/
++	__u32	rcv_tsecr;	/* Time stamp echo reply        	*/
++	char	saw_tstamp;	/* Saw TIMESTAMP on last packet		*/
++	char	tstamp_ok;	/* TIMESTAMP seen on SYN packet		*/
++	char	sack_ok;	/* SACK seen on SYN packet		*/
++	char	wscale_ok;	/* Wscale seen on SYN packet		*/
++	__u8	snd_wscale;	/* Window scaling received from sender	*/
++	__u8	rcv_wscale;	/* Window scaling to send to receiver	*/
++/*	SACKs data	*/
++	__u8	dsack;		/* D-SACK is scheduled			*/
++	__u8	eff_sacks;	/* Size of SACK array to send with next packet */
++	__u8	num_sacks;	/* Number of SACK blocks		*/
++	__u8	__pad;
++	__u16	user_mss;  	/* mss requested by user in ioctl */
++	__u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
++};
++
+ struct tcp_opt {
+ 	int	tcp_header_len;	/* Bytes of tcp header to send		*/
+ 
+@@ -251,22 +272,19 @@ struct tcp_opt {
+ 	__u32	pmtu_cookie;	/* Last pmtu seen by socket		*/
+ 	__u32	mss_cache;	/* Cached effective mss, not including SACKS */
+ 	__u16	mss_cache_std;	/* Like mss_cache, but without TSO */
+-	__u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
+ 	__u16	ext_header_len;	/* Network protocol overhead (IP/IPv6 options) */
+ 	__u16	ext2_header_len;/* Options depending on route */
+ 	__u8	ca_state;	/* State of fast-retransmit machine 	*/
+ 	__u8	retransmits;	/* Number of unrecovered RTO timeouts.	*/
++	__u32	frto_highmark;	/* snd_nxt when RTO occurred */
+ 
+ 	__u8	reordering;	/* Packet reordering metric.		*/
+ 	__u8	frto_counter;	/* Number of new acks after RTO */
+-	__u32	frto_highmark;	/* snd_nxt when RTO occurred */
+ 
+ 	__u8	unused_pad;
+ 	__u8	defer_accept;	/* User waits for some data after accept() */
+-	/* one byte hole, try to pack */
+ 
+ /* RTT measurement */
+-	__u8	backoff;	/* backoff				*/
+ 	__u32	srtt;		/* smothed round trip time << 3		*/
+ 	__u32	mdev;		/* medium deviation			*/
+ 	__u32	mdev_max;	/* maximal mdev for the last rtt period	*/
+@@ -277,7 +295,15 @@ struct tcp_opt {
+ 	__u32	packets_out;	/* Packets which are "in flight"	*/
+ 	__u32	left_out;	/* Packets which leaved network		*/
+ 	__u32	retrans_out;	/* Retransmitted packets out		*/
++	__u8	backoff;	/* backoff				*/
++/*
++ *      Options received (usually on last packet, some only on SYN packets).
++ */
++	__u8	nonagle;	/* Disable Nagle algorithm?             */
++	__u8	keepalive_probes; /* num of allowed keep alive probes	*/
+ 
++	__u8	probes_out;	/* unanswered 0 window probes		*/
++	struct tcp_options_received rx_opt;
+ 
+ /*
+  *	Slow start and congestion control (see also Nagle, and Karn & Partridge)
+@@ -303,40 +329,19 @@ struct tcp_opt {
+ 	__u32	write_seq;	/* Tail(+1) of data held in tcp send buffer */
+ 	__u32	pushed_seq;	/* Last pushed seq, required to talk to windows */
+ 	__u32	copied_seq;	/* Head of yet unread data		*/
+-/*
+- *      Options received (usually on last packet, some only on SYN packets).
+- */
+-	char	tstamp_ok,	/* TIMESTAMP seen on SYN packet		*/
+-		wscale_ok,	/* Wscale seen on SYN packet		*/
+-		sack_ok;	/* SACK seen on SYN packet		*/
+-	char	saw_tstamp;	/* Saw TIMESTAMP on last packet		*/
+-        __u8	snd_wscale;	/* Window scaling received from sender	*/
+-        __u8	rcv_wscale;	/* Window scaling to send to receiver	*/
+-	__u8	nonagle;	/* Disable Nagle algorithm?             */
+-	__u8	keepalive_probes; /* num of allowed keep alive probes	*/
+-
+-/*	PAWS/RTTM data	*/
+-        __u32	rcv_tsval;	/* Time stamp value             	*/
+-        __u32	rcv_tsecr;	/* Time stamp echo reply        	*/
+-        __u32	ts_recent;	/* Time stamp to echo next		*/
+-        long	ts_recent_stamp;/* Time we stored ts_recent (for aging) */
+ 
+ /*	SACKs data	*/
+-	__u16	user_mss;  	/* mss requested by user in ioctl */
+-	__u8	dsack;		/* D-SACK is scheduled			*/
+-	__u8	eff_sacks;	/* Size of SACK array to send with next packet */
+ 	struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
+ 	struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/
+ 
+ 	__u32	window_clamp;	/* Maximal window to advertise		*/
+ 	__u32	rcv_ssthresh;	/* Current window clamp			*/
+-	__u8	probes_out;	/* unanswered 0 window probes		*/
+-	__u8	num_sacks;	/* Number of SACK blocks		*/
+ 	__u16	advmss;		/* Advertised MSS			*/
+ 
+ 	__u8	syn_retries;	/* num of allowed syn retries */
+ 	__u8	ecn_flags;	/* ECN status bits.			*/
+ 	__u16	prior_ssthresh; /* ssthresh saved at recovery start	*/
++ 	__u16	__pad1;
+ 	__u32	lost_out;	/* Lost packets				*/
+ 	__u32	sacked_out;	/* SACK'd packets			*/
+ 	__u32	fackets_out;	/* FACK'd packets			*/
+diff -uprN linux-2.6.8.1.orig/include/linux/time.h linux-2.6.8.1-ve022stab078/include/linux/time.h
+--- linux-2.6.8.1.orig/include/linux/time.h	2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/time.h	2006-05-11 13:05:32.000000000 +0400
+@@ -194,6 +194,18 @@ static inline unsigned int jiffies_to_ms
+ 	return (j * 1000) / HZ;
+ #endif
+ }
++
++static inline unsigned int jiffies_to_usecs(const unsigned long j)
++{
++#if HZ <= 1000 && !(1000 % HZ)
++	return (1000000 / HZ) * j;
++#elif HZ > 1000 && !(HZ % 1000)
++	return (j*1000 + (HZ - 1000))/(HZ / 1000);
++#else
++	return (j * 1000000) / HZ;
++#endif
++}
++
+ static inline unsigned long msecs_to_jiffies(const unsigned int m)
+ {
+ #if HZ <= 1000 && !(1000 % HZ)
+@@ -332,6 +344,7 @@ static inline unsigned long get_seconds(
+ struct timespec current_kernel_time(void);
+ 
+ #define CURRENT_TIME (current_kernel_time())
++#define CURRENT_TIME_SEC ((struct timespec) { xtime.tv_sec, 0 })
+ 
+ #endif /* __KERNEL__ */
+ 
+@@ -349,6 +362,8 @@ struct itimerval;
+ extern int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue);
+ extern int do_getitimer(int which, struct itimerval *value);
+ 
++extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
++
+ static inline void
+ set_normalized_timespec (struct timespec *ts, time_t sec, long nsec)
+ {
+diff -uprN linux-2.6.8.1.orig/include/linux/tty.h linux-2.6.8.1-ve022stab078/include/linux/tty.h
+--- linux-2.6.8.1.orig/include/linux/tty.h	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/tty.h	2006-05-11 13:05:40.000000000 +0400
+@@ -239,6 +239,8 @@ struct device;
+  * size each time the window is created or resized anyway.
+  * 						- TYT, 9/14/92
+  */
++struct user_beancounter;
++
+ struct tty_struct {
+ 	int	magic;
+ 	struct tty_driver *driver;
+@@ -293,8 +295,12 @@ struct tty_struct {
+ 	spinlock_t read_lock;
+ 	/* If the tty has a pending do_SAK, queue it here - akpm */
+ 	struct work_struct SAK_work;
++	struct ve_struct *owner_env;
+ };
+ 
++DCL_VE_OWNER_PROTO(TTY, TAIL_SOFT, struct tty_struct, owner_env, , ())
++#define tty_ub(__tty) (slab_ub(__tty))
++
+ /* tty magic number */
+ #define TTY_MAGIC		0x5401
+ 
+@@ -319,6 +325,7 @@ struct tty_struct {
+ #define TTY_HW_COOK_IN 15
+ #define TTY_PTY_LOCK 16
+ #define TTY_NO_WRITE_SPLIT 17
++#define TTY_CHARGED 18
+ 
+ #define TTY_WRITE_FLUSH(tty) tty_write_flush((tty))
+ 
+diff -uprN linux-2.6.8.1.orig/include/linux/tty_driver.h linux-2.6.8.1-ve022stab078/include/linux/tty_driver.h
+--- linux-2.6.8.1.orig/include/linux/tty_driver.h	2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/tty_driver.h	2006-05-11 13:05:40.000000000 +0400
+@@ -115,6 +115,7 @@
+  * 	character to the device.
+  */
+ 
++#include <linux/ve_owner.h>
+ #include <linux/fs.h>
+ #include <linux/list.h>
+ #include <linux/cdev.h>
+@@ -214,9 +215,13 @@ struct tty_driver {
+ 			unsigned int set, unsigned int clear);
+ 
+ 	struct list_head tty_drivers;
++	struct ve_struct *owner_env;
+ };
+ 
++DCL_VE_OWNER_PROTO(TTYDRV, TAIL_SOFT, struct tty_driver, owner_env, , ())
++
+ extern struct list_head tty_drivers;
++extern rwlock_t tty_driver_guard;
+ 
+ struct tty_driver *alloc_tty_driver(int lines);
+ void put_tty_driver(struct tty_driver *driver);
+diff -uprN linux-2.6.8.1.orig/include/linux/types.h linux-2.6.8.1-ve022stab078/include/linux/types.h
+--- linux-2.6.8.1.orig/include/linux/types.h	2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/types.h	2006-05-11 13:05:32.000000000 +0400
+@@ -132,6 +132,10 @@ typedef		__s64		int64_t;
+ typedef unsigned long sector_t;
+ #endif
+ 
++#ifdef __KERNEL__
++typedef unsigned gfp_t;
++#endif
++
+ /*
+  * The type of an index into the pagecache.  Use a #define so asm/types.h
+  * can override it.
+@@ -140,6 +144,19 @@ typedef unsigned long sector_t;
+ #define pgoff_t unsigned long
+ #endif
+ 
++#ifdef __CHECKER__
++#define __bitwise __attribute__((bitwise))
++#else
++#define __bitwise
++#endif
++
++typedef __u16 __bitwise __le16;
++typedef __u16 __bitwise __be16;
++typedef __u32 __bitwise __le32;
++typedef __u32 __bitwise __be32;
++typedef __u64 __bitwise __le64;
++typedef __u64 __bitwise __be64;
++
+ #endif /* __KERNEL_STRICT_NAMES */
+ 
+ /*
+diff -uprN linux-2.6.8.1.orig/include/linux/ufs_fs.h linux-2.6.8.1-ve022stab078/include/linux/ufs_fs.h
+--- linux-2.6.8.1.orig/include/linux/ufs_fs.h	2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/ufs_fs.h	2006-05-11 13:05:35.000000000 +0400
+@@ -899,7 +899,7 @@ extern struct inode * ufs_new_inode (str
+ extern u64  ufs_frag_map (struct inode *, sector_t);
+ extern void ufs_read_inode (struct inode *);
+ extern void ufs_put_inode (struct inode *);
+-extern void ufs_write_inode (struct inode *, int);
++extern int ufs_write_inode (struct inode *, int);
+ extern int ufs_sync_inode (struct inode *);
+ extern void ufs_delete_inode (struct inode *);
+ extern struct buffer_head * ufs_getfrag (struct inode *, unsigned, int, int *);
+diff -uprN linux-2.6.8.1.orig/include/linux/ve.h linux-2.6.8.1-ve022stab078/include/linux/ve.h
+--- linux-2.6.8.1.orig/include/linux/ve.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/linux/ve.h	2006-05-11 13:05:48.000000000 +0400
+@@ -0,0 +1,311 @@
++/*
++ *  include/linux/ve.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _LINUX_VE_H
++#define _LINUX_VE_H
++
++#include <linux/config.h>
++
++#ifndef __ENVID_T_DEFINED__
++typedef unsigned envid_t;
++#define __ENVID_T_DEFINED__
++#endif
++
++#include <linux/types.h>
++#include <linux/capability.h>
++#include <linux/utsname.h>
++#include <linux/sysctl.h>
++#include <linux/vzstat.h>
++#include <linux/kobject.h>
++
++#ifdef VZMON_DEBUG
++#  define VZTRACE(fmt,args...) \
++	printk(KERN_DEBUG fmt, ##args)
++#else
++#  define VZTRACE(fmt,args...)
++#endif /* VZMON_DEBUG */
++
++struct tty_driver;
++struct devpts_config;
++struct task_struct;
++struct new_utsname;
++struct file_system_type;
++struct icmp_mib;
++struct ip_mib;
++struct tcp_mib;
++struct udp_mib;
++struct linux_mib;
++struct fib_info;
++struct fib_rule;
++struct veip_struct;
++struct ve_monitor;
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++struct fib_table;
++struct devcnfv4_struct;
++#ifdef CONFIG_VE_IPTABLES
++struct ipt_filter_initial_table;
++struct ipt_nat_initial_table;
++struct ipt_table;
++struct ip_conntrack;
++struct nf_hook_ops;
++struct ve_ip_conntrack {
++	struct list_head 	*_ip_conntrack_hash;
++	struct list_head	_ip_conntrack_expect_list;
++	struct list_head	_ip_conntrack_protocol_list;
++	struct list_head	_ip_conntrack_helpers;
++	int 			_ip_conntrack_max;
++	unsigned long		_ip_ct_tcp_timeouts[10];
++	unsigned long		_ip_ct_udp_timeout;
++	unsigned long		_ip_ct_udp_timeout_stream;
++	unsigned long		_ip_ct_icmp_timeout;
++	unsigned long		_ip_ct_generic_timeout;
++	atomic_t		_ip_conntrack_count;
++	void (*_ip_conntrack_destroyed)(struct ip_conntrack *conntrack);
++#ifdef CONFIG_SYSCTL
++	struct ctl_table_header *_ip_ct_sysctl_header;
++	ctl_table		*_ip_ct_net_table;
++	ctl_table		*_ip_ct_ipv4_table;
++	ctl_table		*_ip_ct_netfilter_table;
++	ctl_table		*_ip_ct_sysctl_table;
++#endif /*CONFIG_SYSCTL*/
++
++	int			_ip_conntrack_ftp_ports_c;
++	int			_ip_conntrack_irc_ports_c;
++
++	struct list_head	_ip_nat_protos;
++	struct list_head	_ip_nat_helpers;
++	struct list_head	*_ip_nat_bysource;
++	struct ipt_nat_initial_table *_ip_nat_initial_table;
++	struct ipt_table	*_ip_nat_table;
++
++	int			_ip_nat_ftp_ports_c;
++	int			_ip_nat_irc_ports_c;
++
++	/* resource accounting */
++	struct user_beancounter *ub;
++};
++#endif
++#endif
++
++#define UIDHASH_BITS_VE		6
++#define UIDHASH_SZ_VE		(1 << UIDHASH_BITS_VE)
++
++struct ve_cpu_stats {
++	cycles_t	idle_time;
++	cycles_t	iowait_time;
++	cycles_t	strt_idle_time;
++	cycles_t	used_time;
++	seqcount_t	stat_lock;
++	int		nr_running;
++	int		nr_unint;
++	int		nr_iowait;
++	u64		user;
++	u64		nice;
++	u64		system;
++} ____cacheline_aligned;
++
++struct ve_struct {
++	struct ve_struct	*prev;
++	struct ve_struct	*next;
++
++	envid_t			veid;
++	struct task_struct	*init_entry;
++	struct list_head	vetask_lh;
++	kernel_cap_t		cap_default;
++	atomic_t		pcounter;
++	/* ref counter to ve from ipc */
++	atomic_t		counter;	
++	unsigned int		class_id;
++	struct veip_struct	*veip;
++	struct rw_semaphore	op_sem;
++	int			is_running;
++	int			is_locked;
++	int			virt_pids;
++	/* see vzcalluser.h for VE_FEATURE_XXX definitions */
++	__u64			features;
++
++/* VE's root */
++	struct vfsmount 	*fs_rootmnt;
++	struct dentry 		*fs_root;
++
++/* sysctl */
++	struct new_utsname	*utsname;
++	struct list_head	sysctl_lh;
++	struct ctl_table_header	*kern_header;
++	struct ctl_table	*kern_table;
++	struct ctl_table_header	*quota_header;
++	struct ctl_table	*quota_table;
++	struct file_system_type *proc_fstype;
++	struct vfsmount		*proc_mnt;
++	struct proc_dir_entry	*proc_root;
++	struct proc_dir_entry	*proc_sys_root;
++
++/* SYSV IPC */
++	struct ipc_ids		*_shm_ids;
++	struct ipc_ids		*_msg_ids;
++	struct ipc_ids		*_sem_ids;
++	int			_used_sems;
++	int			_shm_tot;
++	size_t			_shm_ctlmax;
++	size_t			_shm_ctlall;
++	int			_shm_ctlmni;
++	int			_msg_ctlmax;
++	int			_msg_ctlmni;
++	int			_msg_ctlmnb;
++	int			_sem_ctls[4];
++
++/* BSD pty's */
++	struct tty_driver       *pty_driver;
++	struct tty_driver       *pty_slave_driver;
++
++#ifdef CONFIG_UNIX98_PTYS
++	struct tty_driver	*ptm_driver;
++	struct tty_driver	*pts_driver;
++	struct idr		*allocated_ptys;
++#endif
++	struct file_system_type *devpts_fstype;
++	struct vfsmount		*devpts_mnt;
++	struct dentry		*devpts_root;
++	struct devpts_config	*devpts_config;
++
++	struct file_system_type *shmem_fstype;
++	struct vfsmount		*shmem_mnt;
++#ifdef CONFIG_SYSFS
++	struct file_system_type *sysfs_fstype;
++	struct vfsmount		*sysfs_mnt;
++	struct super_block	*sysfs_sb;
++#endif
++	struct subsystem	*class_subsys;
++	struct subsystem	*class_obj_subsys;
++	struct class		*net_class;
++
++/* User uids hash */
++	struct list_head	uidhash_table[UIDHASH_SZ_VE];
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	struct hlist_head	_net_dev_head;
++	struct hlist_head	_net_dev_index_head;
++	struct net_device	*_net_dev_base, **_net_dev_tail;
++	int			ifindex;
++	struct net_device	*_loopback_dev;
++	struct net_device	*_venet_dev;
++	struct ipv4_devconf	*_ipv4_devconf;
++	struct ipv4_devconf	*_ipv4_devconf_dflt;
++	struct ctl_table_header	*forward_header;
++	struct ctl_table	*forward_table;
++#endif
++ 	unsigned long		rt_flush_required;
++
++/* per VE CPU stats*/
++	struct timespec		start_timespec;
++	u64			start_jiffies;
++	cycles_t 		start_cycles;
++	unsigned long		avenrun[3];	/* loadavg data */
++
++	cycles_t 		cpu_used_ve;
++	struct kstat_lat_pcpu_struct	sched_lat_ve;
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	struct fib_info		*_fib_info_list;
++	struct fib_rule		*_local_rule;
++	struct fib_rule		*_fib_rules;
++#ifdef CONFIG_IP_MULTIPLE_TABLES
++	/* XXX: why a magic constant? */
++	struct fib_table 	*_fib_tables[256]; /* RT_TABLE_MAX - for now */
++#else
++	struct fib_table	*_main_table;
++	struct fib_table	*_local_table;
++#endif
++	struct icmp_mib		*_icmp_statistics[2];
++	struct ipstats_mib	*_ip_statistics[2];
++	struct tcp_mib		*_tcp_statistics[2];
++	struct udp_mib		*_udp_statistics[2];
++	struct linux_mib	*_net_statistics[2];
++	struct venet_stat       *stat;
++#ifdef CONFIG_VE_IPTABLES
++/* core/netfilter.c virtualization */
++	void			*_nf_hooks;
++	struct ipt_filter_initial_table	*_ipt_filter_initial_table; /* initial_table struct */
++	struct ipt_table	*_ve_ipt_filter_pf; /* packet_filter struct */
++	struct nf_hook_ops	*_ve_ipt_filter_io; /* ipt_ops struct */
++	struct ipt_table	*_ipt_mangle_table;
++	struct nf_hook_ops	*_ipt_mangle_hooks;
++	struct list_head	*_ipt_target;
++	struct list_head	*_ipt_match;
++	struct list_head	*_ipt_tables;
++
++	struct ipt_target 	*_ipt_standard_target;
++	struct ipt_target 	*_ipt_error_target;
++	struct ipt_match 	*_tcp_matchstruct;
++	struct ipt_match 	*_udp_matchstruct;
++	struct ipt_match 	*_icmp_matchstruct;
++
++	__u64			_iptables_modules;
++	struct ve_ip_conntrack	*_ip_conntrack;
++#endif /* CONFIG_VE_IPTABLES */
++#endif
++	wait_queue_head_t	*_log_wait;
++	unsigned long		*_log_start;
++	unsigned long		*_log_end;
++	unsigned long		*_logged_chars;
++	char			*log_buf;
++#define VE_DEFAULT_LOG_BUF_LEN	4096
++
++	struct ve_cpu_stats 	ve_cpu_stats[NR_CPUS] ____cacheline_aligned;
++	unsigned long		down_at;
++	struct list_head	cleanup_list;
++ 
++ 	unsigned long		jiffies_fixup;
++ 	unsigned char		disable_net;
++ 	unsigned char		sparse_vpid;
++	struct ve_monitor	*monitor;
++	struct proc_dir_entry	*monitor_proc;
++};
++
++#define VE_CPU_STATS(ve, cpu) (&((ve)->ve_cpu_stats[(cpu)]))
++
++extern int nr_ve;
++
++#ifdef CONFIG_VE
++
++int get_device_perms_ve(int dev_type, dev_t dev, int access_mode);
++void do_env_cleanup(struct ve_struct *envid);
++void do_update_load_avg_ve(void);
++void do_env_free(struct ve_struct *ptr);
++
++#define ve_utsname (*get_exec_env()->utsname)
++
++static inline struct ve_struct *get_ve(struct ve_struct *ptr)
++{
++	if (ptr != NULL)
++		atomic_inc(&ptr->counter);
++	return ptr;
++}
++
++static inline void put_ve(struct ve_struct *ptr)
++{
++	if (ptr && atomic_dec_and_test(&ptr->counter)) {
++		if (atomic_read(&ptr->pcounter) > 0)
++			BUG();
++		if (ptr->is_running)
++			BUG();
++		do_env_free(ptr);
++	}
++}
++
++#define ve_cpu_online_map(ve, mask) fairsched_cpu_online_map(ve->veid, mask)
++#else	/* CONFIG_VE */
++#define ve_utsname	system_utsname
++#define get_ve(ve)	(NULL)
++#define put_ve(ve)	do { } while (0)
++#endif	/* CONFIG_VE */
++
++#endif /* _LINUX_VE_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/ve_owner.h linux-2.6.8.1-ve022stab078/include/linux/ve_owner.h
+--- linux-2.6.8.1.orig/include/linux/ve_owner.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/linux/ve_owner.h	2006-05-11 13:05:40.000000000 +0400
+@@ -0,0 +1,32 @@
++/*
++ *  include/linux/ve_proto.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __VE_OWNER_H__
++#define __VE_OWNER_H__
++
++#include <linux/config.h>
++#include <linux/vmalloc.h>
++
++
++#define DCL_VE_OWNER(name, kind, type, member, attr1, attr2)
++	/* prototype declares static inline functions */
++
++#define DCL_VE_OWNER_PROTO(name, kind, type, member, attr1, attr2)	\
++type;									\
++static inline struct ve_struct *VE_OWNER_##name(type *obj)		\
++{									\
++	return obj->member;						\
++}									\
++static inline void SET_VE_OWNER_##name(type *obj, struct ve_struct *ve)	\
++{									\
++	obj->member = ve;						\
++}
++
++#endif /* __VE_OWNER_H__ */
+diff -uprN linux-2.6.8.1.orig/include/linux/ve_proto.h linux-2.6.8.1-ve022stab078/include/linux/ve_proto.h
+--- linux-2.6.8.1.orig/include/linux/ve_proto.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/linux/ve_proto.h	2006-05-11 13:05:42.000000000 +0400
+@@ -0,0 +1,73 @@
++/*
++ *  include/linux/ve_proto.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __VE_H__
++#define __VE_H__
++
++#ifdef CONFIG_VE
++
++extern struct semaphore ve_call_guard;
++extern rwlock_t ve_call_lock;
++
++#ifdef CONFIG_SYSVIPC
++extern void prepare_ipc(void);
++extern int init_ve_ipc(struct ve_struct *);
++extern void fini_ve_ipc(struct ve_struct *);
++extern void ve_ipc_cleanup(void);
++#endif
++
++extern struct tty_driver *get_pty_driver(void);
++extern struct tty_driver *get_pty_slave_driver(void);
++#ifdef CONFIG_UNIX98_PTYS
++extern struct tty_driver *ptm_driver;	/* Unix98 pty masters; for /dev/ptmx */
++extern struct tty_driver *pts_driver;	/* Unix98 pty slaves;  for /dev/ptmx */
++#endif
++
++extern rwlock_t  tty_driver_guard;
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++void ip_fragment_cleanup(struct ve_struct *envid);
++void tcp_v4_kill_ve_sockets(struct ve_struct *envid);
++struct fib_table * fib_hash_init(int id);
++int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr);
++extern int main_loopback_init(struct net_device*);
++int venet_init(void);
++#endif
++
++extern struct ve_struct *ve_list_head;
++extern rwlock_t ve_list_guard;
++extern struct ve_struct *get_ve_by_id(envid_t);
++extern struct ve_struct *__find_ve_by_id(envid_t);
++
++extern int do_setdevperms(envid_t veid, unsigned type,
++		dev_t dev, unsigned mask);
++
++#define VE_HOOK_INIT	0
++#define VE_HOOK_FINI	1
++#define VE_MAX_HOOKS	2
++
++typedef int ve_hookfn(unsigned int hooknum, void *data);
++
++struct ve_hook
++{
++	struct list_head list;
++	ve_hookfn *hook;
++	ve_hookfn *undo;
++	struct module *owner;
++	int hooknum;
++	/* Functions are called in ascending priority. */
++	int priority;
++};
++
++extern int ve_hook_register(struct ve_hook *vh);
++extern void ve_hook_unregister(struct ve_hook *vh);
++
++#endif
++#endif
+diff -uprN linux-2.6.8.1.orig/include/linux/ve_task.h linux-2.6.8.1-ve022stab078/include/linux/ve_task.h
+--- linux-2.6.8.1.orig/include/linux/ve_task.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/linux/ve_task.h	2006-05-11 13:05:40.000000000 +0400
+@@ -0,0 +1,34 @@
++/*
++ *  include/linux/ve_task.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __VE_TASK_H__
++#define __VE_TASK_H__
++
++#include <linux/seqlock.h>
++
++struct ve_task_info {
++/* virtualization */
++	struct ve_struct *owner_env;
++	struct ve_struct *exec_env;
++	struct list_head vetask_list;
++	struct dentry *glob_proc_dentry;
++/* statistics: scheduling latency */
++	cycles_t sleep_time;
++	cycles_t sched_time;
++	cycles_t sleep_stamp;
++	cycles_t wakeup_stamp;
++	seqcount_t wakeup_lock;
++};
++
++#define VE_TASK_INFO(task)	(&(task)->ve_task_info)
++#define VE_TASK_LIST_2_TASK(lh)	\
++	list_entry(lh, struct task_struct, ve_task_info.vetask_list)
++
++#endif /* __VE_TASK_H__ */
+diff -uprN linux-2.6.8.1.orig/include/linux/venet.h linux-2.6.8.1-ve022stab078/include/linux/venet.h
+--- linux-2.6.8.1.orig/include/linux/venet.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/linux/venet.h	2006-05-11 13:05:40.000000000 +0400
+@@ -0,0 +1,68 @@
++/*
++ *  include/linux/venet.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _VENET_H
++#define _VENET_H
++
++#include <linux/list.h>
++#include <linux/spinlock.h>
++#include <linux/vzcalluser.h>
++
++#define VEIP_HASH_SZ 512
++
++struct ve_struct;
++struct venet_stat;
++struct ip_entry_struct
++{
++	__u32			ip;
++	struct ve_struct	*active_env;
++	struct venet_stat	*stat;
++	struct veip_struct	*veip;
++	struct list_head 	ip_hash;
++	struct list_head 	ve_list;
++};
++
++struct veip_struct
++{
++	struct list_head	src_lh;
++	struct list_head	dst_lh;
++	struct list_head	ip_lh;
++	struct list_head	list;
++	envid_t			veid;
++};
++
++/* veip_hash_lock should be taken for write by caller */
++void ip_entry_hash(struct ip_entry_struct *entry, struct veip_struct *veip);
++/* veip_hash_lock should be taken for write by caller */
++void ip_entry_unhash(struct ip_entry_struct *entry);
++/* veip_hash_lock should be taken for read by caller */
++struct ip_entry_struct *ip_entry_lookup(u32 addr);
++
++/* veip_hash_lock should be taken for read by caller */
++struct veip_struct *veip_find(envid_t veid);
++/* veip_hash_lock should be taken for write by caller */
++struct veip_struct *veip_findcreate(envid_t veid);
++/* veip_hash_lock should be taken for write by caller */
++void veip_put(struct veip_struct *veip);
++
++int veip_start(struct ve_struct *ve);
++void veip_stop(struct ve_struct *ve);
++int veip_entry_add(struct ve_struct *ve, struct sockaddr_in *addr);
++int veip_entry_del(envid_t veid, struct sockaddr_in *addr);
++int venet_change_skb_owner(struct sk_buff *skb);
++
++extern struct list_head ip_entry_hash_table[];
++extern rwlock_t veip_hash_lock;
++
++#ifdef CONFIG_PROC_FS
++int veip_seq_show(struct seq_file *m, void *v);
++#endif
++
++#endif
+diff -uprN linux-2.6.8.1.orig/include/linux/veprintk.h linux-2.6.8.1-ve022stab078/include/linux/veprintk.h
+--- linux-2.6.8.1.orig/include/linux/veprintk.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/linux/veprintk.h	2006-05-11 13:05:42.000000000 +0400
+@@ -0,0 +1,38 @@
++/*
++ *  include/linux/veprintk.h
++ *
++ *  Copyright (C) 2006  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __VE_PRINTK_H__
++#define __VE_PRINTK_H__
++
++#ifdef CONFIG_VE
++
++#define ve_log_wait		(*(get_exec_env()->_log_wait))
++#define ve_log_start		(*(get_exec_env()->_log_start))
++#define ve_log_end		(*(get_exec_env()->_log_end))
++#define ve_logged_chars		(*(get_exec_env()->_logged_chars))
++#define ve_log_buf		(get_exec_env()->log_buf)
++#define ve_log_buf_len		(ve_is_super(get_exec_env()) ? \
++				log_buf_len : VE_DEFAULT_LOG_BUF_LEN)
++#define VE_LOG_BUF_MASK		(ve_log_buf_len - 1)
++#define VE_LOG_BUF(idx)		(ve_log_buf[(idx) & VE_LOG_BUF_MASK])
++
++#else
++
++#define ve_log_wait		log_wait
++#define ve_log_start		log_start
++#define ve_log_end		log_end
++#define ve_logged_chars		logged_chars
++#define ve_log_buf		log_buf
++#define ve_log_buf_len		log_buf_len
++#define VE_LOG_BUF_MASK		LOG_BUF_MASK
++#define VE_LOG_BUF(idx)		LOG_BUF(idx)
++
++#endif /* CONFIG_VE */
++#endif /* __VE_PRINTK_H__ */
+diff -uprN linux-2.6.8.1.orig/include/linux/virtinfo.h linux-2.6.8.1-ve022stab078/include/linux/virtinfo.h
+--- linux-2.6.8.1.orig/include/linux/virtinfo.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/linux/virtinfo.h	2006-05-11 13:05:49.000000000 +0400
+@@ -0,0 +1,86 @@
++/*
++ *  include/linux/virtinfo.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __LINUX_VIRTINFO_H
++#define __LINUX_VIRTINFO_H
++
++#include <linux/config.h>
++#include <linux/kernel.h>
++#include <linux/page-flags.h>
++#include <linux/notifier.h>
++
++struct vnotifier_block
++{
++	int (*notifier_call)(struct vnotifier_block *self,
++			unsigned long, void *, int);
++	struct vnotifier_block *next;
++	int priority;
++};
++
++extern struct semaphore virtinfo_sem;
++void __virtinfo_notifier_register(int type, struct vnotifier_block *nb);
++void virtinfo_notifier_register(int type, struct vnotifier_block *nb);
++void virtinfo_notifier_unregister(int type, struct vnotifier_block *nb);
++int virtinfo_notifier_call(int type, unsigned long n, void *data);
++
++struct meminfo {
++	struct sysinfo si;
++	unsigned long active, inactive;
++	unsigned long cache, swapcache;
++	unsigned long committed_space;
++	struct page_state ps;
++	unsigned long vmalloc_total, vmalloc_used, vmalloc_largest;
++};
++
++#define VIRTINFO_DOFORK		0
++#define VIRTINFO_DOEXIT		1
++#define VIRTINFO_DOEXECVE	2
++#define VIRTINFO_DOFORKRET	3
++#define VIRTINFO_DOFORKPOST	4
++#define VIRTINFO_EXIT		5
++#define VIRTINFO_EXITMMAP	6
++#define VIRTINFO_EXECMMAP	7
++#define VIRTINFO_ENOUGHMEM	8
++#define VIRTINFO_OUTOFMEM	9
++#define VIRTINFO_PAGEIN		10
++#define VIRTINFO_MEMINFO	11
++#define VIRTINFO_SYSINFO	12
++#define VIRTINFO_NEWUBC		13
++
++enum virt_info_types {
++	VITYPE_GENERAL,
++	VITYPE_FAUDIT,
++	VITYPE_QUOTA,
++	VITYPE_SCP,
++
++	VIRT_TYPES
++};
++
++#ifdef CONFIG_VZ_GENCALLS
++
++static inline int virtinfo_gencall(unsigned long n, void *data)
++{
++	int r;
++
++	r = virtinfo_notifier_call(VITYPE_GENERAL, n, data);
++	if (r & NOTIFY_FAIL)
++		return -ENOBUFS;
++	if (r & NOTIFY_OK)
++		return -ERESTARTNOINTR;
++	return 0;
++}
++
++#else
++
++#define virtinfo_gencall(n, data)	0
++
++#endif
++
++#endif /* __LINUX_VIRTINFO_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/vmalloc.h linux-2.6.8.1-ve022stab078/include/linux/vmalloc.h
+--- linux-2.6.8.1.orig/include/linux/vmalloc.h	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/vmalloc.h	2006-05-11 13:05:40.000000000 +0400
+@@ -9,6 +9,10 @@
+ #define VM_ALLOC	0x00000002	/* vmalloc() */
+ #define VM_MAP		0x00000004	/* vmap()ed pages */
+ 
++/* align size to 2^n page boundary */
++#define POWER2_PAGE_ALIGN(size) \
++	((typeof(size))(1UL << (PAGE_SHIFT + get_order(size))))
++
+ struct vm_struct {
+ 	void			*addr;
+ 	unsigned long		size;
+@@ -26,6 +30,8 @@ extern void *vmalloc(unsigned long size)
+ extern void *vmalloc_exec(unsigned long size);
+ extern void *vmalloc_32(unsigned long size);
+ extern void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot);
++extern void *vmalloc_best(unsigned long size);
++extern void *ub_vmalloc_best(unsigned long size);
+ extern void vfree(void *addr);
+ 
+ extern void *vmap(struct page **pages, unsigned int count,
+@@ -38,6 +44,9 @@ extern void vunmap(void *addr);
+ extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags);
+ extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
+ 					unsigned long start, unsigned long end);
++extern struct vm_struct * get_vm_area_best(unsigned long size,
++					   unsigned long flags);
++extern void vprintstat(void);
+ extern struct vm_struct *remove_vm_area(void *addr);
+ extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
+ 			struct page ***pages);
+diff -uprN linux-2.6.8.1.orig/include/linux/vsched.h linux-2.6.8.1-ve022stab078/include/linux/vsched.h
+--- linux-2.6.8.1.orig/include/linux/vsched.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/linux/vsched.h	2006-05-11 13:05:40.000000000 +0400
+@@ -0,0 +1,34 @@
++/*
++ *  include/linux/vsched.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __VSCHED_H__
++#define __VSCHED_H__
++
++#include <linux/config.h>
++#include <linux/cache.h>
++#include <linux/fairsched.h>
++#include <linux/sched.h>
++
++extern int vsched_create(int id, struct fairsched_node *node);
++extern int vsched_destroy(struct vcpu_scheduler *vsched);
++
++extern int vsched_mvpr(struct task_struct *p, struct vcpu_scheduler *vsched);
++
++extern int vcpu_online(int cpu);
++
++#ifdef CONFIG_VE
++#ifdef CONFIG_FAIRSCHED
++extern unsigned long ve_scale_khz(unsigned long khz);
++#else
++#define ve_scale_khz(khz)	(khz)
++#endif
++#endif
++
++#endif
+diff -uprN linux-2.6.8.1.orig/include/linux/vzcalluser.h linux-2.6.8.1-ve022stab078/include/linux/vzcalluser.h
+--- linux-2.6.8.1.orig/include/linux/vzcalluser.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/linux/vzcalluser.h	2006-05-11 13:05:48.000000000 +0400
+@@ -0,0 +1,220 @@
++/*
++ *  include/linux/vzcalluser.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _LINUX_VZCALLUSER_H
++#define _LINUX_VZCALLUSER_H
++
++#include <linux/types.h>
++#include <linux/ioctl.h>
++
++#define KERN_VZ_PRIV_RANGE 51
++
++#ifndef __ENVID_T_DEFINED__
++typedef unsigned envid_t;
++#define __ENVID_T_DEFINED__
++#endif
++
++/*
++ * VE management ioctls
++ */
++
++struct vzctl_old_env_create {
++	envid_t veid;
++	unsigned flags;
++#define VE_CREATE 	1	/* Create VE, VE_ENTER added automatically */
++#define VE_EXCLUSIVE	2	/* Fail if exists */
++#define VE_ENTER	4	/* Enter existing VE */
++#define VE_TEST		8	/* Test if VE exists */
++#define VE_LOCK		16	/* Do not allow entering created VE */
++#define VE_SKIPLOCK	32	/* Allow entering embrion VE */
++	__u32 addr;
++};
++
++struct vzctl_mark_env_to_down {
++	envid_t veid;
++};
++
++struct vzctl_setdevperms {
++	envid_t veid;
++	unsigned type;
++#define VE_USE_MAJOR	010	/* Test MAJOR supplied in rule */
++#define VE_USE_MINOR	030	/* Test MINOR supplied in rule */
++#define VE_USE_MASK	030	/* Testing mask, VE_USE_MAJOR|VE_USE_MINOR */
++	unsigned dev;
++	unsigned mask;
++};
++
++struct vzctl_ve_netdev {
++	envid_t veid;
++	int op;
++#define VE_NETDEV_ADD  1
++#define VE_NETDEV_DEL  2
++	char *dev_name;
++};
++
++/* these masks represent modules */
++#define VE_IP_IPTABLES_MOD		(1U<<0)
++#define VE_IP_FILTER_MOD		(1U<<1)
++#define VE_IP_MANGLE_MOD		(1U<<2)
++#define VE_IP_MATCH_LIMIT_MOD		(1U<<3)
++#define VE_IP_MATCH_MULTIPORT_MOD	(1U<<4)
++#define VE_IP_MATCH_TOS_MOD		(1U<<5)
++#define VE_IP_TARGET_TOS_MOD		(1U<<6)
++#define VE_IP_TARGET_REJECT_MOD		(1U<<7)
++#define VE_IP_TARGET_TCPMSS_MOD		(1U<<8)
++#define VE_IP_MATCH_TCPMSS_MOD		(1U<<9)
++#define VE_IP_MATCH_TTL_MOD		(1U<<10)
++#define VE_IP_TARGET_LOG_MOD		(1U<<11)
++#define VE_IP_MATCH_LENGTH_MOD		(1U<<12)
++#define VE_IP_CONNTRACK_MOD		(1U<<14)
++#define VE_IP_CONNTRACK_FTP_MOD		(1U<<15)
++#define VE_IP_CONNTRACK_IRC_MOD		(1U<<16)
++#define VE_IP_MATCH_CONNTRACK_MOD	(1U<<17)
++#define VE_IP_MATCH_STATE_MOD		(1U<<18)
++#define VE_IP_MATCH_HELPER_MOD		(1U<<19)
++#define VE_IP_NAT_MOD			(1U<<20)
++#define VE_IP_NAT_FTP_MOD		(1U<<21)
++#define VE_IP_NAT_IRC_MOD		(1U<<22)
++#define VE_IP_TARGET_REDIRECT_MOD	(1U<<23)
++
++/* these masks represent modules with their dependences */
++#define VE_IP_IPTABLES		(VE_IP_IPTABLES_MOD)
++#define VE_IP_FILTER		(VE_IP_FILTER_MOD		\
++					| VE_IP_IPTABLES)
++#define VE_IP_MANGLE		(VE_IP_MANGLE_MOD		\
++					| VE_IP_IPTABLES)
++#define VE_IP_MATCH_LIMIT	(VE_IP_MATCH_LIMIT_MOD		\
++					| VE_IP_IPTABLES)
++#define VE_IP_MATCH_MULTIPORT	(VE_IP_MATCH_MULTIPORT_MOD	\
++					| VE_IP_IPTABLES)
++#define VE_IP_MATCH_TOS		(VE_IP_MATCH_TOS_MOD		\
++					| VE_IP_IPTABLES)
++#define VE_IP_TARGET_TOS	(VE_IP_TARGET_TOS_MOD		\
++					| VE_IP_IPTABLES)
++#define VE_IP_TARGET_REJECT	(VE_IP_TARGET_REJECT_MOD	\
++					| VE_IP_IPTABLES)
++#define VE_IP_TARGET_TCPMSS	(VE_IP_TARGET_TCPMSS_MOD	\
++					| VE_IP_IPTABLES)
++#define VE_IP_MATCH_TCPMSS	(VE_IP_MATCH_TCPMSS_MOD		\
++					| VE_IP_IPTABLES)
++#define VE_IP_MATCH_TTL		(VE_IP_MATCH_TTL_MOD		\
++					| VE_IP_IPTABLES)
++#define VE_IP_TARGET_LOG	(VE_IP_TARGET_LOG_MOD		\
++					| VE_IP_IPTABLES)
++#define VE_IP_MATCH_LENGTH	(VE_IP_MATCH_LENGTH_MOD		\
++					| VE_IP_IPTABLES)
++#define VE_IP_CONNTRACK		(VE_IP_CONNTRACK_MOD		\
++					| VE_IP_IPTABLES)
++#define VE_IP_CONNTRACK_FTP	(VE_IP_CONNTRACK_FTP_MOD	\
++					| VE_IP_CONNTRACK)
++#define VE_IP_CONNTRACK_IRC	(VE_IP_CONNTRACK_IRC_MOD	\
++					| VE_IP_CONNTRACK)
++#define VE_IP_MATCH_CONNTRACK	(VE_IP_MATCH_CONNTRACK_MOD	\
++					| VE_IP_CONNTRACK)
++#define VE_IP_MATCH_STATE	(VE_IP_MATCH_STATE_MOD		\
++					| VE_IP_CONNTRACK)
++#define VE_IP_MATCH_HELPER	(VE_IP_MATCH_HELPER_MOD		\
++					| VE_IP_CONNTRACK)
++#define VE_IP_NAT		(VE_IP_NAT_MOD			\
++					| VE_IP_CONNTRACK)
++#define VE_IP_NAT_FTP		(VE_IP_NAT_FTP_MOD		\
++					| VE_IP_NAT | VE_IP_CONNTRACK_FTP)
++#define VE_IP_NAT_IRC		(VE_IP_NAT_IRC_MOD		\
++					| VE_IP_NAT | VE_IP_CONNTRACK_IRC)
++#define VE_IP_TARGET_REDIRECT	(VE_IP_TARGET_REDIRECT_MOD	\
++					| VE_IP_NAT)
++
++/* safe iptables mask to be used by default */
++#define VE_IP_DEFAULT					\
++	(VE_IP_IPTABLES |				\
++	VE_IP_FILTER | VE_IP_MANGLE |			\
++	VE_IP_MATCH_LIMIT | VE_IP_MATCH_MULTIPORT |	\
++	VE_IP_MATCH_TOS | VE_IP_TARGET_REJECT | 	\
++	VE_IP_TARGET_TCPMSS | VE_IP_MATCH_TCPMSS |	\
++	VE_IP_MATCH_TTL | VE_IP_MATCH_LENGTH)
++
++#define VE_IPT_CMP(x,y)		(((x) & (y)) == (y))
++
++struct vzctl_env_create_cid {
++	envid_t veid;
++	unsigned flags;
++	__u32 class_id;
++};
++
++struct vzctl_env_create {
++	envid_t veid;
++	unsigned flags;
++	__u32 class_id;
++};
++
++struct env_create_param {
++	__u64 iptables_mask;
++};
++#define VZCTL_ENV_CREATE_DATA_MINLEN	sizeof(struct env_create_param)
++
++struct env_create_param2 {
++	__u64 iptables_mask;
++	__u64 feature_mask;
++#define VE_FEATURE_SYSFS	(1ULL << 0)
++	__u32 total_vcpus;	/* 0 - don't care, same as in host */
++};
++#define VZCTL_ENV_CREATE_DATA_MAXLEN	sizeof(struct env_create_param2)
++
++typedef struct env_create_param2 env_create_param_t;
++
++struct vzctl_env_create_data {
++	envid_t veid;
++	unsigned flags;
++	__u32 class_id;
++	env_create_param_t *data;
++	int datalen;
++};
++
++struct vz_load_avg {
++	int val_int;
++	int val_frac;
++};
++
++struct vz_cpu_stat {
++	unsigned long user_jif;
++	unsigned long nice_jif;
++	unsigned long system_jif; 
++	unsigned long uptime_jif;
++	__u64 idle_clk;
++	__u64 strv_clk;
++	__u64 uptime_clk;
++	struct vz_load_avg avenrun[3];	/* loadavg data */
++};
++
++struct vzctl_cpustatctl {
++	envid_t veid;
++	struct vz_cpu_stat *cpustat;
++};
++
++#define VZCTLTYPE '.'
++#define VZCTL_OLD_ENV_CREATE	_IOW(VZCTLTYPE, 0,			\
++					struct vzctl_old_env_create)
++#define VZCTL_MARK_ENV_TO_DOWN	_IOW(VZCTLTYPE, 1,			\
++					struct vzctl_mark_env_to_down)
++#define VZCTL_SETDEVPERMS	_IOW(VZCTLTYPE, 2,			\
++					struct vzctl_setdevperms)
++#define VZCTL_ENV_CREATE_CID	_IOW(VZCTLTYPE, 4,			\
++					struct vzctl_env_create_cid)
++#define VZCTL_ENV_CREATE	_IOW(VZCTLTYPE, 5,			\
++					struct vzctl_env_create)
++#define VZCTL_GET_CPU_STAT	_IOW(VZCTLTYPE, 6,			\
++					struct vzctl_cpustatctl)
++#define VZCTL_ENV_CREATE_DATA	_IOW(VZCTLTYPE, 10,			\
++					struct vzctl_env_create_data)
++#define VZCTL_VE_NETDEV		_IOW(VZCTLTYPE, 11,			\
++					struct vzctl_ve_netdev)
++
++
++#endif
+diff -uprN linux-2.6.8.1.orig/include/linux/vzctl.h linux-2.6.8.1-ve022stab078/include/linux/vzctl.h
+--- linux-2.6.8.1.orig/include/linux/vzctl.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/linux/vzctl.h	2006-05-11 13:05:40.000000000 +0400
+@@ -0,0 +1,30 @@
++/*
++ *  include/linux/vzctl.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _LINUX_VZCTL_H
++#define _LINUX_VZCTL_H
++
++#include <linux/list.h>
++
++struct module;
++struct inode;
++struct file;
++struct vzioctlinfo {
++	unsigned type;
++	int (*func)(struct inode *, struct file *,
++			unsigned int, unsigned long);
++	struct module *owner;
++	struct list_head list;
++};
++
++extern void vzioctl_register(struct vzioctlinfo *inf);
++extern void vzioctl_unregister(struct vzioctlinfo *inf);
++
++#endif
+diff -uprN linux-2.6.8.1.orig/include/linux/vzctl_quota.h linux-2.6.8.1-ve022stab078/include/linux/vzctl_quota.h
+--- linux-2.6.8.1.orig/include/linux/vzctl_quota.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/linux/vzctl_quota.h	2006-05-11 13:05:43.000000000 +0400
+@@ -0,0 +1,43 @@
++/*
++ *  include/linux/vzctl_quota.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __LINUX_VZCTL_QUOTA_H__
++#define __LINUX_VZCTL_QUOTA_H__
++
++/*
++ * Quota management ioctl
++ */
++
++struct vz_quota_stat;
++struct vzctl_quotactl {
++	int cmd;
++	unsigned int quota_id;
++	struct vz_quota_stat *qstat;
++	char *ve_root;
++};
++
++struct vzctl_quotaugidctl {
++	int cmd;		/* subcommand */
++	unsigned int quota_id;	/* quota id where it applies to */
++	unsigned int ugid_index;/* for reading statistic. index of first
++				    uid/gid record to read */
++	unsigned int ugid_size;	/* size of ugid_buf array */
++	void *addr; 		/* user-level buffer */
++};
++
++#define VZDQCTLTYPE '+'
++#define VZCTL_QUOTA_CTL		_IOWR(VZDQCTLTYPE, 1,			\
++					struct vzctl_quotactl)
++#define VZCTL_QUOTA_NEW_CTL	_IOWR(VZDQCTLTYPE, 2,			\
++					struct vzctl_quotactl)
++#define VZCTL_QUOTA_UGID_CTL	_IOWR(VZDQCTLTYPE, 3,			\
++					struct vzctl_quotaugidctl)
++
++#endif /* __LINUX_VZCTL_QUOTA_H__ */
+diff -uprN linux-2.6.8.1.orig/include/linux/vzctl_venet.h linux-2.6.8.1-ve022stab078/include/linux/vzctl_venet.h
+--- linux-2.6.8.1.orig/include/linux/vzctl_venet.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/linux/vzctl_venet.h	2006-05-11 13:05:40.000000000 +0400
+@@ -0,0 +1,36 @@
++/*
++ *  include/linux/vzctl_venet.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _VZCTL_VENET_H
++#define _VZCTL_VENET_H
++
++#include <linux/types.h>
++#include <linux/ioctl.h>
++
++#ifndef __ENVID_T_DEFINED__
++typedef unsigned envid_t;
++#define __ENVID_T_DEFINED__
++#endif
++
++struct vzctl_ve_ip_map {
++	envid_t veid;
++	int op;
++#define VE_IP_ADD	1
++#define VE_IP_DEL	2
++	struct sockaddr *addr;
++	int addrlen;
++};
++
++#define VENETCTLTYPE '('
++
++#define VENETCTL_VE_IP_MAP	_IOW(VENETCTLTYPE, 3,			\
++					struct vzctl_ve_ip_map)
++
++#endif
+diff -uprN linux-2.6.8.1.orig/include/linux/vzdq_tree.h linux-2.6.8.1-ve022stab078/include/linux/vzdq_tree.h
+--- linux-2.6.8.1.orig/include/linux/vzdq_tree.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/linux/vzdq_tree.h	2006-05-11 13:05:43.000000000 +0400
+@@ -0,0 +1,99 @@
++/*
++ *
++ * Copyright (C) 2005 SWsoft
++ * All rights reserved.
++ * 
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * This file contains Virtuozzo disk quota tree definition
++ */
++
++#ifndef _VZDQ_TREE_H
++#define _VZDQ_TREE_H
++
++#include <linux/list.h>
++#include <asm/string.h>
++
++typedef unsigned int quotaid_t;
++#define QUOTAID_BITS		32
++#define QUOTAID_BBITS		4
++#define QUOTAID_EBITS		8
++
++#if QUOTAID_EBITS % QUOTAID_BBITS
++#error Quota bit assumption failure
++#endif
++
++#define QUOTATREE_BSIZE		(1 << QUOTAID_BBITS)
++#define QUOTATREE_BMASK		(QUOTATREE_BSIZE - 1)
++#define QUOTATREE_DEPTH		((QUOTAID_BITS + QUOTAID_BBITS - 1) \
++							/ QUOTAID_BBITS)
++#define QUOTATREE_EDEPTH	((QUOTAID_BITS + QUOTAID_EBITS - 1) \
++							/ QUOTAID_EBITS)
++#define QUOTATREE_BSHIFT(lvl)	((QUOTATREE_DEPTH - (lvl) - 1) * QUOTAID_BBITS)
++
++/*
++ * Depth of keeping unused node (not inclusive).
++ * 0 means release all nodes including root,
++ * QUOTATREE_DEPTH means never release nodes.
++ * Current value: release all nodes strictly after QUOTATREE_EDEPTH 
++ * (measured in external shift units).
++ */
++#define QUOTATREE_CDEPTH	(QUOTATREE_DEPTH \
++				- 2 * QUOTATREE_DEPTH / QUOTATREE_EDEPTH \
++				+ 1)
++
++/*
++ * Levels 0..(QUOTATREE_DEPTH-1) are tree nodes.
++ * On level i the maximal number of nodes is 2^(i*QUOTAID_BBITS),
++ * and each node contains 2^QUOTAID_BBITS pointers.
++ * Level 0 is a (single) tree root node.
++ *
++ * Nodes of level (QUOTATREE_DEPTH-1) contain pointers to caller's data.
++ * Nodes of lower levels contain pointers to nodes.
++ *
++ * Double pointer in array of i-level node, pointing to a (i+1)-level node
++ * (such as inside quotatree_find_state) are marked by level (i+1), not i.
++ * Level 0 double pointer is a pointer to root inside tree struct.
++ *
++ * The tree is permanent, i.e. all index blocks allocated are keeped alive to
++ * preserve the blocks numbers in the quota file tree to keep its changes
++ * locally.
++ */
++struct quotatree_node {
++	struct list_head list;
++	quotaid_t num;
++	void *blocks[QUOTATREE_BSIZE];
++};
++
++struct quotatree_level {
++	struct list_head usedlh, freelh;
++	quotaid_t freenum;
++};
++
++struct quotatree_tree {
++	struct quotatree_level levels[QUOTATREE_DEPTH];
++	struct quotatree_node *root;
++	unsigned int leaf_num;
++};
++
++struct quotatree_find_state {
++	void **block;
++	int level;
++};
++
++/* number of leafs (objects) and leaf level of the tree */
++#define QTREE_LEAFNUM(tree)	((tree)->leaf_num)
++#define QTREE_LEAFLVL(tree)	(&(tree)->levels[QUOTATREE_DEPTH - 1])
++
++struct quotatree_tree *quotatree_alloc(void);
++void *quotatree_find(struct quotatree_tree *tree, quotaid_t id,
++		struct quotatree_find_state *st);
++int quotatree_insert(struct quotatree_tree *tree, quotaid_t id,
++		struct quotatree_find_state *st, void *data);
++void quotatree_remove(struct quotatree_tree *tree, quotaid_t id);
++void quotatree_free(struct quotatree_tree *tree, void (*dtor)(void *));
++void *quotatree_get_next(struct quotatree_tree *tree, quotaid_t id);
++void *quotatree_leaf_byindex(struct quotatree_tree *tree, unsigned int index);
++
++#endif /* _VZDQ_TREE_H */
++
+diff -uprN linux-2.6.8.1.orig/include/linux/vzquota.h linux-2.6.8.1-ve022stab078/include/linux/vzquota.h
+--- linux-2.6.8.1.orig/include/linux/vzquota.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/linux/vzquota.h	2006-05-11 13:05:43.000000000 +0400
+@@ -0,0 +1,291 @@
++/*
++ *
++ * Copyright (C) 2001-2005 SWsoft
++ * All rights reserved.
++ * 
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * This file contains Virtuozzo disk quota implementation
++ */
++
++#ifndef _VZDQUOTA_H
++#define _VZDQUOTA_H
++
++#include <linux/types.h>
++#include <linux/quota.h>
++
++/* vzquotactl syscall commands */
++#define VZ_DQ_CREATE		5 /* create quota master block */
++#define VZ_DQ_DESTROY		6 /* destroy qmblk */
++#define VZ_DQ_ON		7 /* mark dentry with already created qmblk */
++#define VZ_DQ_OFF		8 /* remove mark, don't destroy qmblk */
++#define VZ_DQ_SETLIMIT		9 /* set new limits */
++#define VZ_DQ_GETSTAT		10 /* get usage statistic */
++/* set of syscalls to maintain UGID quotas */
++#define VZ_DQ_UGID_GETSTAT	1 /* get usage/limits for ugid(s) */
++#define VZ_DQ_UGID_ADDSTAT	2 /* set usage/limits statistic for ugid(s) */
++#define VZ_DQ_UGID_GETGRACE	3 /* get expire times */
++#define VZ_DQ_UGID_SETGRACE	4 /* set expire times */
++#define VZ_DQ_UGID_GETCONFIG	5 /* get ugid_max limit, cnt, flags of qmblk */
++#define VZ_DQ_UGID_SETCONFIG	6 /* set ugid_max limit, flags of qmblk */
++#define VZ_DQ_UGID_SETLIMIT	7 /* set ugid B/I limits */
++#define VZ_DQ_UGID_SETINFO	8 /* set ugid info */
++
++/* common structure for vz and ugid quota */
++struct dq_stat {
++	/* blocks limits */
++	__u64	bhardlimit;	/* absolute limit in bytes */
++	__u64	bsoftlimit;	/* preferred limit in bytes */
++	time_t	btime;		/* time limit for excessive disk use */
++	__u64	bcurrent;	/* current bytes count */
++	/* inodes limits */
++	__u32	ihardlimit;	/* absolute limit on allocated inodes */
++	__u32	isoftlimit;	/* preferred inode limit */
++	time_t	itime;		/* time limit for excessive inode use */
++	__u32	icurrent;	/* current # allocated inodes */
++};
++
++/* Values for dq_info->flags */
++#define VZ_QUOTA_INODES 0x01       /* inodes limit warning printed */
++#define VZ_QUOTA_SPACE  0x02       /* space limit warning printed */
++
++struct dq_info {
++	time_t		bexpire;   /* expire timeout for excessive disk use */
++	time_t		iexpire;   /* expire timeout for excessive inode use */
++	unsigned	flags;	   /* see previos defines */
++};
++
++struct vz_quota_stat  {
++	struct dq_stat dq_stat;
++	struct dq_info dq_info;
++};
++
++/* UID/GID interface record - for user-kernel level exchange */
++struct vz_quota_iface {
++	unsigned int	qi_id;	   /* UID/GID this applies to */
++	unsigned int	qi_type;   /* USRQUOTA|GRPQUOTA */
++	struct dq_stat	qi_stat;   /* limits, options, usage stats */
++};
++
++/* values for flags and dq_flags */
++/* this flag is set if the userspace has been unable to provide usage
++ * information about all ugids
++ * if the flag is set, we don't allocate new UG quota blocks (their
++ * current usage is unknown) or free existing UG quota blocks (not to
++ * lose information that this block is ok) */
++#define VZDQUG_FIXED_SET	0x01
++/* permit to use ugid quota */
++#define VZDQUG_ON		0x02
++#define VZDQ_USRQUOTA		0x10
++#define VZDQ_GRPQUOTA		0x20
++#define VZDQ_NOACT		0x1000	/* not actual */
++#define VZDQ_NOQUOT		0x2000	/* not under quota tree */
++
++struct vz_quota_ugid_stat {
++	unsigned int	limit;	/* max amount of ugid records */
++	unsigned int	count;	/* amount of ugid records */
++	unsigned int	flags;	
++};
++
++struct vz_quota_ugid_setlimit {
++	unsigned int	type;	/* quota type (USR/GRP) */
++	unsigned int	id;	/* ugid */
++	struct if_dqblk dqb;	/* limits info */
++};
++
++struct vz_quota_ugid_setinfo {
++	unsigned int	type;	/* quota type (USR/GRP) */
++	struct if_dqinfo dqi;	/* grace info */
++};
++
++#ifdef __KERNEL__
++#include <linux/list.h>
++#include <asm/atomic.h>
++#include <asm/semaphore.h>
++#include <linux/time.h>
++#include <linux/vzquota_qlnk.h>
++#include <linux/vzdq_tree.h>
++
++/* One-second resolution for grace times */
++#define CURRENT_TIME_SECONDS	(get_seconds())
++
++/* Values for dq_info flags */
++#define VZ_QUOTA_INODES	0x01	   /* inodes limit warning printed */
++#define VZ_QUOTA_SPACE	0x02	   /* space limit warning printed */
++
++/* values for dq_state */
++#define VZDQ_STARTING		0 /* created, not turned on yet */
++#define VZDQ_WORKING		1 /* quota created, turned on */
++#define VZDQ_STOPING		2 /* created, turned on and off */
++
++/* master quota record - one per veid */
++struct vz_quota_master {
++	struct list_head	dq_hash;	/* next quota in hash list */
++	atomic_t		dq_count;	/* inode reference count */
++	unsigned int		dq_flags;	/* see VZDQUG_FIXED_SET */
++	unsigned int		dq_state;	/* see values above */
++	unsigned int		dq_id;		/* VEID this applies to */
++	struct dq_stat		dq_stat; 	/* limits, grace, usage stats */
++	struct dq_info		dq_info;	/* grace times and flags */
++	spinlock_t		dq_data_lock;	/* for dq_stat */
++
++	struct semaphore	dq_sem;		/* semaphore to protect 
++						   ugid tree */
++
++	struct list_head	dq_ilink_list;	/* list of vz_quota_ilink */
++	struct quotatree_tree	*dq_uid_tree;	/* vz_quota_ugid tree for UIDs */
++	struct quotatree_tree	*dq_gid_tree;	/* vz_quota_ugid tree for GIDs */
++	unsigned int		dq_ugid_count;	/* amount of ugid records */
++	unsigned int		dq_ugid_max;	/* max amount of ugid records */
++	struct dq_info		dq_ugid_info[MAXQUOTAS]; /* ugid grace times */
++
++	struct dentry		*dq_root_dentry;/* dentry of fs tree */
++	struct vfsmount		*dq_root_mnt;	/* vfsmnt of this dentry */
++	struct super_block	*dq_sb;	      /* superblock of our quota root */
++};
++
++/* UID/GID quota record - one per pair (quota_master, uid or gid) */
++struct vz_quota_ugid {
++	unsigned int		qugid_id;     /* UID/GID this applies to */
++	struct dq_stat		qugid_stat;   /* limits, options, usage stats */
++	int			qugid_type;   /* USRQUOTA|GRPQUOTA */
++	atomic_t		qugid_count;  /* reference count */
++};
++
++#define VZ_QUOTA_UGBAD		((struct vz_quota_ugid *)0xfeafea11)
++
++struct vz_quota_datast {
++	struct vz_quota_ilink qlnk;
++};
++
++#define VIRTINFO_QUOTA_GETSTAT	0
++#define VIRTINFO_QUOTA_ON	1
++#define VIRTINFO_QUOTA_OFF	2
++
++struct virt_info_quota {
++	struct super_block *super;
++	struct dq_stat *qstat;
++};
++
++/*
++ * Interface to VZ quota core
++ */
++#define INODE_QLNK(inode)	(&(inode)->i_qlnk)
++#define QLNK_INODE(qlnk)	container_of((qlnk), struct inode, i_qlnk)
++
++#define VZ_QUOTA_BAD		((struct vz_quota_master *)0xefefefef)
++
++#define VZ_QUOTAO_SETE		1
++#define VZ_QUOTAO_INIT		2
++#define VZ_QUOTAO_DESTR		3
++#define VZ_QUOTAO_SWAP		4
++#define VZ_QUOTAO_INICAL	5
++#define VZ_QUOTAO_DRCAL		6
++#define VZ_QUOTAO_QSET		7
++#define VZ_QUOTAO_TRANS		8
++#define VZ_QUOTAO_ACT		9
++#define VZ_QUOTAO_DTREE		10
++#define VZ_QUOTAO_DET		11
++#define VZ_QUOTAO_ON		12
++
++extern struct semaphore vz_quota_sem;
++void inode_qmblk_lock(struct super_block *sb);
++void inode_qmblk_unlock(struct super_block *sb);
++void qmblk_data_read_lock(struct vz_quota_master *qmblk);
++void qmblk_data_read_unlock(struct vz_quota_master *qmblk);
++void qmblk_data_write_lock(struct vz_quota_master *qmblk);
++void qmblk_data_write_unlock(struct vz_quota_master *qmblk);
++
++/* for quota operations */
++void vzquota_inode_init_call(struct inode *inode);
++void vzquota_inode_drop_call(struct inode *inode);
++int vzquota_inode_transfer_call(struct inode *, struct iattr *);
++struct vz_quota_master *vzquota_inode_data(struct inode *inode,
++		struct vz_quota_datast *);
++void vzquota_data_unlock(struct inode *inode, struct vz_quota_datast *);
++int vzquota_rename_check(struct inode *inode,
++		struct inode *old_dir, struct inode *new_dir);
++struct vz_quota_master *vzquota_inode_qmblk(struct inode *inode);
++/* for second-level quota */
++struct vz_quota_master *vzquota_find_qmblk(struct super_block *);
++/* for management operations */
++struct vz_quota_master *vzquota_alloc_master(unsigned int quota_id,
++		struct vz_quota_stat *qstat);
++void vzquota_free_master(struct vz_quota_master *);
++struct vz_quota_master *vzquota_find_master(unsigned int quota_id);
++int vzquota_on_qmblk(struct super_block *sb, struct inode *inode,
++		struct vz_quota_master *qmblk);
++int vzquota_off_qmblk(struct super_block *sb, struct vz_quota_master *qmblk);
++int vzquota_get_super(struct super_block *sb);
++void vzquota_put_super(struct super_block *sb);
++
++static inline struct vz_quota_master *qmblk_get(struct vz_quota_master *qmblk)
++{
++	if (!atomic_read(&qmblk->dq_count))
++		BUG();
++	atomic_inc(&qmblk->dq_count);
++	return qmblk;
++}
++
++static inline void __qmblk_put(struct vz_quota_master *qmblk)
++{
++	atomic_dec(&qmblk->dq_count);
++}
++
++static inline void qmblk_put(struct vz_quota_master *qmblk)
++{
++	if (!atomic_dec_and_test(&qmblk->dq_count))
++		return;
++	vzquota_free_master(qmblk);
++}
++
++extern struct list_head vzquota_hash_table[];
++extern int vzquota_hash_size;
++
++/*
++ * Interface to VZ UGID quota
++ */
++extern struct quotactl_ops vz_quotactl_operations;
++extern struct dquot_operations vz_quota_operations2;
++extern struct quota_format_type vz_quota_empty_v2_format;
++
++#define QUGID_TREE(qmblk, type)	(((type) == USRQUOTA) ?		\
++					qmblk->dq_uid_tree :	\
++					qmblk->dq_gid_tree)
++
++#define VZDQUG_FIND_DONT_ALLOC	1
++#define VZDQUG_FIND_FAKE	2
++struct vz_quota_ugid *vzquota_find_ugid(struct vz_quota_master *qmblk,
++		unsigned int quota_id, int type, int flags);
++struct vz_quota_ugid *__vzquota_find_ugid(struct vz_quota_master *qmblk,
++		unsigned int quota_id, int type, int flags);
++struct vz_quota_ugid *vzquota_get_ugid(struct vz_quota_ugid *qugid);
++void vzquota_put_ugid(struct vz_quota_master *qmblk,
++		struct vz_quota_ugid *qugid);
++void vzquota_kill_ugid(struct vz_quota_master *qmblk);
++int vzquota_ugid_init(void);
++void vzquota_ugid_release(void);
++int vzquota_transfer_usage(struct inode *inode, int mask,
++		struct vz_quota_ilink *qlnk);
++
++struct vzctl_quotaugidctl;
++long do_vzquotaugidctl(struct vzctl_quotaugidctl *qub);
++
++/*
++ * Other VZ quota parts
++ */
++extern struct dquot_operations vz_quota_operations;
++
++long do_vzquotactl(int cmd, unsigned int quota_id,
++			  struct vz_quota_stat *qstat, const char *ve_root);
++int vzquota_proc_init(void);
++void vzquota_proc_release(void);
++struct vz_quota_master *vzquota_find_qmblk(struct super_block *);
++extern struct semaphore vz_quota_sem;
++
++void vzaquota_init(void);
++void vzaquota_fini(void);
++
++#endif /* __KERNEL__ */
++
++#endif /* _VZDQUOTA_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/vzquota_qlnk.h linux-2.6.8.1-ve022stab078/include/linux/vzquota_qlnk.h
+--- linux-2.6.8.1.orig/include/linux/vzquota_qlnk.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/linux/vzquota_qlnk.h	2006-05-11 13:05:43.000000000 +0400
+@@ -0,0 +1,25 @@
++/*
++ *  include/linux/vzquota_qlnk.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _VZDQUOTA_QLNK_H
++#define _VZDQUOTA_QLNK_H
++
++struct vz_quota_master;
++struct vz_quota_ugid;
++
++/* inode link, used to track inodes using quota via dq_ilink_list */
++struct vz_quota_ilink {
++	struct vz_quota_master *qmblk;
++	struct vz_quota_ugid *qugid[MAXQUOTAS];
++	struct list_head list;
++	unsigned char origin;
++};
++
++#endif /* _VZDQUOTA_QLNK_H */
+diff -uprN linux-2.6.8.1.orig/include/linux/vzratelimit.h linux-2.6.8.1-ve022stab078/include/linux/vzratelimit.h
+--- linux-2.6.8.1.orig/include/linux/vzratelimit.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/linux/vzratelimit.h	2006-05-11 13:05:40.000000000 +0400
+@@ -0,0 +1,28 @@
++/*
++ *  include/linux/vzratelimit.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __VZ_RATELIMIT_H__
++#define __VZ_RATELIMIT_H__
++
++/*
++ * Generic ratelimiting stuff.
++ */
++
++struct vz_rate_info {
++	int burst;
++	int interval; /* jiffy_t per event */
++	int bucket; /* kind of leaky bucket */
++	unsigned long last; /* last event */
++};
++
++/* Return true if rate limit permits. */
++int vz_ratelimit(struct vz_rate_info *p);
++
++#endif /* __VZ_RATELIMIT_H__ */
+diff -uprN linux-2.6.8.1.orig/include/linux/vzstat.h linux-2.6.8.1-ve022stab078/include/linux/vzstat.h
+--- linux-2.6.8.1.orig/include/linux/vzstat.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/linux/vzstat.h	2006-05-11 13:05:40.000000000 +0400
+@@ -0,0 +1,176 @@
++/*
++ *  include/linux/vzstat.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __VZSTAT_H__
++#define __VZSTAT_H__
++
++struct swap_cache_info_struct {
++	unsigned long add_total;
++	unsigned long del_total;
++	unsigned long find_success;
++	unsigned long find_total;
++	unsigned long noent_race;
++	unsigned long exist_race;
++	unsigned long remove_race;
++};
++
++struct kstat_lat_snap_struct {
++	cycles_t maxlat, totlat;
++	unsigned long count;
++};
++struct kstat_lat_pcpu_snap_struct {
++	cycles_t maxlat, totlat;
++	unsigned long count;
++	seqcount_t lock;
++} ____cacheline_maxaligned_in_smp;
++
++struct kstat_lat_struct {
++	struct kstat_lat_snap_struct cur, last;
++	cycles_t avg[3];
++};
++struct kstat_lat_pcpu_struct {
++	struct kstat_lat_pcpu_snap_struct cur[NR_CPUS];
++	cycles_t max_snap;
++	struct kstat_lat_snap_struct last;
++	cycles_t avg[3];
++};
++
++struct kstat_perf_snap_struct {
++	cycles_t wall_tottime, cpu_tottime;
++	cycles_t wall_maxdur, cpu_maxdur;
++	unsigned long count;
++};
++struct kstat_perf_struct {
++	struct kstat_perf_snap_struct cur, last;
++};
++
++struct kstat_zone_avg {
++	unsigned long		free_pages_avg[3],
++				nr_active_avg[3],
++				nr_inactive_avg[3];
++};
++
++#define KSTAT_ALLOCSTAT_NR 5
++
++struct kernel_stat_glob {
++	unsigned long nr_unint_avg[3];
++
++	unsigned long alloc_fails[KSTAT_ALLOCSTAT_NR];
++	struct kstat_lat_struct alloc_lat[KSTAT_ALLOCSTAT_NR];
++	struct kstat_lat_pcpu_struct sched_lat;
++	struct kstat_lat_struct swap_in;
++
++	struct kstat_perf_struct ttfp, cache_reap,
++			refill_inact, shrink_icache, shrink_dcache;
++
++	struct kstat_zone_avg zone_avg[3];	/* MAX_NR_ZONES */
++} ____cacheline_aligned;
++
++extern struct kernel_stat_glob kstat_glob ____cacheline_aligned;
++extern spinlock_t kstat_glb_lock;
++
++#define KSTAT_PERF_ENTER(name)				\
++	unsigned long flags;				\
++	cycles_t start, sleep_time;			\
++							\
++	start = get_cycles();				\
++	sleep_time = VE_TASK_INFO(current)->sleep_time;	\
++
++#define KSTAT_PERF_LEAVE(name)				\
++	spin_lock_irqsave(&kstat_glb_lock, flags);	\
++	kstat_glob.name.cur.count++;			\
++	start = get_cycles() - start;			\
++	if (kstat_glob.name.cur.wall_maxdur < start)	\
++		kstat_glob.name.cur.wall_maxdur = start;\
++	kstat_glob.name.cur.wall_tottime += start;	\
++	start -= VE_TASK_INFO(current)->sleep_time -	\
++					sleep_time;	\
++	if (kstat_glob.name.cur.cpu_maxdur < start)	\
++		kstat_glob.name.cur.cpu_maxdur = start;	\
++	kstat_glob.name.cur.cpu_tottime += start;	\
++	spin_unlock_irqrestore(&kstat_glb_lock, flags);	\
++
++/*
++ * Add another statistics reading.
++ * Serialization is the caller's due.
++ */
++static inline void KSTAT_LAT_ADD(struct kstat_lat_struct *p,
++		cycles_t dur)
++{
++	p->cur.count++;
++	if (p->cur.maxlat < dur)
++		p->cur.maxlat = dur;
++	p->cur.totlat += dur;
++}
++
++static inline void KSTAT_LAT_PCPU_ADD(struct kstat_lat_pcpu_struct *p, int cpu,
++		cycles_t dur)
++{
++	struct kstat_lat_pcpu_snap_struct *cur;
++
++	cur = &p->cur[cpu];
++	write_seqcount_begin(&cur->lock);
++	cur->count++;
++	if (cur->maxlat < dur)
++		cur->maxlat = dur;
++	cur->totlat += dur;
++	write_seqcount_end(&cur->lock);
++}
++
++/*
++ * Move current statistics to last, clear last.
++ * Serialization is the caller's due.
++ */
++static inline void KSTAT_LAT_UPDATE(struct kstat_lat_struct *p)
++{
++	cycles_t m;
++	memcpy(&p->last, &p->cur, sizeof(p->last));
++	p->cur.maxlat = 0;
++	m = p->last.maxlat;
++	CALC_LOAD(p->avg[0], EXP_1, m)
++	CALC_LOAD(p->avg[1], EXP_5, m)
++	CALC_LOAD(p->avg[2], EXP_15, m)
++}
++
++static inline void KSTAT_LAT_PCPU_UPDATE(struct kstat_lat_pcpu_struct *p)
++{
++	unsigned i, cpu;
++	struct kstat_lat_pcpu_snap_struct snap, *cur;
++	cycles_t m;
++
++	memset(&p->last, 0, sizeof(p->last));
++	for (cpu = 0; cpu < NR_CPUS; cpu++) {
++		cur = &p->cur[cpu];
++		do {
++			i = read_seqcount_begin(&cur->lock);
++			memcpy(&snap, cur, sizeof(snap));
++		} while (read_seqcount_retry(&cur->lock, i));
++		/* 
++		 * read above and this update of maxlat is not atomic,
++		 * but this is OK, since it happens rarely and losing
++		 * a couple of peaks is not essential. xemul
++		 */
++		cur->maxlat = 0;
++
++		p->last.count += snap.count;
++		p->last.totlat += snap.totlat;
++		if (p->last.maxlat < snap.maxlat)
++			p->last.maxlat = snap.maxlat;
++	}
++
++	m = (p->last.maxlat > p->max_snap ? p->last.maxlat : p->max_snap);
++	CALC_LOAD(p->avg[0], EXP_1, m);
++	CALC_LOAD(p->avg[1], EXP_5, m);
++	CALC_LOAD(p->avg[2], EXP_15, m);
++	/* reset max_snap to calculate it correctly next time */
++	p->max_snap = 0;
++}
++
++#endif /* __VZSTAT_H__ */
+diff -uprN linux-2.6.8.1.orig/include/linux/zlib.h linux-2.6.8.1-ve022stab078/include/linux/zlib.h
+--- linux-2.6.8.1.orig/include/linux/zlib.h	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/linux/zlib.h	2006-05-11 13:05:34.000000000 +0400
+@@ -506,6 +506,11 @@ extern int zlib_deflateReset (z_streamp 
+    stream state was inconsistent (such as zalloc or state being NULL).
+ */
+ 
++static inline unsigned long deflateBound(unsigned long s)
++{
++	return s + ((s + 7) >> 3) + ((s + 63) >> 6) + 11;
++}
++
+ extern int zlib_deflateParams (z_streamp strm, int level, int strategy);
+ /*
+      Dynamically update the compression level and compression strategy.  The
+diff -uprN linux-2.6.8.1.orig/include/net/af_unix.h linux-2.6.8.1-ve022stab078/include/net/af_unix.h
+--- linux-2.6.8.1.orig/include/net/af_unix.h	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/net/af_unix.h	2006-05-11 13:05:40.000000000 +0400
+@@ -13,23 +13,37 @@ extern atomic_t unix_tot_inflight;
+ 
+ static inline struct sock *first_unix_socket(int *i)
+ {
++	struct sock *s;
++	struct ve_struct *ve;
++
++	ve = get_exec_env();
+ 	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
+-		if (!hlist_empty(&unix_socket_table[*i]))
+-			return __sk_head(&unix_socket_table[*i]);
++		for (s = sk_head(&unix_socket_table[*i]);
++		     s != NULL && !ve_accessible(VE_OWNER_SK(s), ve);
++		     s = sk_next(s));
++		if (s != NULL)
++			return s;
+ 	}
+ 	return NULL;
+ }
+ 
+ static inline struct sock *next_unix_socket(int *i, struct sock *s)
+ {
+-	struct sock *next = sk_next(s);
+-	/* More in this chain? */
+-	if (next)
+-		return next;
++	struct ve_struct *ve;
++
++	ve = get_exec_env();
++	for (s = sk_next(s); s != NULL; s = sk_next(s)) {
++		if (!ve_accessible(VE_OWNER_SK(s), ve))
++			continue;
++		return s;
++	}
+ 	/* Look for next non-empty chain. */
+ 	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
+-		if (!hlist_empty(&unix_socket_table[*i]))
+-			return __sk_head(&unix_socket_table[*i]);
++		for (s = sk_head(&unix_socket_table[*i]);
++		     s != NULL && !ve_accessible(VE_OWNER_SK(s), ve);
++		     s = sk_next(s));
++		if (s != NULL)
++			return s;
+ 	}
+ 	return NULL;
+ }
+diff -uprN linux-2.6.8.1.orig/include/net/compat.h linux-2.6.8.1-ve022stab078/include/net/compat.h
+--- linux-2.6.8.1.orig/include/net/compat.h	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/net/compat.h	2006-05-11 13:05:49.000000000 +0400
+@@ -23,6 +23,12 @@ struct compat_cmsghdr {
+ 	compat_int_t	cmsg_type;
+ };
+ 
++#if defined(CONFIG_X86_64)
++#define is_current_32bits()	(current_thread_info()->flags & _TIF_IA32)
++#else
++#define is_current_32bits()	0
++#endif
++
+ #else /* defined(CONFIG_COMPAT) */
+ #define compat_msghdr	msghdr		/* to avoid compiler warnings */
+ #endif /* defined(CONFIG_COMPAT) */
+@@ -33,7 +39,8 @@ extern asmlinkage long compat_sys_sendms
+ extern asmlinkage long compat_sys_recvmsg(int,struct compat_msghdr __user *,unsigned);
+ extern asmlinkage long compat_sys_getsockopt(int, int, int, char __user *, int __user *);
+ extern int put_cmsg_compat(struct msghdr*, int, int, int, void *);
+-extern int cmsghdr_from_user_compat_to_kern(struct msghdr *, unsigned char *,
+-		int);
++
++struct sock;
++extern int cmsghdr_from_user_compat_to_kern(struct msghdr *, struct sock *, unsigned char *, int);
+ 
+ #endif /* NET_COMPAT_H */
+diff -uprN linux-2.6.8.1.orig/include/net/flow.h linux-2.6.8.1-ve022stab078/include/net/flow.h
+--- linux-2.6.8.1.orig/include/net/flow.h	2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/net/flow.h	2006-05-11 13:05:40.000000000 +0400
+@@ -10,6 +10,7 @@
+ #include <linux/in6.h>
+ #include <asm/atomic.h>
+ 
++struct ve_struct;
+ struct flowi {
+ 	int	oif;
+ 	int	iif;
+@@ -77,6 +78,9 @@ struct flowi {
+ #define fl_icmp_type	uli_u.icmpt.type
+ #define fl_icmp_code	uli_u.icmpt.code
+ #define fl_ipsec_spi	uli_u.spi
++#ifdef CONFIG_VE
++	struct ve_struct *owner_env;
++#endif
+ } __attribute__((__aligned__(BITS_PER_LONG/8)));
+ 
+ #define FLOW_DIR_IN	0
+diff -uprN linux-2.6.8.1.orig/include/net/icmp.h linux-2.6.8.1-ve022stab078/include/net/icmp.h
+--- linux-2.6.8.1.orig/include/net/icmp.h	2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/net/icmp.h	2006-05-11 13:05:40.000000000 +0400
+@@ -34,9 +34,14 @@ struct icmp_err {
+ 
+ extern struct icmp_err icmp_err_convert[];
+ DECLARE_SNMP_STAT(struct icmp_mib, icmp_statistics);
+-#define ICMP_INC_STATS(field)		SNMP_INC_STATS(icmp_statistics, field)
+-#define ICMP_INC_STATS_BH(field)	SNMP_INC_STATS_BH(icmp_statistics, field)
+-#define ICMP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(icmp_statistics, field)
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_icmp_statistics (get_exec_env()->_icmp_statistics)
++#else
++#define ve_icmp_statistics icmp_statistics
++#endif
++#define ICMP_INC_STATS(field)		SNMP_INC_STATS(ve_icmp_statistics, field)
++#define ICMP_INC_STATS_BH(field)	SNMP_INC_STATS_BH(ve_icmp_statistics, field)
++#define ICMP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_icmp_statistics, field)
+ 
+ extern void	icmp_send(struct sk_buff *skb_in,  int type, int code, u32 info);
+ extern int	icmp_rcv(struct sk_buff *skb);
+diff -uprN linux-2.6.8.1.orig/include/net/ip.h linux-2.6.8.1-ve022stab078/include/net/ip.h
+--- linux-2.6.8.1.orig/include/net/ip.h	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/net/ip.h	2006-05-11 13:05:40.000000000 +0400
+@@ -151,15 +151,25 @@ struct ipv4_config
+ 
+ extern struct ipv4_config ipv4_config;
+ DECLARE_SNMP_STAT(struct ipstats_mib, ip_statistics);
+-#define IP_INC_STATS(field)		SNMP_INC_STATS(ip_statistics, field)
+-#define IP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ip_statistics, field)
+-#define IP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ip_statistics, field)
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_ip_statistics (get_exec_env()->_ip_statistics)
++#else
++#define ve_ip_statistics ip_statistics
++#endif
++#define IP_INC_STATS(field)		SNMP_INC_STATS(ve_ip_statistics, field)
++#define IP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ve_ip_statistics, field)
++#define IP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_ip_statistics, field)
+ DECLARE_SNMP_STAT(struct linux_mib, net_statistics);
+-#define NET_INC_STATS(field)		SNMP_INC_STATS(net_statistics, field)
+-#define NET_INC_STATS_BH(field)		SNMP_INC_STATS_BH(net_statistics, field)
+-#define NET_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(net_statistics, field)
+-#define NET_ADD_STATS_BH(field, adnd)	SNMP_ADD_STATS_BH(net_statistics, field, adnd)
+-#define NET_ADD_STATS_USER(field, adnd)	SNMP_ADD_STATS_USER(net_statistics, field, adnd)
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_net_statistics (get_exec_env()->_net_statistics)
++#else
++#define ve_net_statistics net_statistics
++#endif
++#define NET_INC_STATS(field)		SNMP_INC_STATS(ve_net_statistics, field)
++#define NET_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ve_net_statistics, field)
++#define NET_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_net_statistics, field)
++#define NET_ADD_STATS_BH(field, adnd)	SNMP_ADD_STATS_BH(ve_net_statistics, field, adnd)
++#define NET_ADD_STATS_USER(field, adnd)	SNMP_ADD_STATS_USER(ve_net_statistics, field, adnd)
+ 
+ extern int sysctl_local_port_range[2];
+ extern int sysctl_ip_default_ttl;
+@@ -253,8 +263,21 @@ extern int	ip_call_ra_chain(struct sk_bu
+ /*
+  *	Functions provided by ip_fragment.o
+  */
+- 
+-struct sk_buff *ip_defrag(struct sk_buff *skb);
++
++enum ip_defrag_users
++{
++	IP_DEFRAG_LOCAL_DELIVER,
++	IP_DEFRAG_CALL_RA_CHAIN,
++	IP_DEFRAG_CONNTRACK_IN,
++	IP_DEFRAG_CONNTRACK_OUT,
++	IP_DEFRAG_NAT_OUT,
++	IP_DEFRAG_FW_COMPAT,
++	IP_DEFRAG_VS_IN,
++	IP_DEFRAG_VS_OUT,
++	IP_DEFRAG_VS_FWD
++};
++
++struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user);
+ extern int ip_frag_nqueues;
+ extern atomic_t ip_frag_mem;
+ 
+diff -uprN linux-2.6.8.1.orig/include/net/ip_fib.h linux-2.6.8.1-ve022stab078/include/net/ip_fib.h
+--- linux-2.6.8.1.orig/include/net/ip_fib.h	2004-08-14 14:56:15.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/net/ip_fib.h	2006-05-11 13:05:40.000000000 +0400
+@@ -139,10 +139,22 @@ struct fib_table
+ 	unsigned char	tb_data[0];
+ };
+ 
++struct fn_zone;
++struct fn_hash
++{
++	struct fn_zone	*fn_zones[33];
++	struct fn_zone	*fn_zone_list;
++};
++
+ #ifndef CONFIG_IP_MULTIPLE_TABLES
+ 
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ip_fib_local_table 	get_exec_env()->_local_table
++#define ip_fib_main_table 	get_exec_env()->_main_table
++#else
+ extern struct fib_table *ip_fib_local_table;
+ extern struct fib_table *ip_fib_main_table;
++#endif
+ 
+ static inline struct fib_table *fib_get_table(int id)
+ {
+@@ -174,7 +186,12 @@ static inline void fib_select_default(co
+ #define ip_fib_local_table (fib_tables[RT_TABLE_LOCAL])
+ #define ip_fib_main_table (fib_tables[RT_TABLE_MAIN])
+ 
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define fib_tables get_exec_env()->_fib_tables
++#else
+ extern struct fib_table * fib_tables[RT_TABLE_MAX+1];
++#endif
++
+ extern int fib_lookup(const struct flowi *flp, struct fib_result *res);
+ extern struct fib_table *__fib_new_table(int id);
+ extern void fib_rule_put(struct fib_rule *r);
+@@ -231,10 +248,19 @@ extern u32  __fib_res_prefsrc(struct fib
+ 
+ /* Exported by fib_hash.c */
+ extern struct fib_table *fib_hash_init(int id);
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++struct ve_struct;
++extern int init_ve_route(struct ve_struct *ve);
++extern void fini_ve_route(struct ve_struct *ve);
++#else
++#define init_ve_route(ve)	(0)
++#define fini_ve_route(ve)	do { } while (0)
++#endif
+ 
+ #ifdef CONFIG_IP_MULTIPLE_TABLES
+ /* Exported by fib_rules.c */
+-
++extern int fib_rules_create(void);
++extern void fib_rules_destroy(void);
+ extern int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
+ extern int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
+ extern int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb);
+diff -uprN linux-2.6.8.1.orig/include/net/scm.h linux-2.6.8.1-ve022stab078/include/net/scm.h
+--- linux-2.6.8.1.orig/include/net/scm.h	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/net/scm.h	2006-05-11 13:05:40.000000000 +0400
+@@ -40,7 +40,7 @@ static __inline__ int scm_send(struct so
+ 	memset(scm, 0, sizeof(*scm));
+ 	scm->creds.uid = current->uid;
+ 	scm->creds.gid = current->gid;
+-	scm->creds.pid = current->tgid;
++	scm->creds.pid = virt_tgid(current);
+ 	if (msg->msg_controllen <= 0)
+ 		return 0;
+ 	return __scm_send(sock, msg, scm);
+diff -uprN linux-2.6.8.1.orig/include/net/sock.h linux-2.6.8.1-ve022stab078/include/net/sock.h
+--- linux-2.6.8.1.orig/include/net/sock.h	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/net/sock.h	2006-05-11 13:05:40.000000000 +0400
+@@ -55,6 +55,8 @@
+ #include <net/dst.h>
+ #include <net/checksum.h>
+ 
++#include <ub/ub_net.h>
++
+ /*
+  * This structure really needs to be cleaned up.
+  * Most of it is for TCP, and not used by any of
+@@ -266,8 +268,12 @@ struct sock {
+   	int			(*sk_backlog_rcv)(struct sock *sk,
+ 						  struct sk_buff *skb);  
+ 	void                    (*sk_destruct)(struct sock *sk);
++	struct sock_beancounter sk_bc;
++	struct ve_struct *sk_owner_env;
+ };
+ 
++DCL_VE_OWNER_PROTO(SK, SLAB, struct sock, sk_owner_env, , (noinline, regparm(1)))
++
+ /*
+  * Hashed lists helper routines
+  */
+@@ -488,7 +494,8 @@ do {	if (!(__sk)->sk_backlog.tail) {				
+ })
+ 
+ extern int sk_stream_wait_connect(struct sock *sk, long *timeo_p);
+-extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p);
++extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p,
++				unsigned long amount);
+ extern void sk_stream_wait_close(struct sock *sk, long timeo_p);
+ extern int sk_stream_error(struct sock *sk, int flags, int err);
+ extern void sk_stream_kill_queues(struct sock *sk);
+@@ -672,8 +679,11 @@ static inline void sk_stream_writequeue_
+ 
+ static inline int sk_stream_rmem_schedule(struct sock *sk, struct sk_buff *skb)
+ {
+-	return (int)skb->truesize <= sk->sk_forward_alloc ||
+-		sk_stream_mem_schedule(sk, skb->truesize, 1);
++	if ((int)skb->truesize > sk->sk_forward_alloc &&
++		!sk_stream_mem_schedule(sk, skb->truesize, 1))
++		/* The situation is bad according to mainstream. Den */
++		return 0;
++	return ub_tcprcvbuf_charge(sk, skb) == 0;
+ }
+ 
+ /* Used by processes to "lock" a socket state, so that
+@@ -724,6 +734,11 @@ extern struct sk_buff 		*sock_alloc_send
+ 						     unsigned long size,
+ 						     int noblock,
+ 						     int *errcode);
++extern struct sk_buff 		*sock_alloc_send_skb2(struct sock *sk,
++						      unsigned long size,
++						      unsigned long size2,
++						      int noblock,
++						      int *errcode);
+ extern struct sk_buff 		*sock_alloc_send_pskb(struct sock *sk,
+ 						      unsigned long header_len,
+ 						      unsigned long data_len,
+@@ -1073,6 +1088,10 @@ static inline int sock_queue_rcv_skb(str
+ 		goto out;
+ 	}
+ 
++	err = ub_sockrcvbuf_charge(sk, skb);
++	if (err < 0)
++		goto out;
++
+ 	/* It would be deadlock, if sock_queue_rcv_skb is used
+ 	   with socket lock! We assume that users of this
+ 	   function are lock free.
+diff -uprN linux-2.6.8.1.orig/include/net/tcp.h linux-2.6.8.1-ve022stab078/include/net/tcp.h
+--- linux-2.6.8.1.orig/include/net/tcp.h	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/net/tcp.h	2006-05-11 13:05:45.000000000 +0400
+@@ -30,6 +30,7 @@
+ #include <linux/slab.h>
+ #include <linux/cache.h>
+ #include <linux/percpu.h>
++#include <linux/ve_owner.h>
+ #include <net/checksum.h>
+ #include <net/sock.h>
+ #include <net/snmp.h>
+@@ -39,6 +40,10 @@
+ #endif
+ #include <linux/seq_file.h>
+ 
++
++#define TCP_PAGE(sk)	(sk->sk_sndmsg_page)
++#define TCP_OFF(sk)	(sk->sk_sndmsg_off)
++
+ /* This is for all connections with a full identity, no wildcards.
+  * New scheme, half the table is for TIME_WAIT, the other half is
+  * for the rest.  I'll experiment with dynamic table growth later.
+@@ -83,12 +88,16 @@ struct tcp_ehash_bucket {
+  * ports are created in O(1) time?  I thought so. ;-)	-DaveM
+  */
+ struct tcp_bind_bucket {
++	struct ve_struct	*owner_env;
+ 	unsigned short		port;
+ 	signed short		fastreuse;
+ 	struct hlist_node	node;
+ 	struct hlist_head	owners;
+ };
+ 
++DCL_VE_OWNER_PROTO(TB, GENERIC, struct tcp_bind_bucket, owner_env,
++						inline, (always_inline));
++
+ #define tb_for_each(tb, node, head) hlist_for_each_entry(tb, node, head, node)
+ 
+ struct tcp_bind_hashbucket {
+@@ -158,16 +167,17 @@ extern kmem_cache_t *tcp_sk_cachep;
+ 
+ extern kmem_cache_t *tcp_bucket_cachep;
+ extern struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
+-						 unsigned short snum);
++						 unsigned short snum,
++						 struct ve_struct *env);
+ extern void tcp_bucket_destroy(struct tcp_bind_bucket *tb);
+ extern void tcp_bucket_unlock(struct sock *sk);
+ extern int tcp_port_rover;
+ extern struct sock *tcp_v4_lookup_listener(u32 addr, unsigned short hnum, int dif);
+ 
+ /* These are AF independent. */
+-static __inline__ int tcp_bhashfn(__u16 lport)
++static __inline__ int tcp_bhashfn(__u16 lport, unsigned veid)
+ {
+-	return (lport & (tcp_bhash_size - 1));
++	return ((lport + (veid ^ (veid >> 16))) & (tcp_bhash_size - 1));
+ }
+ 
+ extern void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb,
+@@ -217,13 +227,19 @@ struct tcp_tw_bucket {
+ 	unsigned long		tw_ttd;
+ 	struct tcp_bind_bucket	*tw_tb;
+ 	struct hlist_node	tw_death_node;
++	spinlock_t		tw_lock;
+ #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ 	struct in6_addr		tw_v6_daddr;
+ 	struct in6_addr		tw_v6_rcv_saddr;
+ 	int			tw_v6_ipv6only;
+ #endif
++	envid_t			tw_owner_env;
+ };
+ 
++#define TW_VEID(tw)	((tw)->tw_owner_env)
++#define SET_TW_VEID(tw, veid)	((tw)->tw_owner_env) = (veid)
++
++
+ static __inline__ void tw_add_node(struct tcp_tw_bucket *tw,
+ 				   struct hlist_head *list)
+ {
+@@ -304,7 +320,11 @@ static inline int tcp_v6_ipv6only(const 
+ # define tcp_v6_ipv6only(__sk)		0
+ #endif
+ 
++#define TW_WSCALE_MASK		0x0f
++#define TW_WSCALE_SPEC		0x10
++
+ extern kmem_cache_t *tcp_timewait_cachep;
++#include <ub/ub_net.h>
+ 
+ static inline void tcp_tw_put(struct tcp_tw_bucket *tw)
+ {
+@@ -340,28 +360,38 @@ extern void tcp_tw_deschedule(struct tcp
+ #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
+ 	__u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr));
+ #endif /* __BIG_ENDIAN */
+-#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
++#define TCP_IPV4_MATCH_ALLVE(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
+ 	(((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie))	&&	\
+ 	 ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports))	&&	\
+ 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+-#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
++#define TCP_IPV4_TW_MATCH_ALLVE(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
+ 	(((*((__u64 *)&(tcptw_sk(__sk)->tw_daddr))) == (__cookie)) &&	\
+ 	 ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) &&	\
+ 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+ #else /* 32-bit arch */
+ #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr)
+-#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
++#define TCP_IPV4_MATCH_ALLVE(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
+ 	((inet_sk(__sk)->daddr			== (__saddr))	&&	\
+ 	 (inet_sk(__sk)->rcv_saddr		== (__daddr))	&&	\
+ 	 ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports))	&&	\
+ 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+-#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
++#define TCP_IPV4_TW_MATCH_ALLVE(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
+ 	((tcptw_sk(__sk)->tw_daddr		== (__saddr))	&&	\
+ 	 (tcptw_sk(__sk)->tw_rcv_saddr		== (__daddr))	&&	\
+ 	 ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) &&	\
+ 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+ #endif /* 64-bit arch */
+ 
++#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif, __ve)\
++        (TCP_IPV4_MATCH_ALLVE((__sk), (__cookie), (__saddr), (__daddr),	    \
++							(__ports), (__dif)) \
++	 && ve_accessible_strict(VE_OWNER_SK((__sk)), (__ve)))
++
++#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif, __ve)\
++        (TCP_IPV4_TW_MATCH_ALLVE((__sk), (__cookie), (__saddr), (__daddr),  \
++							(__ports), (__dif)) \
++	 && ve_accessible_strict(TW_VEID(tcptw_sk(__sk)), VEID(__ve)))
++
+ #define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif)	   \
+ 	(((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports))   	&& \
+ 	 ((__sk)->sk_family		== AF_INET6)		&& \
+@@ -370,16 +400,16 @@ extern void tcp_tw_deschedule(struct tcp
+ 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+ 
+ /* These can have wildcards, don't try too hard. */
+-static __inline__ int tcp_lhashfn(unsigned short num)
++static __inline__ int tcp_lhashfn(unsigned short num, unsigned veid)
+ {
+-	return num & (TCP_LHTABLE_SIZE - 1);
++	return ((num + (veid ^ (veid >> 16))) & (TCP_LHTABLE_SIZE - 1));
+ }
+ 
+ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
+ {
+-	return tcp_lhashfn(inet_sk(sk)->num);
++	return tcp_lhashfn(inet_sk(sk)->num, VEID(VE_OWNER_SK(sk)));
+ }
+-
++ 
+ #define MAX_TCP_HEADER	(128 + MAX_HEADER)
+ 
+ /* 
+@@ -598,7 +628,9 @@ extern int sysctl_tcp_mem[3];
+ extern int sysctl_tcp_wmem[3];
+ extern int sysctl_tcp_rmem[3];
+ extern int sysctl_tcp_app_win;
++#ifndef sysctl_tcp_adv_win_scale
+ extern int sysctl_tcp_adv_win_scale;
++#endif
+ extern int sysctl_tcp_tw_reuse;
+ extern int sysctl_tcp_frto;
+ extern int sysctl_tcp_low_latency;
+@@ -613,6 +645,7 @@ extern int sysctl_tcp_bic_fast_convergen
+ extern int sysctl_tcp_bic_low_window;
+ extern int sysctl_tcp_default_win_scale;
+ extern int sysctl_tcp_moderate_rcvbuf;
++extern int sysctl_tcp_use_sg;
+ 
+ extern atomic_t tcp_memory_allocated;
+ extern atomic_t tcp_sockets_allocated;
+@@ -765,12 +798,17 @@ static inline int between(__u32 seq1, __
+ extern struct proto tcp_prot;
+ 
+ DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics);
+-#define TCP_INC_STATS(field)		SNMP_INC_STATS(tcp_statistics, field)
+-#define TCP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(tcp_statistics, field)
+-#define TCP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(tcp_statistics, field)
+-#define TCP_DEC_STATS(field)		SNMP_DEC_STATS(tcp_statistics, field)
+-#define TCP_ADD_STATS_BH(field, val)	SNMP_ADD_STATS_BH(tcp_statistics, field, val)
+-#define TCP_ADD_STATS_USER(field, val)	SNMP_ADD_STATS_USER(tcp_statistics, field, val)
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_tcp_statistics (get_exec_env()->_tcp_statistics)
++#else
++#define ve_tcp_statistics tcp_statistics
++#endif
++#define TCP_INC_STATS(field)		SNMP_INC_STATS(ve_tcp_statistics, field)
++#define TCP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ve_tcp_statistics, field)
++#define TCP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_tcp_statistics, field)
++#define TCP_DEC_STATS(field)		SNMP_DEC_STATS(ve_tcp_statistics, field)
++#define TCP_ADD_STATS_BH(field, val)	SNMP_ADD_STATS_BH(ve_tcp_statistics, field, val)
++#define TCP_ADD_STATS_USER(field, val)	SNMP_ADD_STATS_USER(ve_tcp_statistics, field, val)
+ 
+ extern void			tcp_put_port(struct sock *sk);
+ extern void			tcp_inherit_port(struct sock *sk, struct sock *child);
+@@ -837,9 +875,9 @@ static __inline__ void tcp_delack_init(s
+ 	memset(&tp->ack, 0, sizeof(tp->ack));
+ }
+ 
+-static inline void tcp_clear_options(struct tcp_opt *tp)
++static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
+ {
+- 	tp->tstamp_ok = tp->sack_ok = tp->wscale_ok = tp->snd_wscale = 0;
++ 	rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
+ }
+ 
+ enum tcp_tw_status
+@@ -888,7 +926,7 @@ extern int			tcp_recvmsg(struct kiocb *i
+ extern int			tcp_listen_start(struct sock *sk);
+ 
+ extern void			tcp_parse_options(struct sk_buff *skb,
+-						  struct tcp_opt *tp,
++						  struct tcp_options_received *opt_rx,
+ 						  int estab);
+ 
+ /*
+@@ -1062,9 +1100,9 @@ static __inline__ unsigned int tcp_curre
+ 		    tp->ext2_header_len != dst->header_len)
+ 			mss_now = tcp_sync_mss(sk, mtu);
+ 	}
+-	if (tp->eff_sacks)
++	if (tp->rx_opt.eff_sacks)
+ 		mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
+-			    (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK));
++			    (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK));
+ 	return mss_now;
+ }
+ 
+@@ -1097,7 +1135,7 @@ static __inline__ void __tcp_fast_path_o
+ 
+ static __inline__ void tcp_fast_path_on(struct tcp_opt *tp)
+ {
+-	__tcp_fast_path_on(tp, tp->snd_wnd>>tp->snd_wscale);
++	__tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
+ }
+ 
+ static inline void tcp_fast_path_check(struct sock *sk, struct tcp_opt *tp)
+@@ -1134,7 +1172,7 @@ extern u32	__tcp_select_window(struct so
+  * only use of the low 32-bits of jiffies and hide the ugly
+  * casts with the following macro.
+  */
+-#define tcp_time_stamp		((__u32)(jiffies))
++#define tcp_time_stamp		((__u32)(jiffies + get_exec_env()->jiffies_fixup))
+ 
+ /* This is what the send packet queueing engine uses to pass
+  * TCP per-packet control information to the transmission
+@@ -1305,7 +1343,8 @@ static inline __u32 tcp_current_ssthresh
+ 
+ static inline void tcp_sync_left_out(struct tcp_opt *tp)
+ {
+-	if (tp->sack_ok && tp->sacked_out >= tp->packets_out - tp->lost_out)
++	if (tp->rx_opt.sack_ok &&
++			tp->sacked_out >= tp->packets_out - tp->lost_out)
+ 		tp->sacked_out = tp->packets_out - tp->lost_out;
+ 	tp->left_out = tp->sacked_out + tp->lost_out;
+ }
+@@ -1615,39 +1654,39 @@ static __inline__ void tcp_done(struct s
+ 		tcp_destroy_sock(sk);
+ }
+ 
+-static __inline__ void tcp_sack_reset(struct tcp_opt *tp)
++static __inline__ void tcp_sack_reset(struct tcp_options_received *rx_opt)
+ {
+-	tp->dsack = 0;
+-	tp->eff_sacks = 0;
+-	tp->num_sacks = 0;
++	rx_opt->dsack = 0;
++	rx_opt->eff_sacks = 0;
++	rx_opt->num_sacks = 0;
+ }
+ 
+ static __inline__ void tcp_build_and_update_options(__u32 *ptr, struct tcp_opt *tp, __u32 tstamp)
+ {
+-	if (tp->tstamp_ok) {
++	if (tp->rx_opt.tstamp_ok) {
+ 		*ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
+ 					  (TCPOPT_NOP << 16) |
+ 					  (TCPOPT_TIMESTAMP << 8) |
+ 					  TCPOLEN_TIMESTAMP);
+ 		*ptr++ = htonl(tstamp);
+-		*ptr++ = htonl(tp->ts_recent);
++		*ptr++ = htonl(tp->rx_opt.ts_recent);
+ 	}
+-	if (tp->eff_sacks) {
+-		struct tcp_sack_block *sp = tp->dsack ? tp->duplicate_sack : tp->selective_acks;
++	if (tp->rx_opt.eff_sacks) {
++		struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks;
+ 		int this_sack;
+ 
+ 		*ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
+ 					  (TCPOPT_NOP << 16) |
+ 					  (TCPOPT_SACK << 8) |
+ 					  (TCPOLEN_SACK_BASE +
+-					   (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK)));
+-		for(this_sack = 0; this_sack < tp->eff_sacks; this_sack++) {
++					   (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK)));
++		for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
+ 			*ptr++ = htonl(sp[this_sack].start_seq);
+ 			*ptr++ = htonl(sp[this_sack].end_seq);
+ 		}
+-		if (tp->dsack) {
+-			tp->dsack = 0;
+-			tp->eff_sacks--;
++		if (tp->rx_opt.dsack) {
++			tp->rx_opt.dsack = 0;
++			tp->rx_opt.eff_sacks--;
+ 		}
+ 	}
+ }
+@@ -1851,17 +1890,17 @@ static inline void tcp_synq_drop(struct 
+ }
+ 
+ static __inline__ void tcp_openreq_init(struct open_request *req,
+-					struct tcp_opt *tp,
++					struct tcp_options_received *rx_opt,
+ 					struct sk_buff *skb)
+ {
+ 	req->rcv_wnd = 0;		/* So that tcp_send_synack() knows! */
+ 	req->rcv_isn = TCP_SKB_CB(skb)->seq;
+-	req->mss = tp->mss_clamp;
+-	req->ts_recent = tp->saw_tstamp ? tp->rcv_tsval : 0;
+-	req->tstamp_ok = tp->tstamp_ok;
+-	req->sack_ok = tp->sack_ok;
+-	req->snd_wscale = tp->snd_wscale;
+-	req->wscale_ok = tp->wscale_ok;
++	req->mss = rx_opt->mss_clamp;
++	req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
++	req->tstamp_ok = rx_opt->tstamp_ok;
++	req->sack_ok = rx_opt->sack_ok;
++	req->snd_wscale = rx_opt->snd_wscale;
++	req->wscale_ok = rx_opt->wscale_ok;
+ 	req->acked = 0;
+ 	req->ecn_ok = 0;
+ 	req->rmt_port = skb->h.th->source;
+@@ -1910,11 +1949,11 @@ static inline int tcp_fin_time(struct tc
+ 	return fin_timeout;
+ }
+ 
+-static inline int tcp_paws_check(struct tcp_opt *tp, int rst)
++static inline int tcp_paws_check(struct tcp_options_received *rx_opt, int rst)
+ {
+-	if ((s32)(tp->rcv_tsval - tp->ts_recent) >= 0)
++	if ((s32)(rx_opt->rcv_tsval - rx_opt->ts_recent) >= 0)
+ 		return 0;
+-	if (xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_24DAYS)
++	if (xtime.tv_sec >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)
+ 		return 0;
+ 
+ 	/* RST segments are not recommended to carry timestamp,
+@@ -1929,7 +1968,7 @@ static inline int tcp_paws_check(struct 
+ 
+ 	   However, we can relax time bounds for RST segments to MSL.
+ 	 */
+-	if (rst && xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_MSL)
++	if (rst && xtime.tv_sec >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL)
+ 		return 0;
+ 	return 1;
+ }
+@@ -1941,6 +1980,8 @@ static inline void tcp_v4_setup_caps(str
+ 		if (sk->sk_no_largesend || dst->header_len)
+ 			sk->sk_route_caps &= ~NETIF_F_TSO;
+ 	}
++	if (!sysctl_tcp_use_sg)
++		sk->sk_route_caps &= ~NETIF_F_SG;
+ }
+ 
+ #define TCP_CHECK_TIMER(sk) do { } while (0)
+diff -uprN linux-2.6.8.1.orig/include/net/udp.h linux-2.6.8.1-ve022stab078/include/net/udp.h
+--- linux-2.6.8.1.orig/include/net/udp.h	2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/include/net/udp.h	2006-05-11 13:05:40.000000000 +0400
+@@ -40,13 +40,19 @@ extern rwlock_t udp_hash_lock;
+ 
+ extern int udp_port_rover;
+ 
+-static inline int udp_lport_inuse(u16 num)
++static inline int udp_hashfn(u16 num, unsigned veid)
++{
++	return ((num + (veid ^ (veid >> 16))) & (UDP_HTABLE_SIZE - 1));
++}
++
++static inline int udp_lport_inuse(u16 num, struct ve_struct *env)
+ {
+ 	struct sock *sk;
+ 	struct hlist_node *node;
+ 
+-	sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)])
+-		if (inet_sk(sk)->num == num)
++	sk_for_each(sk, node, &udp_hash[udp_hashfn(num, VEID(env))])
++		if (inet_sk(sk)->num == num &&
++		    ve_accessible_strict(VE_OWNER_SK(sk), env))
+ 			return 1;
+ 	return 0;
+ }
+@@ -73,9 +79,14 @@ extern int	udp_ioctl(struct sock *sk, in
+ extern int	udp_disconnect(struct sock *sk, int flags);
+ 
+ DECLARE_SNMP_STAT(struct udp_mib, udp_statistics);
+-#define UDP_INC_STATS(field)		SNMP_INC_STATS(udp_statistics, field)
+-#define UDP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(udp_statistics, field)
+-#define UDP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(udp_statistics, field)
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_udp_statistics (get_exec_env()->_udp_statistics)
++#else
++#define ve_udp_statistics udp_statistics
++#endif
++#define UDP_INC_STATS(field)		SNMP_INC_STATS(ve_udp_statistics, field)
++#define UDP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ve_udp_statistics, field)
++#define UDP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(ve_udp_statistics, field)
+ 
+ /* /proc */
+ struct udp_seq_afinfo {
+diff -uprN linux-2.6.8.1.orig/include/ub/beancounter.h linux-2.6.8.1-ve022stab078/include/ub/beancounter.h
+--- linux-2.6.8.1.orig/include/ub/beancounter.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/ub/beancounter.h	2006-05-11 13:05:48.000000000 +0400
+@@ -0,0 +1,321 @@
++/*
++ *  include/ub/beancounter.h
++ *
++ *  Copyright (C) 1999-2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ *  Andrey Savochkin	saw@sw-soft.com
++ *
++ */
++
++#ifndef _LINUX_BEANCOUNTER_H
++#define _LINUX_BEANCOUNTER_H
++
++#include <linux/config.h>
++
++/*
++ * Generic ratelimiting stuff.
++ */
++
++struct ub_rate_info {
++	int burst;
++	int interval; /* jiffy_t per event */
++	int bucket; /* kind of leaky bucket */
++	unsigned long last; /* last event */
++};
++
++/* Return true if rate limit permits. */
++int ub_ratelimit(struct ub_rate_info *);
++
++
++/*
++ * This magic is used to distinuish user beancounter and pages beancounter
++ * in struct page. page_ub and page_bc are placed in union and MAGIC
++ * ensures us that we don't use pbc as ubc in ub_page_uncharge().
++ */
++#define UB_MAGIC		0x62756275
++
++/*
++ *	Resource list.
++ */
++
++#define UB_KMEMSIZE	0	/* Unswappable kernel memory size including
++				 * struct task, page directories, etc.
++				 */
++#define UB_LOCKEDPAGES	1	/* Mlock()ed pages. */
++#define UB_PRIVVMPAGES	2	/* Total number of pages, counting potentially
++				 * private pages as private and used.
++				 */
++#define UB_SHMPAGES	3	/* IPC SHM segment size. */
++#define UB_ZSHMPAGES	4	/* Anonymous shared memory. */
++#define UB_NUMPROC	5	/* Number of processes. */
++#define UB_PHYSPAGES	6	/* All resident pages, for swapout guarantee. */
++#define UB_VMGUARPAGES	7	/* Guarantee for memory allocation,
++				 * checked against PRIVVMPAGES.
++				 */
++#define UB_OOMGUARPAGES	8	/* Guarantees against OOM kill.
++				 * Only limit is used, no accounting.
++				 */
++#define UB_NUMTCPSOCK	9	/* Number of TCP sockets. */
++#define UB_NUMFLOCK	10	/* Number of file locks. */
++#define UB_NUMPTY	11	/* Number of PTYs. */
++#define UB_NUMSIGINFO	12	/* Number of siginfos. */
++#define UB_TCPSNDBUF	13	/* Total size of tcp send buffers. */
++#define UB_TCPRCVBUF	14	/* Total size of tcp receive buffers. */
++#define UB_OTHERSOCKBUF	15	/* Total size of other socket
++				 * send buffers (all buffers for PF_UNIX).
++				 */
++#define UB_DGRAMRCVBUF	16	/* Total size of other socket
++				 * receive buffers.
++				 */
++#define UB_NUMOTHERSOCK	17	/* Number of other sockets. */
++#define UB_DCACHESIZE	18	/* Size of busy dentry/inode cache. */
++#define UB_NUMFILE	19	/* Number of open files. */
++
++#define UB_RESOURCES	24
++
++#define UB_UNUSEDPRIVVM	(UB_RESOURCES + 0)
++#define UB_TMPFSPAGES	(UB_RESOURCES + 1)
++#define UB_SWAPPAGES	(UB_RESOURCES + 2)
++#define UB_HELDPAGES	(UB_RESOURCES + 3)
++
++struct ubparm {
++	/* 
++	 * A barrier over which resource allocations are failed gracefully.
++	 * If the amount of consumed memory is over the barrier further sbrk()
++	 * or mmap() calls fail, the existing processes are not killed. 
++	 */
++	unsigned long	barrier;
++	/* hard resource limit */
++	unsigned long	limit;
++	/* consumed resources */
++	unsigned long	held;
++	/* maximum amount of consumed resources through the last period */
++	unsigned long	maxheld;
++	/* minimum amount of consumed resources through the last period */
++	unsigned long	minheld;
++	/* count of failed charges */
++	unsigned long	failcnt;
++};
++
++/*
++ * Kernel internal part.
++ */
++
++#ifdef __KERNEL__
++
++#include <ub/ub_debug.h>
++#include <linux/interrupt.h>
++#include <asm/atomic.h>
++#include <linux/spinlock.h>
++#include <linux/cache.h>
++#include <linux/threads.h>
++
++/*
++ * UB_MAXVALUE is essentially LONG_MAX declared in a cross-compiling safe form.
++ */
++#define UB_MAXVALUE	( (1UL << (sizeof(unsigned long)*8-1)) - 1)
++
++
++/*
++ *	Resource management structures
++ * Serialization issues:
++ *   beancounter list management is protected via ub_hash_lock
++ *   task pointers are set only for current task and only once
++ *   refcount is managed atomically
++ *   value and limit comparison and change are protected by per-ub spinlock
++ */
++
++struct page_beancounter;
++struct task_beancounter;
++struct sock_beancounter;
++
++struct page_private {
++	unsigned long		ubp_unused_privvmpages;
++	unsigned long		ubp_tmpfs_respages;
++	unsigned long		ubp_swap_pages;
++	unsigned long long	ubp_held_pages;
++};
++
++struct sock_private {
++	unsigned long		ubp_rmem_thres;
++	unsigned long		ubp_wmem_pressure;
++	unsigned long		ubp_maxadvmss;
++	unsigned long		ubp_rmem_pressure;
++#define UB_RMEM_EXPAND          0
++#define UB_RMEM_KEEP            1
++#define UB_RMEM_SHRINK          2
++	struct list_head	ubp_other_socks;
++	struct list_head	ubp_tcp_socks;
++	atomic_t		ubp_orphan_count;
++};
++
++struct ub_perfstat {
++	unsigned long unmap;
++	unsigned long swapin;
++} ____cacheline_aligned_in_smp;
++
++struct user_beancounter
++{
++	unsigned long		ub_magic;
++	atomic_t		ub_refcount;
++	struct user_beancounter	*ub_next;
++	spinlock_t		ub_lock;
++	uid_t			ub_uid;
++
++	struct ub_rate_info	ub_limit_rl;
++	int			ub_oom_noproc;
++
++	struct page_private	ppriv;
++#define ub_unused_privvmpages	ppriv.ubp_unused_privvmpages
++#define ub_tmpfs_respages	ppriv.ubp_tmpfs_respages
++#define ub_swap_pages		ppriv.ubp_swap_pages
++#define ub_held_pages		ppriv.ubp_held_pages
++	struct sock_private	spriv;
++#define ub_rmem_thres		spriv.ubp_rmem_thres
++#define ub_maxadvmss		spriv.ubp_maxadvmss
++#define ub_rmem_pressure	spriv.ubp_rmem_pressure
++#define ub_wmem_pressure	spriv.ubp_wmem_pressure
++#define ub_tcp_sk_list		spriv.ubp_tcp_socks
++#define ub_other_sk_list	spriv.ubp_other_socks
++#define ub_orphan_count		spriv.ubp_orphan_count
++
++	struct user_beancounter *parent;
++	void			*private_data;
++	unsigned long		ub_aflags;
++
++	/* resources statistic and settings */
++	struct ubparm		ub_parms[UB_RESOURCES];
++	/* resources statistic for last interval */
++	struct ubparm		ub_store[UB_RESOURCES];
++
++	struct ub_perfstat	ub_perfstat[NR_CPUS];
++
++#ifdef CONFIG_UBC_DEBUG_KMEM
++	struct list_head	ub_cclist;
++	long			ub_pages_charged[NR_CPUS];
++	long			ub_vmalloc_charged[NR_CPUS];
++#endif
++};
++
++enum severity { UB_HARD, UB_SOFT, UB_FORCE };
++
++#define UB_AFLAG_NOTIF_PAGEIN	0
++
++static inline int ub_barrier_hit(struct user_beancounter *ub, int resource)
++{
++	return ub->ub_parms[resource].held > ub->ub_parms[resource].barrier;
++}
++
++static inline int ub_hfbarrier_hit(struct user_beancounter *ub, int resource)
++{
++	return (ub->ub_parms[resource].held > 
++		((ub->ub_parms[resource].barrier) >> 1));
++}
++
++#ifndef CONFIG_USER_RESOURCE
++
++extern inline struct user_beancounter *get_beancounter_byuid
++		(uid_t uid, int create) { return NULL; }
++extern inline struct user_beancounter *get_beancounter
++		(struct user_beancounter *ub) { return NULL; }
++extern inline void put_beancounter(struct user_beancounter *ub) {;}
++
++static inline void page_ubc_init(void) { };
++static inline void beancounter_init(unsigned long mempages) { };
++static inline void ub0_init(void) { };
++
++#else /* CONFIG_USER_RESOURCE */
++
++/*
++ *  Charge/uncharge operations
++ */
++
++extern int __charge_beancounter_locked(struct user_beancounter *ub,
++		int resource, unsigned long val, enum severity strict);
++
++extern void __uncharge_beancounter_locked(struct user_beancounter *ub,
++		int resource, unsigned long val);
++
++extern void __put_beancounter(struct user_beancounter *ub);
++
++extern void uncharge_warn(struct user_beancounter *ub, int resource,
++		unsigned long val, unsigned long held);
++
++extern const char *ub_rnames[];
++/*
++ *	Put a beancounter reference
++ */
++
++static inline void put_beancounter(struct user_beancounter *ub)
++{
++	if (unlikely(ub == NULL))
++		return;
++
++	__put_beancounter(ub);
++}
++
++/*
++ *	Create a new beancounter reference
++ */
++extern struct user_beancounter *get_beancounter_byuid(uid_t uid, int create);
++
++static inline 
++struct user_beancounter *get_beancounter(struct user_beancounter *ub)
++{
++	if (unlikely(ub == NULL))
++		return NULL;
++
++	atomic_inc(&ub->ub_refcount);
++	return ub;
++}
++
++extern struct user_beancounter *get_subbeancounter_byid(
++		struct user_beancounter *,
++		int id, int create);
++extern struct user_beancounter *subbeancounter_findcreate(
++		struct user_beancounter *p, int id);
++
++extern void beancounter_init(unsigned long);
++extern void page_ubc_init(void);
++extern struct user_beancounter ub0;
++extern void ub0_init(void);
++#define get_ub0()	(&ub0)
++
++extern void print_ub_uid(struct user_beancounter *ub, char *buf, int size);
++
++/*
++ *	Resource charging
++ * Change user's account and compare against limits
++ */
++
++static inline void ub_adjust_maxheld(struct user_beancounter *ub, int resource)
++{
++	if (ub->ub_parms[resource].maxheld < ub->ub_parms[resource].held)
++		ub->ub_parms[resource].maxheld = ub->ub_parms[resource].held;
++	if (ub->ub_parms[resource].minheld > ub->ub_parms[resource].held)
++		ub->ub_parms[resource].minheld = ub->ub_parms[resource].held;
++}
++
++#endif /* CONFIG_USER_RESOURCE */
++
++#include <ub/ub_decl.h>
++UB_DECLARE_FUNC(int, charge_beancounter(struct user_beancounter *ub,
++			int resource, unsigned long val, enum severity strict));
++UB_DECLARE_VOID_FUNC(uncharge_beancounter(struct user_beancounter *ub,
++			int resource, unsigned long val));
++
++UB_DECLARE_VOID_FUNC(charge_beancounter_notop(struct user_beancounter *ub,
++			int resource, unsigned long val));
++UB_DECLARE_VOID_FUNC(uncharge_beancounter_notop(struct user_beancounter *ub,
++			int resource, unsigned long val));
++
++#ifndef CONFIG_USER_RESOURCE_PROC
++static inline void beancounter_proc_init(void) { };
++#else
++extern void beancounter_proc_init(void);
++#endif
++#endif /* __KERNEL__ */
++#endif /* _LINUX_BEANCOUNTER_H */
+diff -uprN linux-2.6.8.1.orig/include/ub/ub_dcache.h linux-2.6.8.1-ve022stab078/include/ub/ub_dcache.h
+--- linux-2.6.8.1.orig/include/ub/ub_dcache.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/ub/ub_dcache.h	2006-05-11 13:05:39.000000000 +0400
+@@ -0,0 +1,56 @@
++/*
++ *  include/ub/ub_dcache.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_DCACHE_H_
++#define __UB_DCACHE_H_
++
++#include <ub/ub_decl.h>
++
++/*
++ * UB_DCACHESIZE accounting
++ */
++
++struct dentry_beancounter
++{
++	/*
++	 *  d_inuse =
++	 *         <number of external refs> +
++	 *         <number of 'used' childs>
++	 *
++	 * d_inuse == -1 means that dentry is unused
++	 * state change -1 => 0 causes charge
++	 * state change 0 => -1 causes uncharge
++	 */
++	atomic_t d_inuse;
++	/* charged size, including name length if name is not inline */
++	unsigned long d_ubsize;
++	struct user_beancounter *d_ub;
++};
++
++extern unsigned int inode_memusage(void);
++extern unsigned int dentry_memusage(void);
++
++struct dentry;
++
++UB_DECLARE_FUNC(int, ub_dentry_alloc(struct dentry *d))
++UB_DECLARE_VOID_FUNC(ub_dentry_free(struct dentry *d))
++UB_DECLARE_VOID_FUNC(ub_dentry_charge_nofail(struct dentry *d))
++UB_DECLARE_VOID_FUNC(ub_dentry_uncharge(struct dentry *d))
++
++#ifdef CONFIG_USER_RESOURCE
++UB_DECLARE_FUNC(int, ub_dentry_charge(struct dentry *d))
++#else
++#define ub_dentry_charge(d)	({			\
++			spin_unlock(&d->d_lock);	\
++			rcu_read_unlock();		\
++			0;				\
++		})
++#endif
++#endif
+diff -uprN linux-2.6.8.1.orig/include/ub/ub_debug.h linux-2.6.8.1-ve022stab078/include/ub/ub_debug.h
+--- linux-2.6.8.1.orig/include/ub/ub_debug.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/ub/ub_debug.h	2006-05-11 13:05:39.000000000 +0400
+@@ -0,0 +1,95 @@
++/*
++ *  include/ub/ub_debug.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_DEBUG_H_
++#define __UB_DEBUG_H_
++
++/*
++ * general debugging
++ */
++
++#define UBD_ALLOC	0x1
++#define UBD_CHARGE	0x2
++#define UBD_LIMIT	0x4
++#define UBD_TRACE	0x8
++
++/*
++ * ub_net debugging
++ */
++
++#define UBD_NET_SOCKET	0x10
++#define UBD_NET_SLEEP	0x20
++#define UBD_NET_SEND	0x40
++#define UBD_NET_RECV	0x80
++
++/*
++ * Main routines
++ */
++
++#define UB_DEBUG (0)
++#define DEBUG_RESOURCE (0ULL)
++
++#define ub_dbg_cond(__cond, __str, args...)				\
++	do { 								\
++		if ((__cond) != 0)					\
++			printk(__str, ##args);				\
++	} while(0)
++
++#define ub_debug(__section, __str, args...) 				\
++	ub_dbg_cond(UB_DEBUG & (__section), __str, ##args)
++
++#define ub_debug_resource(__resource, __str, args...)			\
++	ub_dbg_cond((UB_DEBUG & UBD_CHARGE) && 				\
++			(DEBUG_RESOURCE & (1 << (__resource))), 	\
++			__str, ##args)
++
++#if UB_DEBUG & UBD_TRACE
++#define ub_debug_trace(__cond, __b, __r)				\
++		do {							\
++			static struct ub_rate_info ri =	{ __b, __r };	\
++			if ((__cond) != 0 && ub_ratelimit(&ri))		\
++				dump_stack(); 				\
++		} while(0)
++#else
++#define ub_debug_trace(__cond, __burst, __rate)
++#endif
++
++#include <linux/config.h>
++
++#ifdef CONFIG_UBC_DEBUG_KMEM
++#include <linux/list.h>
++#include <linux/kmem_cache.h>
++
++struct user_beancounter;
++struct ub_cache_counter {
++	struct list_head ulist;
++	struct ub_cache_counter *next;
++	struct user_beancounter *ub;
++	kmem_cache_t *cachep;
++	unsigned long counter;
++};
++
++extern spinlock_t cc_lock;
++extern void init_cache_counters(void);
++extern void ub_free_counters(struct user_beancounter *);
++extern void ub_kmemcache_free(kmem_cache_t *cachep);
++
++struct vm_struct;
++extern void inc_vmalloc_charged(struct vm_struct *, int);
++extern void dec_vmalloc_charged(struct vm_struct *);
++#else
++#define init_cache_counters()		do { } while (0)
++#define inc_vmalloc_charged(vm, f)	do { } while (0)
++#define dec_vmalloc_charged(vm)		do { } while (0)
++#define ub_free_counters(ub)		do { } while (0)
++#define ub_kmemcache_free(cachep)	do { } while (0)
++#endif
++
++#endif
+diff -uprN linux-2.6.8.1.orig/include/ub/ub_decl.h linux-2.6.8.1-ve022stab078/include/ub/ub_decl.h
+--- linux-2.6.8.1.orig/include/ub/ub_decl.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/ub/ub_decl.h	2006-05-11 13:05:39.000000000 +0400
+@@ -0,0 +1,40 @@
++/*
++ *  include/ub/ub_decl.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_DECL_H_
++#define __UB_DECL_H_
++
++#include <linux/config.h>
++
++/*
++ * Naming convension:
++ * ub_<section|object>_<operation>
++ */
++
++#ifdef CONFIG_USER_RESOURCE
++
++#define UB_DECLARE_FUNC(ret_type, decl)	extern ret_type decl;
++#define UB_DECLARE_VOID_FUNC(decl)	extern void decl;
++
++#else /* CONFIG_USER_RESOURCE */
++
++#define UB_DECLARE_FUNC(ret_type, decl)		\
++	static inline ret_type decl		\
++	{					\
++		return (ret_type)0;		\
++	}
++#define UB_DECLARE_VOID_FUNC(decl)		\
++	static inline void decl			\
++	{					\
++	}
++
++#endif /* CONFIG_USER_RESOURCE */
++
++#endif
+diff -uprN linux-2.6.8.1.orig/include/ub/ub_hash.h linux-2.6.8.1-ve022stab078/include/ub/ub_hash.h
+--- linux-2.6.8.1.orig/include/ub/ub_hash.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/ub/ub_hash.h	2006-05-11 13:05:39.000000000 +0400
+@@ -0,0 +1,41 @@
++/*
++ *  include/ub/ub_hash.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef _LINUX_UBHASH_H
++#define _LINUX_UBHASH_H
++
++#ifdef __KERNEL__
++
++#define UB_HASH_SIZE 256
++
++struct ub_hash_slot {
++	struct user_beancounter *ubh_beans;
++};
++
++extern struct ub_hash_slot ub_hash[];
++extern spinlock_t ub_hash_lock;
++
++#ifdef CONFIG_USER_RESOURCE
++
++/*
++ * Iterate over beancounters
++ * @__slot  - hash slot
++ * @__ubp - beancounter ptr
++ * Can use break :)
++ */
++#define for_each_beancounter(__slot, __ubp)				\
++	for (__slot = 0, __ubp = NULL; 					\
++		__slot < UB_HASH_SIZE && __ubp == NULL; __slot++)	\
++		 for (__ubp = ub_hash[__slot].ubh_beans; __ubp;		\
++				 __ubp = __ubp->ub_next)
++
++#endif /* CONFIG_USER_RESOURCE */
++#endif /* __KERNEL__ */
++#endif /* _LINUX_UBHASH_H */
+diff -uprN linux-2.6.8.1.orig/include/ub/ub_mem.h linux-2.6.8.1-ve022stab078/include/ub/ub_mem.h
+--- linux-2.6.8.1.orig/include/ub/ub_mem.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/ub/ub_mem.h	2006-05-11 13:05:39.000000000 +0400
+@@ -0,0 +1,90 @@
++/*
++ *  include/ub/ub_mem.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_SLAB_H_
++#define __UB_SLAB_H_
++
++#include <linux/config.h>
++#include <linux/kmem_slab.h>
++#include <linux/vmalloc.h>
++#include <linux/gfp.h>
++#include <asm/pgtable.h>
++#include <ub/beancounter.h>
++#include <ub/ub_decl.h>
++
++/*
++ * UB_KMEMSIZE accounting
++ * oom_killer related
++ */
++
++/*
++ * Memory freeing statistics to make correct OOM decision
++ */
++
++struct oom_freeing_stat
++{
++	unsigned long oom_generation; /* current OOM gen */
++	unsigned long freed;
++	unsigned long swapped; /* page referrence counters removed */
++	unsigned long written; /* IO started */
++	unsigned long slabs;   /* slabs shrinked */
++};
++
++extern int oom_generation;
++extern int oom_kill_counter;
++extern spinlock_t oom_generation_lock;
++
++#ifdef CONFIG_UBC_DEBUG_ITEMS 
++#define CHARGE_ORDER(__o)		(1 << __o)
++#define CHARGE_SIZE(__s)		1
++#else
++#define CHARGE_ORDER(__o)		(PAGE_SIZE << (__o))
++#define CHARGE_SIZE(__s)		(__s)
++#endif
++
++#define page_ub(__page)	((__page)->bc.page_ub)
++
++struct mm_struct;
++struct page;
++
++UB_DECLARE_FUNC(struct user_beancounter *, slab_ub(void *obj))
++UB_DECLARE_FUNC(struct user_beancounter *, vmalloc_ub(void *obj))
++UB_DECLARE_FUNC(struct user_beancounter *, mem_ub(void *obj))
++
++UB_DECLARE_FUNC(int, ub_page_charge(struct page *page, int order, int mask))
++UB_DECLARE_VOID_FUNC(ub_page_uncharge(struct page *page, int order))
++
++UB_DECLARE_VOID_FUNC(ub_clear_oom(void))
++UB_DECLARE_VOID_FUNC(ub_oomkill_task(struct mm_struct *mm, 
++			struct user_beancounter *ub, long overdraft))
++UB_DECLARE_FUNC(int, ub_slab_charge(void *objp, int flags))
++UB_DECLARE_VOID_FUNC(ub_slab_uncharge(void *obj))
++
++#ifdef CONFIG_USER_RESOURCE
++/* Flags without __GFP_UBC must comply with vmalloc */
++#define ub_vmalloc(size) __vmalloc(size, \
++		GFP_KERNEL | __GFP_HIGHMEM | __GFP_UBC, PAGE_KERNEL)
++#define ub_kmalloc(size, flags) kmalloc(size, ((flags) | __GFP_UBC))
++extern struct user_beancounter *ub_select_worst(long *);
++#else
++#define ub_vmalloc(size) vmalloc(size)
++#define ub_kmalloc(size, flags) kmalloc(size, flags)
++static inline struct user_beancounter *ub_select_worst(long *over)
++{
++	*over = 0;
++	return NULL;
++}
++#endif
++
++#define slab_ubcs(cachep, slabp) ((struct user_beancounter **)\
++		(ALIGN((unsigned long)(slab_bufctl(slabp) + (cachep)->num),\
++		       sizeof(void *))))
++
++#endif
+diff -uprN linux-2.6.8.1.orig/include/ub/ub_misc.h linux-2.6.8.1-ve022stab078/include/ub/ub_misc.h
+--- linux-2.6.8.1.orig/include/ub/ub_misc.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/ub/ub_misc.h	2006-05-11 13:05:39.000000000 +0400
+@@ -0,0 +1,33 @@
++/*
++ *  include/ub/ub_misc.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_MISC_H_
++#define __UB_MISC_H_
++
++#include <ub/ub_decl.h>
++
++struct tty_struct;
++struct file;
++struct file_lock;
++
++UB_DECLARE_FUNC(int, ub_file_charge(struct file *f))
++UB_DECLARE_VOID_FUNC(ub_file_uncharge(struct file *f))
++UB_DECLARE_FUNC(int, ub_flock_charge(struct file_lock *fl, int hard))
++UB_DECLARE_VOID_FUNC(ub_flock_uncharge(struct file_lock *fl))
++UB_DECLARE_FUNC(int, ub_siginfo_charge(struct user_beancounter *ub,
++			unsigned long size))
++UB_DECLARE_VOID_FUNC(ub_siginfo_uncharge(struct user_beancounter *ub,
++			unsigned long size))
++UB_DECLARE_FUNC(int, ub_task_charge(struct task_struct *parent,
++			struct task_struct *task))
++UB_DECLARE_VOID_FUNC(ub_task_uncharge(struct task_struct *task))
++UB_DECLARE_FUNC(int, ub_pty_charge(struct tty_struct *tty))
++UB_DECLARE_VOID_FUNC(ub_pty_uncharge(struct tty_struct *tty))
++#endif
+diff -uprN linux-2.6.8.1.orig/include/ub/ub_net.h linux-2.6.8.1-ve022stab078/include/ub/ub_net.h
+--- linux-2.6.8.1.orig/include/ub/ub_net.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/ub/ub_net.h	2006-05-11 13:05:39.000000000 +0400
+@@ -0,0 +1,141 @@
++/*
++ *  include/ub/ub_net.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_NET_H_
++#define __UB_NET_H_
++
++/*
++ * UB_NUMXXXSOCK, UB_XXXBUF accounting
++ */
++
++#include <ub/ub_decl.h>
++#include <ub/ub_sk.h>
++
++#define bid2sid(__bufid) \
++	((__bufid) == UB_TCPSNDBUF ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK)
++
++#define SOCK_MIN_UBCSPACE ((int)((2048 - sizeof(struct skb_shared_info)) & \
++			~(SMP_CACHE_BYTES-1)))
++#define SOCK_MIN_UBCSPACE_CH skb_charge_size(SOCK_MIN_UBCSPACE)
++
++
++#define IS_TCP_SOCK(__family, __type) \
++		((__family) == PF_INET && (__type) == SOCK_STREAM)
++
++UB_DECLARE_FUNC(int, ub_sock_charge(struct sock *sk, int family, int type))
++UB_DECLARE_FUNC(int, ub_tcp_sock_charge(struct sock *sk)) 
++UB_DECLARE_FUNC(int, ub_other_sock_charge(struct sock *sk))
++UB_DECLARE_VOID_FUNC(ub_sock_uncharge(struct sock *sk))
++UB_DECLARE_VOID_FUNC(ub_skb_uncharge(struct sk_buff *skb))
++UB_DECLARE_FUNC(int, ub_skb_alloc_bc(struct sk_buff *skb, int gfp_mask))
++UB_DECLARE_VOID_FUNC(ub_skb_free_bc(struct sk_buff *skb))
++UB_DECLARE_FUNC(int, ub_nlrcvbuf_charge(struct sk_buff *skb, struct sock *sk))
++UB_DECLARE_FUNC(int, ub_sockrcvbuf_charge(struct sock *sk, struct sk_buff *skb))
++UB_DECLARE_VOID_FUNC(ub_sock_snd_queue_add(struct sock *sk, int resource, 
++			unsigned long size))
++UB_DECLARE_FUNC(long, ub_sock_wait_for_space(struct sock *sk, long timeo, 
++			unsigned long size))
++
++UB_DECLARE_FUNC(int, ub_tcprcvbuf_charge(struct sock *sk, struct sk_buff *skb))
++UB_DECLARE_FUNC(int, ub_tcprcvbuf_charge_forced(struct sock *sk,
++						struct sk_buff *skb))
++UB_DECLARE_FUNC(int, ub_tcpsndbuf_charge(struct sock *sk, struct sk_buff *skb))
++UB_DECLARE_FUNC(int, ub_tcpsndbuf_charge_forced(struct sock *sk,
++						struct sk_buff *skb))
++
++/* Charge size */
++static inline unsigned long skb_charge_datalen(unsigned long chargesize)
++{
++#ifdef CONFIG_USER_RESOURCE
++	unsigned long slabsize;
++
++	chargesize -= sizeof(struct sk_buff);
++	slabsize = 64;
++	do { 
++		slabsize <<= 1; 
++	} while (slabsize <= chargesize);
++
++	slabsize >>= 1;
++	return (slabsize - sizeof(struct skb_shared_info)) &
++		~(SMP_CACHE_BYTES-1);
++#else
++	return 0;
++#endif
++}
++
++static inline unsigned long skb_charge_size_gen(unsigned long size)
++{ 
++#ifdef CONFIG_USER_RESOURCE
++	unsigned int slabsize;
++
++	size = SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info);
++	slabsize = 32; /* min size is 64 because of skb_shared_info */
++	do { 
++		slabsize <<= 1; 
++	} while (slabsize < size);
++
++	return slabsize + sizeof(struct sk_buff);
++#else
++	return 0;
++#endif
++
++}
++	
++static inline unsigned long skb_charge_size_const(unsigned long size)
++{
++#ifdef CONFIG_USER_RESOURCE
++	unsigned int ret;
++	if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 64)
++		ret = 64 + sizeof(struct sk_buff);
++	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 128)
++		ret = 128 + sizeof(struct sk_buff);
++	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 256)
++		ret = 256 + sizeof(struct sk_buff);
++	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 512)
++		ret = 512 + sizeof(struct sk_buff);
++	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 1024)
++		ret = 1024 + sizeof(struct sk_buff);
++	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 2048)
++		ret = 2048 + sizeof(struct sk_buff);
++	else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 4096)
++		ret = 4096 + sizeof(struct sk_buff);
++	else
++		ret = skb_charge_size_gen(size);
++	return ret;
++#else
++	return 0;
++#endif
++}
++
++
++#define skb_charge_size(__size)			\
++	(__builtin_constant_p(__size)	?	\
++	 skb_charge_size_const(__size)	:	\
++	 skb_charge_size_gen(__size))
++
++UB_DECLARE_FUNC(int, skb_charge_fullsize(struct sk_buff *skb))
++UB_DECLARE_VOID_FUNC(ub_skb_set_charge(struct sk_buff *skb, 
++			struct sock *sk, unsigned long size, int res))
++
++/* Poll reserv */
++UB_DECLARE_FUNC(int, ub_sock_makewres_other(struct sock *sk, unsigned long sz))
++UB_DECLARE_FUNC(int, ub_sock_makewres_tcp(struct sock *sk, unsigned long size))
++UB_DECLARE_FUNC(int, ub_sock_getwres_other(struct sock *sk, unsigned long size))
++UB_DECLARE_FUNC(int, ub_sock_getwres_tcp(struct sock *sk, unsigned long size))
++UB_DECLARE_VOID_FUNC(ub_sock_retwres_other(struct sock *sk, unsigned long size,
++			unsigned long ressize))
++UB_DECLARE_VOID_FUNC(ub_sock_retwres_tcp(struct sock *sk, unsigned long size,
++			unsigned long ressize))
++UB_DECLARE_VOID_FUNC(ub_sock_sndqueueadd_other(struct sock *sk, 
++			unsigned long size))
++UB_DECLARE_VOID_FUNC(ub_sock_sndqueueadd_tcp(struct sock *sk, unsigned long sz))
++UB_DECLARE_VOID_FUNC(ub_sock_sndqueuedel(struct sock *sk))
++
++#endif
+diff -uprN linux-2.6.8.1.orig/include/ub/ub_orphan.h linux-2.6.8.1-ve022stab078/include/ub/ub_orphan.h
+--- linux-2.6.8.1.orig/include/ub/ub_orphan.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/ub/ub_orphan.h	2006-05-11 13:05:39.000000000 +0400
+@@ -0,0 +1,54 @@
++/*
++ *  include/ub/ub_orphan.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_ORPHAN_H_
++#define __UB_ORPHAN_H_
++
++#include "ub/beancounter.h"
++#include "ub/ub_net.h"
++
++
++extern int ub_too_many_orphans(struct sock *sk, int count);
++static inline int tcp_too_many_orphans(struct sock *sk, int count)
++{
++#ifdef CONFIG_USER_RESOURCE
++	if (ub_too_many_orphans(sk, count))
++		return 1;
++#endif
++	return (atomic_read(&tcp_orphan_count) > sysctl_tcp_max_orphans ||
++		(sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
++		 atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]));
++}
++
++static inline atomic_t *tcp_get_orphan_count_ptr(struct sock *sk)
++{
++#ifdef CONFIG_USER_RESOURCE
++	if (sock_has_ubc(sk))
++		return &sock_bc(sk)->ub->ub_orphan_count;
++#endif
++	return &tcp_orphan_count;
++}
++
++static inline void tcp_inc_orphan_count(struct sock *sk)
++{
++	atomic_inc(tcp_get_orphan_count_ptr(sk));
++}
++
++static inline void tcp_dec_orphan_count(struct sock *sk)
++{
++	atomic_dec(tcp_get_orphan_count_ptr(sk));
++}
++
++static inline int tcp_get_orphan_count(struct sock *sk)
++{
++	return atomic_read(tcp_get_orphan_count_ptr(sk));
++}
++
++#endif
+diff -uprN linux-2.6.8.1.orig/include/ub/ub_page.h linux-2.6.8.1-ve022stab078/include/ub/ub_page.h
+--- linux-2.6.8.1.orig/include/ub/ub_page.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/ub/ub_page.h	2006-05-11 13:05:39.000000000 +0400
+@@ -0,0 +1,48 @@
++/*
++ *  include/ub/ub_page.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_PAGE_H_
++#define __UB_PAGE_H_
++
++#include <linux/config.h>
++
++/*
++ * Page_beancounters
++ */
++
++struct page;
++struct user_beancounter;
++
++#define PB_MAGIC 0x62700001UL
++
++struct page_beancounter {
++	unsigned long pb_magic;
++	struct page *page;
++	struct user_beancounter *ub;
++	struct page_beancounter *next_hash;
++	unsigned refcount;
++	struct list_head page_list;
++};
++
++#define PB_REFCOUNT_BITS 24
++#define PB_SHIFT_GET(c) ((c) >> PB_REFCOUNT_BITS)
++#define PB_SHIFT_INC(c) ((c) += (1 << PB_REFCOUNT_BITS))
++#define PB_SHIFT_DEC(c) ((c) -= (1 << PB_REFCOUNT_BITS))
++#define PB_COUNT_GET(c) ((c) & ((1 << PB_REFCOUNT_BITS) - 1))
++#define PB_COUNT_INC(c) ((c)++)
++#define PB_COUNT_DEC(c) ((c)--)
++#define PB_REFCOUNT_MAKE(s, c) (((s) << PB_REFCOUNT_BITS) + (c))
++
++#define page_pbc(__page)	((__page)->bc.page_pbc)
++
++struct address_space;
++extern int is_shmem_mapping(struct address_space *);
++
++#endif
+diff -uprN linux-2.6.8.1.orig/include/ub/ub_sk.h linux-2.6.8.1-ve022stab078/include/ub/ub_sk.h
+--- linux-2.6.8.1.orig/include/ub/ub_sk.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/ub/ub_sk.h	2006-05-11 13:05:39.000000000 +0400
+@@ -0,0 +1,45 @@
++/*
++ *  include/ub/ub_sk.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_SK_H_
++#define __UB_SK_H_
++
++#include <linux/config.h>
++#include <ub/ub_task.h>
++
++struct sock;
++struct sk_buff;
++
++struct skb_beancounter {
++	struct user_beancounter *ub;
++	unsigned long charged:27, resource:5;
++};
++
++struct sock_beancounter {
++	/*
++	 * already charged for future sends, to make poll work;
++	 * changes are protected by bc spinlock, read is under socket
++	 * semaphore for sends and unprotected in poll
++	 */
++	unsigned long           poll_reserv;
++	unsigned long           ub_waitspc;     /* space waiting for */
++	unsigned long           ub_wcharged;
++	struct list_head        ub_sock_list;
++	struct user_beancounter *ub;
++};
++
++#define sock_bc(__sk)		(&(__sk)->sk_bc)
++#define skb_bc(__skb)		(&(__skb)->skb_bc)
++#define skbc_sock(__skbc)	(container_of(__skbc, struct sock, sk_bc))
++#define sock_has_ubc(__sk)	(sock_bc(__sk)->ub != NULL)
++
++#define	set_sk_exec_ub(__sk)	(set_exec_ub(sock_bc(sk)->ub))
++
++#endif
+diff -uprN linux-2.6.8.1.orig/include/ub/ub_stat.h linux-2.6.8.1-ve022stab078/include/ub/ub_stat.h
+--- linux-2.6.8.1.orig/include/ub/ub_stat.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/ub/ub_stat.h	2006-05-11 13:05:39.000000000 +0400
+@@ -0,0 +1,70 @@
++/*
++ *  include/ub/ub_stat.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_STAT_H_
++#define __UB_STAT_H_
++
++/* sys_ubstat commands list */
++#define UBSTAT_READ_ONE			0x010000
++#define UBSTAT_READ_ALL			0x020000
++#define UBSTAT_READ_FULL		0x030000
++#define UBSTAT_UBLIST			0x040000
++#define UBSTAT_UBPARMNUM		0x050000
++#define UBSTAT_GETTIME			0x060000
++
++#define UBSTAT_CMD(func)		((func) & 0xF0000)
++#define UBSTAT_PARMID(func)		((func) & 0x0FFFF)
++
++#define TIME_MAX_SEC		(LONG_MAX / HZ)
++#define TIME_MAX_JIF		(TIME_MAX_SEC * HZ)
++
++typedef unsigned long ubstattime_t;
++
++typedef struct {
++	ubstattime_t	start_time;
++	ubstattime_t	end_time;
++	ubstattime_t	cur_time;
++} ubgettime_t;
++
++typedef struct {
++	long		maxinterval;
++	int		signum;
++} ubnotifrq_t;
++
++typedef struct {
++	unsigned long	maxheld;
++	unsigned long	failcnt;
++} ubstatparm_t;
++
++typedef struct {
++	unsigned long	barrier;
++	unsigned long	limit;
++	unsigned long	held;
++	unsigned long	maxheld;
++	unsigned long	minheld;
++	unsigned long	failcnt;
++	unsigned long __unused1;
++	unsigned long __unused2;
++} ubstatparmf_t;
++
++typedef struct {
++	ubstattime_t	start_time;
++	ubstattime_t	end_time;
++	ubstatparmf_t	param[0];
++} ubstatfull_t;
++
++#ifdef __KERNEL__
++struct ub_stat_notify {
++	struct list_head	list;
++	struct task_struct	*task;
++	int			signum;
++};
++#endif
++#endif
+diff -uprN linux-2.6.8.1.orig/include/ub/ub_task.h linux-2.6.8.1-ve022stab078/include/ub/ub_task.h
+--- linux-2.6.8.1.orig/include/ub/ub_task.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/ub/ub_task.h	2006-05-11 13:05:49.000000000 +0400
+@@ -0,0 +1,50 @@
++/*
++ *  include/ub/ub_task.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_TASK_H_
++#define __UB_TASK_H_
++
++#include <linux/config.h>
++
++struct user_beancounter;
++
++
++#ifdef CONFIG_USER_RESOURCE
++
++struct task_beancounter {
++	struct user_beancounter	*exec_ub;
++	struct user_beancounter	*task_ub;
++	struct user_beancounter *fork_sub;
++	void *task_fnode, *task_freserv;
++	unsigned long task_data[4];
++};
++
++#define task_bc(__tsk) (&((__tsk)->task_bc)) 
++
++#define get_exec_ub()		(task_bc(current)->exec_ub)
++#define get_task_ub(__task)	(task_bc(__task)->task_ub)
++#define set_exec_ub(__newub)		\
++({					\
++	struct user_beancounter *old;	\
++	struct task_beancounter *tbc;	\
++	tbc = task_bc(current);		\
++	old = tbc->exec_ub;		\
++	tbc->exec_ub = __newub;		\
++	old;				\
++})
++
++#else /* CONFIG_USER_RESOURCE */
++
++#define get_exec_ub()		(NULL)
++#define get_task_ub(task)	(NULL)
++#define set_exec_ub(__ub)	(NULL)
++
++#endif /* CONFIG_USER_RESOURCE */
++#endif /* __UB_TASK_H_ */
+diff -uprN linux-2.6.8.1.orig/include/ub/ub_tcp.h linux-2.6.8.1-ve022stab078/include/ub/ub_tcp.h
+--- linux-2.6.8.1.orig/include/ub/ub_tcp.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/ub/ub_tcp.h	2006-05-11 13:05:39.000000000 +0400
+@@ -0,0 +1,79 @@
++/*
++ *  include/ub/ub_tcp.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_TCP_H_
++#define __UB_TCP_H_
++
++/*
++ * UB_NUMXXXSOCK, UB_XXXBUF accounting
++ */
++
++#include <ub/ub_sk.h>
++#include <ub/beancounter.h>
++
++static inline void ub_tcp_update_maxadvmss(struct sock *sk)
++{
++#ifdef CONFIG_USER_RESOURCE
++	if (!sock_has_ubc(sk))
++		return;
++	if (sock_bc(sk)->ub->ub_maxadvmss >= tcp_sk(sk)->advmss)
++		return;
++
++	sock_bc(sk)->ub->ub_maxadvmss =
++		skb_charge_size(MAX_HEADER + sizeof(struct iphdr)
++				+ sizeof(struct tcphdr)	+ tcp_sk(sk)->advmss);
++#endif
++}
++
++static inline int ub_tcp_rmem_allows_expand(struct sock *sk)
++{
++	if (tcp_memory_pressure)
++		return 0;
++#ifdef CONFIG_USER_RESOURCE
++	if (sock_has_ubc(sk)) {
++		struct user_beancounter *ub;
++
++		ub = sock_bc(sk)->ub;
++		if (ub->ub_rmem_pressure == UB_RMEM_EXPAND)
++			return 1;
++		if (ub->ub_rmem_pressure == UB_RMEM_SHRINK)
++			return 0;
++		return sk->sk_rcvbuf <= ub->ub_rmem_thres;
++	}
++#endif
++	return 1;
++}
++
++static inline int ub_tcp_memory_pressure(struct sock *sk)
++{
++	if (tcp_memory_pressure)
++		return 1;
++#ifdef CONFIG_USER_RESOURCE
++	if (sock_has_ubc(sk))
++		return sock_bc(sk)->ub->ub_rmem_pressure != UB_RMEM_EXPAND;
++#endif
++	return 0;
++}
++
++static inline int ub_tcp_shrink_rcvbuf(struct sock *sk)
++{
++	if (tcp_memory_pressure)
++		return 1;
++#ifdef CONFIG_USER_RESOURCE
++	if (sock_has_ubc(sk))
++		return sock_bc(sk)->ub->ub_rmem_pressure == UB_RMEM_SHRINK;
++#endif
++	return 0;
++}
++
++UB_DECLARE_FUNC(int, ub_sock_tcp_chargepage(struct sock *sk))
++UB_DECLARE_VOID_FUNC(ub_sock_tcp_detachpage(struct sock *sk))
++
++#endif
+diff -uprN linux-2.6.8.1.orig/include/ub/ub_vmpages.h linux-2.6.8.1-ve022stab078/include/ub/ub_vmpages.h
+--- linux-2.6.8.1.orig/include/ub/ub_vmpages.h	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/include/ub/ub_vmpages.h	2006-05-11 13:05:39.000000000 +0400
+@@ -0,0 +1,121 @@
++/*
++ *  include/ub/ub_vmpages.h
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#ifndef __UB_PAGES_H_
++#define __UB_PAGES_H_
++
++#include <linux/linkage.h>
++#include <linux/config.h>
++#include <ub/beancounter.h>
++#include <ub/ub_decl.h>
++
++/*
++ * UB_XXXPAGES
++ */
++
++/*
++ * Check whether vma has private or copy-on-write mapping.
++ * Should match checks in ub_protected_charge().
++ */
++#define VM_UB_PRIVATE(__flags, __file)					\
++		( ((__flags) & VM_WRITE) ?				\
++			(__file) == NULL || !((__flags) & VM_SHARED) :	\
++			0						\
++		)
++
++#define UB_PAGE_WEIGHT_SHIFT 24
++#define UB_PAGE_WEIGHT (1 << UB_PAGE_WEIGHT_SHIFT)
++
++struct page_beancounter;
++
++/* Mprotect charging result */
++#define PRIVVM_ERROR		-1
++#define PRIVVM_NO_CHARGE	0
++#define PRIVVM_TO_PRIVATE	1
++#define PRIVVM_TO_SHARED	2
++ 
++#ifdef CONFIG_USER_RESOURCE
++extern int ub_protected_charge(struct user_beancounter *ub, unsigned long size,
++		unsigned long newflags,  struct vm_area_struct *vma);
++#else
++static inline int ub_protected_charge(struct user_beancounter *ub,
++		unsigned long size, unsigned long flags,
++		struct vm_area_struct *vma)
++{
++	return PRIVVM_NO_CHARGE;
++}
++#endif
++
++UB_DECLARE_VOID_FUNC(ub_tmpfs_respages_inc(struct user_beancounter *ub, 
++			unsigned long size))
++UB_DECLARE_VOID_FUNC(ub_tmpfs_respages_dec(struct user_beancounter *ub,
++			unsigned long size))
++UB_DECLARE_FUNC(int, ub_shmpages_charge(struct user_beancounter *ub,
++			unsigned long size))
++UB_DECLARE_VOID_FUNC(ub_shmpages_uncharge(struct user_beancounter *ub,
++			unsigned long size))
++UB_DECLARE_FUNC(int, ub_locked_mem_charge(struct user_beancounter *ub, long sz))
++UB_DECLARE_VOID_FUNC(ub_locked_mem_uncharge(struct user_beancounter *ub, 
++			long size))
++UB_DECLARE_FUNC(int, ub_privvm_charge(struct user_beancounter *ub,
++			unsigned long flags, struct file *file,
++			unsigned long size))
++UB_DECLARE_VOID_FUNC(ub_privvm_uncharge(struct user_beancounter *ub,
++			unsigned long flags, struct file *file,
++			unsigned long size))
++UB_DECLARE_FUNC(int, ub_unused_privvm_inc(struct user_beancounter * ub, 
++			long size, struct vm_area_struct *vma))
++UB_DECLARE_VOID_FUNC(ub_unused_privvm_dec(struct user_beancounter *ub, long sz,
++			struct vm_area_struct *vma))
++UB_DECLARE_VOID_FUNC(__ub_unused_privvm_dec(struct user_beancounter *ub, long sz))
++UB_DECLARE_FUNC(int, ub_memory_charge(struct user_beancounter * ub,
++			unsigned long size, unsigned vm_flags,
++			struct file *vm_file, int strict))
++UB_DECLARE_VOID_FUNC(ub_memory_uncharge(struct user_beancounter * ub,
++			unsigned long size, unsigned vm_flags,
++			struct file *vm_file))
++UB_DECLARE_FUNC(unsigned long, pages_in_vma_range(struct vm_area_struct *vma,
++			unsigned long start, unsigned long end))
++#define pages_in_vma(vma) \
++		(pages_in_vma_range((vma), (vma)->vm_start, (vma)->vm_end))
++
++extern void fastcall __ub_update_physpages(struct user_beancounter *ub);
++extern void fastcall __ub_update_oomguarpages(struct user_beancounter *ub);
++extern void fastcall __ub_update_privvm(struct user_beancounter *ub);
++
++#ifdef CONFIG_USER_SWAP_ACCOUNTING
++extern void ub_swapentry_inc(struct user_beancounter *ub);
++extern void ub_swapentry_dec(struct user_beancounter *ub);
++#endif
++
++#ifdef CONFIG_USER_RSS_ACCOUNTING
++#define PB_DECLARE_FUNC(ret, decl)	UB_DECLARE_FUNC(ret, decl)
++#define PB_DECLARE_VOID_FUNC(decl)	UB_DECLARE_VOID_FUNC(decl)
++#else
++#define PB_DECLARE_FUNC(ret, decl)	static inline ret decl {return (ret)0;}
++#define PB_DECLARE_VOID_FUNC(decl)	static inline void decl { }
++#endif
++
++PB_DECLARE_FUNC(int, pb_reserve_all(struct page_beancounter **pbc))
++PB_DECLARE_FUNC(int, pb_alloc(struct page_beancounter **pbc))
++PB_DECLARE_FUNC(int, pb_alloc_list(struct page_beancounter **pbc, int num,
++			struct mm_struct *mm))
++PB_DECLARE_FUNC(int, pb_add_ref(struct page *page, struct user_beancounter *ub,
++			struct page_beancounter **pbc))
++PB_DECLARE_VOID_FUNC(pb_free_list(struct page_beancounter **pb))
++PB_DECLARE_VOID_FUNC(pb_free(struct page_beancounter **pb))
++PB_DECLARE_VOID_FUNC(pb_add_list_ref(struct page *page, 
++			struct user_beancounter *ub, 
++			struct page_beancounter **pbc))
++PB_DECLARE_VOID_FUNC(pb_remove_ref(struct page *page, 
++			struct user_beancounter *ub))
++PB_DECLARE_FUNC(struct user_beancounter *, pb_grab_page_ub(struct page *page))
++
++#endif
+diff -uprN linux-2.6.8.1.orig/init/do_mounts_initrd.c linux-2.6.8.1-ve022stab078/init/do_mounts_initrd.c
+--- linux-2.6.8.1.orig/init/do_mounts_initrd.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/init/do_mounts_initrd.c	2006-05-11 13:05:37.000000000 +0400
+@@ -10,7 +10,7 @@
+ 
+ #include "do_mounts.h"
+ 
+-unsigned long initrd_start, initrd_end;
++unsigned long initrd_start, initrd_end, initrd_copy;
+ int initrd_below_start_ok;
+ unsigned int real_root_dev;	/* do_proc_dointvec cannot handle kdev_t */
+ static int __initdata old_fd, root_fd;
+diff -uprN linux-2.6.8.1.orig/init/main.c linux-2.6.8.1-ve022stab078/init/main.c
+--- linux-2.6.8.1.orig/init/main.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/init/main.c	2006-05-11 13:05:40.000000000 +0400
+@@ -49,6 +49,8 @@
+ #include <asm/bugs.h>
+ #include <asm/setup.h>
+ 
++#include <ub/beancounter.h>
++
+ /*
+  * This is one of the first .c files built. Error out early
+  * if we have compiler trouble..
+@@ -85,6 +87,7 @@ extern void sbus_init(void);
+ extern void sysctl_init(void);
+ extern void signals_init(void);
+ extern void buffer_init(void);
++extern void fairsched_init_late(void);
+ extern void pidhash_init(void);
+ extern void pidmap_init(void);
+ extern void prio_tree_init(void);
+@@ -101,6 +104,16 @@ extern void tc_init(void);
+ enum system_states system_state;
+ EXPORT_SYMBOL(system_state);
+ 
++#ifdef CONFIG_VE
++extern void init_ve_system(void);
++#endif
++
++void prepare_ve0_process(struct task_struct *tsk);
++void prepare_ve0_proc_root(void);
++void prepare_ve0_sysctl(void);
++void prepare_ve0_loopback(void);
++void prepare_virtual_fs(void);	
++
+ /*
+  * Boot command-line arguments
+  */
+@@ -184,6 +197,52 @@ unsigned long loops_per_jiffy = (1<<12);
+ 
+ EXPORT_SYMBOL(loops_per_jiffy);
+ 
++unsigned long cycles_per_jiffy, cycles_per_clock;
++
++void calibrate_cycles(void)
++{
++	unsigned long ticks;
++	cycles_t time;
++
++	ticks = jiffies;
++	while (ticks == jiffies)
++		/* nothing */;
++	time = get_cycles();
++	ticks = jiffies;
++	while (ticks == jiffies)
++		/* nothing */;
++
++	time = get_cycles() - time;
++	cycles_per_jiffy = time;
++	if ((time >> 32) != 0) {
++		printk("CPU too fast! timings are incorrect\n");
++		cycles_per_jiffy = -1;
++	}
++}
++
++EXPORT_SYMBOL(cycles_per_jiffy);
++
++void calc_cycles_per_jiffy(void)
++{
++#if defined(__i386__)
++	extern unsigned long fast_gettimeoffset_quotient;
++	unsigned long low, high;
++
++	if (fast_gettimeoffset_quotient != 0) {
++		__asm__("divl %2"
++			:"=a" (low), "=d" (high)
++			:"r" (fast_gettimeoffset_quotient),
++			"0" (0), "1" (1000000/HZ));
++
++		cycles_per_jiffy = low;
++	}
++#endif
++	if (cycles_per_jiffy == 0)
++		calibrate_cycles();
++
++	cycles_per_clock = cycles_per_jiffy * (HZ / CLOCKS_PER_SEC);
++}
++
+ /* This is the number of bits of precision for the loops_per_jiffy.  Each
+    bit takes on average 1.5/HZ seconds.  This (like the original) is a little
+    better than 1% */
+@@ -228,6 +287,8 @@ void __devinit calibrate_delay(void)
+ 	printk("%lu.%02lu BogoMIPS\n",
+ 		loops_per_jiffy/(500000/HZ),
+ 		(loops_per_jiffy/(5000/HZ)) % 100);
++
++	calc_cycles_per_jiffy();
+ }
+ 
+ static int __init debug_kernel(char *str)
+@@ -397,7 +458,8 @@ static void __init smp_init(void)
+ 
+ static void noinline rest_init(void)
+ {
+-	kernel_thread(init, NULL, CLONE_FS | CLONE_SIGHAND);
++	kernel_thread(init, NULL, CLONE_FS | CLONE_SIGHAND | CLONE_STOPPED);
++	wake_up_init();
+ 	numa_default_policy();
+ 	unlock_kernel();
+  	cpu_idle();
+@@ -438,7 +500,6 @@ void __init parse_early_param(void)
+ /*
+  *	Activate the first processor.
+  */
+-
+ asmlinkage void __init start_kernel(void)
+ {
+ 	char * command_line;
+@@ -448,6 +509,7 @@ asmlinkage void __init start_kernel(void
+  * enable them
+  */
+ 	lock_kernel();
++	ub0_init();
+ 	page_address_init();
+ 	printk(linux_banner);
+ 	setup_arch(&command_line);
+@@ -459,6 +521,8 @@ asmlinkage void __init start_kernel(void
+ 	 */
+ 	smp_prepare_boot_cpu();
+ 
++	prepare_ve0_process(&init_task);
++
+ 	/*
+ 	 * Set up the scheduler prior starting any interrupts (such as the
+ 	 * timer interrupt). Full topology setup happens at smp_init()
+@@ -517,6 +581,7 @@ asmlinkage void __init start_kernel(void
+ #endif
+ 	fork_init(num_physpages);
+ 	proc_caches_init();
++	beancounter_init(num_physpages);
+ 	buffer_init();
+ 	unnamed_dev_init();
+ 	security_scaffolding_startup();
+@@ -526,7 +591,10 @@ asmlinkage void __init start_kernel(void
+ 	/* rootfs populating might need page-writeback */
+ 	page_writeback_init();
+ #ifdef CONFIG_PROC_FS
++	prepare_ve0_proc_root();
++	prepare_ve0_sysctl();
+ 	proc_root_init();
++	beancounter_proc_init();
+ #endif
+ 	check_bugs();
+ 
+@@ -538,6 +606,7 @@ asmlinkage void __init start_kernel(void
+ 	init_idle(current, smp_processor_id());
+ 
+ 	/* Do the rest non-__init'ed, we're now alive */
++	page_ubc_init();
+ 	rest_init();
+ }
+ 
+@@ -598,6 +667,9 @@ static void __init do_initcalls(void)
+  */
+ static void __init do_basic_setup(void)
+ {
++	prepare_ve0_loopback();
++	init_ve_system();
++
+ 	driver_init();
+ 
+ #ifdef CONFIG_SYSCTL
+@@ -614,7 +686,7 @@ static void __init do_basic_setup(void)
+ static void do_pre_smp_initcalls(void)
+ {
+ 	extern int spawn_ksoftirqd(void);
+-#ifdef CONFIG_SMP
++#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_VCPU)
+ 	extern int migration_init(void);
+ 
+ 	migration_init();
+@@ -666,6 +738,12 @@ static int init(void * unused)
+ 
+ 	fixup_cpu_present_map();
+ 	smp_init();
++
++	/* 
++	 * This should be done after all cpus are known to
++	 * be online.  smp_init gives us confidence in it.
++	 */
++	fairsched_init_late();
+ 	sched_init_smp();
+ 
+ 	/*
+diff -uprN linux-2.6.8.1.orig/init/version.c linux-2.6.8.1-ve022stab078/init/version.c
+--- linux-2.6.8.1.orig/init/version.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/init/version.c	2006-05-11 13:05:42.000000000 +0400
+@@ -28,6 +28,12 @@ struct new_utsname system_utsname = {
+ 
+ EXPORT_SYMBOL(system_utsname);
+ 
++struct new_utsname virt_utsname = {
++	/* we need only this field */
++	.release        = UTS_RELEASE,
++};
++EXPORT_SYMBOL(virt_utsname);
++
+ const char *linux_banner = 
+ 	"Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@"
+ 	LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n";
+diff -uprN linux-2.6.8.1.orig/ipc/compat.c linux-2.6.8.1-ve022stab078/ipc/compat.c
+--- linux-2.6.8.1.orig/ipc/compat.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/ipc/compat.c	2006-05-11 13:05:40.000000000 +0400
+@@ -33,6 +33,8 @@
+ #include <asm/semaphore.h>
+ #include <asm/uaccess.h>
+ 
++#include <linux/ve_owner.h>
++
+ #include "util.h"
+ 
+ struct compat_msgbuf {
+diff -uprN linux-2.6.8.1.orig/ipc/mqueue.c linux-2.6.8.1-ve022stab078/ipc/mqueue.c
+--- linux-2.6.8.1.orig/ipc/mqueue.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/ipc/mqueue.c	2006-05-11 13:05:38.000000000 +0400
+@@ -631,7 +631,8 @@ static int oflag2acc[O_ACCMODE] = { MAY_
+ 	if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY))
+ 		return ERR_PTR(-EINVAL);
+ 
+-	if (permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE], NULL))
++	if (permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE],
++				NULL, NULL))
+ 		return ERR_PTR(-EACCES);
+ 
+ 	filp = dentry_open(dentry, mqueue_mnt, oflag);
+@@ -1008,7 +1009,7 @@ retry:
+ 				goto out;
+ 			}
+ 
+-			ret = netlink_attachskb(sock, nc, 0, MAX_SCHEDULE_TIMEOUT);
++			ret = netlink_attachskb(sock, nc, 0, MAX_SCHEDULE_TIMEOUT, NULL);
+ 			if (ret == 1)
+ 		       		goto retry;
+ 			if (ret) {
+diff -uprN linux-2.6.8.1.orig/ipc/msg.c linux-2.6.8.1-ve022stab078/ipc/msg.c
+--- linux-2.6.8.1.orig/ipc/msg.c	2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/ipc/msg.c	2006-05-11 13:05:45.000000000 +0400
+@@ -75,6 +75,16 @@ static int newque (key_t key, int msgflg
+ static int sysvipc_msg_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
+ #endif
+ 
++void prepare_msg(void)
++{
++#ifdef CONFIG_VE
++	get_ve0()->_msg_ids = &msg_ids;
++	get_ve0()->_msg_ctlmax = msg_ctlmax;
++	get_ve0()->_msg_ctlmnb = msg_ctlmnb;
++	get_ve0()->_msg_ctlmni = msg_ctlmni;
++#endif
++}
++
+ void __init msg_init (void)
+ {
+ 	ipc_init_ids(&msg_ids,msg_ctlmni);
+@@ -84,6 +94,23 @@ void __init msg_init (void)
+ #endif
+ }
+ 
++#ifdef CONFIG_VE
++#  define msg_ids (*(get_exec_env()->_msg_ids))
++#  define msg_ctlmax (get_exec_env()->_msg_ctlmax)
++#  define msg_ctlmnb (get_exec_env()->_msg_ctlmnb)
++#  define msg_ctlmni (get_exec_env()->_msg_ctlmni)
++#endif
++
++#ifdef CONFIG_VE
++void ve_msg_ipc_init (void)
++{
++	msg_ctlmax = MSGMAX;
++	msg_ctlmnb = MSGMNB;
++	msg_ctlmni = MSGMNI;
++	ve_ipc_init_ids(&msg_ids, MSGMNI);
++}
++#endif
++
+ static int newque (key_t key, int msgflg)
+ {
+ 	int id;
+@@ -104,7 +131,7 @@ static int newque (key_t key, int msgflg
+ 		return retval;
+ 	}
+ 
+-	id = ipc_addid(&msg_ids, &msq->q_perm, msg_ctlmni);
++	id = ipc_addid(&msg_ids, &msq->q_perm, msg_ctlmni, -1);
+ 	if(id == -1) {
+ 		security_msg_queue_free(msq);
+ 		ipc_rcu_free(msq, sizeof(*msq));
+@@ -441,7 +468,7 @@ asmlinkage long sys_msgctl (int msqid, i
+ 	ipcp = &msq->q_perm;
+ 	err = -EPERM;
+ 	if (current->euid != ipcp->cuid && 
+-	    current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN))
++	    current->euid != ipcp->uid && !capable(CAP_VE_SYS_ADMIN))
+ 	    /* We _could_ check for CAP_CHOWN above, but we don't */
+ 		goto out_unlock_up;
+ 
+@@ -529,7 +556,7 @@ static inline int pipelined_send(struct 
+ 				wake_up_process(msr->r_tsk);
+ 			} else {
+ 				msr->r_msg = msg;
+-				msq->q_lrpid = msr->r_tsk->pid;
++				msq->q_lrpid = virt_pid(msr->r_tsk);
+ 				msq->q_rtime = get_seconds();
+ 				wake_up_process(msr->r_tsk);
+ 				return 1;
+@@ -603,7 +630,7 @@ retry:
+ 		goto retry;
+ 	}
+ 
+-	msq->q_lspid = current->tgid;
++	msq->q_lspid = virt_tgid(current);
+ 	msq->q_stime = get_seconds();
+ 
+ 	if(!pipelined_send(msq,msg)) {
+@@ -697,7 +724,7 @@ retry:
+ 		list_del(&msg->m_list);
+ 		msq->q_qnum--;
+ 		msq->q_rtime = get_seconds();
+-		msq->q_lrpid = current->tgid;
++		msq->q_lrpid = virt_tgid(current);
+ 		msq->q_cbytes -= msg->m_ts;
+ 		atomic_sub(msg->m_ts,&msg_bytes);
+ 		atomic_dec(&msg_hdrs);
+@@ -828,3 +855,39 @@ done:
+ 	return len;
+ }
+ #endif
++
++#ifdef CONFIG_VE
++void ve_msg_ipc_cleanup(void)
++{
++	int i;
++	struct msg_queue *msq;
++
++	down(&msg_ids.sem);
++	for (i = 0; i <= msg_ids.max_id; i++) {
++		msq = msg_lock(i);
++		if (msq == NULL)
++			continue;
++		freeque(msq, i);
++	}
++	up(&msg_ids.sem);
++}
++
++int sysvipc_walk_msg(int (*func)(int i, struct msg_queue*, void *), void *arg)
++{
++	int i;
++	int err = 0;
++	struct msg_queue * msq;
++
++	down(&msg_ids.sem);
++	for(i = 0; i <= msg_ids.max_id; i++) {
++		if ((msq = msg_lock(i)) == NULL)
++			continue;
++		err = func(msg_buildid(i,msq->q_perm.seq), msq, arg);
++		msg_unlock(msq);
++		if (err)
++			break;
++	}
++	up(&msg_ids.sem);
++	return err;
++}
++#endif
+diff -uprN linux-2.6.8.1.orig/ipc/msgutil.c linux-2.6.8.1-ve022stab078/ipc/msgutil.c
+--- linux-2.6.8.1.orig/ipc/msgutil.c	2004-08-14 14:55:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/ipc/msgutil.c	2006-05-11 13:05:39.000000000 +0400
+@@ -17,6 +17,8 @@
+ 
+ #include "util.h"
+ 
++#include <ub/ub_mem.h>
++
+ struct msg_msgseg {
+ 	struct msg_msgseg* next;
+ 	/* the next part of the message follows immediately */
+@@ -36,7 +38,7 @@ struct msg_msg *load_msg(const void __us
+ 	if (alen > DATALEN_MSG)
+ 		alen = DATALEN_MSG;
+ 
+-	msg = (struct msg_msg *)kmalloc(sizeof(*msg) + alen, GFP_KERNEL);
++	msg = (struct msg_msg *)ub_kmalloc(sizeof(*msg) + alen,	GFP_KERNEL);
+ 	if (msg == NULL)
+ 		return ERR_PTR(-ENOMEM);
+ 
+@@ -56,7 +58,7 @@ struct msg_msg *load_msg(const void __us
+ 		alen = len;
+ 		if (alen > DATALEN_SEG)
+ 			alen = DATALEN_SEG;
+-		seg = (struct msg_msgseg *)kmalloc(sizeof(*seg) + alen,
++		seg = (struct msg_msgseg *)ub_kmalloc(sizeof(*seg) + alen,
+ 						 GFP_KERNEL);
+ 		if (seg == NULL) {
+ 			err = -ENOMEM;
+diff -uprN linux-2.6.8.1.orig/ipc/sem.c linux-2.6.8.1-ve022stab078/ipc/sem.c
+--- linux-2.6.8.1.orig/ipc/sem.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/ipc/sem.c	2006-05-11 13:05:45.000000000 +0400
+@@ -74,6 +74,7 @@
+ #include <asm/uaccess.h>
+ #include "util.h"
+ 
++#include <ub/ub_mem.h>
+ 
+ #define sem_lock(id)	((struct sem_array*)ipc_lock(&sem_ids,id))
+ #define sem_unlock(sma)	ipc_unlock(&(sma)->sem_perm)
+@@ -82,9 +83,13 @@
+ 	ipc_checkid(&sem_ids,&sma->sem_perm,semid)
+ #define sem_buildid(id, seq) \
+ 	ipc_buildid(&sem_ids, id, seq)
++
++int sem_ctls[4] = {SEMMSL, SEMMNS, SEMOPM, SEMMNI};
++
+ static struct ipc_ids sem_ids;
++static int used_sems;
+ 
+-static int newary (key_t, int, int);
++static int newary (key_t, int, int, int);
+ static void freeary (struct sem_array *sma, int id);
+ #ifdef CONFIG_PROC_FS
+ static int sysvipc_sem_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
+@@ -102,24 +107,51 @@ static int sysvipc_sem_read_proc(char *b
+  *	
+  */
+ 
+-int sem_ctls[4] = {SEMMSL, SEMMNS, SEMOPM, SEMMNI};
+ #define sc_semmsl	(sem_ctls[0])
+ #define sc_semmns	(sem_ctls[1])
+ #define sc_semopm	(sem_ctls[2])
+ #define sc_semmni	(sem_ctls[3])
+ 
+-static int used_sems;
++void prepare_sem(void)
++{
++#ifdef CONFIG_VE
++	get_ve0()->_sem_ids = &sem_ids;
++	get_ve0()->_used_sems = used_sems;
++	get_ve0()->_sem_ctls[0] = sem_ctls[0];
++	get_ve0()->_sem_ctls[1] = sem_ctls[1];
++	get_ve0()->_sem_ctls[2] = sem_ctls[2];
++	get_ve0()->_sem_ctls[3] = sem_ctls[3];
++#endif
++}
+ 
+ void __init sem_init (void)
+ {
+ 	used_sems = 0;
+-	ipc_init_ids(&sem_ids,sc_semmni);
++	ipc_init_ids(&sem_ids, SEMMNI);
+ 
+ #ifdef CONFIG_PROC_FS
+ 	create_proc_read_entry("sysvipc/sem", 0, NULL, sysvipc_sem_read_proc, NULL);
+ #endif
+ }
+ 
++#ifdef CONFIG_VE
++#  define sem_ids	(*(get_exec_env()->_sem_ids))
++#  define used_sems	(get_exec_env()->_used_sems)
++#  define sem_ctls	(get_exec_env()->_sem_ctls)
++#endif
++
++#ifdef CONFIG_VE
++void ve_sem_ipc_init (void)
++{
++	used_sems = 0;
++	sem_ctls[0] = SEMMSL;
++	sem_ctls[1] = SEMMNS;
++	sem_ctls[2] = SEMOPM;
++	sem_ctls[3] = SEMMNI;
++	ve_ipc_init_ids(&sem_ids, SEMMNI);
++}
++#endif
++
+ /*
+  * Lockless wakeup algorithm:
+  * Without the check/retry algorithm a lockless wakeup is possible:
+@@ -154,7 +186,7 @@ void __init sem_init (void)
+  */
+ #define IN_WAKEUP	1
+ 
+-static int newary (key_t key, int nsems, int semflg)
++static int newary (key_t key, int semid, int nsems, int semflg)
+ {
+ 	int id;
+ 	int retval;
+@@ -183,7 +215,7 @@ static int newary (key_t key, int nsems,
+ 		return retval;
+ 	}
+ 
+-	id = ipc_addid(&sem_ids, &sma->sem_perm, sc_semmni);
++	id = ipc_addid(&sem_ids, &sma->sem_perm, sc_semmni, semid);
+ 	if(id == -1) {
+ 		security_sem_free(sma);
+ 		ipc_rcu_free(sma, size);
+@@ -212,12 +244,12 @@ asmlinkage long sys_semget (key_t key, i
+ 	down(&sem_ids.sem);
+ 	
+ 	if (key == IPC_PRIVATE) {
+-		err = newary(key, nsems, semflg);
++		err = newary(key, -1, nsems, semflg);
+ 	} else if ((id = ipc_findkey(&sem_ids, key)) == -1) {  /* key not used */
+ 		if (!(semflg & IPC_CREAT))
+ 			err = -ENOENT;
+ 		else
+-			err = newary(key, nsems, semflg);
++			err = newary(key, -1, nsems, semflg);
+ 	} else if (semflg & IPC_CREAT && semflg & IPC_EXCL) {
+ 		err = -EEXIST;
+ 	} else {
+@@ -715,7 +747,7 @@ static int semctl_main(int semid, int se
+ 		for (un = sma->undo; un; un = un->id_next)
+ 			un->semadj[semnum] = 0;
+ 		curr->semval = val;
+-		curr->sempid = current->tgid;
++		curr->sempid = virt_tgid(current);
+ 		sma->sem_ctime = get_seconds();
+ 		/* maybe some queued-up processes were waiting for this */
+ 		update_queue(sma);
+@@ -793,7 +825,7 @@ static int semctl_down(int semid, int se
+ 	ipcp = &sma->sem_perm;
+ 	
+ 	if (current->euid != ipcp->cuid && 
+-	    current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) {
++	    current->euid != ipcp->uid && !capable(CAP_VE_SYS_ADMIN)) {
+ 	    	err=-EPERM;
+ 		goto out_unlock;
+ 	}
+@@ -914,7 +946,8 @@ static inline int get_undo_list(struct s
+ 	undo_list = current->sysvsem.undo_list;
+ 	if (!undo_list) {
+ 		size = sizeof(struct sem_undo_list);
+-		undo_list = (struct sem_undo_list *) kmalloc(size, GFP_KERNEL);
++		undo_list = (struct sem_undo_list *) ub_kmalloc(size,
++				GFP_KERNEL);
+ 		if (undo_list == NULL)
+ 			return -ENOMEM;
+ 		memset(undo_list, 0, size);
+@@ -979,7 +1012,8 @@ static struct sem_undo *find_undo(int se
+ 	nsems = sma->sem_nsems;
+ 	sem_unlock(sma);
+ 
+-	new = (struct sem_undo *) kmalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
++	new = (struct sem_undo *) ub_kmalloc(sizeof(struct sem_undo) +
++			sizeof(short)*nsems, GFP_KERNEL);
+ 	if (!new)
+ 		return ERR_PTR(-ENOMEM);
+ 	memset(new, 0, sizeof(struct sem_undo) + sizeof(short)*nsems);
+@@ -1028,7 +1062,7 @@ asmlinkage long sys_semtimedop(int semid
+ 	if (nsops > sc_semopm)
+ 		return -E2BIG;
+ 	if(nsops > SEMOPM_FAST) {
+-		sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL);
++		sops = ub_kmalloc(sizeof(*sops)*nsops, GFP_KERNEL);
+ 		if(sops==NULL)
+ 			return -ENOMEM;
+ 	}
+@@ -1100,7 +1134,7 @@ retry_undos:
+ 	if (error)
+ 		goto out_unlock_free;
+ 
+-	error = try_atomic_semop (sma, sops, nsops, un, current->tgid);
++	error = try_atomic_semop (sma, sops, nsops, un, virt_tgid(current));
+ 	if (error <= 0)
+ 		goto update;
+ 
+@@ -1112,7 +1146,7 @@ retry_undos:
+ 	queue.sops = sops;
+ 	queue.nsops = nsops;
+ 	queue.undo = un;
+-	queue.pid = current->tgid;
++	queue.pid = virt_tgid(current);
+ 	queue.id = semid;
+ 	if (alter)
+ 		append_to_queue(sma ,&queue);
+@@ -1271,7 +1305,7 @@ found:
+ 				sem->semval += u->semadj[i];
+ 				if (sem->semval < 0)
+ 					sem->semval = 0; /* shouldn't happen */
+-				sem->sempid = current->tgid;
++				sem->sempid = virt_tgid(current);
+ 			}
+ 		}
+ 		sma->sem_otime = get_seconds();
+@@ -1331,3 +1365,58 @@ done:
+ 	return len;
+ }
+ #endif
++
++#ifdef CONFIG_VE
++void ve_sem_ipc_cleanup(void)
++{
++	int i;
++	struct sem_array *sma;
++
++	down(&sem_ids.sem);
++	for (i = 0; i <= sem_ids.max_id; i++) {
++		sma = sem_lock(i);
++		if (sma == NULL)
++			continue;
++		freeary(sma, i);
++	}
++	up(&sem_ids.sem);
++}
++
++int sysvipc_setup_sem(key_t key, int semid, size_t size, int semflg)
++{
++	int err = 0;
++	struct sem_array *sma;
++
++	down(&sem_ids.sem);
++	sma = sem_lock(semid);
++	if (!sma) {
++		err = newary(key, semid, size, semflg);
++		if (err >= 0)
++			sma = sem_lock(semid);
++	}
++	if (sma)
++		sem_unlock(sma);
++	up(&sem_ids.sem);
++
++	return err > 0 ? 0 : err;
++}
++
++int sysvipc_walk_sem(int (*func)(int i, struct sem_array*, void *), void *arg)
++{
++	int i;
++	int err = 0;
++	struct sem_array *sma;
++
++	down(&sem_ids.sem);
++	for (i = 0; i <= sem_ids.max_id; i++) {
++		if ((sma = sem_lock(i)) == NULL)
++			continue;
++		err = func(sem_buildid(i,sma->sem_perm.seq), sma, arg);
++		sem_unlock(sma);
++		if (err)
++			break;
++	}
++	up(&sem_ids.sem);
++	return err;
++}
++#endif
+diff -uprN linux-2.6.8.1.orig/ipc/shm.c linux-2.6.8.1-ve022stab078/ipc/shm.c
+--- linux-2.6.8.1.orig/ipc/shm.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/ipc/shm.c	2006-05-11 13:05:45.000000000 +0400
+@@ -28,6 +28,9 @@
+ #include <linux/security.h>
+ #include <asm/uaccess.h>
+ 
++#include <ub/beancounter.h>
++#include <ub/ub_vmpages.h>
++
+ #include "util.h"
+ 
+ #define shm_flags	shm_perm.mode
+@@ -43,7 +46,7 @@ static struct ipc_ids shm_ids;
+ #define shm_buildid(id, seq) \
+ 	ipc_buildid(&shm_ids, id, seq)
+ 
+-static int newseg (key_t key, int shmflg, size_t size);
++static int newseg (key_t key, int shmid, int shmflg, size_t size);
+ static void shm_open (struct vm_area_struct *shmd);
+ static void shm_close (struct vm_area_struct *shmd);
+ #ifdef CONFIG_PROC_FS
+@@ -55,6 +58,28 @@ size_t 	shm_ctlall = SHMALL;
+ int 	shm_ctlmni = SHMMNI;
+ 
+ static int shm_tot; /* total number of shared memory pages */
++  
++void prepare_shm(void)
++{
++#ifdef CONFIG_VE
++	int i;
++	struct shmid_kernel* shp;
++
++	get_ve0()->_shm_ids = &shm_ids;
++	for (i = 0; i <= shm_ids.max_id; i++) {
++		shp = (struct shmid_kernel *)ipc_lock(&shm_ids, i);
++		if (shp != NULL) {
++			shp->_shm_ids = &shm_ids;
++			ipc_unlock(&shp->shm_perm);
++		}
++	}
++
++	get_ve0()->_shm_ctlmax = shm_ctlmax;
++	get_ve0()->_shm_ctlall = shm_ctlall;
++	get_ve0()->_shm_ctlmni = shm_ctlmni;
++	get_ve0()->_shm_tot = shm_tot;
++#endif
++}
+ 
+ void __init shm_init (void)
+ {
+@@ -64,6 +89,42 @@ void __init shm_init (void)
+ #endif
+ }
+ 
++#ifdef CONFIG_VE
++#  define shm_ids	(*(get_exec_env()->_shm_ids))
++#  define shm_ctlmax	(get_exec_env()->_shm_ctlmax)
++#  define shm_ctlall	(get_exec_env()->_shm_ctlall)
++#  define shm_ctlmni	(get_exec_env()->_shm_ctlmni)
++/* renamed since there is a struct field named shm_tot */
++#  define shm_total	(get_exec_env()->_shm_tot)
++#else
++#  define shm_total	shm_tot
++#endif
++
++#ifdef CONFIG_VE
++void ve_shm_ipc_init (void)
++{
++	shm_ctlmax = SHMMAX;
++ 	shm_ctlall = SHMALL;
++ 	shm_ctlmni = SHMMNI;
++	shm_total = 0;
++	ve_ipc_init_ids(&shm_ids, 1);
++}
++#endif
++
++static struct shmid_kernel* shm_lock_by_sb(int id, struct super_block* sb)
++{
++	struct ve_struct *fs_envid;
++	fs_envid = VE_OWNER_FSTYPE(sb->s_type);
++	return (struct shmid_kernel *)ipc_lock(fs_envid->_shm_ids, id);
++}
++
++static inline int *shm_total_sb(struct super_block *sb)
++{
++	struct ve_struct *fs_envid;
++	fs_envid = VE_OWNER_FSTYPE(sb->s_type);
++	return &fs_envid->_shm_tot;
++}
++
+ static inline int shm_checkid(struct shmid_kernel *s, int id)
+ {
+ 	if (ipc_checkid(&shm_ids,&s->shm_perm,id))
+@@ -71,25 +132,25 @@ static inline int shm_checkid(struct shm
+ 	return 0;
+ }
+ 
+-static inline struct shmid_kernel *shm_rmid(int id)
++static inline struct shmid_kernel *shm_rmid(struct ipc_ids *ids, int id)
+ {
+-	return (struct shmid_kernel *)ipc_rmid(&shm_ids,id);
++	return (struct shmid_kernel *)ipc_rmid(ids, id);
+ }
+ 
+-static inline int shm_addid(struct shmid_kernel *shp)
++static inline int shm_addid(struct shmid_kernel *shp, int reqid)
+ {
+-	return ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni+1);
++	return ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni+1, reqid);
+ }
+ 
+ 
+ 
+-static inline void shm_inc (int id) {
++static inline void shm_inc (int id, struct super_block * sb) {
+ 	struct shmid_kernel *shp;
+ 
+-	if(!(shp = shm_lock(id)))
++	if(!(shp = shm_lock_by_sb(id, sb)))
+ 		BUG();
+ 	shp->shm_atim = get_seconds();
+-	shp->shm_lprid = current->tgid;
++	shp->shm_lprid = virt_tgid(current);
+ 	shp->shm_nattch++;
+ 	shm_unlock(shp);
+ }
+@@ -97,7 +158,40 @@ static inline void shm_inc (int id) {
+ /* This is called by fork, once for every shm attach. */
+ static void shm_open (struct vm_area_struct *shmd)
+ {
+-	shm_inc (shmd->vm_file->f_dentry->d_inode->i_ino);
++	shm_inc (shmd->vm_file->f_dentry->d_inode->i_ino,
++			shmd->vm_file->f_dentry->d_inode->i_sb);
++}
++
++static int shmem_lock(struct shmid_kernel *shp, int lock)
++{
++	struct inode *inode = shp->shm_file->f_dentry->d_inode;
++	struct shmem_inode_info *info = SHMEM_I(inode);
++	unsigned long size;
++
++	if (!is_file_hugepages(shp->shm_file))
++		return 0;
++
++	spin_lock(&info->lock);
++	if (!!lock == !!(info->flags & VM_LOCKED))
++		goto out;
++
++	/* size will be re-calculated in pages inside (un)charge */
++	size = shp->shm_segsz + PAGE_SIZE - 1;
++
++	if (!lock) {
++		ub_locked_mem_uncharge(shmid_ub(shp), size);
++		info->flags &= ~VM_LOCKED;
++	} else if (ub_locked_mem_charge(shmid_ub(shp), size) < 0) 
++			goto out_err;
++	else
++		info->flags |= VM_LOCKED;
++out:
++	spin_unlock(&info->lock);
++	return 0;
++
++out_err:
++	spin_unlock(&info->lock);
++	return -ENOMEM;
+ }
+ 
+ /*
+@@ -110,13 +204,23 @@ static void shm_open (struct vm_area_str
+  */
+ static void shm_destroy (struct shmid_kernel *shp)
+ {
+-	shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
+-	shm_rmid (shp->id);
++	int numpages;
++	struct super_block *sb;
++	int *shm_totalp;
++	struct file *file;
++
++	file = shp->shm_file;
++	numpages = (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
++	sb = file->f_dentry->d_inode->i_sb;
++	shm_totalp = shm_total_sb(sb);
++	*shm_totalp -= numpages;
++	shm_rmid(shp->_shm_ids, shp->id);
+ 	shm_unlock(shp);
+-	if (!is_file_hugepages(shp->shm_file))
+-		shmem_lock(shp->shm_file, 0);
+-	fput (shp->shm_file);
++	shmem_lock(shp, 0);
++	fput (file);
+ 	security_shm_free(shp);
++	put_beancounter(shmid_ub(shp));
++	shmid_ub(shp) = NULL;
+ 	ipc_rcu_free(shp, sizeof(struct shmid_kernel));
+ }
+ 
+@@ -130,13 +234,25 @@ static void shm_close (struct vm_area_st
+ {
+ 	struct file * file = shmd->vm_file;
+ 	int id = file->f_dentry->d_inode->i_ino;
++	struct super_block *sb;
+ 	struct shmid_kernel *shp;
++	struct ipc_ids* ids;
++#ifdef CONFIG_VE
++	struct ve_struct *fs_envid;
++#endif
+ 
+-	down (&shm_ids.sem);
++	sb = file->f_dentry->d_inode->i_sb;
++#ifdef CONFIG_VE
++	fs_envid = get_ve(VE_OWNER_FSTYPE(sb->s_type));
++	ids = fs_envid->_shm_ids;
++#else
++	ids = &shm_ids;
++#endif
++	down (&ids->sem);
+ 	/* remove from the list of attaches of the shm segment */
+-	if(!(shp = shm_lock(id)))
++	if(!(shp = shm_lock_by_sb(id, sb)))
+ 		BUG();
+-	shp->shm_lprid = current->tgid;
++	shp->shm_lprid = virt_tgid(current);
+ 	shp->shm_dtim = get_seconds();
+ 	shp->shm_nattch--;
+ 	if(shp->shm_nattch == 0 &&
+@@ -144,14 +260,20 @@ static void shm_close (struct vm_area_st
+ 		shm_destroy (shp);
+ 	else
+ 		shm_unlock(shp);
+-	up (&shm_ids.sem);
++	up (&ids->sem);
++#ifdef CONFIG_VE
++	put_ve(fs_envid);
++#endif
+ }
+ 
+ static int shm_mmap(struct file * file, struct vm_area_struct * vma)
+ {
+ 	file_accessed(file);
+ 	vma->vm_ops = &shm_vm_ops;
+-	shm_inc(file->f_dentry->d_inode->i_ino);
++	if (!(vma->vm_flags & VM_WRITE))
++		vma->vm_flags &= ~VM_MAYWRITE;
++	shm_inc(file->f_dentry->d_inode->i_ino,
++			file->f_dentry->d_inode->i_sb);
+ 	return 0;
+ }
+ 
+@@ -169,19 +291,19 @@ static struct vm_operations_struct shm_v
+ #endif
+ };
+ 
+-static int newseg (key_t key, int shmflg, size_t size)
++static int newseg (key_t key, int shmid, int shmflg, size_t size)
+ {
+ 	int error;
+ 	struct shmid_kernel *shp;
+ 	int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
+ 	struct file * file;
+-	char name[13];
++	char name[26];
+ 	int id;
+ 
+ 	if (size < SHMMIN || size > shm_ctlmax)
+ 		return -EINVAL;
+ 
+-	if (shm_tot + numpages >= shm_ctlall)
++	if (shm_total + numpages >= shm_ctlall)
+ 		return -ENOSPC;
+ 
+ 	shp = ipc_rcu_alloc(sizeof(*shp));
+@@ -201,7 +323,11 @@ static int newseg (key_t key, int shmflg
+ 	if (shmflg & SHM_HUGETLB)
+ 		file = hugetlb_zero_setup(size);
+ 	else {
++#ifdef CONFIG_VE
++		sprintf (name, "VE%d.SYSV%08x", get_exec_env()->veid, key);
++#else
+ 		sprintf (name, "SYSV%08x", key);
++#endif
+ 		file = shmem_file_setup(name, size, VM_ACCOUNT);
+ 	}
+ 	error = PTR_ERR(file);
+@@ -209,24 +335,26 @@ static int newseg (key_t key, int shmflg
+ 		goto no_file;
+ 
+ 	error = -ENOSPC;
+-	id = shm_addid(shp);
++	id = shm_addid(shp, shmid);
+ 	if(id == -1) 
+ 		goto no_id;
+ 
+-	shp->shm_cprid = current->tgid;
++	shp->shm_cprid = virt_tgid(current);
+ 	shp->shm_lprid = 0;
+ 	shp->shm_atim = shp->shm_dtim = 0;
+ 	shp->shm_ctim = get_seconds();
+ 	shp->shm_segsz = size;
+ 	shp->shm_nattch = 0;
+ 	shp->id = shm_buildid(id,shp->shm_perm.seq);
++	shp->_shm_ids = &shm_ids;
+ 	shp->shm_file = file;
++	shmid_ub(shp) = get_beancounter(get_exec_ub());
+ 	file->f_dentry->d_inode->i_ino = shp->id;
+ 	if (shmflg & SHM_HUGETLB)
+ 		set_file_hugepages(file);
+ 	else
+ 		file->f_op = &shm_file_operations;
+-	shm_tot += numpages;
++	shm_total += numpages;
+ 	shm_unlock(shp);
+ 	return shp->id;
+ 
+@@ -245,12 +373,12 @@ asmlinkage long sys_shmget (key_t key, s
+ 
+ 	down(&shm_ids.sem);
+ 	if (key == IPC_PRIVATE) {
+-		err = newseg(key, shmflg, size);
++		err = newseg(key, -1, shmflg, size);
+ 	} else if ((id = ipc_findkey(&shm_ids, key)) == -1) {
+ 		if (!(shmflg & IPC_CREAT))
+ 			err = -ENOENT;
+ 		else
+-			err = newseg(key, shmflg, size);
++			err = newseg(key, -1, shmflg, size);
+ 	} else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) {
+ 		err = -EEXIST;
+ 	} else {
+@@ -443,7 +571,7 @@ asmlinkage long sys_shmctl (int shmid, i
+ 		down(&shm_ids.sem);
+ 		shm_info.used_ids = shm_ids.in_use;
+ 		shm_get_stat (&shm_info.shm_rss, &shm_info.shm_swp);
+-		shm_info.shm_tot = shm_tot;
++		shm_info.shm_tot = shm_total;
+ 		shm_info.swap_attempts = 0;
+ 		shm_info.swap_successes = 0;
+ 		err = shm_ids.max_id;
+@@ -526,12 +654,10 @@ asmlinkage long sys_shmctl (int shmid, i
+ 			goto out_unlock;
+ 		
+ 		if(cmd==SHM_LOCK) {
+-			if (!is_file_hugepages(shp->shm_file))
+-				shmem_lock(shp->shm_file, 1);
+-			shp->shm_flags |= SHM_LOCKED;
++			if ((err = shmem_lock(shp, 1)) == 0)
++				shp->shm_flags |= SHM_LOCKED;
+ 		} else {
+-			if (!is_file_hugepages(shp->shm_file))
+-				shmem_lock(shp->shm_file, 0);
++			shmem_lock(shp, 0);
+ 			shp->shm_flags &= ~SHM_LOCKED;
+ 		}
+ 		shm_unlock(shp);
+@@ -560,7 +686,7 @@ asmlinkage long sys_shmctl (int shmid, i
+ 
+ 		if (current->euid != shp->shm_perm.uid &&
+ 		    current->euid != shp->shm_perm.cuid && 
+-		    !capable(CAP_SYS_ADMIN)) {
++		    !capable(CAP_VE_SYS_ADMIN)) {
+ 			err=-EPERM;
+ 			goto out_unlock_up;
+ 		}
+@@ -597,7 +723,7 @@ asmlinkage long sys_shmctl (int shmid, i
+ 		err=-EPERM;
+ 		if (current->euid != shp->shm_perm.uid &&
+ 		    current->euid != shp->shm_perm.cuid && 
+-		    !capable(CAP_SYS_ADMIN)) {
++		    !capable(CAP_VE_SYS_ADMIN)) {
+ 			goto out_unlock_up;
+ 		}
+ 
+@@ -818,6 +944,7 @@ asmlinkage long sys_shmdt(char __user *s
+ 	 * could possibly have landed at. Also cast things to loff_t to
+ 	 * prevent overflows and make comparisions vs. equal-width types.
+ 	 */
++	size = PAGE_ALIGN(size);
+ 	while (vma && (loff_t)(vma->vm_end - addr) <= size) {
+ 		next = vma->vm_next;
+ 
+@@ -894,3 +1021,72 @@ done:
+ 	return len;
+ }
+ #endif
++
++#ifdef CONFIG_VE
++void ve_shm_ipc_cleanup(void)
++{
++	int i;
++
++	down(&shm_ids.sem);
++	for (i = 0; i <= shm_ids.max_id; i++) {
++		struct shmid_kernel *shp;
++
++		if (!(shp = shm_lock(i)))
++			continue;
++		if (shp->shm_nattch) {
++			shp->shm_flags |= SHM_DEST;
++			shp->shm_perm.key = IPC_PRIVATE;
++			shm_unlock(shp);
++		} else
++			shm_destroy(shp);
++	}
++	up(&shm_ids.sem);
++}
++#endif
++
++struct file * sysvipc_setup_shm(key_t key, int shmid, size_t size, int shmflg)
++{
++	struct shmid_kernel *shp;
++	struct file *file;
++
++	down(&shm_ids.sem);
++	shp = shm_lock(shmid);
++	if (!shp) {
++		int err;
++
++		err = newseg(key, shmid, shmflg, size);
++		file = ERR_PTR(err);
++		if (err < 0)
++			goto out;
++		shp = shm_lock(shmid);
++	}
++	file = ERR_PTR(-EINVAL);
++	if (shp) {
++		file = shp->shm_file;
++		get_file(file);
++		shm_unlock(shp);
++	}
++out:
++	up(&shm_ids.sem);
++
++	return file;
++}
++
++int sysvipc_walk_shm(int (*func)(struct shmid_kernel*, void *), void *arg)
++{
++	int i;
++	int err = 0;
++	struct shmid_kernel* shp;
++
++	down(&shm_ids.sem);
++	for(i = 0; i <= shm_ids.max_id; i++) {
++		if ((shp = shm_lock(i)) == NULL)
++			continue;
++		err = func(shp, arg);
++		shm_unlock(shp);
++		if (err)
++			break;
++	}
++	up(&shm_ids.sem);
++	return err;
++}
+diff -uprN linux-2.6.8.1.orig/ipc/util.c linux-2.6.8.1-ve022stab078/ipc/util.c
+--- linux-2.6.8.1.orig/ipc/util.c	2004-08-14 14:56:26.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/ipc/util.c	2006-05-11 13:05:48.000000000 +0400
+@@ -13,6 +13,7 @@
+  */
+ 
+ #include <linux/config.h>
++#include <linux/module.h>
+ #include <linux/mm.h>
+ #include <linux/shm.h>
+ #include <linux/init.h>
+@@ -27,8 +28,12 @@
+ 
+ #include <asm/unistd.h>
+ 
++#include <ub/ub_mem.h>
++
+ #include "util.h"
+ 
++DCL_VE_OWNER(IPCIDS, STATIC_SOFT, struct ipc_ids, owner_env, inline, ())
++
+ /**
+  *	ipc_init	-	initialise IPC subsystem
+  *
+@@ -55,7 +60,7 @@ __initcall(ipc_init);
+  *	array itself. 
+  */
+  
+-void __init ipc_init_ids(struct ipc_ids* ids, int size)
++void ve_ipc_init_ids(struct ipc_ids* ids, int size)
+ {
+ 	int i;
+ 	sema_init(&ids->sem,1);
+@@ -82,7 +87,25 @@ void __init ipc_init_ids(struct ipc_ids*
+ 	}
+ 	for(i=0;i<ids->size;i++)
+ 		ids->entries[i].p = NULL;
++#ifdef CONFIG_VE
++	SET_VE_OWNER_IPCIDS(ids, get_exec_env());
++#endif
++}
++
++void __init ipc_init_ids(struct ipc_ids* ids, int size)
++{
++	ve_ipc_init_ids(ids, size);
++}
++
++#ifdef CONFIG_VE
++static void ipc_free_ids(struct ipc_ids* ids)
++{
++	if (ids == NULL)
++		return;
++	ipc_rcu_free(ids->entries, sizeof(struct ipc_id)*ids->size);
++	kfree(ids);
+ }
++#endif
+ 
+ /**
+  *	ipc_findkey	-	find a key in an ipc identifier set	
+@@ -165,10 +188,20 @@ static int grow_ary(struct ipc_ids* ids,
+  *	Called with ipc_ids.sem held.
+  */
+  
+-int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
++int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size, int reqid)
+ {
+ 	int id;
+ 
++	if (reqid >= 0) {
++		id = reqid%SEQ_MULTIPLIER;
++		size = grow_ary(ids,id+1);
++		if (id >= size)
++			return -1;
++		if (ids->entries[id].p == NULL)
++			goto found;
++		return -1;
++	}
++
+ 	size = grow_ary(ids,size);
+ 
+ 	/*
+@@ -181,6 +214,10 @@ int ipc_addid(struct ipc_ids* ids, struc
+ 	}
+ 	return -1;
+ found:
++#ifdef CONFIG_VE
++	if (ids->in_use == 0)
++		(void)get_ve(VE_OWNER_IPCIDS(ids));
++#endif
+ 	ids->in_use++;
+ 	if (id > ids->max_id)
+ 		ids->max_id = id;
+@@ -188,9 +225,13 @@ found:
+ 	new->cuid = new->uid = current->euid;
+ 	new->gid = new->cgid = current->egid;
+ 
+-	new->seq = ids->seq++;
+-	if(ids->seq > ids->seq_max)
+-		ids->seq = 0;
++	if (reqid >= 0) {
++		new->seq = reqid/SEQ_MULTIPLIER;
++	} else {
++		new->seq = ids->seq++;
++		if(ids->seq > ids->seq_max)
++			ids->seq = 0;
++	}
+ 
+ 	new->lock = SPIN_LOCK_UNLOCKED;
+ 	new->deleted = 0;
+@@ -238,6 +279,10 @@ struct kern_ipc_perm* ipc_rmid(struct ip
+ 		} while (ids->entries[lid].p == NULL);
+ 		ids->max_id = lid;
+ 	}
++#ifdef CONFIG_VE
++	if (ids->in_use == 0)
++		put_ve(VE_OWNER_IPCIDS(ids));
++#endif
+ 	p->deleted = 1;
+ 	return p;
+ }
+@@ -254,9 +299,9 @@ void* ipc_alloc(int size)
+ {
+ 	void* out;
+ 	if(size > PAGE_SIZE)
+-		out = vmalloc(size);
++		out = ub_vmalloc(size);
+ 	else
+-		out = kmalloc(size, GFP_KERNEL);
++		out = ub_kmalloc(size, GFP_KERNEL);
+ 	return out;
+ }
+ 
+@@ -317,7 +362,7 @@ void* ipc_rcu_alloc(int size)
+ 	 * workqueue if necessary (for vmalloc). 
+ 	 */
+ 	if (rcu_use_vmalloc(size)) {
+-		out = vmalloc(sizeof(struct ipc_rcu_vmalloc) + size);
++		out = ub_vmalloc(sizeof(struct ipc_rcu_vmalloc) + size);
+ 		if (out) out += sizeof(struct ipc_rcu_vmalloc);
+ 	} else {
+ 		out = kmalloc(sizeof(struct ipc_rcu_kmalloc)+size, GFP_KERNEL);
+@@ -524,6 +569,85 @@ int ipc_checkid(struct ipc_ids* ids, str
+ 	return 0;
+ }
+ 
++#ifdef CONFIG_VE
++
++void prepare_ipc(void)
++{
++	/*
++	 * Note: we don't need to call SET_VE_OWNER_IPCIDS inside,
++	 * since we use static variables for ve0 (see STATIC_SOFT decl).
++	 */
++	prepare_msg();
++	prepare_sem();
++	prepare_shm();
++}
++
++int init_ve_ipc(struct ve_struct * envid)
++{
++	struct ve_struct * saved_envid;
++
++	envid->_msg_ids = kmalloc(sizeof(struct ipc_ids) + sizeof(void *),
++							GFP_KERNEL);
++	if (envid->_msg_ids == NULL)
++		goto out_nomem;
++	envid->_sem_ids = kmalloc(sizeof(struct ipc_ids) + sizeof(void *),
++							GFP_KERNEL);
++	if (envid->_sem_ids == NULL)
++		goto out_free_msg;
++	envid->_shm_ids = kmalloc(sizeof(struct ipc_ids) + sizeof(void *),
++							GFP_KERNEL);
++	if (envid->_shm_ids == NULL)
++		goto out_free_sem;
++
++	/*
++	 * Bad style, but save a lot of code (charging to proper VE)
++	 * Here we temporary change VEID of the process involved in VE init.
++	 * The same is effect for ve_ipc_cleanup in real_do_env_cleanup().
++	 */
++	saved_envid = set_exec_env(envid);
++
++	ve_msg_ipc_init();
++	ve_sem_ipc_init();
++	ve_shm_ipc_init();
++
++	(void)set_exec_env(saved_envid);
++	return 0;
++
++out_free_sem:
++	kfree(envid->_sem_ids);
++out_free_msg:
++	kfree(envid->_msg_ids);
++out_nomem:
++	return -ENOMEM;
++}
++
++void ve_ipc_cleanup(void)
++{
++	ve_msg_ipc_cleanup();
++	ve_sem_ipc_cleanup();
++	ve_shm_ipc_cleanup();
++}
++
++void ve_ipc_free(struct ve_struct *envid)
++{
++	ipc_free_ids(envid->_msg_ids);
++	ipc_free_ids(envid->_sem_ids);
++	ipc_free_ids(envid->_shm_ids);
++	envid->_msg_ids = envid->_sem_ids = envid->_shm_ids = NULL;
++}
++
++void fini_ve_ipc(struct ve_struct *ptr)
++{
++	ve_ipc_cleanup();
++	ve_ipc_free(ptr);
++}
++
++EXPORT_SYMBOL(init_ve_ipc);
++EXPORT_SYMBOL(ve_ipc_cleanup);
++EXPORT_SYMBOL(ve_ipc_free);
++EXPORT_SYMBOL(fini_ve_ipc);
++#endif /* CONFIG_VE */
++
+ #ifdef __ARCH_WANT_IPC_PARSE_VERSION
+ 
+ 
+diff -uprN linux-2.6.8.1.orig/ipc/util.h linux-2.6.8.1-ve022stab078/ipc/util.h
+--- linux-2.6.8.1.orig/ipc/util.h	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/ipc/util.h	2006-05-11 13:05:45.000000000 +0400
+@@ -15,6 +15,20 @@ void sem_init (void);
+ void msg_init (void);
+ void shm_init (void);
+ 
++#ifdef CONFIG_VE
++
++void ve_msg_ipc_init(void);
++void ve_sem_ipc_init(void);
++void ve_shm_ipc_init(void);
++void prepare_msg(void);
++void prepare_sem(void);
++void prepare_shm(void);
++void ve_msg_ipc_cleanup(void);
++void ve_sem_ipc_cleanup(void);
++void ve_shm_ipc_cleanup(void);
++
++#endif
++
+ struct ipc_ids {
+ 	int size;
+ 	int in_use;
+@@ -23,17 +37,21 @@ struct ipc_ids {
+ 	unsigned short seq_max;
+ 	struct semaphore sem;	
+ 	struct ipc_id* entries;
++	struct ve_struct *owner_env;
+ };
+ 
++DCL_VE_OWNER_PROTO(IPCIDS, STATIC_SOFT, struct ipc_ids, owner_env, inline, ())
++
+ struct ipc_id {
+ 	struct kern_ipc_perm* p;
+ };
+ 
+-void __init ipc_init_ids(struct ipc_ids* ids, int size);
++void ipc_init_ids(struct ipc_ids* ids, int size);
++void ve_ipc_init_ids(struct ipc_ids* ids, int size);
+ 
+ /* must be called with ids->sem acquired.*/
+ int ipc_findkey(struct ipc_ids* ids, key_t key);
+-int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size);
++int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size, int reqid);
+ 
+ /* must be called with both locks acquired. */
+ struct kern_ipc_perm* ipc_rmid(struct ipc_ids* ids, int id);
+diff -uprN linux-2.6.8.1.orig/kernel/Kconfig.openvz linux-2.6.8.1-ve022stab078/kernel/Kconfig.openvz
+--- linux-2.6.8.1.orig/kernel/Kconfig.openvz	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/kernel/Kconfig.openvz	2006-05-11 13:05:49.000000000 +0400
+@@ -0,0 +1,46 @@
++# Copyright (C) 2005  SWsoft
++# All rights reserved.
++# Licensing governed by "linux/COPYING.SWsoft" file.
++
++config VE
++	bool "Virtual Environment support"
++	depends on !SECURITY
++	default y
++	help
++	  This option adds support of virtual Linux running on the original box
++	  with fully supported virtual network driver, tty subsystem and
++	  configurable access for hardware and other resources.
++
++config VE_CALLS
++	tristate "VE calls interface"
++	depends on VE
++	default m
++	help
++	  This option controls how to build vzmon code containing VE calls.
++	  By default it's build in module vzmon.o
++
++config VZ_GENCALLS
++	bool
++	default y
++
++config VE_NETDEV
++	tristate "VE networking"
++	depends on VE
++	default m
++	help
++	  This option controls whether to build VE networking code.
++
++config VE_IPTABLES
++	bool "VE netfiltering"
++	depends on VE && VE_NETDEV && INET && NETFILTER
++	default y
++	help
++	  This option controls whether to build VE netfiltering code.
++
++config VZ_WDOG
++	tristate "VE watchdog module"
++	depends on VE
++	default m
++	help
++	  This option controls building of vzwdog module, which dumps
++	  a lot of useful system info on console periodically.
+diff -uprN linux-2.6.8.1.orig/kernel/capability.c linux-2.6.8.1-ve022stab078/kernel/capability.c
+--- linux-2.6.8.1.orig/kernel/capability.c	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/kernel/capability.c	2006-05-11 13:05:40.000000000 +0400
+@@ -23,6 +23,7 @@ EXPORT_SYMBOL(cap_bset);
+  * Locking rule: acquire this prior to tasklist_lock.
+  */
+ spinlock_t task_capability_lock = SPIN_LOCK_UNLOCKED;
++EXPORT_SYMBOL(task_capability_lock);
+ 
+ /*
+  * For sys_getproccap() and sys_setproccap(), any of the three
+@@ -59,8 +60,8 @@ asmlinkage long sys_capget(cap_user_head
+      spin_lock(&task_capability_lock);
+      read_lock(&tasklist_lock); 
+ 
+-     if (pid && pid != current->pid) {
+-	     target = find_task_by_pid(pid);
++     if (pid && pid != virt_pid(current)) {
++	     target = find_task_by_pid_ve(pid);
+ 	     if (!target) {
+ 	          ret = -ESRCH;
+ 	          goto out;
+@@ -89,14 +90,16 @@ static inline void cap_set_pg(int pgrp, 
+ 			      kernel_cap_t *permitted)
+ {
+ 	task_t *g, *target;
+-	struct list_head *l;
+-	struct pid *pid;
+ 
+-	for_each_task_pid(pgrp, PIDTYPE_PGID, g, l, pid) {
++	pgrp = vpid_to_pid(pgrp);
++	if (pgrp < 0)
++		return;
++
++	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, g) {
+ 		target = g;
+-		while_each_thread(g, target)
++		while_each_thread_ve(g, target)
+ 			security_capset_set(target, effective, inheritable, permitted);
+-	}
++	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, g);
+ }
+ 
+ /*
+@@ -109,11 +112,11 @@ static inline void cap_set_all(kernel_ca
+ {
+      task_t *g, *target;
+ 
+-     do_each_thread(g, target) {
++     do_each_thread_ve(g, target) {
+              if (target == current || target->pid == 1)
+                      continue;
+ 	     security_capset_set(target, effective, inheritable, permitted);
+-     } while_each_thread(g, target);
++     } while_each_thread_ve(g, target);
+ }
+ 
+ /*
+@@ -159,8 +162,8 @@ asmlinkage long sys_capset(cap_user_head
+      spin_lock(&task_capability_lock);
+      read_lock(&tasklist_lock);
+ 
+-     if (pid > 0 && pid != current->pid) {
+-          target = find_task_by_pid(pid);
++     if (pid > 0 && pid != virt_pid(current)) {
++          target = find_task_by_pid_ve(pid);
+           if (!target) {
+                ret = -ESRCH;
+                goto out;
+diff -uprN linux-2.6.8.1.orig/kernel/compat.c linux-2.6.8.1-ve022stab078/kernel/compat.c
+--- linux-2.6.8.1.orig/kernel/compat.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/kernel/compat.c	2006-05-11 13:05:27.000000000 +0400
+@@ -559,5 +559,84 @@ long compat_clock_nanosleep(clockid_t wh
+ 	return err;	
+ } 
+ 
++void
++sigset_from_compat (sigset_t *set, compat_sigset_t *compat)
++{
++	switch (_NSIG_WORDS) {
++	case 4: set->sig[3] = compat->sig[6] | (((long)compat->sig[7]) << 32 );
++	case 3: set->sig[2] = compat->sig[4] | (((long)compat->sig[5]) << 32 );
++	case 2: set->sig[1] = compat->sig[2] | (((long)compat->sig[3]) << 32 );
++	case 1: set->sig[0] = compat->sig[0] | (((long)compat->sig[1]) << 32 );
++	}
++}
++
++asmlinkage long
++compat_rt_sigtimedwait (compat_sigset_t __user *uthese,
++		struct compat_siginfo __user *uinfo,
++		struct compat_timespec __user *uts, compat_size_t sigsetsize)
++{
++	compat_sigset_t s32;
++	sigset_t s;
++	int sig;
++	struct timespec t;
++	siginfo_t info;
++	long ret, timeout = 0;
++
++	if (sigsetsize != sizeof(sigset_t))
++		return -EINVAL;
++
++	if (copy_from_user(&s32, uthese, sizeof(compat_sigset_t)))
++		return -EFAULT;
++	sigset_from_compat(&s, &s32);
++	sigdelsetmask(&s,sigmask(SIGKILL)|sigmask(SIGSTOP));
++	signotset(&s);
++
++	if (uts) {
++		if (get_compat_timespec (&t, uts))
++			return -EFAULT;
++		if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0
++				|| t.tv_sec < 0)
++			return -EINVAL;
++	}
++
++	spin_lock_irq(&current->sighand->siglock);
++	sig = dequeue_signal(current, &s, &info);
++	if (!sig) {
++		timeout = MAX_SCHEDULE_TIMEOUT;
++		if (uts)
++			timeout = timespec_to_jiffies(&t)
++				+(t.tv_sec || t.tv_nsec);
++		if (timeout) {
++			current->real_blocked = current->blocked;
++			sigandsets(&current->blocked, &current->blocked, &s);
++
++			recalc_sigpending();
++			spin_unlock_irq(&current->sighand->siglock);
++
++			current->state = TASK_INTERRUPTIBLE;
++			timeout = schedule_timeout(timeout);
++
++			spin_lock_irq(&current->sighand->siglock);
++			sig = dequeue_signal(current, &s, &info);
++			current->blocked = current->real_blocked;
++			siginitset(&current->real_blocked, 0);
++			recalc_sigpending();
++		}
++	}
++	spin_unlock_irq(&current->sighand->siglock);
++
++	if (sig) {
++		ret = sig;
++		if (uinfo) {
++			if (copy_siginfo_to_user32(uinfo, &info))
++				ret = -EFAULT;
++		}
++	}else {
++		ret = timeout?-EINTR:-EAGAIN;
++	}
++	return ret;
++
++}
++
+ /* timer_create is architecture specific because it needs sigevent conversion */
+ 
+diff -uprN linux-2.6.8.1.orig/kernel/configs.c linux-2.6.8.1-ve022stab078/kernel/configs.c
+--- linux-2.6.8.1.orig/kernel/configs.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/kernel/configs.c	2006-05-11 13:05:42.000000000 +0400
+@@ -89,8 +89,7 @@ static int __init ikconfig_init(void)
+ 	struct proc_dir_entry *entry;
+ 
+ 	/* create the current config file */
+-	entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO,
+-				  &proc_root);
++	entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO, NULL);
+ 	if (!entry)
+ 		return -ENOMEM;
+ 
+diff -uprN linux-2.6.8.1.orig/kernel/cpu.c linux-2.6.8.1-ve022stab078/kernel/cpu.c
+--- linux-2.6.8.1.orig/kernel/cpu.c	2004-08-14 14:56:13.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/kernel/cpu.c	2006-05-11 13:05:40.000000000 +0400
+@@ -43,13 +43,18 @@ void unregister_cpu_notifier(struct noti
+ EXPORT_SYMBOL(unregister_cpu_notifier);
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
++
++#ifdef CONFIG_SCHED_VCPU
++#error "CONFIG_HOTPLUG_CPU isn't supported with CONFIG_SCHED_VCPU"
++#endif
++
+ static inline void check_for_tasks(int cpu)
+ {
+ 	struct task_struct *p;
+ 
+ 	write_lock_irq(&tasklist_lock);
+-	for_each_process(p) {
+-		if (task_cpu(p) == cpu && (p->utime != 0 || p->stime != 0))
++	for_each_process_all(p) {
++		if (task_pcpu(p) == cpu && (p->utime != 0 || p->stime != 0))
+ 			printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
+ 				(state = %ld, flags = %lx) \n",
+ 				 p->comm, p->pid, cpu, p->state, p->flags);
+@@ -104,6 +109,13 @@ static int take_cpu_down(void *unused)
+ 	return err;
+ }
+ 
++#ifdef CONFIG_SCHED_VCPU
++#error VCPU vs. HOTPLUG: fix hotplug code below
++/*
++ * What should be fixed:
++ * - check for if (idle_cpu()) yield()
++ */
++#endif
+ int cpu_down(unsigned int cpu)
+ {
+ 	int err;
+diff -uprN linux-2.6.8.1.orig/kernel/exit.c linux-2.6.8.1-ve022stab078/kernel/exit.c
+--- linux-2.6.8.1.orig/kernel/exit.c	2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/kernel/exit.c	2006-05-11 13:05:49.000000000 +0400
+@@ -23,12 +23,17 @@
+ #include <linux/mount.h>
+ #include <linux/proc_fs.h>
+ #include <linux/mempolicy.h>
++#include <linux/swap.h>
++#include <linux/fairsched.h>
++#include <linux/faudit.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/unistd.h>
+ #include <asm/pgtable.h>
+ #include <asm/mmu_context.h>
+ 
++#include <ub/ub_mem.h>
++
+ extern void sem_exit (void);
+ extern struct task_struct *child_reaper;
+ 
+@@ -47,20 +52,19 @@ static void __unhash_process(struct task
+ 	}
+ 
+ 	REMOVE_LINKS(p);
++	REMOVE_VE_LINKS(p);
+ }
+ 
+ void release_task(struct task_struct * p)
+ {
+ 	int zap_leader;
+ 	task_t *leader;
+-	struct dentry *proc_dentry;
++	struct dentry *proc_dentry[2];
+ 
+ repeat: 
+-	BUG_ON(p->state < TASK_ZOMBIE);
+- 
+ 	atomic_dec(&p->user->processes);
+ 	spin_lock(&p->proc_lock);
+-	proc_dentry = proc_pid_unhash(p);
++	proc_pid_unhash(p, proc_dentry);
+ 	write_lock_irq(&tasklist_lock);
+ 	if (unlikely(p->ptrace))
+ 		__ptrace_unlink(p);
+@@ -68,6 +72,8 @@ repeat: 
+ 	__exit_signal(p);
+ 	__exit_sighand(p);
+ 	__unhash_process(p);
++	nr_zombie--;
++	nr_dead++;
+ 
+ 	/*
+ 	 * If we are the last non-leader member of the thread
+@@ -76,7 +82,7 @@ repeat: 
+ 	 */
+ 	zap_leader = 0;
+ 	leader = p->group_leader;
+-	if (leader != p && thread_group_empty(leader) && leader->state == TASK_ZOMBIE) {
++	if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) {
+ 		BUG_ON(leader->exit_signal == -1);
+ 		do_notify_parent(leader, leader->exit_signal);
+ 		/*
+@@ -101,6 +107,8 @@ repeat: 
+ 	spin_unlock(&p->proc_lock);
+ 	proc_pid_flush(proc_dentry);
+ 	release_thread(p);
++	if (atomic_dec_and_test(&VE_TASK_INFO(p)->owner_env->pcounter))
++		do_env_cleanup(VE_TASK_INFO(p)->owner_env);
+ 	put_task_struct(p);
+ 
+ 	p = leader;
+@@ -112,10 +120,10 @@ repeat: 
+ 
+ void unhash_process(struct task_struct *p)
+ {
+-	struct dentry *proc_dentry;
++	struct dentry *proc_dentry[2];
+ 
+ 	spin_lock(&p->proc_lock);
+-	proc_dentry = proc_pid_unhash(p);
++	proc_pid_unhash(p, proc_dentry);
+ 	write_lock_irq(&tasklist_lock);
+ 	__unhash_process(p);
+ 	write_unlock_irq(&tasklist_lock);
+@@ -131,17 +139,18 @@ void unhash_process(struct task_struct *
+ int session_of_pgrp(int pgrp)
+ {
+ 	struct task_struct *p;
+-	struct list_head *l;
+-	struct pid *pid;
+ 	int sid = -1;
+ 
++	WARN_ON(is_virtual_pid(pgrp));
++
+ 	read_lock(&tasklist_lock);
+-	for_each_task_pid(pgrp, PIDTYPE_PGID, p, l, pid)
++	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
+ 		if (p->signal->session > 0) {
+ 			sid = p->signal->session;
+ 			goto out;
+ 		}
+-	p = find_task_by_pid(pgrp);
++	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
++	p = find_task_by_pid_ve(pgrp);
+ 	if (p)
+ 		sid = p->signal->session;
+ out:
+@@ -161,21 +170,21 @@ out:
+ static int will_become_orphaned_pgrp(int pgrp, task_t *ignored_task)
+ {
+ 	struct task_struct *p;
+-	struct list_head *l;
+-	struct pid *pid;
+ 	int ret = 1;
+ 
+-	for_each_task_pid(pgrp, PIDTYPE_PGID, p, l, pid) {
++	WARN_ON(is_virtual_pid(pgrp));
++
++	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
+ 		if (p == ignored_task
+-				|| p->state >= TASK_ZOMBIE 
+-				|| p->real_parent->pid == 1)
++				|| p->exit_state
++				|| virt_pid(p->real_parent) == 1)
+ 			continue;
+ 		if (process_group(p->real_parent) != pgrp
+ 			    && p->real_parent->signal->session == p->signal->session) {
+ 			ret = 0;
+ 			break;
+ 		}
+-	}
++	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
+ 	return ret;	/* (sighing) "Often!" */
+ }
+ 
+@@ -183,6 +192,8 @@ int is_orphaned_pgrp(int pgrp)
+ {
+ 	int retval;
+ 
++	WARN_ON(is_virtual_pid(pgrp));
++
+ 	read_lock(&tasklist_lock);
+ 	retval = will_become_orphaned_pgrp(pgrp, NULL);
+ 	read_unlock(&tasklist_lock);
+@@ -194,10 +205,10 @@ static inline int has_stopped_jobs(int p
+ {
+ 	int retval = 0;
+ 	struct task_struct *p;
+-	struct list_head *l;
+-	struct pid *pid;
+ 
+-	for_each_task_pid(pgrp, PIDTYPE_PGID, p, l, pid) {
++	WARN_ON(is_virtual_pid(pgrp));
++
++	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
+ 		if (p->state != TASK_STOPPED)
+ 			continue;
+ 
+@@ -213,7 +224,7 @@ static inline int has_stopped_jobs(int p
+ 
+ 		retval = 1;
+ 		break;
+-	}
++	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
+ 	return retval;
+ }
+ 
+@@ -260,6 +271,9 @@ void __set_special_pids(pid_t session, p
+ {
+ 	struct task_struct *curr = current;
+ 
++	WARN_ON(is_virtual_pid(pgrp));
++	WARN_ON(is_virtual_pid(session));
++
+ 	if (curr->signal->session != session) {
+ 		detach_pid(curr, PIDTYPE_SID);
+ 		curr->signal->session = session;
+@@ -278,6 +292,7 @@ void set_special_pids(pid_t session, pid
+ 	__set_special_pids(session, pgrp);
+ 	write_unlock_irq(&tasklist_lock);
+ }
++EXPORT_SYMBOL(set_special_pids);
+ 
+ /*
+  * Let kernel threads use this to say that they
+@@ -342,7 +357,9 @@ void daemonize(const char *name, ...)
+ 	exit_mm(current);
+ 
+ 	set_special_pids(1, 1);
++	down(&tty_sem);
+ 	current->signal->tty = NULL;
++	up(&tty_sem);
+ 
+ 	/* Block and flush all signals */
+ 	sigfillset(&blocked);
+@@ -529,12 +546,8 @@ static inline void choose_new_parent(tas
+ 	 * Make sure we're not reparenting to ourselves and that
+ 	 * the parent is not a zombie.
+ 	 */
+-	if (p == reaper || reaper->state >= TASK_ZOMBIE)
+-		p->real_parent = child_reaper;
+-	else
+-		p->real_parent = reaper;
+-	if (p->parent == p->real_parent)
+-		BUG();
++	BUG_ON(p == reaper || reaper->exit_state);
++	p->real_parent = reaper;
+ }
+ 
+ static inline void reparent_thread(task_t *p, task_t *father, int traced)
+@@ -566,7 +579,7 @@ static inline void reparent_thread(task_
+ 		/* If we'd notified the old parent about this child's death,
+ 		 * also notify the new parent.
+ 		 */
+-		if (p->state == TASK_ZOMBIE && p->exit_signal != -1 &&
++		if (p->exit_state == EXIT_ZOMBIE && p->exit_signal != -1 &&
+ 		    thread_group_empty(p))
+ 			do_notify_parent(p, p->exit_signal);
+ 	}
+@@ -597,12 +610,15 @@ static inline void reparent_thread(task_
+ static inline void forget_original_parent(struct task_struct * father,
+ 					  struct list_head *to_release)
+ {
+-	struct task_struct *p, *reaper = father;
++	struct task_struct *p, *tsk_reaper, *reaper = father;
+ 	struct list_head *_p, *_n;
+ 
+-	reaper = father->group_leader;
+-	if (reaper == father)
+-		reaper = child_reaper;
++	do {
++		reaper = next_thread(reaper);
++		if (reaper == father) {
++			break;
++		}
++	} while (reaper->exit_state);
+ 
+ 	/*
+ 	 * There are only two places where our children can be:
+@@ -621,14 +637,21 @@ static inline void forget_original_paren
+ 		/* if father isn't the real parent, then ptrace must be enabled */
+ 		BUG_ON(father != p->real_parent && !ptrace);
+ 
++		tsk_reaper = reaper;
++		if (tsk_reaper == father)
++#ifdef CONFIG_VE
++			tsk_reaper = VE_TASK_INFO(p)->owner_env->init_entry;
++		if (tsk_reaper == p)
++#endif
++			tsk_reaper = child_reaper;
+ 		if (father == p->real_parent) {
+-			/* reparent with a reaper, real father it's us */
+-			choose_new_parent(p, reaper, child_reaper);
++			/* reparent with a tsk_reaper, real father it's us */
++			choose_new_parent(p, tsk_reaper, child_reaper);
+ 			reparent_thread(p, father, 0);
+ 		} else {
+ 			/* reparent ptraced task to its real parent */
+ 			__ptrace_unlink (p);
+-			if (p->state == TASK_ZOMBIE && p->exit_signal != -1 &&
++			if (p->exit_state == EXIT_ZOMBIE && p->exit_signal != -1 &&
+ 			    thread_group_empty(p))
+ 				do_notify_parent(p, p->exit_signal);
+ 		}
+@@ -639,12 +662,20 @@ static inline void forget_original_paren
+ 		 * zombie forever since we prevented it from self-reap itself
+ 		 * while it was being traced by us, to be able to see it in wait4.
+ 		 */
+-		if (unlikely(ptrace && p->state == TASK_ZOMBIE && p->exit_signal == -1))
++		if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && p->exit_signal == -1))
+ 			list_add(&p->ptrace_list, to_release);
+ 	}
+ 	list_for_each_safe(_p, _n, &father->ptrace_children) {
+ 		p = list_entry(_p,struct task_struct,ptrace_list);
+-		choose_new_parent(p, reaper, child_reaper);
++
++		tsk_reaper = reaper;
++		if (tsk_reaper == father)
++#ifdef CONFIG_VE
++			tsk_reaper = VE_TASK_INFO(p)->owner_env->init_entry;
++		if (tsk_reaper == p)
++#endif
++			tsk_reaper = child_reaper;
++		choose_new_parent(p, tsk_reaper, child_reaper);
+ 		reparent_thread(p, father, 1);
+ 	}
+ }
+@@ -740,6 +771,9 @@ static void exit_notify(struct task_stru
+ 	    && !capable(CAP_KILL))
+ 		tsk->exit_signal = SIGCHLD;
+ 
++	if (tsk->exit_signal != -1 && t == child_reaper)
++		/* We dont want people slaying init. */
++		tsk->exit_signal = SIGCHLD;
+ 
+ 	/* If something other than our normal parent is ptracing us, then
+ 	 * send it a SIGCHLD instead of honoring exit_signal.  exit_signal
+@@ -752,11 +786,11 @@ static void exit_notify(struct task_stru
+ 		do_notify_parent(tsk, SIGCHLD);
+ 	}
+ 
+-	state = TASK_ZOMBIE;
++	state = EXIT_ZOMBIE;
+ 	if (tsk->exit_signal == -1 && tsk->ptrace == 0)
+-		state = TASK_DEAD;
+-	tsk->state = state;
+-	tsk->flags |= PF_DEAD;
++		state = EXIT_DEAD;
++	tsk->exit_state = state;
++	nr_zombie++;
+ 
+ 	/*
+ 	 * Clear these here so that update_process_times() won't try to deliver
+@@ -766,20 +800,7 @@ static void exit_notify(struct task_stru
+ 	tsk->it_prof_value = 0;
+ 	tsk->rlim[RLIMIT_CPU].rlim_cur = RLIM_INFINITY;
+ 
+-	/*
+-	 * In the preemption case it must be impossible for the task
+-	 * to get runnable again, so use "_raw_" unlock to keep
+-	 * preempt_count elevated until we schedule().
+-	 *
+-	 * To avoid deadlock on SMP, interrupts must be unmasked.  If we
+-	 * don't, subsequently called functions (e.g, wait_task_inactive()
+-	 * via release_task()) will spin, with interrupt flags
+-	 * unwittingly blocked, until the other task sleeps.  That task
+-	 * may itself be waiting for smp_call_function() to answer and
+-	 * complete, and with interrupts blocked that will never happen.
+-	 */
+-	_raw_write_unlock(&tasklist_lock);
+-	local_irq_enable();
++	write_unlock_irq(&tasklist_lock);
+ 
+ 	list_for_each_safe(_p, _n, &ptrace_dead) {
+ 		list_del_init(_p);
+@@ -788,21 +809,110 @@ static void exit_notify(struct task_stru
+ 	}
+ 
+ 	/* If the process is dead, release it - nobody will wait for it */
+-	if (state == TASK_DEAD)
++	if (state == EXIT_DEAD)
+ 		release_task(tsk);
+ 
++	/* PF_DEAD causes final put_task_struct after we schedule. */
++	preempt_disable();
++	tsk->flags |= PF_DEAD;
+ }
+ 
++asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru);
++
++#ifdef CONFIG_VE
++/*
++ * Handle exitting of init process, it's a special case for VE.
++ */
++static void do_initproc_exit(void)
++{
++	struct task_struct *tsk;
++	struct ve_struct *env;
++	struct siginfo info;
++	struct task_struct *g, *p;
++	long delay = 1L;
++
++	tsk = current;
++	env = VE_TASK_INFO(current)->owner_env;
++	if (env->init_entry != tsk)
++		return;
++
++	if (ve_is_super(env) && tsk->pid == 1)
++		panic("Attempted to kill init!");
++
++	memset(&info, 0, sizeof(info));
++	info.si_errno = 0;
++	info.si_code = SI_KERNEL;
++	info.si_pid = virt_pid(tsk);
++	info.si_uid = current->uid;
++	info.si_signo = SIGKILL;
++
++	/*
++	 * Here the VE changes its state into "not running".
++	 * op_sem taken for write is a barrier to all VE manipulations from
++	 * ioctl: it waits for operations currently in progress and blocks all
++	 * subsequent operations until is_running is set to 0 and op_sem is
++	 * released.
++	 */
++	down_write(&env->op_sem);
++	env->is_running = 0;
++	up_write(&env->op_sem);
++
++	/* send kill to all processes of VE */
++	read_lock(&tasklist_lock);
++	do_each_thread_ve(g, p) {
++		force_sig_info(SIGKILL, &info, p);
++	} while_each_thread_ve(g, p);
++	read_unlock(&tasklist_lock);
++
++	/* wait for all init childs exit */
++	while (atomic_read(&env->pcounter) > 1) {
++		if (sys_wait4(-1, NULL, __WALL | WNOHANG, NULL) > 0)
++			continue;
++		/* it was ENOCHLD or no more children somehow */
++		if (atomic_read(&env->pcounter) == 1)
++			break;
++
++		/* clear all signals to avoid wakeups */
++		if (signal_pending(tsk))
++			flush_signals(tsk);
++		/* we have child without signal sent */
++		__set_current_state(TASK_INTERRUPTIBLE);
++		schedule_timeout(delay);
++		delay = (delay < HZ) ? (delay << 1) : HZ;
++		read_lock(&tasklist_lock);
++		do_each_thread_ve(g, p) {
++			if (p != tsk)
++				force_sig_info(SIGKILL, &info, p);
++		} while_each_thread_ve(g, p);
++		read_unlock(&tasklist_lock);
++	}
++	env->init_entry = child_reaper;
++	write_lock_irq(&tasklist_lock);
++	REMOVE_LINKS(tsk);
++	tsk->parent = tsk->real_parent = child_reaper;
++	SET_LINKS(tsk);
++	write_unlock_irq(&tasklist_lock);
++}
++#endif
++
+ asmlinkage NORET_TYPE void do_exit(long code)
+ {
+ 	struct task_struct *tsk = current;
++	struct mm_struct *mm;
+ 
++	mm = tsk->mm;
+ 	if (unlikely(in_interrupt()))
+ 		panic("Aiee, killing interrupt handler!");
+ 	if (unlikely(!tsk->pid))
+ 		panic("Attempted to kill the idle task!");
++#ifndef CONFIG_VE
+ 	if (unlikely(tsk->pid == 1))
+ 		panic("Attempted to kill init!");
++#else
++	do_initproc_exit();
++#endif
++	virtinfo_gencall(VIRTINFO_DOEXIT, NULL);
++
+ 	if (tsk->io_context)
+ 		exit_io_context();
+ 	tsk->flags |= PF_EXITING;
+@@ -817,7 +927,9 @@ asmlinkage NORET_TYPE void do_exit(long 
+  
+ 	if (unlikely(current->ptrace & PT_TRACE_EXIT)) {
+ 		current->ptrace_message = code;
++		set_pn_state(current, PN_STOP_EXIT);
+ 		ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP);
++		clear_pn_state(current);
+ 	}
+ 
+ 	acct_process(code);
+@@ -838,10 +950,25 @@ asmlinkage NORET_TYPE void do_exit(long 
+ 
+ 	tsk->exit_code = code;
+ 	exit_notify(tsk);
++
++	/* In order to allow OOM to happen from now on */
++	spin_lock(&oom_generation_lock);
++	if (tsk->flags & PF_MEMDIE) {
++		if (!oom_kill_counter || !--oom_kill_counter)
++			oom_generation++;
++		printk("OOM killed process %s (pid=%d, ve=%d) (mm=%p) exited, free=%u.\n",
++				tsk->comm, tsk->pid,
++				VEID(VE_TASK_INFO(current)->owner_env),
++				mm, nr_free_pages());
++	}
++	spin_unlock(&oom_generation_lock);
++
+ #ifdef CONFIG_NUMA
+ 	mpol_free(tsk->mempolicy);
+ 	tsk->mempolicy = NULL;
+ #endif
++
++	BUG_ON(!(current->flags & PF_DEAD));
+ 	schedule();
+ 	BUG();
+ 	/* Avoid "noreturn function does return".  */
+@@ -860,26 +987,22 @@ EXPORT_SYMBOL(complete_and_exit);
+ 
+ asmlinkage long sys_exit(int error_code)
+ {
++	virtinfo_notifier_call(VITYPE_FAUDIT,
++			VIRTINFO_FAUDIT_EXIT, &error_code);
+ 	do_exit((error_code&0xff)<<8);
+ }
+ 
+ task_t fastcall *next_thread(const task_t *p)
+ {
+-	const struct pid_link *link = p->pids + PIDTYPE_TGID;
+-	const struct list_head *tmp, *head = &link->pidptr->task_list;
+-
++	task_t *tsk;
+ #ifdef CONFIG_SMP
+-	if (!p->sighand)
+-		BUG();
+-	if (!spin_is_locked(&p->sighand->siglock) &&
+-				!rwlock_is_locked(&tasklist_lock))
++	if (!rwlock_is_locked(&tasklist_lock) || p->pids[PIDTYPE_TGID].nr == 0)
+ 		BUG();
+ #endif
+-	tmp = link->pid_chain.next;
+-	if (tmp == head)
+-		tmp = head->next;
+-
+-	return pid_task(tmp, PIDTYPE_TGID);
++	tsk = pid_task(p->pids[PIDTYPE_TGID].pid_list.next, PIDTYPE_TGID);
++	/* all threads should belong to ONE ve! */
++	BUG_ON(VE_TASK_INFO(tsk)->owner_env != VE_TASK_INFO(p)->owner_env);
++	return tsk;
+ }
+ 
+ EXPORT_SYMBOL(next_thread);
+@@ -929,21 +1052,26 @@ asmlinkage void sys_exit_group(int error
+ static int eligible_child(pid_t pid, int options, task_t *p)
+ {
+ 	if (pid > 0) {
+-		if (p->pid != pid)
++		if ((is_virtual_pid(pid) ? virt_pid(p) : p->pid) != pid)
+ 			return 0;
+ 	} else if (!pid) {
+ 		if (process_group(p) != process_group(current))
+ 			return 0;
+ 	} else if (pid != -1) {
+-		if (process_group(p) != -pid)
+-			return 0;
++		if (__is_virtual_pid(-pid)) {
++			if (virt_pgid(p) != -pid)
++				return 0;
++		} else {
++			if (process_group(p) != -pid)
++				return 0;
++		}
+ 	}
+ 
+ 	/*
+ 	 * Do not consider detached threads that are
+ 	 * not ptraced:
+ 	 */
+-	if (p->exit_signal == -1 && !p->ptrace)
++	if (unlikely(p->exit_signal == -1 && p->ptrace == 0))
+ 		return 0;
+ 
+ 	/* Wait for all children (clone and not) if __WALL is set;
+@@ -968,7 +1096,7 @@ static int eligible_child(pid_t pid, int
+ }
+ 
+ /*
+- * Handle sys_wait4 work for one task in state TASK_ZOMBIE.  We hold
++ * Handle sys_wait4 work for one task in state EXIT_ZOMBIE.  We hold
+  * read_lock(&tasklist_lock) on entry.  If we return zero, we still hold
+  * the lock and this task is uninteresting.  If we return nonzero, we have
+  * released the lock and the system call should return.
+@@ -982,9 +1110,9 @@ static int wait_task_zombie(task_t *p, u
+ 	 * Try to move the task's state to DEAD
+ 	 * only one thread is allowed to do this:
+ 	 */
+-	state = xchg(&p->state, TASK_DEAD);
+-	if (state != TASK_ZOMBIE) {
+-		BUG_ON(state != TASK_DEAD);
++	state = xchg(&p->exit_state, EXIT_DEAD);
++	if (state != EXIT_ZOMBIE) {
++		BUG_ON(state != EXIT_DEAD);
+ 		return 0;
+ 	}
+ 	if (unlikely(p->exit_signal == -1 && p->ptrace == 0))
+@@ -996,7 +1124,7 @@ static int wait_task_zombie(task_t *p, u
+ 
+ 	/*
+ 	 * Now we are sure this task is interesting, and no other
+-	 * thread can reap it because we set its state to TASK_DEAD.
++	 * thread can reap it because we set its state to EXIT_DEAD.
+ 	 */
+ 	read_unlock(&tasklist_lock);
+ 
+@@ -1008,16 +1136,18 @@ static int wait_task_zombie(task_t *p, u
+ 			retval = put_user(p->exit_code, stat_addr);
+ 	}
+ 	if (retval) {
+-		p->state = TASK_ZOMBIE;
++		// TODO: is this safe?
++		p->exit_state = EXIT_ZOMBIE;
+ 		return retval;
+ 	}
+-	retval = p->pid;
++	retval = get_task_pid(p);
+ 	if (p->real_parent != p->parent) {
+ 		write_lock_irq(&tasklist_lock);
+ 		/* Double-check with lock held.  */
+ 		if (p->real_parent != p->parent) {
+ 			__ptrace_unlink(p);
+-			p->state = TASK_ZOMBIE;
++			// TODO: is this safe?
++			p->exit_state = EXIT_ZOMBIE;
+ 			/*
+ 			 * If this is not a detached task, notify the parent.  If it's
+ 			 * still not detached after that, don't release it now.
+@@ -1072,13 +1202,13 @@ static int wait_task_stopped(task_t *p, 
+ 	/*
+ 	 * This uses xchg to be atomic with the thread resuming and setting
+ 	 * it.  It must also be done with the write lock held to prevent a
+-	 * race with the TASK_ZOMBIE case.
++	 * race with the EXIT_ZOMBIE case.
+ 	 */
+ 	exit_code = xchg(&p->exit_code, 0);
+ 	if (unlikely(p->state > TASK_STOPPED)) {
+ 		/*
+ 		 * The task resumed and then died.  Let the next iteration
+-		 * catch it in TASK_ZOMBIE.  Note that exit_code might
++		 * catch it in EXIT_ZOMBIE.  Note that exit_code might
+ 		 * already be zero here if it resumed and did _exit(0).
+ 		 * The task itself is dead and won't touch exit_code again;
+ 		 * other processors in this function are locked out.
+@@ -1107,7 +1237,7 @@ static int wait_task_stopped(task_t *p, 
+ 	if (!retval && stat_addr)
+ 		retval = put_user((exit_code << 8) | 0x7f, stat_addr);
+ 	if (!retval)
+-		retval = p->pid;
++		retval = get_task_pid(p);
+ 	put_task_struct(p);
+ 
+ 	BUG_ON(!retval);
+@@ -1152,16 +1282,25 @@ repeat:
+ 				if (retval != 0) /* He released the lock.  */
+ 					goto end_wait4;
+ 				break;
+-			case TASK_ZOMBIE:
+-				/*
+-				 * Eligible but we cannot release it yet:
+-				 */
+-				if (ret == 2)
+-					continue;
+-				retval = wait_task_zombie(p, stat_addr, ru);
+-				if (retval != 0) /* He released the lock.  */
+-					goto end_wait4;
+-				break;
++			default:
++ 			// case EXIT_DEAD:
++ 				if (p->exit_state == EXIT_DEAD)
++ 					continue;
++ 			// case EXIT_ZOMBIE:
++ 				if (p->exit_state == EXIT_ZOMBIE) {
++ 					/*
++ 					 * Eligible but we cannot release
++ 					 * it yet:
++ 					 */
++ 					if (ret == 2)
++ 						continue;
++ 					retval = wait_task_zombie(
++ 						p, stat_addr, ru);
++ 					/* He released the lock.  */
++ 					if (retval != 0)
++ 						goto end_wait4;
++ 					break;
++ 				}
+ 			}
+ 		}
+ 		if (!flag) {
+diff -uprN linux-2.6.8.1.orig/kernel/extable.c linux-2.6.8.1-ve022stab078/kernel/extable.c
+--- linux-2.6.8.1.orig/kernel/extable.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/kernel/extable.c	2006-05-11 13:05:40.000000000 +0400
+@@ -49,6 +49,7 @@ static int core_kernel_text(unsigned lon
+ 	if (addr >= (unsigned long)_sinittext &&
+ 	    addr <= (unsigned long)_einittext)
+ 		return 1;
++
+ 	return 0;
+ }
+ 
+diff -uprN linux-2.6.8.1.orig/kernel/fairsched.c linux-2.6.8.1-ve022stab078/kernel/fairsched.c
+--- linux-2.6.8.1.orig/kernel/fairsched.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/kernel/fairsched.c	2006-05-11 13:05:40.000000000 +0400
+@@ -0,0 +1,1286 @@
++/*
++ * Fair Scheduler
++ *
++ * Copyright (C) 2000-2005  SWsoft
++ * All rights reserved.
++ * 
++ * Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * Start-tag scheduling follows the theory presented in
++ * http://www.cs.utexas.edu/users/dmcl/papers/ps/SIGCOMM96.ps
++ */
++
++#include <linux/config.h>
++#include <linux/kernel.h>
++#include <asm/timex.h>
++#include <asm/atomic.h>
++#include <linux/spinlock.h>
++#include <asm/semaphore.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <ub/ub_mem.h>
++#include <linux/proc_fs.h>
++#include <linux/seq_file.h>
++#include <linux/fs.h>
++#include <linux/dcache.h>
++#include <linux/sysctl.h>
++#include <linux/module.h>
++#include <linux/sched.h>
++#include <linux/fairsched.h>
++#include <linux/vsched.h>
++
++/* we need it for vsched routines in sched.c */
++spinlock_t fairsched_lock = SPIN_LOCK_UNLOCKED;
++
++#ifdef CONFIG_FAIRSCHED
++
++#define FAIRSHED_DEBUG		" debug"
++
++
++/*********************************************************************/
++/*
++ * Special arithmetics
++ */
++/*********************************************************************/
++
++#define CYCLES_SHIFT (8)
++#define SCYCLES_TIME(time) \
++        ((scycles_t) {((time) + (1 << CYCLES_SHIFT) - 1)  >> CYCLES_SHIFT})
++
++#define CYCLES_ZERO (0)
++static inline int CYCLES_BEFORE(cycles_t x, cycles_t y)
++{
++        return (__s64)(x-y) < 0;
++}
++static inline int CYCLES_AFTER(cycles_t x, cycles_t y)
++{
++        return (__s64)(y-x) < 0;
++}
++static inline void CYCLES_DADD(cycles_t *x, fschdur_t y) {*x+=y.d;}
++
++#define FSCHDUR_ZERO (0)
++#define TICK_DUR ((fschdur_t){cycles_per_jiffy})
++static inline fschdur_t FSCHDURATION(cycles_t x, cycles_t y)
++{
++	return (fschdur_t){x - y};
++}
++static inline int FSCHDUR_CMP(fschdur_t x, fschdur_t y)
++{
++	if (x.d < y.d) return -1;
++	if (x.d > y.d) return 1;
++	return 0;
++}
++static inline fschdur_t FSCHDUR_SUB(fschdur_t x, fschdur_t y)
++{
++	return (fschdur_t){x.d - y.d};
++}
++
++#define FSCHTAG_ZERO ((fschtag_t){0})
++static inline int FSCHTAG_CMP(fschtag_t x, fschtag_t y)
++{
++	if (x.t < y.t) return -1;
++	if (x.t > y.t) return 1;
++	return 0;
++}
++static inline fschtag_t FSCHTAG_MAX(fschtag_t x, fschtag_t y)
++{
++	return x.t >= y.t ? x : y;
++}
++static inline int FSCHTAG_DADD(fschtag_t *tag, fschdur_t dur, unsigned w)
++{
++	cycles_t new_tag;
++	new_tag = tag->t + (cycles_t)dur.d * w;
++	if (new_tag < tag->t)
++		return -1;
++	/* DEBUG */
++	if (new_tag >= (1ULL << 48))
++		return -1;
++	tag->t = new_tag;
++	return 0;
++}
++static inline int FSCHTAG_ADD(fschtag_t *tag, fschtag_t y)
++{
++	cycles_t new_tag;
++	new_tag = tag->t + y.t;
++	if (new_tag < tag->t)
++		return -1;
++	tag->t = new_tag;
++	return 0;
++}
++static inline fschtag_t FSCHTAG_SUB(fschtag_t x, fschtag_t y)
++{
++	return (fschtag_t){x.t - y.t};
++}
++
++#define FSCHVALUE_ZERO ((fschvalue_t){0})
++#define TICK_VALUE ((fschvalue_t){(cycles_t)cycles_per_jiffy << FSCHRATE_SHIFT})
++static inline fschvalue_t FSCHVALUE(unsigned long t)
++{
++	return (fschvalue_t){(cycles_t)t << FSCHRATE_SHIFT};
++}
++static inline int FSCHVALUE_CMP(fschvalue_t x, fschvalue_t y)
++{
++	if (x.v < y.v) return -1;
++	if (x.v > y.v) return 1;
++	return 0;
++}
++static inline void FSCHVALUE_DADD(fschvalue_t *val, fschdur_t dur,
++		unsigned rate)
++{
++	val->v += (cycles_t)dur.d * rate;
++}
++static inline fschvalue_t FSCHVALUE_SUB(fschvalue_t x, fschvalue_t y)
++{
++	return (fschvalue_t){x.v - y.v};
++}
++static inline cycles_t FSCHVALUE_TO_DELAY(fschvalue_t val, unsigned rate)
++{
++	unsigned long t;
++	/*
++	 * Here we lose precision to make the division 32-bit on IA-32.
++	 * The value is not greater than TICK_VALUE.
++	 * (TICK_VALUE >> FSCHRATE_SHIFT) fits unsigned long.
++	 */
++	t = (val.v + (1 << FSCHRATE_SHIFT) - 1) >> FSCHRATE_SHIFT;
++	return (cycles_t)((t + rate - 1) / rate) << FSCHRATE_SHIFT;
++}
++
++
++/*********************************************************************/
++/*
++ * Global data
++ */
++/*********************************************************************/
++
++#define fsch_assert(x)							\
++	do {								\
++		static int count;					\
++		if (!(x) && count++ < 10)				\
++			printk("fsch_assert " #x " failed\n");		\
++	} while (0)
++
++/*
++ * Configurable parameters
++ */
++unsigned fairsched_max_latency = 25; /* jiffies */
++
++/*
++ * Parameters initialized at startup
++ */
++/* Number of online CPUs */
++unsigned fairsched_nr_cpus;
++/* Token Bucket depth (burst size) */
++static fschvalue_t max_value;
++
++struct fairsched_node fairsched_init_node = {
++	.id		= INT_MAX,
++#ifdef CONFIG_VE
++	.owner_env	= get_ve0(),
++#endif
++	.weight		= 1,
++};
++EXPORT_SYMBOL(fairsched_init_node);
++
++struct fairsched_node fairsched_idle_node = {
++	.id =			-1,
++};
++
++static int fairsched_nr_nodes;
++static LIST_HEAD(fairsched_node_head);
++static LIST_HEAD(fairsched_running_head);
++static LIST_HEAD(fairsched_delayed_head);
++
++DEFINE_PER_CPU(cycles_t, prev_schedule);
++static fschtag_t max_latency;
++
++static DECLARE_MUTEX(fairsched_mutex);
++
++/*********************************************************************/
++/*
++ * Small helper routines
++ */
++/*********************************************************************/
++
++/* this didn't proved to be very valuable statistics... */
++#define fairsched_inc_ve_strv(node, cycles)  do {} while(0)
++#define fairsched_dec_ve_strv(node, cycles)  do {} while(0)
++
++/*********************************************************************/
++/*
++ * Runlist management
++ */
++/*********************************************************************/
++
++/*
++ * Returns the start_tag of the first runnable node, or 0.
++ */
++static inline fschtag_t virtual_time(void)
++{
++	struct fairsched_node *p;
++
++	if (!list_empty(&fairsched_running_head)) {
++		p = list_first_entry(&fairsched_running_head,
++				struct fairsched_node, runlist);
++		return p->start_tag;
++	}
++	return FSCHTAG_ZERO;
++}
++
++static void fairsched_recompute_max_latency(void)
++{
++	struct fairsched_node *p;
++	unsigned w;
++	fschtag_t tag;
++
++	w = FSCHWEIGHT_MAX;
++	list_for_each_entry(p, &fairsched_node_head, nodelist) {
++		if (p->weight < w)
++			w = p->weight;
++	}
++	tag = FSCHTAG_ZERO;
++	(void) FSCHTAG_DADD(&tag, TICK_DUR,
++				fairsched_nr_cpus * fairsched_max_latency * w);
++	max_latency = tag;
++}
++
++static void fairsched_reset_start_tags(void)
++{
++	struct fairsched_node *cnode;
++	fschtag_t min_tag;
++
++	min_tag = virtual_time();
++	list_for_each_entry(cnode, &fairsched_node_head, nodelist) {
++		if (FSCHTAG_CMP(cnode->start_tag, min_tag) > 0)
++			cnode->start_tag = FSCHTAG_SUB(cnode->start_tag,
++						       min_tag);
++		else
++			cnode->start_tag = FSCHTAG_ZERO;
++	}
++}
++
++static void fairsched_running_insert(struct fairsched_node *node)
++{
++	struct list_head *tmp;
++	struct fairsched_node *p;
++	fschtag_t start_tag_max;
++
++	if (!list_empty(&fairsched_running_head)) {
++		start_tag_max = virtual_time();
++		if (!FSCHTAG_ADD(&start_tag_max, max_latency) &&
++		    FSCHTAG_CMP(start_tag_max, node->start_tag) < 0)
++			node->start_tag = start_tag_max;
++	}
++
++	list_for_each(tmp, &fairsched_running_head) {
++		p = list_entry(tmp, struct fairsched_node, runlist);
++		if (FSCHTAG_CMP(node->start_tag, p->start_tag) <= 0)
++			break;
++	}
++	/* insert node just before tmp */
++	list_add_tail(&node->runlist, tmp);
++}
++
++static inline void fairsched_running_insert_fromsleep(
++		struct fairsched_node *node)
++{
++	node->start_tag = FSCHTAG_MAX(node->start_tag, virtual_time());
++	fairsched_running_insert(node);
++}
++
++
++/*********************************************************************/
++/*
++ * CPU limiting helper functions
++ *
++ * These functions compute rates, delays and manipulate with sleep
++ * lists and so on.
++ */
++/*********************************************************************/
++
++/*
++ * Insert a node into the list of nodes removed from scheduling,
++ * sorted by the time at which the the node is allowed to run,
++ * historically called `delay'.
++ */
++static void fairsched_delayed_insert(struct fairsched_node *node)
++{
++	struct fairsched_node *p;
++	struct list_head *tmp;
++
++	list_for_each(tmp, &fairsched_delayed_head) {
++		p = list_entry(tmp, struct fairsched_node,
++				   runlist);
++		if (CYCLES_AFTER(p->delay, node->delay))
++			break;
++	}
++        /* insert node just before tmp */
++	list_add_tail(&node->runlist, tmp);
++}
++
++static inline void nodevalue_add(struct fairsched_node *node,
++		fschdur_t duration, unsigned rate)
++{
++	FSCHVALUE_DADD(&node->value, duration, rate);
++	if (FSCHVALUE_CMP(node->value, max_value) > 0)
++		node->value = max_value;
++}
++
++/*
++ * The node has been selected to run.
++ * This function accounts in advance for the time that the node will run.
++ * The advance not used by the node will be credited back.
++ */
++static void fairsched_ratelimit_charge_advance(
++		struct fairsched_node *node,
++		cycles_t time)
++{
++	fsch_assert(!node->delayed);
++	fsch_assert(FSCHVALUE_CMP(node->value, TICK_VALUE) >= 0);
++
++	/*
++	 * Account for the time passed since last update.
++	 * It might be needed if the node has become runnable because of
++	 * a wakeup, but hasn't gone through other functions updating
++	 * the bucket value.
++	 */
++	if (CYCLES_AFTER(time, node->last_updated_at)) {
++		nodevalue_add(node, FSCHDURATION(time, node->last_updated_at),
++			      node->rate);
++		node->last_updated_at = time;
++	}
++
++	/* charge for the full tick the node might be running */
++	node->value = FSCHVALUE_SUB(node->value, TICK_VALUE);
++	if (FSCHVALUE_CMP(node->value, TICK_VALUE) < 0) {
++		list_del(&node->runlist);
++		node->delayed = 1;
++		node->delay = node->last_updated_at + FSCHVALUE_TO_DELAY(
++					FSCHVALUE_SUB(TICK_VALUE, node->value),
++					node->rate);
++		node->nr_ready = 0;
++		fairsched_delayed_insert(node);
++	}
++}
++
++static void fairsched_ratelimit_credit_unused(
++		struct fairsched_node *node,
++		cycles_t time, fschdur_t duration)
++{
++	/* account for the time passed since last update */
++	if (CYCLES_AFTER(time, node->last_updated_at)) {
++		nodevalue_add(node, FSCHDURATION(time, node->last_updated_at),
++			      node->rate);
++		node->last_updated_at = time;
++	}
++
++	/*
++	 * When the node was given this CPU, it was charged for 1 tick.
++	 * Credit back the unused time.
++	 */
++	if (FSCHDUR_CMP(duration, TICK_DUR) < 0)
++		nodevalue_add(node, FSCHDUR_SUB(TICK_DUR, duration),
++			      1 << FSCHRATE_SHIFT);
++
++	/* check if the node is allowed to run */
++	if (FSCHVALUE_CMP(node->value, TICK_VALUE) < 0) {
++		/*
++		 * The node was delayed and remain such.
++		 * But since the bucket value has been updated,
++		 * update the delay time and move the node in the list.
++		 */
++		fsch_assert(node->delayed);
++		node->delay = node->last_updated_at + FSCHVALUE_TO_DELAY(
++					FSCHVALUE_SUB(TICK_VALUE, node->value),
++					node->rate);
++	} else if (node->delayed) {
++		/*
++		 * The node was delayed, but now it is allowed to run.
++		 * We do not manipulate with lists, it will be done by the
++		 * caller.
++		 */
++		node->nr_ready = node->nr_runnable;
++		node->delayed = 0;
++	}
++}
++
++static void fairsched_delayed_wake(cycles_t time)
++{
++	struct fairsched_node *p;
++
++	while (!list_empty(&fairsched_delayed_head)) {
++		p = list_entry(fairsched_delayed_head.next,
++				  struct fairsched_node,
++				  runlist);
++		if (CYCLES_AFTER(p->delay, time))
++			break;
++
++		/* ok, the delay period is completed */
++		/* account for the time passed since last update */
++		if (CYCLES_AFTER(time, p->last_updated_at)) {
++			nodevalue_add(p, FSCHDURATION(time, p->last_updated_at),
++					p->rate);
++			p->last_updated_at = time;
++		}
++
++		fsch_assert(FSCHVALUE_CMP(p->value, TICK_VALUE) >= 0);
++		p->nr_ready = p->nr_runnable;
++		p->delayed = 0;
++		list_del_init(&p->runlist);
++		if (p->nr_ready)
++			fairsched_running_insert_fromsleep(p);
++	}
++}
++
++static struct fairsched_node *fairsched_find(unsigned int id);
++
++void fairsched_cpu_online_map(int id, cpumask_t *mask)
++{
++	struct fairsched_node *node;
++
++	down(&fairsched_mutex);
++	node = fairsched_find(id);
++	if (node == NULL)
++		*mask = CPU_MASK_NONE;
++	else
++		vsched_cpu_online_map(node->vsched, mask);
++	up(&fairsched_mutex);
++}
++
++
++/*********************************************************************/
++/*
++ * The heart of the algorithm:
++ * fairsched_incrun, fairsched_decrun, fairsched_schedule
++ *
++ * Note: old property nr_ready >= nr_pcpu doesn't hold anymore.
++ * However, nr_runnable, nr_ready and delayed are maintained in sync.
++ */
++/*********************************************************************/
++
++/*
++ * Called on a wakeup inside the node.
++ */
++void fairsched_incrun(struct fairsched_node *node)
++{
++	if (!node->delayed && !node->nr_ready++)
++		/* the node wasn't on the running list, insert */
++		fairsched_running_insert_fromsleep(node);
++	node->nr_runnable++;
++}
++
++/*
++ * Called from inside schedule() when a sleeping state is entered.
++ */
++void fairsched_decrun(struct fairsched_node *node)
++{
++	if (!node->delayed && !--node->nr_ready)
++		/* nr_ready changed 1->0, remove from the running list */
++		list_del_init(&node->runlist);
++	--node->nr_runnable;
++}
++
++void fairsched_inccpu(struct fairsched_node *node)
++{
++	node->nr_pcpu++;
++	fairsched_dec_ve_strv(node, cycles);
++}
++
++static inline void __fairsched_deccpu(struct fairsched_node *node)
++{
++	node->nr_pcpu--;
++	fairsched_inc_ve_strv(node, cycles);
++}
++
++void fairsched_deccpu(struct fairsched_node *node)
++{
++	if (node == &fairsched_idle_node)
++		return;
++
++	__fairsched_deccpu(node);
++}
++
++static void fairsched_account(struct fairsched_node *node,
++		cycles_t time)
++{
++	fschdur_t duration;
++
++	duration = FSCHDURATION(time, __get_cpu_var(prev_schedule));
++#ifdef CONFIG_VE
++	CYCLES_DADD(&node->owner_env->cpu_used_ve, duration);
++#endif
++
++	/*
++	 * The duration is not greater than TICK_DUR since
++	 * task->need_resched is always 1.
++	 */
++	if (FSCHTAG_DADD(&node->start_tag, duration, node->weight)) {
++		fairsched_reset_start_tags();
++		(void) FSCHTAG_DADD(&node->start_tag, duration,
++					node->weight);
++	}
++
++	list_del_init(&node->runlist);
++	if (node->rate_limited)
++		fairsched_ratelimit_credit_unused(node, time, duration);
++	if (!node->delayed) {
++		if (node->nr_ready)
++			fairsched_running_insert(node);
++	} else
++		fairsched_delayed_insert(node);
++}
++
++/*
++ * Scheduling decision
++ *
++ * Updates CPU usage for the node releasing the CPU and selects a new node.
++ */
++struct fairsched_node *fairsched_schedule(
++		struct fairsched_node *prev_node,
++		struct fairsched_node *cur_node,
++		int cur_node_active,
++		cycles_t time)
++{
++	struct fairsched_node *p;
++
++	if (prev_node != &fairsched_idle_node)
++		fairsched_account(prev_node, time);
++	__get_cpu_var(prev_schedule) = time;
++
++	fairsched_delayed_wake(time);
++
++	list_for_each_entry(p, &fairsched_running_head, runlist) {
++		if (p->nr_pcpu < p->nr_ready ||
++		    (cur_node_active && p == cur_node)) {
++			if (p->rate_limited)
++				fairsched_ratelimit_charge_advance(p, time);
++			return p;
++		}
++	}
++	return NULL;
++}
++
++
++/*********************************************************************/
++/*
++ * System calls 
++ *
++ * All do_xxx functions are called under fairsched semaphore and after
++ * capability check.
++ *
++ * The binary interfaces follow some other Fair Scheduler implementations
++ * (although some system call arguments are not needed for our implementation).
++ */
++/*********************************************************************/
++
++static struct fairsched_node *fairsched_find(unsigned int id)
++{
++	struct fairsched_node *p;
++
++	list_for_each_entry(p, &fairsched_node_head, nodelist) {
++		if (p->id == id)
++			return p;
++	}
++	return NULL;
++}
++
++static int do_fairsched_mknod(unsigned int parent, unsigned int weight,
++		unsigned int newid)
++{
++	struct fairsched_node *node;
++	int retval;
++
++	retval = -EINVAL;
++	if (weight < 1 || weight > FSCHWEIGHT_MAX)
++		goto out;
++	if (newid < 0 || newid > INT_MAX)
++		goto out;
++
++	retval = -EBUSY;
++	if (fairsched_find(newid) != NULL)
++		goto out;
++
++	retval = -ENOMEM;
++	node = kmalloc(sizeof(*node), GFP_KERNEL);
++	if (node == NULL)
++		goto out;
++
++	memset(node, 0, sizeof(*node));
++	node->weight = weight;
++	INIT_LIST_HEAD(&node->runlist);
++	node->id = newid;
++#ifdef CONFIG_VE
++	node->owner_env = get_exec_env();
++#endif
++
++	spin_lock_irq(&fairsched_lock);
++	list_add(&node->nodelist, &fairsched_node_head);
++	fairsched_nr_nodes++;
++	fairsched_recompute_max_latency();
++	spin_unlock_irq(&fairsched_lock);
++
++	retval = newid;
++out:
++	return retval;
++}
++
++asmlinkage int sys_fairsched_mknod(unsigned int parent, unsigned int weight,
++				    unsigned int newid)
++{
++	int retval;
++
++	if (!capable(CAP_SETVEID))
++		return -EPERM;
++
++	down(&fairsched_mutex);
++	retval = do_fairsched_mknod(parent, weight, newid);
++	up(&fairsched_mutex);
++
++	return retval;
++}
++EXPORT_SYMBOL(sys_fairsched_mknod);
++
++static int do_fairsched_rmnod(unsigned int id)
++{
++	struct fairsched_node *node;
++	int retval;
++
++	retval = -EINVAL;
++	node = fairsched_find(id);
++	if (node == NULL)
++		goto out;
++	if (node == &fairsched_init_node)
++		goto out;
++
++	retval = vsched_destroy(node->vsched);
++	if (retval)
++		goto out;
++
++	spin_lock_irq(&fairsched_lock);
++	list_del(&node->runlist); /* required for delayed nodes */
++	list_del(&node->nodelist);
++	fairsched_nr_nodes--;
++	fairsched_recompute_max_latency();
++	spin_unlock_irq(&fairsched_lock);
++
++	kfree(node);
++	retval = 0;
++out:
++	return retval;
++}
++
++asmlinkage int sys_fairsched_rmnod(unsigned int id)
++{
++	int retval;
++
++	if (!capable(CAP_SETVEID))
++		return -EPERM;
++
++	down(&fairsched_mutex);
++	retval = do_fairsched_rmnod(id);
++	up(&fairsched_mutex);
++
++	return retval;
++}
++EXPORT_SYMBOL(sys_fairsched_rmnod);
++
++int do_fairsched_chwt(unsigned int id, unsigned weight)
++{
++	struct fairsched_node *node;
++
++	if (id == 0)
++		return -EINVAL;
++	if (weight < 1 || weight > FSCHWEIGHT_MAX)
++		return -EINVAL;
++
++	node = fairsched_find(id);
++	if (node == NULL)
++		return -ENOENT;
++
++	spin_lock_irq(&fairsched_lock);
++	node->weight = weight;
++	fairsched_recompute_max_latency();
++	spin_unlock_irq(&fairsched_lock);
++
++	return 0;
++}
++
++asmlinkage int sys_fairsched_chwt(unsigned int id, unsigned weight)
++{
++	int retval;
++
++	if (!capable(CAP_SETVEID))
++		return -EPERM;
++
++	down(&fairsched_mutex);
++	retval = do_fairsched_chwt(id, weight);
++	up(&fairsched_mutex);
++
++	return retval;
++}
++
++int do_fairsched_rate(unsigned int id, int op, unsigned rate)
++{
++	struct fairsched_node *node;
++	cycles_t time;
++	int retval;
++
++	if (id == 0)
++		return -EINVAL;
++	if (op == 0 && (rate < 1 || rate >= (1UL << 31)))
++		return -EINVAL;
++
++	node = fairsched_find(id);
++	if (node == NULL)
++		return -ENOENT;
++
++	retval = -EINVAL;
++	spin_lock_irq(&fairsched_lock);
++	time = get_cycles();
++	switch (op) {
++		case 0:
++			node->rate = rate;
++			if (node->rate > (fairsched_nr_cpus << FSCHRATE_SHIFT))
++				node->rate =
++					fairsched_nr_cpus << FSCHRATE_SHIFT;
++			node->rate_limited = 1;
++			node->value = max_value;
++			if (node->delayed) {
++				list_del(&node->runlist);
++				node->delay = time;
++				fairsched_delayed_insert(node);
++				node->last_updated_at = time;
++				fairsched_delayed_wake(time);
++			}
++			retval = node->rate;
++			break;
++		case 1:
++			node->rate = 0; /* This assignment is not needed
++					   for the kernel code, and it should
++					   not rely on rate being 0 when it's
++					   unset.  This is a band-aid for some
++					   existing tools (don't know which one
++					   exactly).  --SAW */
++			node->rate_limited = 0;
++			node->value = max_value;
++			if (node->delayed) {
++				list_del(&node->runlist);
++				node->delay = time;
++				fairsched_delayed_insert(node);
++				node->last_updated_at = time;
++				fairsched_delayed_wake(time);
++			}
++			retval = 0;
++			break;
++		case 2:
++			if (node->rate_limited)
++				retval = node->rate;
++			else
++				retval = -ENODATA;
++			break;
++	}
++	spin_unlock_irq(&fairsched_lock);
++
++	return retval;
++}
++
++asmlinkage int sys_fairsched_rate(unsigned int id, int op, unsigned rate)
++{
++	int retval;
++
++	if (!capable(CAP_SETVEID))
++		return -EPERM;
++
++	down(&fairsched_mutex);
++	retval = do_fairsched_rate(id, op, rate);
++	up(&fairsched_mutex);
++
++	return retval;
++}
++
++/*
++ * Called under fairsched_mutex.
++ */
++static int __do_fairsched_mvpr(struct task_struct *p,
++		struct fairsched_node *node)
++{
++	int retval;
++
++	if (node->vsched == NULL) {
++		retval = vsched_create(node->id, node);
++		if (retval < 0)
++			return retval;
++	}
++
++	/* no need to destroy vsched in case of mvpr failure */
++	return vsched_mvpr(p, node->vsched);
++}
++
++int do_fairsched_mvpr(pid_t pid, unsigned int nodeid)
++{
++	struct task_struct *p;
++	struct fairsched_node *node;
++	int retval;
++
++	retval = -ENOENT;
++	node = fairsched_find(nodeid);
++	if (node == NULL)
++		goto out;
++
++	read_lock(&tasklist_lock);
++	retval = -ESRCH;
++	p = find_task_by_pid_all(pid);
++	if (p == NULL)
++		goto out_unlock;
++	get_task_struct(p);
++	read_unlock(&tasklist_lock);
++
++	retval = __do_fairsched_mvpr(p, node);
++	put_task_struct(p);
++	return retval;
++
++out_unlock:
++	read_unlock(&tasklist_lock);
++out:
++	return retval;
++}
++
++asmlinkage int sys_fairsched_mvpr(pid_t pid, unsigned int nodeid)
++{
++	int retval;
++
++	if (!capable(CAP_SETVEID))
++		return -EPERM;
++
++	down(&fairsched_mutex);
++	retval = do_fairsched_mvpr(pid, nodeid);
++	up(&fairsched_mutex);
++
++	return retval;
++}
++EXPORT_SYMBOL(sys_fairsched_mvpr);
++
++
++/*********************************************************************/
++/*
++ * proc interface
++ */
++/*********************************************************************/
++
++struct fairsched_node_dump {
++#ifdef CONFIG_VE
++	envid_t veid;
++#endif
++	int id;
++	unsigned weight;
++	unsigned rate;
++	unsigned rate_limited : 1,
++		 delayed : 1;
++	fschtag_t start_tag;
++	fschvalue_t value;
++	cycles_t delay;
++	int nr_ready;
++	int nr_runnable;
++	int nr_pcpu;
++	int nr_tasks, nr_runtasks;
++};
++
++struct fairsched_dump {
++	int len, compat;
++	struct fairsched_node_dump nodes[0];
++};
++
++static struct fairsched_dump *fairsched_do_dump(int compat)
++{
++	int nr_nodes;
++	int len, i;
++	struct fairsched_dump *dump;
++	struct fairsched_node *node;
++	struct fairsched_node_dump *p;
++	unsigned long flags;
++
++start:
++	nr_nodes = (ve_is_super(get_exec_env()) ? fairsched_nr_nodes + 16 : 1);
++	len = sizeof(*dump) + nr_nodes * sizeof(dump->nodes[0]);
++	dump = ub_vmalloc(len);
++	if (dump == NULL)
++		goto out;
++
++	spin_lock_irqsave(&fairsched_lock, flags);
++	if (ve_is_super(get_exec_env()) && nr_nodes < fairsched_nr_nodes)
++		goto repeat;
++	p = dump->nodes;
++	list_for_each_entry_reverse(node, &fairsched_node_head, nodelist) {
++		if ((char *)p - (char *)dump >= len)
++			break;
++		p->nr_tasks = 0;
++		p->nr_runtasks = 0;
++#ifdef CONFIG_VE
++		if (!ve_accessible(node->owner_env, get_exec_env()))
++			continue;
++		p->veid = node->owner_env->veid;
++		if (compat) {
++			p->nr_tasks = atomic_read(&node->owner_env->pcounter);
++			for (i = 0; i < NR_CPUS; i++)
++				p->nr_runtasks +=
++					VE_CPU_STATS(node->owner_env, i)
++								->nr_running;
++			if (p->nr_runtasks < 0)
++				p->nr_runtasks = 0;
++		}
++#endif
++		p->id = node->id;
++		p->weight = node->weight;
++		p->rate = node->rate;
++		p->rate_limited = node->rate_limited;
++		p->delayed = node->delayed;
++		p->start_tag = node->start_tag;
++		p->value = node->value;
++		p->delay = node->delay;
++		p->nr_ready = node->nr_ready;
++		p->nr_runnable = node->nr_runnable;
++		p->nr_pcpu = node->nr_pcpu;
++		p++;
++	}
++	dump->len = p - dump->nodes;
++	dump->compat = compat;
++	spin_unlock_irqrestore(&fairsched_lock, flags);
++
++out:
++	return dump;
++
++repeat:
++	spin_unlock_irqrestore(&fairsched_lock, flags);
++	vfree(dump);
++	goto start;
++}
++
++#define FAIRSCHED_PROC_HEADLINES 2
++
++#if defined(CONFIG_VE)
++/*
++ * File format is dictated by compatibility reasons.
++ */
++static int fairsched_seq_show(struct seq_file *m, void *v)
++{
++	struct fairsched_dump *dump;
++	struct fairsched_node_dump *p;
++	unsigned vid, nid, pid, r;
++
++	dump = m->private;
++	p = (struct fairsched_node_dump *)((unsigned long)v & ~3UL);
++	if (p - dump->nodes < FAIRSCHED_PROC_HEADLINES) {
++		if (p == dump->nodes)
++			seq_printf(m, "Version: 2.6 debug\n");
++		else if (p == dump->nodes + 1)
++			seq_printf(m,
++				       "      veid "
++				       "        id "
++				       "    parent "
++				       "weight "
++				       " rate "
++  				       "tasks "
++				       "  run "
++				       "cpus"
++				       " "
++				       "flg "
++				       "ready "
++				       "           start_tag "
++				       "               value "
++				       "               delay"
++				       "\n");
++	} else {
++		p -= FAIRSCHED_PROC_HEADLINES;
++		vid = nid = pid = 0;
++		r = (unsigned long)v & 3;
++		if (p == dump->nodes) {
++			if (r == 2)
++				nid = p->id;
++		} else {
++			if (!r)
++				nid = p->id;
++			else if (r == 1)
++				vid = pid = p->id;
++			else
++				vid = p->id, nid = 1;
++		}
++		seq_printf(m,
++			       "%10u "
++			       "%10u %10u %6u %5u %5u %5u %4u"
++			       " "
++			       " %c%c %5u %20Lu %20Lu %20Lu"
++			       "\n",
++			       vid,
++			       nid,
++			       pid,
++			       p->weight,
++			       p->rate,
++			       p->nr_tasks,
++			       p->nr_runtasks,
++			       p->nr_pcpu,
++			       p->rate_limited ? 'L' : '.',
++			       p->delayed ? 'D' : '.',
++			       p->nr_ready,
++			       p->start_tag.t,
++			       p->value.v,
++			       p->delay
++			       );
++	}
++
++	return 0;
++}
++
++static void *fairsched_seq_start(struct seq_file *m, loff_t *pos)
++{
++	struct fairsched_dump *dump;
++	unsigned long l;
++
++	dump = m->private;
++	if (*pos >= dump->len * 3 - 1 + FAIRSCHED_PROC_HEADLINES)
++		return NULL;
++	if (*pos < FAIRSCHED_PROC_HEADLINES)
++		return dump->nodes + *pos;
++	/* guess why... */
++	l = (unsigned long)(dump->nodes +
++		((unsigned long)*pos + FAIRSCHED_PROC_HEADLINES * 2 + 1) / 3);
++	l |= ((unsigned long)*pos + FAIRSCHED_PROC_HEADLINES * 2 + 1) % 3;
++	return (void *)l;
++}
++static void *fairsched_seq_next(struct seq_file *m, void *v, loff_t *pos)
++{
++	++*pos;
++	return fairsched_seq_start(m, pos);
++}
++#endif
++
++static int fairsched2_seq_show(struct seq_file *m, void *v)
++{
++	struct fairsched_dump *dump;
++	struct fairsched_node_dump *p;
++
++	dump = m->private;
++	p = v;
++	if (p - dump->nodes < FAIRSCHED_PROC_HEADLINES) {
++		if (p == dump->nodes)
++			seq_printf(m, "Version: 2.7" FAIRSHED_DEBUG "\n");
++		else if (p == dump->nodes + 1)
++			seq_printf(m,
++				       "        id "
++				       "weight "
++				       " rate "
++				       "  run "
++				       "cpus"
++#ifdef FAIRSHED_DEBUG
++				       " "
++				       "flg "
++				       "ready "
++				       "           start_tag "
++				       "               value "
++				       "               delay"
++#endif
++				       "\n");
++	} else {
++		p -= FAIRSCHED_PROC_HEADLINES;
++		seq_printf(m,
++			       "%10u %6u %5u %5u %4u"
++#ifdef FAIRSHED_DEBUG
++			       " "
++			       " %c%c %5u %20Lu %20Lu %20Lu"
++#endif
++			       "\n",
++			       p->id,
++			       p->weight,
++			       p->rate,
++			       p->nr_runnable,
++			       p->nr_pcpu
++#ifdef FAIRSHED_DEBUG
++			       ,
++			       p->rate_limited ? 'L' : '.',
++			       p->delayed ? 'D' : '.',
++			       p->nr_ready,
++			       p->start_tag.t,
++			       p->value.v,
++			       p->delay
++#endif
++			       );
++	}
++
++	return 0;
++}
++
++static void *fairsched2_seq_start(struct seq_file *m, loff_t *pos)
++{
++	struct fairsched_dump *dump;
++
++	dump = m->private;
++	if (*pos >= dump->len + FAIRSCHED_PROC_HEADLINES)
++		return NULL;
++	return dump->nodes + *pos;
++}
++static void *fairsched2_seq_next(struct seq_file *m, void *v, loff_t *pos)
++{
++	++*pos;
++	return fairsched2_seq_start(m, pos);
++}
++static void fairsched2_seq_stop(struct seq_file *m, void *v)
++{
++}
++
++#ifdef CONFIG_VE
++static struct seq_operations fairsched_seq_op = {
++	.start		= fairsched_seq_start,
++	.next		= fairsched_seq_next,
++	.stop		= fairsched2_seq_stop,
++	.show		= fairsched_seq_show
++};
++#endif
++static struct seq_operations fairsched2_seq_op = {
++	.start		= fairsched2_seq_start,
++	.next		= fairsched2_seq_next,
++	.stop		= fairsched2_seq_stop,
++	.show		= fairsched2_seq_show
++};
++static int fairsched_seq_open(struct inode *inode, struct file *file)
++{
++	int ret;
++	struct seq_file *m;
++	int compat;
++
++#ifdef CONFIG_VE
++	compat = (file->f_dentry->d_name.len == sizeof("fairsched") - 1);
++	ret = seq_open(file, compat ? &fairsched_seq_op : &fairsched2_seq_op);
++#else
++	compat = 0;
++	ret = seq_open(file, fairsched2_seq_op);
++#endif
++	if (ret)
++		return ret;
++	m = file->private_data;
++	m->private = fairsched_do_dump(compat);
++	if (m->private == NULL) {
++		seq_release(inode, file);
++		ret = -ENOMEM;
++	}
++	return ret;
++}
++static int fairsched_seq_release(struct inode *inode, struct file *file)
++{
++	struct seq_file *m;
++	struct fairsched_dump *dump;
++
++	m = file->private_data;
++	dump = m->private;
++	m->private = NULL;
++	vfree(dump);
++	seq_release(inode, file);
++	return 0;
++}
++static struct file_operations proc_fairsched_operations = {
++	.open		= fairsched_seq_open,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= fairsched_seq_release
++};
++
++
++/*********************************************************************/
++/*
++ * Fairsched initialization
++ */
++/*********************************************************************/
++
++int fsch_sysctl_latency(ctl_table *ctl, int write, struct file *filp,
++			void *buffer, size_t *lenp, loff_t *ppos)
++{
++	int *valp = ctl->data;
++	int val = *valp;
++	int ret;
++
++	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
++
++	if (!write || *valp == val)
++		return ret;
++
++	spin_lock_irq(&fairsched_lock);
++	fairsched_recompute_max_latency();
++	spin_unlock_irq(&fairsched_lock);
++	return ret;
++}
++
++static void fairsched_calibrate(void)
++{
++	fairsched_nr_cpus = num_online_cpus();
++	max_value = FSCHVALUE(cycles_per_jiffy * (fairsched_nr_cpus + 1));
++}
++
++void __init fairsched_init_early(void)
++{
++	printk(KERN_INFO "Virtuozzo Fair CPU scheduler\n");
++	list_add(&fairsched_init_node.nodelist, &fairsched_node_head);
++	fairsched_nr_nodes++;
++}
++
++/*
++ * Note: this function is execute late in the initialization sequence.
++ * We ourselves need calibrated cycles and initialized procfs...
++ * The consequence of this late initialization is that start tags are
++ * efficiently ignored and each node preempts others on insertion.
++ * But it isn't a problem (only init node can be runnable).
++ */
++void __init fairsched_init_late(void)
++{
++	struct proc_dir_entry *entry;
++
++	if (get_cycles() == 0)
++		panic("FAIRSCHED: no TSC!\n");
++	fairsched_calibrate();
++	fairsched_recompute_max_latency();
++
++	entry = create_proc_glob_entry("fairsched", S_IRUGO, NULL);
++	if (entry)
++		entry->proc_fops = &proc_fairsched_operations;
++	entry = create_proc_glob_entry("fairsched2", S_IRUGO, NULL);
++	if (entry)
++		entry->proc_fops = &proc_fairsched_operations;
++}
++
++
++#else /* CONFIG_FAIRSCHED */
++
++
++/*********************************************************************/
++/*
++ * No Fairsched
++ */
++/*********************************************************************/
++
++asmlinkage int sys_fairsched_mknod(unsigned int parent, unsigned int weight,
++				    unsigned int newid)
++{
++	return -ENOSYS;
++}
++
++asmlinkage int sys_fairsched_rmnod(unsigned int id)
++{
++	return -ENOSYS;
++}
++
++asmlinkage int sys_fairsched_chwt(unsigned int id, unsigned int weight)
++{
++	return -ENOSYS;
++}
++
++asmlinkage int sys_fairsched_mvpr(pid_t pid, unsigned int nodeid)
++{
++	return -ENOSYS;
++}
++
++asmlinkage int sys_fairsched_rate(unsigned int id, int op, unsigned rate)
++{
++	return -ENOSYS;
++}
++
++void __init fairsched_init_late(void)
++{
++}
++
++#endif /* CONFIG_FAIRSCHED */
+diff -uprN linux-2.6.8.1.orig/kernel/fork.c linux-2.6.8.1-ve022stab078/kernel/fork.c
+--- linux-2.6.8.1.orig/kernel/fork.c	2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/kernel/fork.c	2006-05-11 13:05:49.000000000 +0400
+@@ -20,12 +20,14 @@
+ #include <linux/vmalloc.h>
+ #include <linux/completion.h>
+ #include <linux/namespace.h>
++#include <linux/file.h>
+ #include <linux/personality.h>
+ #include <linux/mempolicy.h>
+ #include <linux/sem.h>
+ #include <linux/file.h>
+ #include <linux/binfmts.h>
+ #include <linux/mman.h>
++#include <linux/virtinfo.h>
+ #include <linux/fs.h>
+ #include <linux/cpu.h>
+ #include <linux/security.h>
+@@ -36,6 +38,7 @@
+ #include <linux/mount.h>
+ #include <linux/audit.h>
+ #include <linux/rmap.h>
++#include <linux/fairsched.h>
+ 
+ #include <asm/pgtable.h>
+ #include <asm/pgalloc.h>
+@@ -44,10 +47,14 @@
+ #include <asm/cacheflush.h>
+ #include <asm/tlbflush.h>
+ 
++#include <ub/ub_misc.h>
++#include <ub/ub_vmpages.h>
++
+ /* The idle threads do not count..
+  * Protected by write_lock_irq(&tasklist_lock)
+  */
+ int nr_threads;
++EXPORT_SYMBOL(nr_threads);
+ 
+ int max_threads;
+ unsigned long total_forks;	/* Handle normal Linux uptimes. */
+@@ -77,13 +84,14 @@ static kmem_cache_t *task_struct_cachep;
+ 
+ static void free_task(struct task_struct *tsk)
+ {
++	ub_task_uncharge(tsk);
+ 	free_thread_info(tsk->thread_info);
+ 	free_task_struct(tsk);
+ }
+ 
+ void __put_task_struct(struct task_struct *tsk)
+ {
+-	WARN_ON(!(tsk->state & (TASK_DEAD | TASK_ZOMBIE)));
++	WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE)));
+ 	WARN_ON(atomic_read(&tsk->usage));
+ 	WARN_ON(tsk == current);
+ 
+@@ -92,6 +100,13 @@ void __put_task_struct(struct task_struc
+ 	security_task_free(tsk);
+ 	free_uid(tsk->user);
+ 	put_group_info(tsk->group_info);
++
++#ifdef CONFIG_VE
++	put_ve(VE_TASK_INFO(tsk)->owner_env);
++	write_lock_irq(&tasklist_lock);
++	nr_dead--;
++	write_unlock_irq(&tasklist_lock);
++#endif
+ 	free_task(tsk);
+ }
+ 
+@@ -219,7 +234,7 @@ void __init fork_init(unsigned long memp
+ 	/* create a slab on which task_structs can be allocated */
+ 	task_struct_cachep =
+ 		kmem_cache_create("task_struct", sizeof(struct task_struct),
+-			ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL, NULL);
++			ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_UBC, NULL, NULL);
+ #endif
+ 
+ 	/*
+@@ -250,19 +265,30 @@ static struct task_struct *dup_task_stru
+ 		return NULL;
+ 
+ 	ti = alloc_thread_info(tsk);
+-	if (!ti) {
+-		free_task_struct(tsk);
+-		return NULL;
+-	}
++	if (ti == NULL)
++		goto out_free_task;
+ 
+ 	*ti = *orig->thread_info;
+ 	*tsk = *orig;
+ 	tsk->thread_info = ti;
+ 	ti->task = tsk;
+ 
++	/* Our parent has been killed by OOM killer... Go away */
++	if (tsk->flags & PF_MEMDIE)
++		goto out_free_thread;
++
++	if (ub_task_charge(orig, tsk) < 0)
++		goto out_free_thread;
++
+ 	/* One for us, one for whoever does the "release_task()" (usually parent) */
+ 	atomic_set(&tsk->usage,2);
+ 	return tsk;
++
++out_free_thread:
++	free_thread_info(ti);
++out_free_task:
++	free_task_struct(tsk);
++	return NULL;
+ }
+ 
+ #ifdef CONFIG_MMU
+@@ -308,9 +334,14 @@ static inline int dup_mmap(struct mm_str
+ 		if (mpnt->vm_flags & VM_ACCOUNT) {
+ 			unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+ 			if (security_vm_enough_memory(len))
+-				goto fail_nomem;
++				goto fail_nocharge;
+ 			charge = len;
+ 		}
++
++		if (ub_privvm_charge(mm_ub(mm), mpnt->vm_flags, mpnt->vm_file,
++					mpnt->vm_end - mpnt->vm_start))
++			goto fail_nocharge;
++
+ 		tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ 		if (!tmp)
+ 			goto fail_nomem;
+@@ -323,6 +354,7 @@ static inline int dup_mmap(struct mm_str
+ 		tmp->vm_flags &= ~VM_LOCKED;
+ 		tmp->vm_mm = mm;
+ 		tmp->vm_next = NULL;
++		tmp->vm_rss = 0;
+ 		anon_vma_link(tmp);
+ 		vma_prio_tree_init(tmp);
+ 		file = tmp->vm_file;
+@@ -372,6 +404,9 @@ out:
+ fail_nomem_policy:
+ 	kmem_cache_free(vm_area_cachep, tmp);
+ fail_nomem:
++	ub_privvm_uncharge(mm_ub(mm), mpnt->vm_flags, mpnt->vm_file,
++			mpnt->vm_end - mpnt->vm_start);
++fail_nocharge:
+ 	retval = -ENOMEM;
+ 	vm_unacct_memory(charge);
+ 	goto out;
+@@ -398,12 +433,15 @@ static inline void mm_free_pgd(struct mm
+ spinlock_t mmlist_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
+ int mmlist_nr;
+ 
++EXPORT_SYMBOL(mmlist_lock);
++
+ #define allocate_mm()	(kmem_cache_alloc(mm_cachep, SLAB_KERNEL))
+ #define free_mm(mm)	(kmem_cache_free(mm_cachep, (mm)))
+ 
+ #include <linux/init_task.h>
+ 
+-static struct mm_struct * mm_init(struct mm_struct * mm)
++static struct mm_struct * mm_init(struct mm_struct * mm,
++		struct user_beancounter * ub)
+ {
+ 	atomic_set(&mm->mm_users, 1);
+ 	atomic_set(&mm->mm_count, 1);
+@@ -414,11 +452,15 @@ static struct mm_struct * mm_init(struct
+ 	mm->ioctx_list = NULL;
+ 	mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm);
+ 	mm->free_area_cache = TASK_UNMAPPED_BASE;
++#ifdef CONFIG_USER_RESOURCE
++	mm_ub(mm) = get_beancounter(ub);
++#endif
+ 
+ 	if (likely(!mm_alloc_pgd(mm))) {
+ 		mm->def_flags = 0;
+ 		return mm;
+ 	}
++	put_beancounter(mm_ub(mm));
+ 	free_mm(mm);
+ 	return NULL;
+ }
+@@ -433,7 +475,7 @@ struct mm_struct * mm_alloc(void)
+ 	mm = allocate_mm();
+ 	if (mm) {
+ 		memset(mm, 0, sizeof(*mm));
+-		mm = mm_init(mm);
++		mm = mm_init(mm, get_exec_ub());
+ 	}
+ 	return mm;
+ }
+@@ -448,6 +490,7 @@ void fastcall __mmdrop(struct mm_struct 
+ 	BUG_ON(mm == &init_mm);
+ 	mm_free_pgd(mm);
+ 	destroy_context(mm);
++	put_beancounter(mm_ub(mm));
+ 	free_mm(mm);
+ }
+ 
+@@ -462,6 +505,7 @@ void mmput(struct mm_struct *mm)
+ 		spin_unlock(&mmlist_lock);
+ 		exit_aio(mm);
+ 		exit_mmap(mm);
++		(void) virtinfo_gencall(VIRTINFO_EXITMMAP, mm);
+ 		mmdrop(mm);
+ 	}
+ }
+@@ -562,7 +606,7 @@ static int copy_mm(unsigned long clone_f
+ 
+ 	/* Copy the current MM stuff.. */
+ 	memcpy(mm, oldmm, sizeof(*mm));
+-	if (!mm_init(mm))
++	if (!mm_init(mm, get_task_ub(tsk)))
+ 		goto fail_nomem;
+ 
+ 	if (init_new_context(tsk,mm))
+@@ -588,6 +632,7 @@ fail_nocontext:
+ 	 * because it calls destroy_context()
+ 	 */
+ 	mm_free_pgd(mm);
++	put_beancounter(mm_ub(mm));
+ 	free_mm(mm);
+ 	return retval;
+ }
+@@ -853,7 +898,7 @@ asmlinkage long sys_set_tid_address(int 
+ {
+ 	current->clear_child_tid = tidptr;
+ 
+-	return current->pid;
++	return virt_pid(current);
+ }
+ 
+ /*
+@@ -869,7 +914,8 @@ struct task_struct *copy_process(unsigne
+ 				 struct pt_regs *regs,
+ 				 unsigned long stack_size,
+ 				 int __user *parent_tidptr,
+-				 int __user *child_tidptr)
++				 int __user *child_tidptr,
++				 long pid)
+ {
+ 	int retval;
+ 	struct task_struct *p = NULL;
+@@ -929,19 +975,28 @@ struct task_struct *copy_process(unsigne
+ 
+ 	p->did_exec = 0;
+ 	copy_flags(clone_flags, p);
+-	if (clone_flags & CLONE_IDLETASK)
++	if (clone_flags & CLONE_IDLETASK) {
+ 		p->pid = 0;
+-	else {
++		set_virt_pid(p, 0);
++	} else {
+ 		p->pid = alloc_pidmap();
+ 		if (p->pid == -1)
++			goto bad_fork_cleanup_pid;
++#ifdef CONFIG_VE
++		set_virt_pid(p, alloc_vpid(p->pid, pid ? : -1));
++		if (virt_pid(p) < 0)
+ 			goto bad_fork_cleanup;
++#endif
+ 	}
+ 	retval = -EFAULT;
+ 	if (clone_flags & CLONE_PARENT_SETTID)
+-		if (put_user(p->pid, parent_tidptr))
++		if (put_user(virt_pid(p), parent_tidptr))
+ 			goto bad_fork_cleanup;
+ 
+ 	p->proc_dentry = NULL;
++#ifdef CONFIG_VE
++	VE_TASK_INFO(p)->glob_proc_dentry = NULL;
++#endif
+ 
+ 	INIT_LIST_HEAD(&p->children);
+ 	INIT_LIST_HEAD(&p->sibling);
+@@ -1017,6 +1072,7 @@ struct task_struct *copy_process(unsigne
+ 	/* ok, now we should be set up.. */
+ 	p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
+ 	p->pdeath_signal = 0;
++	p->exit_state = 0;
+ 
+ 	/* Perform scheduler related setup */
+ 	sched_fork(p);
+@@ -1026,12 +1082,26 @@ struct task_struct *copy_process(unsigne
+ 	 * We dont wake it up yet.
+ 	 */
+ 	p->tgid = p->pid;
++	set_virt_tgid(p, virt_pid(p));
++	set_virt_pgid(p, virt_pgid(current));
++	set_virt_sid(p, virt_sid(current));
+ 	p->group_leader = p;
+ 	INIT_LIST_HEAD(&p->ptrace_children);
+ 	INIT_LIST_HEAD(&p->ptrace_list);
+ 
+ 	/* Need tasklist lock for parent etc handling! */
+ 	write_lock_irq(&tasklist_lock);
++
++	/*
++	 * The task hasn't been attached yet, so cpus_allowed mask cannot
++	 * have changed. The cpus_allowed mask of the parent may have
++	 * changed after it was copied first time, and it may then move to
++	 * another CPU - so we re-copy it here and set the child's CPU to
++	 * the parent's CPU. This avoids alot of nasty races.
++	 */
++	p->cpus_allowed = current->cpus_allowed;
++	set_task_cpu(p, task_cpu(current));
++
+ 	/*
+ 	 * Check for pending SIGKILL! The new thread should not be allowed
+ 	 * to slip out of an OOM kill. (or normal SIGKILL.)
+@@ -1043,7 +1113,7 @@ struct task_struct *copy_process(unsigne
+ 	}
+ 
+ 	/* CLONE_PARENT re-uses the old parent */
+-	if (clone_flags & CLONE_PARENT)
++	if (clone_flags & (CLONE_PARENT|CLONE_THREAD))
+ 		p->real_parent = current->real_parent;
+ 	else
+ 		p->real_parent = current;
+@@ -1063,6 +1133,7 @@ struct task_struct *copy_process(unsigne
+ 			goto bad_fork_cleanup_namespace;
+ 		}
+ 		p->tgid = current->tgid;
++		set_virt_tgid(p, virt_tgid(current));
+ 		p->group_leader = current->group_leader;
+ 
+ 		if (current->signal->group_stop_count > 0) {
+@@ -1082,15 +1153,20 @@ struct task_struct *copy_process(unsigne
+ 	if (p->ptrace & PT_PTRACED)
+ 		__ptrace_link(p, current->parent);
+ 
++#ifdef CONFIG_VE
++	SET_VE_LINKS(p);
++	atomic_inc(&VE_TASK_INFO(p)->owner_env->pcounter);
++	get_ve(VE_TASK_INFO(p)->owner_env);
++	seqcount_init(&VE_TASK_INFO(p)->wakeup_lock);
++#endif
+ 	attach_pid(p, PIDTYPE_PID, p->pid);
++	attach_pid(p, PIDTYPE_TGID, p->tgid);
+ 	if (thread_group_leader(p)) {
+-		attach_pid(p, PIDTYPE_TGID, p->tgid);
+ 		attach_pid(p, PIDTYPE_PGID, process_group(p));
+ 		attach_pid(p, PIDTYPE_SID, p->signal->session);
+ 		if (p->pid)
+ 			__get_cpu_var(process_counts)++;
+-	} else
+-		link_pid(p, p->pids + PIDTYPE_TGID, &p->group_leader->pids[PIDTYPE_TGID].pid);
++	}
+ 
+ 	nr_threads++;
+ 	write_unlock_irq(&tasklist_lock);
+@@ -1126,6 +1202,11 @@ bad_fork_cleanup_policy:
+ 	mpol_free(p->mempolicy);
+ #endif
+ bad_fork_cleanup:
++#ifdef CONFIG_VE
++	if (virt_pid(p) != p->pid && virt_pid(p) > 0)
++		free_vpid(virt_pid(p), get_exec_env());
++#endif
++bad_fork_cleanup_pid:
+ 	if (p->pid > 0)
+ 		free_pidmap(p->pid);
+ 	if (p->binfmt)
+@@ -1163,12 +1244,13 @@ static inline int fork_traceflag (unsign
+  * It copies the process, and if successful kick-starts
+  * it and waits for it to finish using the VM if required.
+  */
+-long do_fork(unsigned long clone_flags,
++long do_fork_pid(unsigned long clone_flags,
+ 	      unsigned long stack_start,
+ 	      struct pt_regs *regs,
+ 	      unsigned long stack_size,
+ 	      int __user *parent_tidptr,
+-	      int __user *child_tidptr)
++	      int __user *child_tidptr,
++	      long pid0)
+ {
+ 	struct task_struct *p;
+ 	int trace = 0;
+@@ -1180,12 +1262,16 @@ long do_fork(unsigned long clone_flags,
+ 			clone_flags |= CLONE_PTRACE;
+ 	}
+ 
+-	p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr);
++	pid = virtinfo_gencall(VIRTINFO_DOFORK, (void *)clone_flags);
++	if (pid)
++		return pid;
++
++	p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid0);
+ 	/*
+ 	 * Do this prior waking up the new thread - the thread pointer
+ 	 * might get invalid after that point, if the thread exits quickly.
+ 	 */
+-	pid = IS_ERR(p) ? PTR_ERR(p) : p->pid;
++	pid = IS_ERR(p) ? PTR_ERR(p) : virt_pid(p);
+ 
+ 	if (!IS_ERR(p)) {
+ 		struct completion vfork;
+@@ -1203,6 +1289,7 @@ long do_fork(unsigned long clone_flags,
+ 			set_tsk_thread_flag(p, TIF_SIGPENDING);
+ 		}
+ 
++		virtinfo_gencall(VIRTINFO_DOFORKRET, p);
+ 		if (!(clone_flags & CLONE_STOPPED)) {
+ 			/*
+ 			 * Do the wakeup last. On SMP we treat fork() and
+@@ -1220,25 +1307,24 @@ long do_fork(unsigned long clone_flags,
+ 			else
+ 				wake_up_forked_process(p);
+ 		} else {
+-			int cpu = get_cpu();
+-
+ 			p->state = TASK_STOPPED;
+-			if (cpu_is_offline(task_cpu(p)))
+-				set_task_cpu(p, cpu);
+-
+-			put_cpu();
+ 		}
+ 		++total_forks;
+ 
+ 		if (unlikely (trace)) {
+ 			current->ptrace_message = pid;
++			set_pn_state(current, PN_STOP_FORK);
+ 			ptrace_notify ((trace << 8) | SIGTRAP);
++			clear_pn_state(current);
+ 		}
+ 
+ 		if (clone_flags & CLONE_VFORK) {
+ 			wait_for_completion(&vfork);
+-			if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))
++			if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) {
++				set_pn_state(current, PN_STOP_VFORK);
+ 				ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
++				clear_pn_state(current);
++			}
+ 		} else
+ 			/*
+ 			 * Let the child process run first, to avoid most of the
+@@ -1246,9 +1332,24 @@ long do_fork(unsigned long clone_flags,
+ 			 */
+ 			set_need_resched();
+ 	}
++	virtinfo_gencall(VIRTINFO_DOFORKPOST, (void *)(long)pid);
+ 	return pid;
+ }
+ 
++EXPORT_SYMBOL(do_fork_pid);
++
++long do_fork(unsigned long clone_flags,
++	      unsigned long stack_start,
++	      struct pt_regs *regs,
++	      unsigned long stack_size,
++	      int __user *parent_tidptr,
++	      int __user *child_tidptr)
++{
++	return do_fork_pid(clone_flags, stack_start, regs, stack_size,
++			   parent_tidptr, child_tidptr, 0);
++}
++
++
+ /* SLAB cache for signal_struct structures (tsk->signal) */
+ kmem_cache_t *signal_cachep;
+ 
+@@ -1267,24 +1368,26 @@ kmem_cache_t *vm_area_cachep;
+ /* SLAB cache for mm_struct structures (tsk->mm) */
+ kmem_cache_t *mm_cachep;
+ 
++#include <linux/kmem_cache.h>
+ void __init proc_caches_init(void)
+ {
+ 	sighand_cachep = kmem_cache_create("sighand_cache",
+ 			sizeof(struct sighand_struct), 0,
+-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
++			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
+ 	signal_cachep = kmem_cache_create("signal_cache",
+ 			sizeof(struct signal_struct), 0,
+-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
++			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
+ 	files_cachep = kmem_cache_create("files_cache", 
+ 			sizeof(struct files_struct), 0,
+-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
++			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
++	files_cachep->flags |= CFLGS_ENVIDS;
+ 	fs_cachep = kmem_cache_create("fs_cache", 
+ 			sizeof(struct fs_struct), 0,
+-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
++			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
+ 	vm_area_cachep = kmem_cache_create("vm_area_struct",
+ 			sizeof(struct vm_area_struct), 0,
+-			SLAB_PANIC, NULL, NULL);
++			SLAB_PANIC|SLAB_UBC, NULL, NULL);
+ 	mm_cachep = kmem_cache_create("mm_struct",
+ 			sizeof(struct mm_struct), 0,
+-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
++			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
+ }
+diff -uprN linux-2.6.8.1.orig/kernel/futex.c linux-2.6.8.1-ve022stab078/kernel/futex.c
+--- linux-2.6.8.1.orig/kernel/futex.c	2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/kernel/futex.c	2006-05-11 13:05:33.000000000 +0400
+@@ -258,6 +258,18 @@ static void drop_key_refs(union futex_ke
+ 	}
+ }
+ 
++static inline int get_futex_value_locked(int *dest, int __user *from)
++{
++	int ret;
++
++	inc_preempt_count();
++	ret = __copy_from_user(dest, from, sizeof(int));
++	dec_preempt_count();
++	preempt_check_resched();
++
++	return ret ? -EFAULT : 0;
++}
++
+ /*
+  * The hash bucket lock must be held when this is called.
+  * Afterwards, the futex_q must not be accessed.
+@@ -329,6 +341,7 @@ static int futex_requeue(unsigned long u
+ 	int ret, drop_count = 0;
+ 	unsigned int nqueued;
+ 
++ retry:
+ 	down_read(&current->mm->mmap_sem);
+ 
+ 	ret = get_futex_key(uaddr1, &key1);
+@@ -355,9 +368,20 @@ static int futex_requeue(unsigned long u
+ 		   before *uaddr1.  */
+ 		smp_mb();
+ 
+-		if (get_user(curval, (int __user *)uaddr1) != 0) {
+-			ret = -EFAULT;
+-			goto out;
++		ret = get_futex_value_locked(&curval, (int __user *)uaddr1);
++
++		if (unlikely(ret)) {
++			/* If we would have faulted, release mmap_sem, fault
++			 * it in and start all over again.
++			 */
++			up_read(&current->mm->mmap_sem);
++
++			ret = get_user(curval, (int __user *)uaddr1);
++
++			if (!ret)
++				goto retry;
++
++			return ret;
+ 		}
+ 		if (curval != *valp) {
+ 			ret = -EAGAIN;
+@@ -480,6 +504,7 @@ static int futex_wait(unsigned long uadd
+ 	int ret, curval;
+ 	struct futex_q q;
+ 
++ retry:
+ 	down_read(&current->mm->mmap_sem);
+ 
+ 	ret = get_futex_key(uaddr, &q.key);
+@@ -493,9 +518,23 @@ static int futex_wait(unsigned long uadd
+ 	 * We hold the mmap semaphore, so the mapping cannot have changed
+ 	 * since we looked it up.
+ 	 */
+-	if (get_user(curval, (int __user *)uaddr) != 0) {
+-		ret = -EFAULT;
+-		goto out_unqueue;
++
++	ret = get_futex_value_locked(&curval, (int __user *)uaddr);
++
++	if (unlikely(ret)) {
++		/* If we would have faulted, release mmap_sem, fault it in and
++		 * start all over again.
++		 */
++		up_read(&current->mm->mmap_sem);
++
++		if (!unqueue_me(&q)) /* There's a chance we got woken already */
++			return 0;
++
++		ret = get_user(curval, (int __user *)uaddr);
++
++		if (!ret)
++			goto retry;
++		return ret;
+ 	}
+ 	if (curval != val) {
+ 		ret = -EWOULDBLOCK;
+@@ -538,8 +577,8 @@ static int futex_wait(unsigned long uadd
+ 		return 0;
+ 	if (time == 0)
+ 		return -ETIMEDOUT;
+-	/* A spurious wakeup should never happen. */
+-	WARN_ON(!signal_pending(current));
++	/* We expect signal_pending(current), but another thread may
++	 * have handled it for us already. */
+ 	return -EINTR;
+ 
+  out_unqueue:
+diff -uprN linux-2.6.8.1.orig/kernel/kmod.c linux-2.6.8.1-ve022stab078/kernel/kmod.c
+--- linux-2.6.8.1.orig/kernel/kmod.c	2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/kernel/kmod.c	2006-05-11 13:05:40.000000000 +0400
+@@ -78,6 +78,10 @@ int request_module(const char *fmt, ...)
+ #define MAX_KMOD_CONCURRENT 50	/* Completely arbitrary value - KAO */
+ 	static int kmod_loop_msg;
+ 
++	/* Don't allow request_module() inside VE. */
++	if (!ve_is_super(get_exec_env()))
++		return -EPERM;
++
+ 	va_start(args, fmt);
+ 	ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
+ 	va_end(args);
+@@ -260,6 +264,9 @@ int call_usermodehelper(char *path, char
+ 	};
+ 	DECLARE_WORK(work, __call_usermodehelper, &sub_info);
+ 
++	if (!ve_is_super(get_exec_env()))
++		return -EPERM;
++
+ 	if (!khelper_wq)
+ 		return -EBUSY;
+ 
+diff -uprN linux-2.6.8.1.orig/kernel/kthread.c linux-2.6.8.1-ve022stab078/kernel/kthread.c
+--- linux-2.6.8.1.orig/kernel/kthread.c	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/kernel/kthread.c	2006-05-11 13:05:40.000000000 +0400
+@@ -108,7 +108,7 @@ static void keventd_create_kthread(void 
+ 		create->result = ERR_PTR(pid);
+ 	} else {
+ 		wait_for_completion(&create->started);
+-		create->result = find_task_by_pid(pid);
++		create->result = find_task_by_pid_all(pid);
+ 	}
+ 	complete(&create->done);
+ }
+@@ -151,6 +151,7 @@ void kthread_bind(struct task_struct *k,
+ 	BUG_ON(k->state != TASK_INTERRUPTIBLE);
+ 	/* Must have done schedule() in kthread() before we set_task_cpu */
+ 	wait_task_inactive(k);
++	/* The following lines look to be unprotected, possible race - vlad */
+ 	set_task_cpu(k, cpu);
+ 	k->cpus_allowed = cpumask_of_cpu(cpu);
+ }
+diff -uprN linux-2.6.8.1.orig/kernel/module.c linux-2.6.8.1-ve022stab078/kernel/module.c
+--- linux-2.6.8.1.orig/kernel/module.c	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/kernel/module.c	2006-05-11 13:05:42.000000000 +0400
+@@ -2045,6 +2045,8 @@ static void *m_start(struct seq_file *m,
+ 	loff_t n = 0;
+ 
+ 	down(&module_mutex);
++	if (!ve_is_super(get_exec_env()))
++		return NULL;
+ 	list_for_each(i, &modules) {
+ 		if (n++ == *pos)
+ 			break;
+diff -uprN linux-2.6.8.1.orig/kernel/panic.c linux-2.6.8.1-ve022stab078/kernel/panic.c
+--- linux-2.6.8.1.orig/kernel/panic.c	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/kernel/panic.c	2006-05-11 13:05:40.000000000 +0400
+@@ -23,6 +23,8 @@
+ int panic_timeout;
+ int panic_on_oops;
+ int tainted;
++int kernel_text_csum_broken;
++EXPORT_SYMBOL(kernel_text_csum_broken);
+ 
+ EXPORT_SYMBOL(panic_timeout);
+ 
+@@ -125,7 +127,8 @@ const char *print_tainted(void)
+ {
+ 	static char buf[20];
+ 	if (tainted) {
+-		snprintf(buf, sizeof(buf), "Tainted: %c%c%c",
++		snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c",
++			kernel_text_csum_broken ? 'B' : ' ',
+ 			tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G',
+ 			tainted & TAINT_FORCED_MODULE ? 'F' : ' ',
+ 			tainted & TAINT_UNSAFE_SMP ? 'S' : ' ');
+diff -uprN linux-2.6.8.1.orig/kernel/pid.c linux-2.6.8.1-ve022stab078/kernel/pid.c
+--- linux-2.6.8.1.orig/kernel/pid.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/kernel/pid.c	2006-05-11 13:05:43.000000000 +0400
+@@ -26,8 +26,12 @@
+ #include <linux/bootmem.h>
+ #include <linux/hash.h>
+ 
++#ifdef CONFIG_VE
++static void __free_vpid(int vpid, struct ve_struct *ve);
++#endif
++
+ #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
+-static struct list_head *pid_hash[PIDTYPE_MAX];
++static struct hlist_head *pid_hash[PIDTYPE_MAX];
+ static int pidhash_shift;
+ 
+ int pid_max = PID_MAX_DEFAULT;
+@@ -50,8 +54,14 @@ typedef struct pidmap {
+ 	void *page;
+ } pidmap_t;
+ 
++#ifdef CONFIG_VE
++#define PIDMAP_NRFREE (BITS_PER_PAGE/2)
++#else
++#define PIDMAP_NRFREE BITS_PER_PAGE
++#endif
++
+ static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
+-	 { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } };
++	 { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(PIDMAP_NRFREE), NULL } };
+ 
+ static pidmap_t *map_limit = pidmap_array + PIDMAP_ENTRIES;
+ 
+@@ -62,6 +72,8 @@ fastcall void free_pidmap(int pid)
+ 	pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
+ 	int offset = pid & BITS_PER_PAGE_MASK;
+ 
++	BUG_ON(__is_virtual_pid(pid) || pid == 1);
++
+ 	clear_bit(offset, map->page);
+ 	atomic_inc(&map->nr_free);
+ }
+@@ -103,6 +115,8 @@ int alloc_pidmap(void)
+ 	pidmap_t *map;
+ 
+ 	pid = last_pid + 1;
++	if (__is_virtual_pid(pid))
++		pid += VPID_DIV;
+ 	if (pid >= pid_max)
+ 		pid = RESERVED_PIDS;
+ 
+@@ -133,6 +147,8 @@ next_map:
+ 	 */
+ scan_more:
+ 	offset = find_next_zero_bit(map->page, BITS_PER_PAGE, offset);
++	if (__is_virtual_pid(offset))
++		offset += VPID_DIV;
+ 	if (offset >= BITS_PER_PAGE)
+ 		goto next_map;
+ 	if (test_and_set_bit(offset, map->page))
+@@ -146,92 +162,134 @@ failure:
+ 	return -1;
+ }
+ 
+-fastcall struct pid *find_pid(enum pid_type type, int nr)
++struct pid * fastcall find_pid(enum pid_type type, int nr)
+ {
+-	struct list_head *elem, *bucket = &pid_hash[type][pid_hashfn(nr)];
++	struct hlist_node *elem;
+ 	struct pid *pid;
+ 
+-	__list_for_each(elem, bucket) {
+-		pid = list_entry(elem, struct pid, hash_chain);
++	hlist_for_each_entry(pid, elem,
++			&pid_hash[type][pid_hashfn(nr)], pid_chain) {
+ 		if (pid->nr == nr)
+ 			return pid;
+ 	}
+ 	return NULL;
+ }
+-
+-void fastcall link_pid(task_t *task, struct pid_link *link, struct pid *pid)
+-{
+-	atomic_inc(&pid->count);
+-	list_add_tail(&link->pid_chain, &pid->task_list);
+-	link->pidptr = pid;
+-}
++EXPORT_SYMBOL(find_pid);
+ 
+ int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
+ {
+-	struct pid *pid = find_pid(type, nr);
++	struct pid *pid, *task_pid;
+ 
+-	if (pid)
+-		atomic_inc(&pid->count);
+-	else {
+-		pid = &task->pids[type].pid;
+-		pid->nr = nr;
+-		atomic_set(&pid->count, 1);
+-		INIT_LIST_HEAD(&pid->task_list);
+-		pid->task = task;
+-		get_task_struct(task);
+-		list_add(&pid->hash_chain, &pid_hash[type][pid_hashfn(nr)]);
++	task_pid = &task->pids[type];
++	pid = find_pid(type, nr);
++	if (pid == NULL) {
++		hlist_add_head(&task_pid->pid_chain,
++				&pid_hash[type][pid_hashfn(nr)]);
++		INIT_LIST_HEAD(&task_pid->pid_list);
++	} else {
++		INIT_HLIST_NODE(&task_pid->pid_chain);
++		list_add_tail(&task_pid->pid_list, &pid->pid_list);
+ 	}
+-	list_add_tail(&task->pids[type].pid_chain, &pid->task_list);
+-	task->pids[type].pidptr = pid;
++	task_pid->nr = nr;
+ 
+ 	return 0;
+ }
+ 
+-static inline int __detach_pid(task_t *task, enum pid_type type)
++static fastcall int __detach_pid(task_t *task, enum pid_type type)
+ {
+-	struct pid_link *link = task->pids + type;
+-	struct pid *pid = link->pidptr;
+-	int nr;
++	struct pid *pid, *pid_next;
++	int nr = 0;
++
++	pid = &task->pids[type];
++	if (!hlist_unhashed(&pid->pid_chain)) {
++		hlist_del(&pid->pid_chain);
++
++		if (list_empty(&pid->pid_list))
++			nr = pid->nr;
++		else {
++			pid_next = list_entry(pid->pid_list.next,
++						struct pid, pid_list);
++			/* insert next pid from pid_list to hash */
++			hlist_add_head(&pid_next->pid_chain,
++				&pid_hash[type][pid_hashfn(pid_next->nr)]);
++		}
++	}
+ 
+-	list_del(&link->pid_chain);
+-	if (!atomic_dec_and_test(&pid->count))
+-		return 0;
+-
+-	nr = pid->nr;
+-	list_del(&pid->hash_chain);
+-	put_task_struct(pid->task);
++	list_del(&pid->pid_list);
++	pid->nr = 0;
+ 
+ 	return nr;
+ }
+ 
+-static void _detach_pid(task_t *task, enum pid_type type)
+-{
+-	__detach_pid(task, type);
+-}
+-
+ void fastcall detach_pid(task_t *task, enum pid_type type)
+ {
+-	int nr = __detach_pid(task, type);
++	int i;
++	int nr;
+ 
++	nr = __detach_pid(task, type);
+ 	if (!nr)
+ 		return;
+ 
+-	for (type = 0; type < PIDTYPE_MAX; ++type)
+-		if (find_pid(type, nr))
++	for (i = 0; i < PIDTYPE_MAX; ++i)
++		if (find_pid(i, nr))
+ 			return;
++
++#ifdef CONFIG_VE
++	__free_vpid(task->pids[type].vnr, VE_TASK_INFO(task)->owner_env);
++#endif
+ 	free_pidmap(nr);
+ }
+ 
+-task_t *find_task_by_pid(int nr)
++task_t *find_task_by_pid_type(int type, int nr)
+ {
+-	struct pid *pid = find_pid(PIDTYPE_PID, nr);
++	BUG();
++	return NULL;
++}
+ 
++EXPORT_SYMBOL(find_task_by_pid_type);
++
++task_t *find_task_by_pid_type_all(int type, int nr)
++{
++	struct pid *pid;
++
++	BUG_ON(nr != -1 && is_virtual_pid(nr));
++
++	pid = find_pid(type, nr);
+ 	if (!pid)
+ 		return NULL;
+-	return pid_task(pid->task_list.next, PIDTYPE_PID);
++
++	return pid_task(&pid->pid_list, type);
+ }
+ 
+-EXPORT_SYMBOL(find_task_by_pid);
++EXPORT_SYMBOL(find_task_by_pid_type_all);
++
++#ifdef CONFIG_VE
++
++task_t *find_task_by_pid_type_ve(int type, int nr)
++{
++	task_t *tsk;
++	int gnr = nr;
++	struct pid *pid;
++
++	if (is_virtual_pid(nr)) {
++		gnr = __vpid_to_pid(nr);
++		if (unlikely(gnr == -1))
++			return NULL;
++	}
++
++	pid = find_pid(type, gnr);
++	if (!pid)
++		return NULL;
++
++	tsk = pid_task(&pid->pid_list, type);
++	if (!ve_accessible(VE_TASK_INFO(tsk)->owner_env, get_exec_env()))
++		return NULL;
++	return tsk;
++}
++
++EXPORT_SYMBOL(find_task_by_pid_type_ve);
++
++#endif
+ 
+ /*
+  * This function switches the PIDs if a non-leader thread calls
+@@ -240,22 +298,26 @@ EXPORT_SYMBOL(find_task_by_pid);
+  */
+ void switch_exec_pids(task_t *leader, task_t *thread)
+ {
+-	_detach_pid(leader, PIDTYPE_PID);
+-	_detach_pid(leader, PIDTYPE_TGID);
+-	_detach_pid(leader, PIDTYPE_PGID);
+-	_detach_pid(leader, PIDTYPE_SID);
++	__detach_pid(leader, PIDTYPE_PID);
++	__detach_pid(leader, PIDTYPE_TGID);
++	__detach_pid(leader, PIDTYPE_PGID);
++	__detach_pid(leader, PIDTYPE_SID);
+ 
+-	_detach_pid(thread, PIDTYPE_PID);
+-	_detach_pid(thread, PIDTYPE_TGID);
++	__detach_pid(thread, PIDTYPE_PID);
++	__detach_pid(thread, PIDTYPE_TGID);
+ 
+ 	leader->pid = leader->tgid = thread->pid;
+ 	thread->pid = thread->tgid;
++	set_virt_tgid(leader, virt_pid(thread));
++	set_virt_pid(leader, virt_pid(thread));
++	set_virt_pid(thread, virt_tgid(thread));
+ 
+ 	attach_pid(thread, PIDTYPE_PID, thread->pid);
+ 	attach_pid(thread, PIDTYPE_TGID, thread->tgid);
+ 	attach_pid(thread, PIDTYPE_PGID, thread->signal->pgrp);
+ 	attach_pid(thread, PIDTYPE_SID, thread->signal->session);
+ 	list_add_tail(&thread->tasks, &init_task.tasks);
++	SET_VE_LINKS(thread);
+ 
+ 	attach_pid(leader, PIDTYPE_PID, leader->pid);
+ 	attach_pid(leader, PIDTYPE_TGID, leader->tgid);
+@@ -263,6 +325,338 @@ void switch_exec_pids(task_t *leader, ta
+ 	attach_pid(leader, PIDTYPE_SID, leader->signal->session);
+ }
+ 
++#ifdef CONFIG_VE
++
++/* Virtual PID bits.
++ *
++ * At the moment all internal structures in kernel store real global pid.
++ * The only place, where virtual PID is used, is at user frontend. We
++ * remap virtual pids obtained from user to global ones (vpid_to_pid) and
++ * map globals to virtuals before showing them to user (virt_pid_type).
++ *
++ * We hold virtual PIDs inside struct pid, so map global -> virtual is easy.
++ */
++
++pid_t _pid_type_to_vpid(int type, pid_t pid)
++{
++	struct pid * p;
++
++	if (unlikely(is_virtual_pid(pid)))
++		return -1;
++
++	read_lock(&tasklist_lock);
++	p = find_pid(type, pid);
++	if (p) {
++		pid = p->vnr;
++	} else {
++		pid = -1;
++	}
++	read_unlock(&tasklist_lock);
++	return pid;
++}
++
++pid_t pid_type_to_vpid(int type, pid_t pid)
++{
++	int vpid;
++
++	if (unlikely(pid <= 0))
++		return pid;
++
++	BUG_ON(is_virtual_pid(pid));
++
++	if (ve_is_super(get_exec_env()))
++		return pid;
++
++	vpid = _pid_type_to_vpid(type, pid);
++	if (unlikely(vpid == -1)) {
++		/* It is allowed: global pid can be used everywhere.
++		 * This can happen, when kernel remembers stray pids:
++		 * signal queues, locks etc.
++		 */
++		vpid = pid;
++	}
++	return vpid;
++}
++
++/* To map virtual pids to global we maintain special hash table.
++ *
++ * Mapping entries are allocated when a process with non-trivial
++ * mapping is forked, which is possible only after VE migrated.
++ * Mappings are destroyed, when a global pid is removed from global
++ * pidmap, which means we do not need to refcount mappings.
++ */
++
++static struct hlist_head *vpid_hash;
++
++struct vpid_mapping
++{
++	int	vpid;
++	int	veid;
++	int	pid;
++	struct hlist_node link;
++};
++
++static kmem_cache_t *vpid_mapping_cachep;
++
++static inline int vpid_hashfn(int vnr, int veid)
++{
++	return hash_long((unsigned long)(vnr+(veid<<16)), pidhash_shift);
++}
++
++struct vpid_mapping *__lookup_vpid_mapping(int vnr, int veid)
++{
++	struct hlist_node *elem;
++	struct vpid_mapping *map;
++
++	hlist_for_each_entry(map, elem,
++			&vpid_hash[vpid_hashfn(vnr, veid)], link) {
++		if (map->vpid == vnr && map->veid == veid)
++			return map;
++	}
++	return NULL;
++}
++
++/* __vpid_to_pid() is raw version of vpid_to_pid(). It is to be used
++ * only under tasklist_lock. In some places we must use only this version
++ * (f.e. __kill_pg_info is called under write lock!)
++ *
++ * Caller should pass virtual pid. This function returns an error, when
++ * seeing a global pid.
++ */
++int __vpid_to_pid(int pid)
++{
++	struct vpid_mapping *map;
++
++	if (unlikely(!is_virtual_pid(pid) || ve_is_super(get_exec_env())))
++		return -1;
++
++	if (!get_exec_env()->sparse_vpid) {
++		if (pid != 1)
++			return pid - VPID_DIV;
++		return get_exec_env()->init_entry->pid;
++	}
++
++	map = __lookup_vpid_mapping(pid, VEID(get_exec_env()));
++	if (map)
++		return map->pid;
++	return -1;
++}
++
++int vpid_to_pid(int pid)
++{
++	/* User gave bad pid. It is his problem. */
++	if (unlikely(pid <= 0))
++		return pid;
++
++	if (!is_virtual_pid(pid))
++		return pid;
++
++	read_lock(&tasklist_lock);
++	pid = __vpid_to_pid(pid);
++	read_unlock(&tasklist_lock);
++	return pid;
++}
++
++/* VEs which never migrated have trivial "arithmetic" mapping pid <-> vpid:
++ *
++ * vpid == 1 -> ve->init_task->pid
++ * else	        pid & ~VPID_DIV
++ *
++ * In this case VE has ve->sparse_vpid = 0 and we do not use vpid hash table.
++ *
++ * When VE migrates and we see non-trivial mapping the first time, we
++ * scan process table and populate mapping hash table.
++ */
++
++static int add_mapping(int pid, int vpid, int veid, struct hlist_head *cache)
++{
++        if (pid > 0 && vpid > 0 && !__lookup_vpid_mapping(vpid, veid)) {
++		struct vpid_mapping *m;
++		if (hlist_empty(cache)) {
++			m = kmem_cache_alloc(vpid_mapping_cachep, GFP_ATOMIC);
++			if (unlikely(m == NULL))
++				return -ENOMEM;
++		} else {
++			m = hlist_entry(cache->first, struct vpid_mapping, link);
++			hlist_del(&m->link);
++		}
++		m->pid = pid;
++		m->vpid = vpid;
++		m->veid = veid;
++		hlist_add_head(&m->link,
++			       &vpid_hash[vpid_hashfn(vpid, veid)]);
++	}
++	return 0;
++}
++
++static int switch_to_sparse_mapping(int pid)
++{
++	struct ve_struct *env = get_exec_env();
++	struct hlist_head cache;
++	task_t *g, *t;
++	int pcount;
++	int err;
++
++	/* Transition happens under write_lock_irq, so we try to make
++	 * it more reliable and fast preallocating mapping entries.
++	 * pcounter may be not enough, we could have lots of orphaned
++	 * process groups and sessions, which also require mappings.
++	 */
++	INIT_HLIST_HEAD(&cache);
++	pcount = atomic_read(&env->pcounter);
++	err = -ENOMEM;
++	while (pcount > 0) {
++		struct vpid_mapping *m;
++		m = kmem_cache_alloc(vpid_mapping_cachep, GFP_KERNEL);
++		if (!m)
++			goto out;
++		hlist_add_head(&m->link, &cache);
++		pcount--;
++	}
++
++	write_lock_irq(&tasklist_lock);
++	err = 0;
++	if (env->sparse_vpid)
++		goto out_unlock;
++
++	err = -ENOMEM;
++	do_each_thread_ve(g, t) {
++		if (t->pid == pid)
++			continue;
++		if (add_mapping(t->pid, virt_pid(t), VEID(env), &cache))
++			goto out_unlock;
++	} while_each_thread_ve(g, t);
++
++	for_each_process_ve(t) {
++		if (t->pid == pid)
++			continue;
++
++		if (add_mapping(t->tgid, virt_tgid(t), VEID(env), &cache))
++			goto out_unlock;
++		if (add_mapping(t->signal->pgrp, virt_pgid(t), VEID(env), &cache))
++			goto out_unlock;
++		if (add_mapping(t->signal->session, virt_sid(t), VEID(env), &cache))
++			goto out_unlock;
++	}
++	env->sparse_vpid = 1;
++	err = 0;
++
++out_unlock:
++	if (err) {
++		int i;
++
++		for (i=0; i<(1<<pidhash_shift); i++) {
++			struct hlist_node *elem, *next;
++			struct vpid_mapping *map;
++
++			hlist_for_each_entry_safe(map, elem, next, &vpid_hash[i], link) {
++				if (map->veid == VEID(env)) {
++					hlist_del(elem);
++					hlist_add_head(elem, &cache);
++				}
++			}
++		}
++	}
++	write_unlock_irq(&tasklist_lock);
++
++out:
++	while (!hlist_empty(&cache)) {
++		struct vpid_mapping *m;
++		m = hlist_entry(cache.first, struct vpid_mapping, link);
++		hlist_del(&m->link);
++		kmem_cache_free(vpid_mapping_cachep, m);
++	}
++	return err;
++}
++
++int alloc_vpid(int pid, int virt_pid)
++{
++	int result;
++	struct vpid_mapping *m;
++	struct ve_struct *env = get_exec_env();
++
++	if (ve_is_super(env) || !env->virt_pids)
++		return pid;
++
++	if (!env->sparse_vpid) {
++		if (virt_pid == -1)
++			return pid + VPID_DIV;
++
++		if (virt_pid == 1 || virt_pid == pid + VPID_DIV)
++			return virt_pid;
++
++		if ((result = switch_to_sparse_mapping(pid)) < 0)
++			return result;
++	}
++
++	m = kmem_cache_alloc(vpid_mapping_cachep, GFP_KERNEL);
++	if (!m)
++		return -ENOMEM;
++
++	m->pid = pid;
++	m->veid = VEID(env);
++
++	result = (virt_pid == -1) ? pid + VPID_DIV : virt_pid;
++
++	write_lock_irq(&tasklist_lock);
++	if (unlikely(__lookup_vpid_mapping(result, m->veid))) {
++		if (virt_pid > 0) {
++			result = -EEXIST;
++			goto out;
++		}
++
++		/* No luck. Now we search for some not-existing vpid.
++		 * It is weak place. We do linear search. */
++		do {
++			result++;
++			if (!__is_virtual_pid(result))
++				result += VPID_DIV;
++			if (result >= pid_max)
++				result = RESERVED_PIDS + VPID_DIV;
++		} while (__lookup_vpid_mapping(result, m->veid) != NULL);
++
++		/* And set last_pid in hope future alloc_pidmap to avoid
++		 * collisions after future alloc_pidmap() */
++		last_pid = result - VPID_DIV;
++	}
++	if (result > 0) {
++		m->vpid = result;
++		hlist_add_head(&m->link,
++			       &vpid_hash[vpid_hashfn(result, m->veid)]);
++	}
++out:
++	write_unlock_irq(&tasklist_lock);
++	if (result < 0)
++		kmem_cache_free(vpid_mapping_cachep, m);
++	return result;
++}
++EXPORT_SYMBOL(alloc_vpid);
++
++static void __free_vpid(int vpid, struct ve_struct *ve)
++{
++	struct vpid_mapping *m;
++
++	if (!ve->sparse_vpid)
++		return;
++
++	if (!__is_virtual_pid(vpid) && (vpid != 1 || ve_is_super(ve)))
++		return;
++
++	m = __lookup_vpid_mapping(vpid, ve->veid);
++	BUG_ON(m == NULL);
++	hlist_del(&m->link);
++	kmem_cache_free(vpid_mapping_cachep, m);
++}
++
++void free_vpid(int vpid, struct ve_struct *ve)
++{
++	write_lock_irq(&tasklist_lock);
++	__free_vpid(vpid, ve);
++	write_unlock_irq(&tasklist_lock);
++}
++EXPORT_SYMBOL(free_vpid);
++#endif
++
+ /*
+  * The pid hash table is scaled according to the amount of memory in the
+  * machine.  From a minimum of 16 slots up to 4096 slots at one gigabyte or
+@@ -283,12 +677,20 @@ void __init pidhash_init(void)
+ 
+ 	for (i = 0; i < PIDTYPE_MAX; i++) {
+ 		pid_hash[i] = alloc_bootmem(pidhash_size *
+-					sizeof(struct list_head));
++					sizeof(struct hlist_head));
+ 		if (!pid_hash[i])
+ 			panic("Could not alloc pidhash!\n");
+ 		for (j = 0; j < pidhash_size; j++)
+-			INIT_LIST_HEAD(&pid_hash[i][j]);
++			INIT_HLIST_HEAD(&pid_hash[i][j]);
+ 	}
++
++#ifdef CONFIG_VE
++	vpid_hash = alloc_bootmem(pidhash_size * sizeof(struct hlist_head));
++	if (!vpid_hash)
++		panic("Could not alloc vpid_hash!\n");
++	for (j = 0; j < pidhash_size; j++)
++		INIT_HLIST_HEAD(&vpid_hash[j]);
++#endif
+ }
+ 
+ void __init pidmap_init(void)
+@@ -305,4 +707,12 @@ void __init pidmap_init(void)
+ 
+ 	for (i = 0; i < PIDTYPE_MAX; i++)
+ 		attach_pid(current, i, 0);
++
++#ifdef CONFIG_VE
++	vpid_mapping_cachep =
++		kmem_cache_create("vpid_mapping",
++				  sizeof(struct vpid_mapping),
++				  __alignof__(struct vpid_mapping),
++				  SLAB_PANIC|SLAB_UBC, NULL, NULL);
++#endif
+ }
+diff -uprN linux-2.6.8.1.orig/kernel/posix-timers.c linux-2.6.8.1-ve022stab078/kernel/posix-timers.c
+--- linux-2.6.8.1.orig/kernel/posix-timers.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/kernel/posix-timers.c	2006-05-11 13:05:40.000000000 +0400
+@@ -31,6 +31,7 @@
+  * POSIX clocks & timers
+  */
+ #include <linux/mm.h>
++#include <linux/module.h>
+ #include <linux/smp_lock.h>
+ #include <linux/interrupt.h>
+ #include <linux/slab.h>
+@@ -223,7 +224,8 @@ static __init int init_posix_timers(void
+ 	register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
+ 
+ 	posix_timers_cache = kmem_cache_create("posix_timers_cache",
+-					sizeof (struct k_itimer), 0, 0, NULL, NULL);
++					sizeof (struct k_itimer), 0, SLAB_UBC,
++					NULL, NULL);
+ 	idr_init(&posix_timers_id);
+ 	return 0;
+ }
+@@ -394,6 +396,11 @@ exit:
+ static void timer_notify_task(struct k_itimer *timr)
+ {
+ 	int ret;
++	struct ve_struct *old_ve;
++	struct user_beancounter *old_ub;
++	
++	old_ve = set_exec_env(VE_TASK_INFO(timr->it_process)->owner_env);
++	old_ub = set_exec_ub(task_bc(timr->it_process)->task_ub);
+ 
+ 	memset(&timr->sigq->info, 0, sizeof(siginfo_t));
+ 
+@@ -440,6 +447,9 @@ static void timer_notify_task(struct k_i
+ 		 */
+ 		schedule_next_timer(timr);
+ 	}
++
++	(void)set_exec_ub(old_ub);
++	(void)set_exec_env(old_ve);
+ }
+ 
+ /*
+@@ -499,7 +509,7 @@ static inline struct task_struct * good_
+ 	struct task_struct *rtn = current->group_leader;
+ 
+ 	if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
+-		(!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) ||
++		(!(rtn = find_task_by_pid_ve(event->sigev_notify_thread_id)) ||
+ 		 rtn->tgid != current->tgid ||
+ 		 (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
+ 		return NULL;
+@@ -1228,6 +1238,7 @@ int do_posix_clock_monotonic_gettime(str
+ 	}
+ 	return 0;
+ }
++EXPORT_SYMBOL(do_posix_clock_monotonic_gettime);
+ 
+ int do_posix_clock_monotonic_settime(struct timespec *tp)
+ {
+diff -uprN linux-2.6.8.1.orig/kernel/power/pmdisk.c linux-2.6.8.1-ve022stab078/kernel/power/pmdisk.c
+--- linux-2.6.8.1.orig/kernel/power/pmdisk.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/kernel/power/pmdisk.c	2006-05-11 13:05:39.000000000 +0400
+@@ -206,7 +206,7 @@ static int write_swap_page(unsigned long
+ 	swp_entry_t entry;
+ 	int error = 0;
+ 
+-	entry = get_swap_page();
++	entry = get_swap_page(mm_ub(&init_mm));
+ 	if (swp_offset(entry) && 
+ 	    swapfile_used[swp_type(entry)] == SWAPFILE_SUSPEND) {
+ 		error = rw_swap_page_sync(WRITE, entry,
+diff -uprN linux-2.6.8.1.orig/kernel/power/process.c linux-2.6.8.1-ve022stab078/kernel/power/process.c
+--- linux-2.6.8.1.orig/kernel/power/process.c	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/kernel/power/process.c	2006-05-11 13:05:45.000000000 +0400
+@@ -23,15 +23,15 @@ static inline int freezeable(struct task
+ {
+ 	if ((p == current) || 
+ 	    (p->flags & PF_NOFREEZE) ||
+-	    (p->state == TASK_ZOMBIE) ||
+-	    (p->state == TASK_DEAD) ||
++	    (p->exit_state == EXIT_ZOMBIE) ||
++ 	    (p->exit_state == EXIT_DEAD) ||
+ 	    (p->state == TASK_STOPPED))
+ 		return 0;
+ 	return 1;
+ }
+ 
+ /* Refrigerator is place where frozen processes are stored :-). */
+-void refrigerator(unsigned long flag)
++void refrigerator()
+ {
+ 	/* Hmm, should we be allowed to suspend when there are realtime
+ 	   processes around? */
+@@ -39,14 +39,19 @@ void refrigerator(unsigned long flag)
+ 	save = current->state;
+ 	current->state = TASK_UNINTERRUPTIBLE;
+ 	pr_debug("%s entered refrigerator\n", current->comm);
+-	printk("=");
+-	current->flags &= ~PF_FREEZE;
++	/* printk("="); */
+ 
+ 	spin_lock_irq(&current->sighand->siglock);
+-	recalc_sigpending(); /* We sent fake signal, clean it up */
++	if (test_and_clear_thread_flag(TIF_FREEZE)) {
++		recalc_sigpending(); /* We sent fake signal, clean it up */
++		current->flags |= PF_FROZEN;
++	} else {
++		/* Freeze request could be canceled before we entered
++		 * refrigerator(). In this case we do nothing. */
++		current->state = save;
++	}
+ 	spin_unlock_irq(&current->sighand->siglock);
+ 
+-	current->flags |= PF_FROZEN;
+ 	while (current->flags & PF_FROZEN)
+ 		schedule();
+ 	pr_debug("%s left refrigerator\n", current->comm);
+@@ -65,7 +70,7 @@ int freeze_processes(void)
+ 	do {
+ 		todo = 0;
+ 		read_lock(&tasklist_lock);
+-		do_each_thread(g, p) {
++		do_each_thread_all(g, p) {
+ 			unsigned long flags;
+ 			if (!freezeable(p))
+ 				continue;
+@@ -75,12 +80,12 @@ int freeze_processes(void)
+ 
+ 			/* FIXME: smp problem here: we may not access other process' flags
+ 			   without locking */
+-			p->flags |= PF_FREEZE;
+ 			spin_lock_irqsave(&p->sighand->siglock, flags);
++			set_tsk_thread_flag(p, TIF_FREEZE);
+ 			signal_wake_up(p, 0);
+ 			spin_unlock_irqrestore(&p->sighand->siglock, flags);
+ 			todo++;
+-		} while_each_thread(g, p);
++		} while_each_thread_all(g, p);
+ 		read_unlock(&tasklist_lock);
+ 		yield();			/* Yield is okay here */
+ 		if (time_after(jiffies, start_time + TIMEOUT)) {
+@@ -90,7 +95,7 @@ int freeze_processes(void)
+ 		}
+ 	} while(todo);
+ 	
+-	printk( "|\n" );
++	/* printk( "|\n" ); */
+ 	BUG_ON(in_atomic());
+ 	return 0;
+ }
+@@ -101,15 +106,18 @@ void thaw_processes(void)
+ 
+ 	printk( "Restarting tasks..." );
+ 	read_lock(&tasklist_lock);
+-	do_each_thread(g, p) {
++	do_each_thread_all(g, p) {
++		unsigned long flags;
+ 		if (!freezeable(p))
+ 			continue;
++		spin_lock_irqsave(&p->sighand->siglock, flags);
+ 		if (p->flags & PF_FROZEN) {
+ 			p->flags &= ~PF_FROZEN;
+ 			wake_up_process(p);
+ 		} else
+ 			printk(KERN_INFO " Strange, %s not stopped\n", p->comm );
+-	} while_each_thread(g, p);
++		spin_unlock_irqrestore(&p->sighand->siglock, flags);
++	} while_each_thread_all(g, p);
+ 
+ 	read_unlock(&tasklist_lock);
+ 	schedule();
+diff -uprN linux-2.6.8.1.orig/kernel/power/swsusp.c linux-2.6.8.1-ve022stab078/kernel/power/swsusp.c
+--- linux-2.6.8.1.orig/kernel/power/swsusp.c	2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/kernel/power/swsusp.c	2006-05-11 13:05:39.000000000 +0400
+@@ -317,7 +317,7 @@ static int write_suspend_image(void)
+ 	for (i=0; i<nr_copy_pages; i++) {
+ 		if (!(i%100))
+ 			printk( "." );
+-		entry = get_swap_page();
++		entry = get_swap_page(mm_ub(&init_mm));
+ 		if (!entry.val)
+ 			panic("\nNot enough swapspace when writing data" );
+ 		
+@@ -335,7 +335,7 @@ static int write_suspend_image(void)
+ 		cur = (union diskpage *)((char *) pagedir_nosave)+i;
+ 		BUG_ON ((char *) cur != (((char *) pagedir_nosave) + i*PAGE_SIZE));
+ 		printk( "." );
+-		entry = get_swap_page();
++		entry = get_swap_page(mm_ub(&init_mm));
+ 		if (!entry.val) {
+ 			printk(KERN_CRIT "Not enough swapspace when writing pgdir\n" );
+ 			panic("Don't know how to recover");
+@@ -358,7 +358,7 @@ static int write_suspend_image(void)
+ 	BUG_ON (sizeof(struct suspend_header) > PAGE_SIZE-sizeof(swp_entry_t));
+ 	BUG_ON (sizeof(union diskpage) != PAGE_SIZE);
+ 	BUG_ON (sizeof(struct link) != PAGE_SIZE);
+-	entry = get_swap_page();
++	entry = get_swap_page(mm_ub(&init_mm));
+ 	if (!entry.val)
+ 		panic( "\nNot enough swapspace when writing header" );
+ 	if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
+diff -uprN linux-2.6.8.1.orig/kernel/printk.c linux-2.6.8.1-ve022stab078/kernel/printk.c
+--- linux-2.6.8.1.orig/kernel/printk.c	2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/kernel/printk.c	2006-05-11 13:05:42.000000000 +0400
+@@ -26,10 +26,13 @@
+ #include <linux/module.h>
+ #include <linux/interrupt.h>			/* For in_interrupt() */
+ #include <linux/config.h>
++#include <linux/slab.h>
+ #include <linux/delay.h>
+ #include <linux/smp.h>
+ #include <linux/security.h>
+ #include <linux/bootmem.h>
++#include <linux/vzratelimit.h>
++#include <linux/veprintk.h>
+ 
+ #include <asm/uaccess.h>
+ 
+@@ -53,6 +56,7 @@ int console_printk[4] = {
+ 
+ EXPORT_SYMBOL(console_printk);
+ 
++int console_silence_loglevel;
+ int oops_in_progress;
+ 
+ /*
+@@ -77,7 +81,7 @@ static int console_locked;
+  * It is also used in interesting ways to provide interlocking in
+  * release_console_sem().
+  */
+-static spinlock_t logbuf_lock = SPIN_LOCK_UNLOCKED;
++spinlock_t logbuf_lock = SPIN_LOCK_UNLOCKED;
+ 
+ static char __log_buf[__LOG_BUF_LEN];
+ static char *log_buf = __log_buf;
+@@ -151,6 +155,43 @@ static int __init console_setup(char *st
+ 
+ __setup("console=", console_setup);
+ 
++static int __init setup_console_silencelevel(char *str)
++{
++	int level;
++
++	if (get_option(&str, &level) != 1)
++		return 0;
++
++	console_silence_loglevel = level;
++	return 1;
++}
++
++__setup("silencelevel=", setup_console_silencelevel);
++
++static inline int ve_log_init(void)
++{
++#ifdef CONFIG_VE
++	if (ve_log_buf != NULL)
++		return 0;
++
++	if (ve_is_super(get_exec_env())) {
++		ve0._log_wait = &log_wait;
++		ve0._log_start = &log_start;
++		ve0._log_end = &log_end;
++		ve0._logged_chars = &logged_chars;
++		ve0.log_buf = log_buf;
++		return 0;
++	}
++
++	ve_log_buf = kmalloc(ve_log_buf_len, GFP_ATOMIC);
++	if (!ve_log_buf)
++		return -ENOMEM;
++
++	memset(ve_log_buf, 0, ve_log_buf_len);
++#endif
++	return 0;
++}
++
+ /**
+  * add_preferred_console - add a device to the list of preferred consoles.
+  *
+@@ -249,6 +290,10 @@ int do_syslog(int type, char __user * bu
+ 	char c;
+ 	int error = 0;
+ 
++	if (!ve_is_super(get_exec_env()) &&
++			(type == 6 || type == 7 || type == 8))
++		goto out;
++
+ 	error = security_syslog(type);
+ 	if (error)
+ 		return error;
+@@ -268,14 +313,15 @@ int do_syslog(int type, char __user * bu
+ 		error = verify_area(VERIFY_WRITE,buf,len);
+ 		if (error)
+ 			goto out;
+-		error = wait_event_interruptible(log_wait, (log_start - log_end));
++		error = wait_event_interruptible(ve_log_wait,
++					(ve_log_start - ve_log_end));
+ 		if (error)
+ 			goto out;
+ 		i = 0;
+ 		spin_lock_irq(&logbuf_lock);
+-		while (!error && (log_start != log_end) && i < len) {
+-			c = LOG_BUF(log_start);
+-			log_start++;
++		while (!error && (ve_log_start != ve_log_end) && i < len) {
++			c = VE_LOG_BUF(ve_log_start);
++			ve_log_start++;
+ 			spin_unlock_irq(&logbuf_lock);
+ 			error = __put_user(c,buf);
+ 			buf++;
+@@ -299,15 +345,17 @@ int do_syslog(int type, char __user * bu
+ 		error = verify_area(VERIFY_WRITE,buf,len);
+ 		if (error)
+ 			goto out;
++		if (ve_log_buf == NULL)
++			goto out;
+ 		count = len;
+-		if (count > log_buf_len)
+-			count = log_buf_len;
++		if (count > ve_log_buf_len)
++			count = ve_log_buf_len;
+ 		spin_lock_irq(&logbuf_lock);
+-		if (count > logged_chars)
+-			count = logged_chars;
++		if (count > ve_logged_chars)
++			count = ve_logged_chars;
+ 		if (do_clear)
+-			logged_chars = 0;
+-		limit = log_end;
++			ve_logged_chars = 0;
++		limit = ve_log_end;
+ 		/*
+ 		 * __put_user() could sleep, and while we sleep
+ 		 * printk() could overwrite the messages 
+@@ -316,9 +364,9 @@ int do_syslog(int type, char __user * bu
+ 		 */
+ 		for(i = 0; i < count && !error; i++) {
+ 			j = limit-1-i;
+-			if (j + log_buf_len < log_end)
++			if (j + ve_log_buf_len < ve_log_end)
+ 				break;
+-			c = LOG_BUF(j);
++			c = VE_LOG_BUF(j);
+ 			spin_unlock_irq(&logbuf_lock);
+ 			error = __put_user(c,&buf[count-1-i]);
+ 			spin_lock_irq(&logbuf_lock);
+@@ -340,7 +388,7 @@ int do_syslog(int type, char __user * bu
+ 		}
+ 		break;
+ 	case 5:		/* Clear ring buffer */
+-		logged_chars = 0;
++		ve_logged_chars = 0;
+ 		break;
+ 	case 6:		/* Disable logging to console */
+ 		console_loglevel = minimum_console_loglevel;
+@@ -358,10 +406,10 @@ int do_syslog(int type, char __user * bu
+ 		error = 0;
+ 		break;
+ 	case 9:		/* Number of chars in the log buffer */
+-		error = log_end - log_start;
++		error = ve_log_end - ve_log_start;
+ 		break;
+ 	case 10:	/* Size of the log buffer */
+-		error = log_buf_len;
++		error = ve_log_buf_len;
+ 		break;
+ 	default:
+ 		error = -EINVAL;
+@@ -461,14 +509,14 @@ static void call_console_drivers(unsigne
+ 
+ static void emit_log_char(char c)
+ {
+-	LOG_BUF(log_end) = c;
+-	log_end++;
+-	if (log_end - log_start > log_buf_len)
+-		log_start = log_end - log_buf_len;
+-	if (log_end - con_start > log_buf_len)
++	VE_LOG_BUF(ve_log_end) = c;
++	ve_log_end++;
++	if (ve_log_end - ve_log_start > ve_log_buf_len)
++		ve_log_start = ve_log_end - ve_log_buf_len;
++	if (ve_is_super(get_exec_env()) && log_end - con_start > log_buf_len)
+ 		con_start = log_end - log_buf_len;
+-	if (logged_chars < log_buf_len)
+-		logged_chars++;
++	if (ve_logged_chars < ve_log_buf_len)
++		ve_logged_chars++;
+ }
+ 
+ /*
+@@ -505,14 +553,14 @@ static void zap_locks(void)
+  * then changes console_loglevel may break. This is because console_loglevel
+  * is inspected when the actual printing occurs.
+  */
+-asmlinkage int printk(const char *fmt, ...)
++asmlinkage int vprintk(const char *fmt, va_list args)
+ {
+-	va_list args;
+ 	unsigned long flags;
+ 	int printed_len;
+ 	char *p;
+ 	static char printk_buf[1024];
+ 	static int log_level_unknown = 1;
++	int err, need_wake;
+ 
+ 	if (unlikely(oops_in_progress))
+ 		zap_locks();
+@@ -520,10 +568,14 @@ asmlinkage int printk(const char *fmt, .
+ 	/* This stops the holder of console_sem just where we want him */
+ 	spin_lock_irqsave(&logbuf_lock, flags);
+ 
++	err = ve_log_init();
++	if (err) {
++		spin_unlock_irqrestore(&logbuf_lock, flags);
++		return err;
++	}
++
+ 	/* Emit the output into the temporary buffer */
+-	va_start(args, fmt);
+ 	printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args);
+-	va_end(args);
+ 
+ 	/*
+ 	 * Copy the output into log_buf.  If the caller didn't provide
+@@ -554,7 +606,12 @@ asmlinkage int printk(const char *fmt, .
+ 		spin_unlock_irqrestore(&logbuf_lock, flags);
+ 		goto out;
+ 	}
+-	if (!down_trylock(&console_sem)) {
++	if (!ve_is_super(get_exec_env())) {
++		need_wake = (ve_log_start != ve_log_end);
++		spin_unlock_irqrestore(&logbuf_lock, flags);
++		if (!oops_in_progress && need_wake)
++			wake_up_interruptible(&ve_log_wait);
++	} else if (!down_trylock(&console_sem)) {
+ 		console_locked = 1;
+ 		/*
+ 		 * We own the drivers.  We can drop the spinlock and let
+@@ -574,8 +631,49 @@ asmlinkage int printk(const char *fmt, .
+ out:
+ 	return printed_len;
+ }
++
++EXPORT_SYMBOL(vprintk);
++
++asmlinkage int printk(const char *fmt, ...)
++{
++	va_list args;
++	int i;
++	struct ve_struct *env;
++
++	va_start(args, fmt);
++	env = set_exec_env(get_ve0());
++	i = vprintk(fmt, args);
++	set_exec_env(env);
++	va_end(args);
++	return i;
++}
++
+ EXPORT_SYMBOL(printk);
+ 
++asmlinkage int ve_printk(int dst, const char *fmt, ...)
++{
++	va_list args;
++	int printed_len;
++
++	printed_len = 0;
++	if (ve_is_super(get_exec_env()) || (dst & VE0_LOG)) {
++		struct ve_struct *env;
++		va_start(args, fmt);
++		env = set_exec_env(get_ve0());
++		printed_len = vprintk(fmt, args);
++		set_exec_env(env);
++		va_end(args);
++	}
++	if (!ve_is_super(get_exec_env()) && (dst & VE_LOG)) {
++		va_start(args, fmt);
++		printed_len = vprintk(fmt, args);
++		va_end(args);
++	}
++	return printed_len;
++}
++EXPORT_SYMBOL(ve_printk);
++
++
+ /**
+  * acquire_console_sem - lock the console system for exclusive use.
+  *
+@@ -600,6 +698,12 @@ int is_console_locked(void)
+ }
+ EXPORT_SYMBOL(is_console_locked);
+ 
++void wake_up_klogd(void)
++{
++	if (!oops_in_progress && waitqueue_active(&log_wait))
++		wake_up_interruptible(&log_wait);
++}
++
+ /**
+  * release_console_sem - unlock the console system
+  *
+@@ -635,8 +739,8 @@ void release_console_sem(void)
+ 	console_may_schedule = 0;
+ 	up(&console_sem);
+ 	spin_unlock_irqrestore(&logbuf_lock, flags);
+-	if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait))
+-		wake_up_interruptible(&log_wait);
++	if (wake_klogd)
++		wake_up_klogd();
+ }
+ EXPORT_SYMBOL(release_console_sem);
+ 
+@@ -895,3 +999,33 @@ int printk_ratelimit(void)
+ 				printk_ratelimit_burst);
+ }
+ EXPORT_SYMBOL(printk_ratelimit);
++
++/*
++ *	Rate limiting stuff.
++ */
++int vz_ratelimit(struct vz_rate_info *p)
++{
++	unsigned long cjif, djif;
++	unsigned long flags;
++	static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED;
++	long new_bucket;
++
++	spin_lock_irqsave(&ratelimit_lock, flags);
++	cjif = jiffies;
++	djif = cjif - p->last;
++	if (djif < p->interval) {
++		if (p->bucket >= p->burst) {
++			spin_unlock_irqrestore(&ratelimit_lock, flags);
++			return 0;
++		}
++		p->bucket++;
++	} else {
++		new_bucket = p->bucket - (djif / (unsigned)p->interval);
++		if (new_bucket < 0)
++			new_bucket = 0;
++		p->bucket = new_bucket + 1;
++	}
++	p->last = cjif;
++	spin_unlock_irqrestore(&ratelimit_lock, flags);
++	return 1;
++}
+diff -uprN linux-2.6.8.1.orig/kernel/ptrace.c linux-2.6.8.1-ve022stab078/kernel/ptrace.c
+--- linux-2.6.8.1.orig/kernel/ptrace.c	2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/kernel/ptrace.c	2006-05-11 13:05:40.000000000 +0400
+@@ -46,8 +46,8 @@ void __ptrace_link(task_t *child, task_t
+  */
+ void __ptrace_unlink(task_t *child)
+ {
+-	if (!child->ptrace)
+-		BUG();
++	BUG_ON(!child->ptrace);
++
+ 	child->ptrace = 0;
+ 	if (list_empty(&child->ptrace_list))
+ 		return;
+@@ -85,7 +85,7 @@ int ptrace_attach(struct task_struct *ta
+ 	retval = -EPERM;
+ 	if (task->pid <= 1)
+ 		goto bad;
+-	if (task == current)
++	if (task->tgid == current->tgid)
+ 		goto bad;
+ 	if (!task->mm)
+ 		goto bad;
+@@ -99,6 +99,8 @@ int ptrace_attach(struct task_struct *ta
+ 	rmb();
+ 	if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
+ 		goto bad;
++	if (!task->mm->vps_dumpable && !ve_is_super(get_exec_env()))
++		goto bad;
+ 	/* the same process cannot be attached many times */
+ 	if (task->ptrace & PT_PTRACED)
+ 		goto bad;
+@@ -124,22 +126,27 @@ bad:
+ 	return retval;
+ }
+ 
++void __ptrace_detach(struct task_struct *child, unsigned int data)
++{
++	child->exit_code = data;
++	/* .. re-parent .. */
++	__ptrace_unlink(child);
++	/* .. and wake it up. */
++	if (child->exit_state != EXIT_ZOMBIE)
++		wake_up_process(child);
++}
++
+ int ptrace_detach(struct task_struct *child, unsigned int data)
+ {
+ 	if ((unsigned long) data > _NSIG)
+-		return	-EIO;
++		return -EIO;
+ 
+ 	/* Architecture-specific hardware disable .. */
+ 	ptrace_disable(child);
+ 
+-	/* .. re-parent .. */
+-	child->exit_code = data;
+-
+ 	write_lock_irq(&tasklist_lock);
+-	__ptrace_unlink(child);
+-	/* .. and wake it up. */
+-	if (child->state != TASK_ZOMBIE)
+-		wake_up_process(child);
++	if (child->ptrace)
++		__ptrace_detach(child, data);
+ 	write_unlock_irq(&tasklist_lock);
+ 
+ 	return 0;
+diff -uprN linux-2.6.8.1.orig/kernel/sched.c linux-2.6.8.1-ve022stab078/kernel/sched.c
+--- linux-2.6.8.1.orig/kernel/sched.c	2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/kernel/sched.c	2006-05-11 13:05:49.000000000 +0400
+@@ -25,6 +25,7 @@
+ #include <asm/uaccess.h>
+ #include <linux/highmem.h>
+ #include <linux/smp_lock.h>
++#include <linux/pagemap.h>
+ #include <asm/mmu_context.h>
+ #include <linux/interrupt.h>
+ #include <linux/completion.h>
+@@ -40,6 +41,8 @@
+ #include <linux/cpu.h>
+ #include <linux/percpu.h>
+ #include <linux/kthread.h>
++#include <linux/vsched.h>
++#include <linux/fairsched.h>
+ #include <asm/tlb.h>
+ 
+ #include <asm/unistd.h>
+@@ -132,7 +135,7 @@
+ #ifdef CONFIG_SMP
+ #define TIMESLICE_GRANULARITY(p)	(MIN_TIMESLICE * \
+ 		(1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)) * \
+-			num_online_cpus())
++			vsched_num_online_vcpus(task_vsched(p)))
+ #else
+ #define TIMESLICE_GRANULARITY(p)	(MIN_TIMESLICE * \
+ 		(1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)))
+@@ -203,6 +206,7 @@ struct prio_array {
+  * (such as the load balancing or the thread migration code), lock
+  * acquire operations must be ordered by ascending &runqueue.
+  */
++typedef struct vcpu_info *vcpu_t;
+ struct runqueue {
+ 	spinlock_t lock;
+ 
+@@ -217,7 +221,7 @@ struct runqueue {
+ 	unsigned long long nr_switches;
+ 	unsigned long expired_timestamp, nr_uninterruptible;
+ 	unsigned long long timestamp_last_tick;
+-	task_t *curr, *idle;
++	task_t *curr;
+ 	struct mm_struct *prev_mm;
+ 	prio_array_t *active, *expired, arrays[2];
+ 	int best_expired_prio;
+@@ -225,35 +229,623 @@ struct runqueue {
+ 
+ #ifdef CONFIG_SMP
+ 	struct sched_domain *sd;
+-
+ 	/* For active balancing */
+ 	int active_balance;
+-	int push_cpu;
++#endif
++	vcpu_t push_cpu;
+ 
+ 	task_t *migration_thread;
+ 	struct list_head migration_queue;
+-#endif
+ };
+ 
+-static DEFINE_PER_CPU(struct runqueue, runqueues);
++/* VCPU scheduler state description */
++struct vcpu_info;
++struct vcpu_scheduler {
++	struct list_head idle_list;
++	struct list_head active_list;
++	struct list_head running_list;
++#ifdef CONFIG_FAIRSCHED
++	struct fairsched_node *node;
++#endif
++	struct vcpu_info *vcpu[NR_CPUS];
++	int id;
++	cpumask_t vcpu_online_map, vcpu_running_map;
++	cpumask_t pcpu_running_map;
++	int num_online_vcpus;
++} ____cacheline_maxaligned_in_smp;
++
++/* virtual CPU description */
++struct vcpu_info {
++	struct runqueue rq;
++#ifdef CONFIG_SCHED_VCPU
++	unsigned active : 1,
++		 running : 1;
++	struct list_head list;
++	struct vcpu_scheduler *vsched;
++	int last_pcpu;
++	u32 start_time;
++#endif
++	int id;
++} ____cacheline_maxaligned_in_smp;
++
++/* physical CPU description */
++struct pcpu_info {
++	struct vcpu_scheduler *vsched;
++	struct vcpu_info *vcpu;
++	task_t *idle;
++#ifdef CONFIG_SMP
++	struct sched_domain *sd;
++#endif
++	int id;
++} ____cacheline_maxaligned_in_smp;
++
++struct pcpu_info pcpu_info[NR_CPUS];
++
++#define pcpu(nr)		(&pcpu_info[nr])
++#define this_pcpu()		(pcpu(smp_processor_id()))
+ 
+ #define for_each_domain(cpu, domain) \
+-	for (domain = cpu_rq(cpu)->sd; domain; domain = domain->parent)
++	for (domain = vcpu_rq(cpu)->sd; domain; domain = domain->parent)
++
++#ifdef CONFIG_SCHED_VCPU
++
++u32 vcpu_sched_timeslice = 5;
++u32 vcpu_timeslice = 0;
++EXPORT_SYMBOL(vcpu_sched_timeslice);
++EXPORT_SYMBOL(vcpu_timeslice);
++
++extern spinlock_t fairsched_lock;
++static struct vcpu_scheduler default_vsched, idle_vsched;
++static struct vcpu_info boot_vcpu;
++
++#define vsched_default_vsched()	(&default_vsched)
++#define vsched_default_vcpu(id)	(default_vsched.vcpu[id])
++
++/* 
++ * All macroses below could be used without locks, if there is no
++ * strict ordering requirements, because we assume, that:
++ *
++ * 1. VCPU could not disappear "on the fly" (FIXME)
++ *
++ * 2. p->vsched access is atomic.
++ */
++
++#define task_vsched(tsk)	((tsk)->vsched)
++#define this_vsched()		(task_vsched(current))
++
++#define vsched_vcpu(vsched, id)	((vsched)->vcpu[id])
++#define this_vcpu()		(task_vcpu(current))
++#define task_vcpu(p)		((p)->vcpu)
++
++#define vsched_id(vsched)	((vsched)->id)
++#define vsched_vcpu_online_map(vsched)	((vsched)->vcpu_online_map)
++#define vsched_num_online_vcpus(vsched)	((vsched)->num_online_vcpus)
++#define vsched_pcpu_running_map(vsched)	((vsched)->pcpu_running_map)
++
++#define vcpu_vsched(vcpu)	((vcpu)->vsched)
++#define vcpu_last_pcpu(vcpu)	((vcpu)->last_pcpu)
++#define vcpu_isset(vcpu, mask)	(cpu_isset((vcpu)->id, mask))
++#define vcpu_is_offline(vcpu)	(!vcpu_isset(vcpu, \
++					vcpu_vsched(vcpu)->vcpu_online_map))
++
++static int __add_vcpu(struct vcpu_scheduler *vsched, int id);
++
++#else	/* CONFIG_SCHED_VCPU */
++
++static DEFINE_PER_CPU(struct vcpu_info, vcpu_info);
++
++#define task_vsched(p)		NULL
++#define this_vcpu()		(task_vcpu(current))
++#define task_vcpu(p)		(vcpu(task_cpu(p)))
++
++#define vsched_vcpu(sched, id)	(vcpu(id))
++#define vsched_id(vsched)	0
++#define vsched_default_vsched()	NULL
++#define vsched_default_vcpu(id)	(vcpu(id))
++
++#define vsched_vcpu_online_map(vsched)	(cpu_online_map)
++#define vsched_num_online_vcpus(vsched)	(num_online_cpus())
++#define vsched_pcpu_running_map(vsched)	(cpu_online_map)
++
++#define vcpu(id)		(&per_cpu(vcpu_info, id))
++
++#define vcpu_vsched(vcpu)	NULL
++#define vcpu_last_pcpu(vcpu)	((vcpu)->id)
++#define vcpu_isset(vcpu, mask)	(cpu_isset((vcpu)->id, mask))
++#define vcpu_is_offline(vcpu)	(cpu_is_offline((vcpu)->id))
++
++#endif	/* CONFIG_SCHED_VCPU */
++
++#define this_rq()		(vcpu_rq(this_vcpu()))
++#define task_rq(p)		(vcpu_rq(task_vcpu(p)))
++#define vcpu_rq(vcpu)		(&(vcpu)->rq)
++#define get_vcpu()		({ preempt_disable(); this_vcpu(); })
++#define put_vcpu()		({ put_cpu(); })
++#define rq_vcpu(__rq)		(container_of((__rq), struct vcpu_info, rq))
++
++task_t *idle_task(int cpu) 
++{
++	return pcpu(cpu)->idle;
++}
++
++#ifdef CONFIG_SMP
++static inline void update_rq_cpu_load(runqueue_t *rq)
++{
++	unsigned long old_load, this_load;
++
++	if (rq->nr_running == 0) {
++		rq->cpu_load = 0;
++		return;
++	}
++
++	old_load = rq->cpu_load;
++	this_load = rq->nr_running * SCHED_LOAD_SCALE;
++	/*
++	 * Round up the averaging division if load is increasing. This
++	 * prevents us from getting stuck on 9 if the load is 10, for
++	 * example.
++	 */
++	if (this_load > old_load)
++		old_load++;
++	rq->cpu_load = (old_load + this_load) / 2;
++}
++#else	/* CONFIG_SMP */
++static inline void update_rq_cpu_load(runqueue_t *rq)
++{
++}
++#endif	/* CONFIG_SMP */
++
++#ifdef CONFIG_SCHED_VCPU
++
++void fastcall vsched_cpu_online_map(struct vcpu_scheduler *vsched,
++		cpumask_t *mask)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&fairsched_lock, flags);
++	*mask = vsched->vcpu_online_map;
++	spin_unlock_irqrestore(&fairsched_lock, flags);
++}
++
++static inline void set_task_vsched(task_t *p, struct vcpu_scheduler *vsched)
++{
++	/* NOTE: set_task_cpu() is required after every set_task_vsched()! */
++	p->vsched = vsched;
++	p->vsched_id = vsched_id(vsched);
++}
++
++inline void set_task_cpu(struct task_struct *p, unsigned int vcpu_id)
++{
++	p->vcpu = vsched_vcpu(task_vsched(p), vcpu_id);
++	p->vcpu_id = vcpu_id;
++}
++
++static inline void set_task_vcpu(struct task_struct *p, vcpu_t vcpu)
++{
++	p->vcpu = vcpu;
++	p->vcpu_id = vcpu->id;
++}
++
++
++#ifdef CONFIG_VE
++#define cycles_after(a, b)	((long long)(b) - (long long)(a) < 0)
++
++cycles_t ve_sched_get_idle_time(struct ve_struct *ve, int cpu)
++{
++	struct ve_cpu_stats *ve_stat;
++	unsigned v;
++	cycles_t strt, ret, cycles;
++
++	ve_stat = VE_CPU_STATS(ve, cpu);
++	do {
++		v = read_seqcount_begin(&ve_stat->stat_lock);
++		ret = ve_stat->idle_time;
++		strt = ve_stat->strt_idle_time;
++		if (strt && nr_uninterruptible_ve(ve) == 0) {
++			cycles = get_cycles();
++			if (cycles_after(cycles, strt))
++				ret += cycles - strt;
++		}
++	} while (read_seqcount_retry(&ve_stat->stat_lock, v));
++	return ret;
++}
++
++cycles_t ve_sched_get_iowait_time(struct ve_struct *ve, int cpu)
++{
++	struct ve_cpu_stats *ve_stat;
++	unsigned v;
++	cycles_t strt, ret, cycles;
++
++	ve_stat = VE_CPU_STATS(ve, cpu);
++	do {
++		v = read_seqcount_begin(&ve_stat->stat_lock);
++		ret = ve_stat->iowait_time;
++		strt = ve_stat->strt_idle_time;
++		if (strt && nr_iowait_ve(ve) > 0) {
++			cycles = get_cycles();
++			if (cycles_after(cycles, strt))
++				ret += cycles - strt;
++		}
++	} while (read_seqcount_retry(&ve_stat->stat_lock, v));
++	return ret;
++}
++
++static inline void vcpu_save_ve_idle(struct ve_struct *ve,
++		unsigned int vcpu, cycles_t cycles)
++{
++	struct ve_cpu_stats *ve_stat;
++
++	ve_stat = VE_CPU_STATS(ve, vcpu);
++
++	write_seqcount_begin(&ve_stat->stat_lock);
++	if (ve_stat->strt_idle_time) {
++		if (cycles_after(cycles, ve_stat->strt_idle_time)) {
++			if (nr_iowait_ve(ve) == 0)
++				ve_stat->idle_time += cycles -
++					ve_stat->strt_idle_time;
++			else
++				ve_stat->iowait_time += cycles - 
++					ve_stat->strt_idle_time;
++		}
++		ve_stat->strt_idle_time = 0;
++	}
++	write_seqcount_end(&ve_stat->stat_lock);
++}
++
++static inline void vcpu_strt_ve_idle(struct ve_struct *ve,
++		unsigned int vcpu, cycles_t cycles)
++{
++	struct ve_cpu_stats *ve_stat;
++
++	ve_stat = VE_CPU_STATS(ve, vcpu);
++
++	write_seqcount_begin(&ve_stat->stat_lock);
++	ve_stat->strt_idle_time = cycles;
++	write_seqcount_end(&ve_stat->stat_lock);
++}
++
++#else
++#define vcpu_save_ve_idle(ve, vcpu, cycles)	do { } while (0)
++#define vcpu_strt_ve_idle(ve, vcpu, cycles)	do { } while (0)
++#endif
++
++/* this is called when rq->nr_running changes from 0 to 1 */
++static void vcpu_attach(runqueue_t *rq)
++{
++	struct vcpu_scheduler *vsched;
++	vcpu_t vcpu;
++
++	vcpu = rq_vcpu(rq);
++	vsched = vcpu_vsched(vcpu);
++
++	BUG_ON(vcpu->active);
++	spin_lock(&fairsched_lock);
++	vcpu->active = 1;
++	if (!vcpu->running)
++		list_move_tail(&vcpu->list, &vsched->active_list);
++
++	fairsched_incrun(vsched->node);
++	spin_unlock(&fairsched_lock);
++}
++
++/* this is called when rq->nr_running changes from 1 to 0 */
++static void vcpu_detach(runqueue_t *rq)
++{
++	struct vcpu_scheduler *vsched;
++	vcpu_t vcpu;
++
++	vcpu = rq_vcpu(rq);
++	vsched = vcpu_vsched(vcpu);
++	BUG_ON(!vcpu->active);
++
++	spin_lock(&fairsched_lock);
++	fairsched_decrun(vsched->node);
++
++	vcpu->active = 0;
++	if (!vcpu->running)
++		list_move_tail(&vcpu->list, &vsched->idle_list);
++	spin_unlock(&fairsched_lock);
++}
++
++static inline void __vcpu_get(vcpu_t vcpu)
++{
++	struct pcpu_info *pcpu;
++	struct vcpu_scheduler *vsched;
++
++	BUG_ON(!this_vcpu()->running);
++
++	pcpu = this_pcpu();
++	vsched = vcpu_vsched(vcpu);
++
++	pcpu->vcpu = vcpu;
++	pcpu->vsched = vsched;
++
++	fairsched_inccpu(vsched->node);
++
++	list_move_tail(&vcpu->list, &vsched->running_list);
++	vcpu->start_time = jiffies;
++	vcpu->last_pcpu = pcpu->id;
++	vcpu->running = 1;
++	__set_bit(vcpu->id, vsched->vcpu_running_map.bits);
++	__set_bit(pcpu->id, vsched->pcpu_running_map.bits);
++#ifdef CONFIG_SMP
++	vcpu_rq(vcpu)->sd = pcpu->sd;
++#endif
++}
++
++static void vcpu_put(vcpu_t vcpu)
++{
++	struct vcpu_scheduler *vsched;
++	struct pcpu_info *cur_pcpu;
++	runqueue_t *rq;
+ 
+-#define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
+-#define this_rq()		(&__get_cpu_var(runqueues))
+-#define task_rq(p)		cpu_rq(task_cpu(p))
+-#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
++	vsched = vcpu_vsched(vcpu);
++	rq = vcpu_rq(vcpu);
++	cur_pcpu = this_pcpu();
++
++	BUG_ON(!vcpu->running);
++
++	spin_lock(&fairsched_lock);
++	vcpu->running = 0;
++	list_move_tail(&vcpu->list,
++		vcpu->active ? &vsched->active_list : &vsched->idle_list);
++	fairsched_deccpu(vsched->node);
++	__clear_bit(vcpu->id, vsched->vcpu_running_map.bits);
++	if (vsched != this_vsched())
++		__clear_bit(cur_pcpu->id, vsched->pcpu_running_map.bits);
++
++	if (!vcpu->active)
++		rq->expired_timestamp = 0;
++	/* from this point task_running(prev_rq, prev) will be 0 */
++	rq->curr = cur_pcpu->idle;
++	update_rq_cpu_load(rq);
++	spin_unlock(&fairsched_lock);
++}
++
++static vcpu_t schedule_vcpu(vcpu_t cur_vcpu, cycles_t cycles)
++{
++	struct vcpu_scheduler *vsched;
++	vcpu_t vcpu;
++	runqueue_t *rq;
++#ifdef CONFIG_FAIRSCHED
++	struct fairsched_node *node, *nodec;
++
++	nodec = vcpu_vsched(cur_vcpu)->node;
++	node = nodec;
++#endif
++
++	BUG_ON(!cur_vcpu->running);
++restart:
++	spin_lock(&fairsched_lock);
++#ifdef CONFIG_FAIRSCHED
++	node = fairsched_schedule(node, nodec,
++			cur_vcpu->active,
++			cycles);
++	if (unlikely(node == NULL))
++		goto idle;
++
++	vsched = node->vsched;
++#else
++	vsched = &default_vsched;
++#endif
++	/* FIXME: optimize vcpu switching, maybe we do not need to call
++	   fairsched_schedule() at all if vcpu is still active and too
++	   little time have passed so far */
++	if (cur_vcpu->vsched == vsched && cur_vcpu->active &&
++	    jiffies - cur_vcpu->start_time < msecs_to_jiffies(vcpu_sched_timeslice)) {
++		vcpu = cur_vcpu;
++		goto done;
++	}
++
++	if (list_empty(&vsched->active_list)) {
++		/* nothing except for this cpu can be scheduled */
++		if (likely(cur_vcpu->vsched == vsched && cur_vcpu->active)) {
++			/* 
++			 * Current vcpu is the one we need. We have not
++			 * put it yet, so it's not on the active_list.
++			 */
++			vcpu = cur_vcpu;
++			goto done;
++		} else
++			goto none;
++	}
++
++	/* select vcpu and add to running list */
++	vcpu = list_entry(vsched->active_list.next, struct vcpu_info, list);
++	__vcpu_get(vcpu);
++done:
++	spin_unlock(&fairsched_lock);
++
++	rq = vcpu_rq(vcpu);
++	if (unlikely(vcpu != cur_vcpu)) {
++		spin_unlock(&vcpu_rq(cur_vcpu)->lock);
++		spin_lock(&rq->lock);
++		if (unlikely(!rq->nr_running)) {
++			/* race with balancing? */
++			spin_unlock(&rq->lock);
++			vcpu_put(vcpu);
++			spin_lock(&vcpu_rq(cur_vcpu)->lock);
++			goto restart;
++		}
++	}
++	BUG_ON(!rq->nr_running);
++	return vcpu;
++
++none:
++#ifdef CONFIG_FAIRSCHED
++	spin_unlock(&fairsched_lock);
++
++	/* fairsched doesn't schedule more CPUs than we have active */
++	BUG_ON(1);
++#else
++	goto idle;
++#endif
++
++idle:
++	vcpu = task_vcpu(this_pcpu()->idle);
++	__vcpu_get(vcpu);
++	spin_unlock(&fairsched_lock);
++	spin_unlock(&vcpu_rq(cur_vcpu)->lock);
++
++	spin_lock(&vcpu_rq(vcpu)->lock);
++	return vcpu;
++}
++
++#else /* CONFIG_SCHED_VCPU */
++
++#define set_task_vsched(task, vsched)		do { } while (0)
++
++static inline void vcpu_attach(runqueue_t *rq)
++{
++}
++
++static inline void vcpu_detach(runqueue_t *rq)
++{
++}
++
++static inline void vcpu_put(vcpu_t vcpu)
++{
++}
++
++static inline vcpu_t schedule_vcpu(vcpu_t prev_vcpu, cycles_t cycles)
++{
++	return prev_vcpu;
++}
++
++static inline void set_task_vcpu(struct task_struct *p, vcpu_t vcpu)
++{
++	set_task_pcpu(p, vcpu->id);
++}
++
++#endif /* CONFIG_SCHED_VCPU */
++
++int vcpu_online(int cpu)
++{
++	return cpu_isset(cpu, vsched_vcpu_online_map(this_vsched()));
++}
+ 
+ /*
+  * Default context-switch locking:
+  */
+ #ifndef prepare_arch_switch
+ # define prepare_arch_switch(rq, next)	do { } while (0)
+-# define finish_arch_switch(rq, next)	spin_unlock_irq(&(rq)->lock)
++# define finish_arch_switch(rq, next)	spin_unlock(&(rq)->lock)
+ # define task_running(rq, p)		((rq)->curr == (p))
+ #endif
+ 
++struct kernel_stat_glob kstat_glob;
++spinlock_t kstat_glb_lock = SPIN_LOCK_UNLOCKED;
++EXPORT_SYMBOL(kstat_glob);
++EXPORT_SYMBOL(kstat_glb_lock);
++
++#ifdef CONFIG_VE
++
++#define ve_nr_running_inc(env, cpu)					\
++	do {								\
++		VE_CPU_STATS((env), (cpu))->nr_running++;		\
++	} while(0)
++#define ve_nr_running_dec(env, cpu)					\
++	do {								\
++		VE_CPU_STATS((env), (cpu))->nr_running--;		\
++	} while(0)
++#define ve_nr_iowait_inc(env, cpu) 					\
++	do {								\
++		VE_CPU_STATS((env), (cpu))->nr_iowait++;		\
++	} while(0)
++#define ve_nr_iowait_dec(env, cpu)					\
++	do {								\
++		VE_CPU_STATS((env), (cpu))->nr_iowait--;		\
++	} while(0)
++#define ve_nr_unint_inc(env, cpu)					\
++	do {								\
++		VE_CPU_STATS((env), (cpu))->nr_unint++;			\
++	} while(0)
++#define ve_nr_unint_dec(env, cpu)					\
++	do {								\
++		VE_CPU_STATS((env), (cpu))->nr_unint--;			\
++	} while(0)
++
++void ve_sched_attach(struct ve_struct *envid)
++{
++	struct task_struct *tsk;
++	unsigned int vcpu;
++
++	tsk = current;
++	preempt_disable();
++	vcpu = task_cpu(tsk);
++	ve_nr_running_dec(VE_TASK_INFO(tsk)->owner_env, vcpu);
++	ve_nr_running_inc(envid, vcpu);
++	preempt_enable();
++}
++EXPORT_SYMBOL(ve_sched_attach);
++
++#else
++
++#define ve_nr_running_inc(env, cpu)		do { } while(0)
++#define ve_nr_running_dec(env, cpu)		do { } while(0)
++#define ve_nr_iowait_inc(env, cpu)		do { } while(0)
++#define ve_nr_iowait_dec(env, cpu)		do { } while(0)
++#define ve_nr_unint_inc(env, cpu)		do { } while(0)
++#define ve_nr_unint_dec(env, cpu)		do { } while(0)
++
++#endif
++
++struct task_nrs_struct {
++	long nr_running;
++	long nr_uninterruptible;
++	long nr_stopped;
++	long nr_sleeping;
++	long nr_iowait;
++	long long nr_switches;
++} ____cacheline_aligned_in_smp;
++
++static struct task_nrs_struct glob_tasks_nrs[NR_CPUS];
++unsigned long nr_zombie = 0;	/* protected by tasklist_lock */
++unsigned long nr_dead = 0;
++EXPORT_SYMBOL(nr_zombie);
++EXPORT_SYMBOL(nr_dead);
++
++#define nr_running_inc(cpu, vcpu, ve) do {			\
++		glob_tasks_nrs[cpu].nr_running++;		\
++		ve_nr_running_inc(ve, vcpu);			\
++	} while (0)
++#define nr_running_dec(cpu, vcpu, ve) do {			\
++		glob_tasks_nrs[cpu].nr_running--;		\
++		ve_nr_running_dec(ve, vcpu);			\
++	} while (0)
++
++#define nr_unint_inc(cpu, vcpu, ve) do {			\
++		glob_tasks_nrs[cpu].nr_uninterruptible++;	\
++		ve_nr_unint_inc(ve, vcpu);			\
++	} while (0)
++#define nr_unint_dec(cpu, vcpu, ve) do {			\
++		glob_tasks_nrs[cpu].nr_uninterruptible--;	\
++		ve_nr_unint_dec(ve, vcpu);			\
++	} while (0)
++
++#define nr_iowait_inc(cpu, vcpu, ve) do {			\
++		glob_tasks_nrs[cpu].nr_iowait++;		\
++		ve_nr_iowait_inc(ve, vcpu);			\
++	} while (0)
++#define nr_iowait_dec(cpu, vcpu, ve) do {			\
++		glob_tasks_nrs[cpu].nr_iowait--;		\
++		ve_nr_iowait_dec(ve, vcpu);			\
++	} while (0)
++
++#define nr_stopped_inc(cpu, vcpu, ve) do {			\
++		glob_tasks_nrs[cpu].nr_stopped++;		\
++	} while (0)
++#define nr_stopped_dec(cpu, vcpu, ve) do {			\
++		glob_tasks_nrs[cpu].nr_stopped--;		\
++	} while (0)
++
++#define nr_sleeping_inc(cpu, vcpu, ve) do {			\
++		glob_tasks_nrs[cpu].nr_sleeping++;		\
++	} while (0)
++#define nr_sleeping_dec(cpu, vcpu, ve) do {			\
++		glob_tasks_nrs[cpu].nr_sleeping--;		\
++	} while (0)
++ 
+ /*
+  * task_rq_lock - lock the runqueue a given task resides on and disable
+  * interrupts.  Note the ordering: we can safely lookup the task_rq without
+@@ -361,13 +953,39 @@ static int effective_prio(task_t *p)
+ 	return prio;
+ }
+ 
++static inline void write_wakeup_stamp(struct task_struct *p, cycles_t cyc)
++{
++	struct ve_task_info *ti;
++
++	ti = VE_TASK_INFO(p);
++	write_seqcount_begin(&ti->wakeup_lock);
++	ti->wakeup_stamp = cyc;
++	write_seqcount_end(&ti->wakeup_lock);
++}
++
+ /*
+  * __activate_task - move a task to the runqueue.
+  */
+ static inline void __activate_task(task_t *p, runqueue_t *rq)
+ {
++	cycles_t cycles;
++	unsigned int vcpu;
++	struct ve_struct *ve;
++
++	cycles = get_cycles();
++	vcpu = task_cpu(p);
++	ve = VE_TASK_INFO(p)->owner_env;
++
++	write_wakeup_stamp(p, cycles);
++	VE_TASK_INFO(p)->sleep_time += cycles;
++	nr_running_inc(smp_processor_id(), vcpu, ve);
++
+ 	enqueue_task(p, rq->active);
+ 	rq->nr_running++;
++	if (rq->nr_running == 1) {
++		vcpu_save_ve_idle(ve, vcpu, cycles);
++		vcpu_attach(rq);
++	}
+ }
+ 
+ /*
+@@ -507,11 +1125,33 @@ static void activate_task(task_t *p, run
+  */
+ static void deactivate_task(struct task_struct *p, runqueue_t *rq)
+ {
++	cycles_t cycles;
++	unsigned int cpu, vcpu;
++	struct ve_struct *ve;
++
++	cycles = get_cycles();
++	cpu = smp_processor_id();
++	vcpu = rq_vcpu(rq)->id;
++	ve = VE_TASK_INFO(p)->owner_env;
++
++	VE_TASK_INFO(p)->sleep_time -= cycles;
+ 	rq->nr_running--;
+-	if (p->state == TASK_UNINTERRUPTIBLE)
++	nr_running_dec(cpu, vcpu, ve);
++	if (p->state == TASK_UNINTERRUPTIBLE) {
+ 		rq->nr_uninterruptible++;
++		nr_unint_inc(cpu, vcpu, ve);
++	}
++	if (p->state == TASK_INTERRUPTIBLE)
++		nr_sleeping_inc(cpu, vcpu, ve);
++	if (p->state == TASK_STOPPED)
++		nr_stopped_inc(cpu, vcpu, ve);
++	/* nr_zombie is calced in exit.c */
+ 	dequeue_task(p, p->array);
+ 	p->array = NULL;
++	if (rq->nr_running == 0) {
++		vcpu_strt_ve_idle(ve, vcpu, cycles);
++		vcpu_detach(rq);
++	}
+ }
+ 
+ /*
+@@ -522,6 +1162,7 @@ static void deactivate_task(struct task_
+  * the target CPU.
+  */
+ #ifdef CONFIG_SMP
++/* FIXME: need to add vsched arg */
+ static void resched_task(task_t *p)
+ {
+ 	int need_resched, nrpolling;
+@@ -532,8 +1173,9 @@ static void resched_task(task_t *p)
+ 	need_resched = test_and_set_tsk_thread_flag(p,TIF_NEED_RESCHED);
+ 	nrpolling |= test_tsk_thread_flag(p,TIF_POLLING_NRFLAG);
+ 
+-	if (!need_resched && !nrpolling && (task_cpu(p) != smp_processor_id()))
+-		smp_send_reschedule(task_cpu(p));
++	/* FIXME: think over */
++	if (!need_resched && !nrpolling && (task_pcpu(p) != smp_processor_id()))
++		smp_send_reschedule(task_pcpu(p));
+ 	preempt_enable();
+ }
+ #else
+@@ -549,10 +1191,29 @@ static inline void resched_task(task_t *
+  */
+ inline int task_curr(const task_t *p)
+ {
+-	return cpu_curr(task_cpu(p)) == p;
++	return task_rq(p)->curr == p;
++}
++
++/**
++ * idle_cpu - is a given cpu idle currently?
++ * @cpu: the processor in question.
++ */
++inline int idle_cpu(int cpu)
++{
++	return pcpu(cpu)->vsched == &idle_vsched;
++}
++
++EXPORT_SYMBOL_GPL(idle_cpu);
++
++static inline int idle_vcpu(vcpu_t cpu)
++{
++#ifdef CONFIG_SCHED_VCPU
++	return !cpu->active;
++#else
++	return idle_cpu(cpu->id);
++#endif
+ }
+ 
+-#ifdef CONFIG_SMP
+ enum request_type {
+ 	REQ_MOVE_TASK,
+ 	REQ_SET_DOMAIN,
+@@ -564,7 +1225,7 @@ typedef struct {
+ 
+ 	/* For REQ_MOVE_TASK */
+ 	task_t *task;
+-	int dest_cpu;
++	vcpu_t dest_cpu;
+ 
+ 	/* For REQ_SET_DOMAIN */
+ 	struct sched_domain *sd;
+@@ -576,7 +1237,7 @@ typedef struct {
+  * The task's runqueue lock must be held.
+  * Returns true if you have to wait for migration thread.
+  */
+-static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req)
++static int migrate_task(task_t *p, vcpu_t dest_cpu, migration_req_t *req)
+ {
+ 	runqueue_t *rq = task_rq(p);
+ 
+@@ -584,8 +1245,13 @@ static int migrate_task(task_t *p, int d
+ 	 * If the task is not on a runqueue (and not running), then
+ 	 * it is sufficient to simply update the task's cpu field.
+ 	 */
++#ifdef CONFIG_SCHED_VCPU
++	BUG_ON(task_vsched(p) == &idle_vsched);
++	BUG_ON(vcpu_vsched(dest_cpu) == &idle_vsched);
++#endif
+ 	if (!p->array && !task_running(rq, p)) {
+-		set_task_cpu(p, dest_cpu);
++		set_task_vsched(p, vcpu_vsched(dest_cpu));
++		set_task_vcpu(p, dest_cpu);
+ 		return 0;
+ 	}
+ 
+@@ -597,6 +1263,7 @@ static int migrate_task(task_t *p, int d
+ 	return 1;
+ }
+ 
++#ifdef CONFIG_SMP
+ /*
+  * wait_task_inactive - wait for a thread to unschedule.
+  *
+@@ -615,7 +1282,12 @@ void wait_task_inactive(task_t * p)
+ repeat:
+ 	rq = task_rq_lock(p, &flags);
+ 	/* Must be off runqueue entirely, not preempted. */
+-	if (unlikely(p->array)) {
++	/*
++	 * VCPU: we need to check task_running() here, since
++	 * we drop rq->lock in the middle of schedule() and task
++	 * can be deactivated, but still running until it calls vcpu_put()
++	 */
++	if (unlikely(p->array) || task_running(rq, p)) {
+ 		/* If it's preempted, we yield.  It could be a while. */
+ 		preempted = !task_running(rq, p);
+ 		task_rq_unlock(rq, &flags);
+@@ -639,8 +1311,11 @@ void kick_process(task_t *p)
+ 	int cpu;
+ 
+ 	preempt_disable();
+-	cpu = task_cpu(p);
++	cpu = task_pcpu(p);
+ 	if ((cpu != smp_processor_id()) && task_curr(p))
++		/* FIXME: ??? think over */
++		/* should add something like get_pcpu(cpu)->vcpu->id == task_cpu(p),
++		   but with serialization of vcpu access... */
+ 		smp_send_reschedule(cpu);
+ 	preempt_enable();
+ }
+@@ -653,9 +1328,9 @@ EXPORT_SYMBOL_GPL(kick_process);
+  * We want to under-estimate the load of migration sources, to
+  * balance conservatively.
+  */
+-static inline unsigned long source_load(int cpu)
++static inline unsigned long source_load(vcpu_t cpu)
+ {
+-	runqueue_t *rq = cpu_rq(cpu);
++	runqueue_t *rq = vcpu_rq(cpu);
+ 	unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
+ 
+ 	return min(rq->cpu_load, load_now);
+@@ -664,9 +1339,9 @@ static inline unsigned long source_load(
+ /*
+  * Return a high guess at the load of a migration-target cpu
+  */
+-static inline unsigned long target_load(int cpu)
++static inline unsigned long target_load(vcpu_t cpu)
+ {
+-	runqueue_t *rq = cpu_rq(cpu);
++	runqueue_t *rq = vcpu_rq(cpu);
+ 	unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
+ 
+ 	return max(rq->cpu_load, load_now);
+@@ -682,32 +1357,38 @@ static inline unsigned long target_load(
+  * Returns the CPU we should wake onto.
+  */
+ #if defined(ARCH_HAS_SCHED_WAKE_IDLE)
+-static int wake_idle(int cpu, task_t *p)
++static vcpu_t wake_idle(vcpu_t cpu, task_t *p)
+ {
+-	cpumask_t tmp;
+-	runqueue_t *rq = cpu_rq(cpu);
++	cpumask_t tmp, vtmp;
++	runqueue_t *rq = vcpu_rq(cpu);
+ 	struct sched_domain *sd;
++	struct vcpu_scheduler *vsched;
+ 	int i;
+ 
+-	if (idle_cpu(cpu))
++	if (idle_vcpu(cpu))
+ 		return cpu;
+ 
+ 	sd = rq->sd;
+ 	if (!(sd->flags & SD_WAKE_IDLE))
+ 		return cpu;
+ 
++	vsched = vcpu_vsched(cpu);
+ 	cpus_and(tmp, sd->span, cpu_online_map);
+-	cpus_and(tmp, tmp, p->cpus_allowed);
++	cpus_and(vtmp, vsched_vcpu_online_map(vsched), p->cpus_allowed);
+ 
+-	for_each_cpu_mask(i, tmp) {
+-		if (idle_cpu(i))
+-			return i;
++	for_each_cpu_mask(i, vtmp) {
++		vcpu_t vcpu;
++		vcpu = vsched_vcpu(vsched, i);
++		if (!cpu_isset(vcpu_last_pcpu(vcpu), tmp))
++			continue;
++		if (idle_vcpu(vcpu))
++			return vcpu;
+ 	}
+ 
+ 	return cpu;
+ }
+ #else
+-static inline int wake_idle(int cpu, task_t *p)
++static inline vcpu_t wake_idle(vcpu_t cpu, task_t *p)
+ {
+ 	return cpu;
+ }
+@@ -729,15 +1410,17 @@ static inline int wake_idle(int cpu, tas
+  */
+ static int try_to_wake_up(task_t * p, unsigned int state, int sync)
+ {
+-	int cpu, this_cpu, success = 0;
++	vcpu_t cpu, this_cpu;
++	int success = 0;
+ 	unsigned long flags;
+ 	long old_state;
+ 	runqueue_t *rq;
+ #ifdef CONFIG_SMP
+ 	unsigned long load, this_load;
+ 	struct sched_domain *sd;
+-	int new_cpu;
++	vcpu_t new_cpu;
+ #endif
++	cpu = NULL;
+ 
+ 	rq = task_rq_lock(p, &flags);
+ 	old_state = p->state;
+@@ -747,8 +1430,8 @@ static int try_to_wake_up(task_t * p, un
+ 	if (p->array)
+ 		goto out_running;
+ 
+-	cpu = task_cpu(p);
+-	this_cpu = smp_processor_id();
++	cpu = task_vcpu(p);
++	this_cpu = this_vcpu();
+ 
+ #ifdef CONFIG_SMP
+ 	if (unlikely(task_running(rq, p)))
+@@ -756,7 +1439,10 @@ static int try_to_wake_up(task_t * p, un
+ 
+ 	new_cpu = cpu;
+ 
+-	if (cpu == this_cpu || unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
++	/* FIXME: add vsched->last_vcpu array to optimize wakeups in different vsched */
++	if (vcpu_vsched(cpu) != vcpu_vsched(this_cpu))
++		goto out_set_cpu;
++	if (cpu == this_cpu || unlikely(!vcpu_isset(this_cpu, p->cpus_allowed)))
+ 		goto out_set_cpu;
+ 
+ 	load = source_load(cpu);
+@@ -795,7 +1481,7 @@ static int try_to_wake_up(task_t * p, un
+ 			 * Now sd has SD_WAKE_AFFINE and p is cache cold in sd
+ 			 * or sd has SD_WAKE_BALANCE and there is an imbalance
+ 			 */
+-			if (cpu_isset(cpu, sd->span))
++			if (cpu_isset(vcpu_last_pcpu(cpu), sd->span))
+ 				goto out_set_cpu;
+ 		}
+ 	}
+@@ -803,8 +1489,8 @@ static int try_to_wake_up(task_t * p, un
+ 	new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */
+ out_set_cpu:
+ 	new_cpu = wake_idle(new_cpu, p);
+-	if (new_cpu != cpu && cpu_isset(new_cpu, p->cpus_allowed)) {
+-		set_task_cpu(p, new_cpu);
++	if (new_cpu != cpu && vcpu_isset(new_cpu, p->cpus_allowed)) {
++		set_task_vcpu(p, new_cpu);
+ 		task_rq_unlock(rq, &flags);
+ 		/* might preempt at this point */
+ 		rq = task_rq_lock(p, &flags);
+@@ -814,20 +1500,28 @@ out_set_cpu:
+ 		if (p->array)
+ 			goto out_running;
+ 
+-		this_cpu = smp_processor_id();
+-		cpu = task_cpu(p);
++		this_cpu = this_vcpu();
++		cpu = task_vcpu(p);
+ 	}
+ 
+ out_activate:
+ #endif /* CONFIG_SMP */
+ 	if (old_state == TASK_UNINTERRUPTIBLE) {
+ 		rq->nr_uninterruptible--;
++		nr_unint_dec(smp_processor_id(), task_cpu(p),
++				VE_TASK_INFO(p)->owner_env);
+ 		/*
+ 		 * Tasks on involuntary sleep don't earn
+ 		 * sleep_avg beyond just interactive state.
+ 		 */
+ 		p->activated = -1;
+ 	}
++	if (old_state == TASK_INTERRUPTIBLE)
++		nr_sleeping_dec(smp_processor_id(), task_cpu(p),
++				VE_TASK_INFO(p)->owner_env);
++	if (old_state == TASK_STOPPED)
++		nr_stopped_dec(smp_processor_id(), task_cpu(p),
++				VE_TASK_INFO(p)->owner_env);
+ 
+ 	/*
+ 	 * Sync wakeups (i.e. those types of wakeups where the waker
+@@ -866,6 +1560,37 @@ int fastcall wake_up_state(task_t *p, un
+ }
+ 
+ /*
++ * init is special, it is forked from swapper (idle_vsched) and should
++ * belong to default_vsched, so we have to change it's vsched/fairsched manually
++ */
++void wake_up_init(void)
++{
++	task_t *p;
++	runqueue_t *rq;
++	unsigned long flags;
++
++	p = find_task_by_pid_all(1);
++	BUG_ON(p == NULL || p->state != TASK_STOPPED);
++
++	/* we should change both fairsched node and vsched here */
++	set_task_vsched(p, &default_vsched);
++	set_task_cpu(p, 0);
++
++	/*
++	 * can't call wake_up_forked_thread() directly here,
++	 * since it assumes that a child belongs to the same vsched
++	 */
++	p->state = TASK_RUNNING;
++	p->sleep_avg = 0;
++	p->interactive_credit = 0;
++	p->prio = effective_prio(p);
++
++	rq = task_rq_lock(p, &flags);
++	__activate_task(p, rq);
++	task_rq_unlock(rq, &flags);
++}
++
++/*
+  * Perform scheduler related setup for a newly forked process p.
+  * p is forked by current.
+  */
+@@ -904,6 +1629,7 @@ void fastcall sched_fork(task_t *p)
+ 	p->first_time_slice = 1;
+ 	current->time_slice >>= 1;
+ 	p->timestamp = sched_clock();
++	VE_TASK_INFO(p)->sleep_time -= get_cycles(); /*cosmetic: sleep till wakeup below*/
+ 	if (!current->time_slice) {
+ 		/*
+ 		 * This case is rare, it happens when the parent has only
+@@ -931,6 +1657,7 @@ void fastcall wake_up_forked_process(tas
+ 	runqueue_t *rq = task_rq_lock(current, &flags);
+ 
+ 	BUG_ON(p->state != TASK_RUNNING);
++	BUG_ON(task_vsched(current) != task_vsched(p));
+ 
+ 	/*
+ 	 * We decrease the sleep average of forking parents
+@@ -946,7 +1673,8 @@ void fastcall wake_up_forked_process(tas
+ 	p->interactive_credit = 0;
+ 
+ 	p->prio = effective_prio(p);
+-	set_task_cpu(p, smp_processor_id());
++	set_task_pcpu(p, task_pcpu(current));
++	set_task_vcpu(p, this_vcpu());
+ 
+ 	if (unlikely(!current->array))
+ 		__activate_task(p, rq);
+@@ -956,6 +1684,8 @@ void fastcall wake_up_forked_process(tas
+ 		p->array = current->array;
+ 		p->array->nr_active++;
+ 		rq->nr_running++;
++		nr_running_inc(smp_processor_id(), task_cpu(p),
++				VE_TASK_INFO(p)->owner_env);
+ 	}
+ 	task_rq_unlock(rq, &flags);
+ }
+@@ -974,18 +1704,16 @@ void fastcall sched_exit(task_t * p)
+ 	unsigned long flags;
+ 	runqueue_t *rq;
+ 
+-	local_irq_save(flags);
+-	if (p->first_time_slice) {
+-		p->parent->time_slice += p->time_slice;
+-		if (unlikely(p->parent->time_slice > MAX_TIMESLICE))
+-			p->parent->time_slice = MAX_TIMESLICE;
+-	}
+-	local_irq_restore(flags);
+ 	/*
+ 	 * If the child was a (relative-) CPU hog then decrease
+ 	 * the sleep_avg of the parent as well.
+ 	 */
+ 	rq = task_rq_lock(p->parent, &flags);
++	if (p->first_time_slice && task_cpu(p) == task_cpu(p->parent)) {
++		p->parent->time_slice += p->time_slice;
++		if (unlikely(p->parent->time_slice > MAX_TIMESLICE))
++			p->parent->time_slice = MAX_TIMESLICE;
++	}
+ 	if (p->sleep_avg < p->parent->sleep_avg)
+ 		p->parent->sleep_avg = p->parent->sleep_avg /
+ 		(EXIT_WEIGHT + 1) * EXIT_WEIGHT + p->sleep_avg /
+@@ -1008,25 +1736,39 @@ void fastcall sched_exit(task_t * p)
+  */
+ static void finish_task_switch(task_t *prev)
+ {
+-	runqueue_t *rq = this_rq();
+-	struct mm_struct *mm = rq->prev_mm;
++	runqueue_t *rq;
++	struct mm_struct *mm;
+ 	unsigned long prev_task_flags;
++	vcpu_t prev_vcpu, vcpu;
+ 
++	prev_vcpu = task_vcpu(prev);
++	vcpu = this_vcpu();
++	rq = vcpu_rq(vcpu);
++	mm = rq->prev_mm;
+ 	rq->prev_mm = NULL;
+ 
+ 	/*
+ 	 * A task struct has one reference for the use as "current".
+-	 * If a task dies, then it sets TASK_ZOMBIE in tsk->state and calls
+-	 * schedule one last time. The schedule call will never return,
++	 * If a task dies, then it sets EXIT_ZOMBIE in tsk->exit_state and
++	 * calls schedule one last time. The schedule call will never return,
+ 	 * and the scheduled task must drop that reference.
+-	 * The test for TASK_ZOMBIE must occur while the runqueue locks are
++	 * The test for EXIT_ZOMBIE must occur while the runqueue locks are
+ 	 * still held, otherwise prev could be scheduled on another cpu, die
+ 	 * there before we look at prev->state, and then the reference would
+ 	 * be dropped twice.
+ 	 *		Manfred Spraul <manfred@colorfullife.com>
+ 	 */
+ 	prev_task_flags = prev->flags;
++
++	/*
++	 * no schedule() should happen until vcpu_put,
++	 * and schedule_tail() calls us with preempt enabled...
++	 */
+ 	finish_arch_switch(rq, prev);
++	if (prev_vcpu != vcpu)
++		vcpu_put(prev_vcpu);
++	local_irq_enable();
++
+ 	if (mm)
+ 		mmdrop(mm);
+ 	if (unlikely(prev_task_flags & PF_DEAD))
+@@ -1042,7 +1784,7 @@ asmlinkage void schedule_tail(task_t *pr
+ 	finish_task_switch(prev);
+ 
+ 	if (current->set_child_tid)
+-		put_user(current->pid, current->set_child_tid);
++		put_user(virt_pid(current), current->set_child_tid);
+ }
+ 
+ /*
+@@ -1083,44 +1825,109 @@ task_t * context_switch(runqueue_t *rq, 
+  */
+ unsigned long nr_running(void)
+ {
+-	unsigned long i, sum = 0;
+-
+-	for_each_cpu(i)
+-		sum += cpu_rq(i)->nr_running;
++	int i;
++	long sum;
+ 
+-	return sum;
++	sum = 0;
++	for (i = 0; i < NR_CPUS; i++)
++		sum += glob_tasks_nrs[i].nr_running;
++	return (unsigned long)(sum < 0 ? 0 : sum);
+ }
++EXPORT_SYMBOL(nr_running);
+ 
+ unsigned long nr_uninterruptible(void)
+ {
+-	unsigned long i, sum = 0;
+-
+-	for_each_cpu(i)
+-		sum += cpu_rq(i)->nr_uninterruptible;
++	int i;
++	long sum;
+ 
+-	return sum;
++	sum = 0;
++	for (i = 0; i < NR_CPUS; i++)
++		sum += glob_tasks_nrs[i].nr_uninterruptible;
++	return (unsigned long)(sum < 0 ? 0 : sum);
+ }
++EXPORT_SYMBOL(nr_uninterruptible);
+ 
+-unsigned long long nr_context_switches(void)
++unsigned long nr_sleeping(void)
+ {
+-	unsigned long long i, sum = 0;
++	int i;
++	long sum;
+ 
+-	for_each_cpu(i)
+-		sum += cpu_rq(i)->nr_switches;
++	sum = 0;
++	for (i = 0; i < NR_CPUS; i++)
++		sum += glob_tasks_nrs[i].nr_sleeping;
++	return (unsigned long)(sum < 0 ? 0 : sum);
++}
++EXPORT_SYMBOL(nr_sleeping);
+ 
+-	return sum;
++unsigned long nr_stopped(void)
++{
++	int i;
++	long sum;
++
++	sum = 0;
++	for (i = 0; i < NR_CPUS; i++)
++		sum += glob_tasks_nrs[i].nr_stopped;
++	return (unsigned long)(sum < 0 ? 0 : sum);
+ }
++EXPORT_SYMBOL(nr_stopped);
+ 
+ unsigned long nr_iowait(void)
+ {
+-	unsigned long i, sum = 0;
++	int i;
++	long sum;
+ 
+-	for_each_cpu(i)
+-		sum += atomic_read(&cpu_rq(i)->nr_iowait);
++	sum = 0;
++	for (i = 0; i < NR_CPUS; i++)
++		sum += glob_tasks_nrs[i].nr_iowait;
++	return (unsigned long)(sum < 0 ? 0 : sum);
++}
++
++unsigned long long nr_context_switches(void)
++{
++	int i;
++	long long sum;
+ 
++	sum = 0;
++	for (i = 0; i < NR_CPUS; i++)
++		sum += glob_tasks_nrs[i].nr_switches;
+ 	return sum;
+ }
+ 
++#ifdef CONFIG_VE
++unsigned long nr_running_ve(struct ve_struct *ve)
++{
++	int i;
++	long sum;
++
++	sum = 0;
++	for (i = 0; i < NR_CPUS; i++)
++		sum += VE_CPU_STATS(ve, i)->nr_running;
++	return (unsigned long)(sum < 0 ? 0 : sum);
++}
++
++unsigned long nr_uninterruptible_ve(struct ve_struct *ve)
++{
++	int i;
++	long sum;
++
++	sum = 0;
++	for (i = 0; i < NR_CPUS; i++)
++		sum += VE_CPU_STATS(ve, i)->nr_unint;
++	return (unsigned long)(sum < 0 ? 0 : sum);
++}
++
++unsigned long nr_iowait_ve(struct ve_struct *ve)
++{
++	int i;
++	long sum;
++
++	sum = 0;
++	for (i = 0; i < NR_CPUS; i++)
++		sum += VE_CPU_STATS(ve, i)->nr_iowait;
++	return (unsigned long)(sum < 0 ? 0 : sum);
++}
++#endif
++
+ /*
+  * double_rq_lock - safely lock two runqueues
+  *
+@@ -1167,24 +1974,32 @@ enum idle_type
+ /*
+  * find_idlest_cpu - find the least busy runqueue.
+  */
+-static int find_idlest_cpu(struct task_struct *p, int this_cpu,
++static vcpu_t find_idlest_cpu(struct task_struct *p, vcpu_t this_cpu,
+ 			   struct sched_domain *sd)
+ {
+ 	unsigned long load, min_load, this_load;
+-	int i, min_cpu;
+-	cpumask_t mask;
++	int i;
++	vcpu_t min_cpu;
++	cpumask_t mask, vmask;
++	struct vcpu_scheduler *vsched;
+ 
+-	min_cpu = UINT_MAX;
++	vsched = task_vsched(p);
++	min_cpu = NULL;
+ 	min_load = ULONG_MAX;
+ 
+ 	cpus_and(mask, sd->span, cpu_online_map);
+-	cpus_and(mask, mask, p->cpus_allowed);
++	cpus_and(vmask, vsched_vcpu_online_map(vsched), p->cpus_allowed);
+ 
+-	for_each_cpu_mask(i, mask) {
+-		load = target_load(i);
++	for_each_cpu_mask(i, vmask) {
++		vcpu_t vcpu;
++		vcpu = vsched_vcpu(vsched, i);
+ 
++		if (!cpu_isset(vcpu_last_pcpu(vcpu), mask))
++			continue;
++
++		load = target_load(vcpu);
+ 		if (load < min_load) {
+-			min_cpu = i;
++			min_cpu = vcpu;
+ 			min_load = load;
+ 
+ 			/* break out early on an idle CPU: */
+@@ -1193,6 +2008,9 @@ static int find_idlest_cpu(struct task_s
+ 		}
+ 	}
+ 
++	if (min_cpu == NULL)
++		return this_cpu;
++
+ 	/* add +1 to account for the new task */
+ 	this_load = source_load(this_cpu) + SCHED_LOAD_SCALE;
+ 
+@@ -1220,9 +2038,9 @@ static int find_idlest_cpu(struct task_s
+ void fastcall wake_up_forked_thread(task_t * p)
+ {
+ 	unsigned long flags;
+-	int this_cpu = get_cpu(), cpu;
++	vcpu_t this_cpu = get_vcpu(), cpu;
+ 	struct sched_domain *tmp, *sd = NULL;
+-	runqueue_t *this_rq = cpu_rq(this_cpu), *rq;
++	runqueue_t *this_rq = vcpu_rq(this_cpu), *rq;
+ 
+ 	/*
+ 	 * Find the largest domain that this CPU is part of that
+@@ -1238,7 +2056,7 @@ void fastcall wake_up_forked_thread(task
+ 
+ 	local_irq_save(flags);
+ lock_again:
+-	rq = cpu_rq(cpu);
++	rq = vcpu_rq(cpu);
+ 	double_rq_lock(this_rq, rq);
+ 
+ 	BUG_ON(p->state != TASK_RUNNING);
+@@ -1248,7 +2066,7 @@ lock_again:
+ 	 * the mask could have changed - just dont migrate
+ 	 * in this case:
+ 	 */
+-	if (unlikely(!cpu_isset(cpu, p->cpus_allowed))) {
++	if (unlikely(!vcpu_isset(cpu, p->cpus_allowed))) {
+ 		cpu = this_cpu;
+ 		double_rq_unlock(this_rq, rq);
+ 		goto lock_again;
+@@ -1267,7 +2085,7 @@ lock_again:
+ 	p->interactive_credit = 0;
+ 
+ 	p->prio = effective_prio(p);
+-	set_task_cpu(p, cpu);
++	set_task_vcpu(p, cpu);
+ 
+ 	if (cpu == this_cpu) {
+ 		if (unlikely(!current->array))
+@@ -1278,6 +2096,8 @@ lock_again:
+ 			p->array = current->array;
+ 			p->array->nr_active++;
+ 			rq->nr_running++;
++			nr_running_inc(smp_processor_id(), task_cpu(p),
++					VE_TASK_INFO(p)->owner_env);
+ 		}
+ 	} else {
+ 		/* Not the local CPU - must adjust timestamp */
+@@ -1290,8 +2110,9 @@ lock_again:
+ 
+ 	double_rq_unlock(this_rq, rq);
+ 	local_irq_restore(flags);
+-	put_cpu();
++	put_vcpu();
+ }
++#endif
+ 
+ /*
+  * If dest_cpu is allowed for this process, migrate the task to it.
+@@ -1299,15 +2120,15 @@ lock_again:
+  * allow dest_cpu, which will force the cpu onto dest_cpu.  Then
+  * the cpu_allowed mask is restored.
+  */
+-static void sched_migrate_task(task_t *p, int dest_cpu)
++static void sched_migrate_task(task_t *p, vcpu_t dest_cpu)
+ {
+ 	migration_req_t req;
+ 	runqueue_t *rq;
+ 	unsigned long flags;
+ 
+ 	rq = task_rq_lock(p, &flags);
+-	if (!cpu_isset(dest_cpu, p->cpus_allowed)
+-	    || unlikely(cpu_is_offline(dest_cpu)))
++	if (!vcpu_isset(dest_cpu, p->cpus_allowed)
++	    || unlikely(vcpu_is_offline(dest_cpu)))
+ 		goto out;
+ 
+ 	/* force the process onto the specified CPU */
+@@ -1325,6 +2146,7 @@ out:
+ 	task_rq_unlock(rq, &flags);
+ }
+ 
++#ifdef CONFIG_SMP
+ /*
+  * sched_balance_exec(): find the highest-level, exec-balance-capable
+  * domain and try to migrate the task to the least loaded CPU.
+@@ -1335,10 +2157,10 @@ out:
+ void sched_balance_exec(void)
+ {
+ 	struct sched_domain *tmp, *sd = NULL;
+-	int new_cpu, this_cpu = get_cpu();
++	vcpu_t new_cpu, this_cpu = get_vcpu();
+ 
+ 	/* Prefer the current CPU if there's only this task running */
+-	if (this_rq()->nr_running <= 1)
++	if (vcpu_rq(this_cpu)->nr_running <= 1)
+ 		goto out;
+ 
+ 	for_each_domain(this_cpu, tmp)
+@@ -1354,7 +2176,7 @@ void sched_balance_exec(void)
+ 		}
+ 	}
+ out:
+-	put_cpu();
++	put_vcpu();
+ }
+ 
+ /*
+@@ -1378,12 +2200,26 @@ static void double_lock_balance(runqueue
+  */
+ static inline
+ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
+-	       runqueue_t *this_rq, prio_array_t *this_array, int this_cpu)
++	       runqueue_t *this_rq, prio_array_t *this_array, vcpu_t this_cpu)
+ {
++	struct ve_struct *ve;
++	cycles_t cycles;
++
++	cycles = get_cycles();
++	ve = VE_TASK_INFO(p)->owner_env;
++
+ 	dequeue_task(p, src_array);
+ 	src_rq->nr_running--;
+-	set_task_cpu(p, this_cpu);
++	if (src_rq->nr_running == 0) {
++		vcpu_detach(src_rq);
++		vcpu_strt_ve_idle(ve, rq_vcpu(src_rq)->id, cycles);
++	}
++	set_task_vcpu(p, this_cpu);
+ 	this_rq->nr_running++;
++	if (this_rq->nr_running == 1) {
++		vcpu_save_ve_idle(ve, this_cpu->id, cycles);
++		vcpu_attach(this_rq);
++	}
+ 	enqueue_task(p, this_array);
+ 	p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
+ 				+ this_rq->timestamp_last_tick;
+@@ -1399,7 +2235,7 @@ void pull_task(runqueue_t *src_rq, prio_
+  * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
+  */
+ static inline
+-int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,
++int can_migrate_task(task_t *p, runqueue_t *rq, vcpu_t this_cpu,
+ 		     struct sched_domain *sd, enum idle_type idle)
+ {
+ 	/*
+@@ -1410,7 +2246,7 @@ int can_migrate_task(task_t *p, runqueue
+ 	 */
+ 	if (task_running(rq, p))
+ 		return 0;
+-	if (!cpu_isset(this_cpu, p->cpus_allowed))
++	if (!vcpu_isset(this_cpu, p->cpus_allowed))
+ 		return 0;
+ 
+ 	/* Aggressive migration if we've failed balancing */
+@@ -1430,7 +2266,7 @@ int can_migrate_task(task_t *p, runqueue
+  *
+  * Called with both runqueues locked.
+  */
+-static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest,
++static int move_tasks(runqueue_t *this_rq, vcpu_t this_cpu, runqueue_t *busiest,
+ 		      unsigned long max_nr_move, struct sched_domain *sd,
+ 		      enum idle_type idle)
+ {
+@@ -1506,12 +2342,17 @@ out:
+  * moved to restore balance via the imbalance parameter.
+  */
+ static struct sched_group *
+-find_busiest_group(struct sched_domain *sd, int this_cpu,
++find_busiest_group(struct sched_domain *sd, vcpu_t this_cpu,
+ 		   unsigned long *imbalance, enum idle_type idle)
+ {
+ 	struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
+ 	unsigned long max_load, avg_load, total_load, this_load, total_pwr;
++	struct vcpu_scheduler *vsched;
++	vcpu_t vcpu;
++	int this_pcpu;
+ 
++	vsched = vcpu_vsched(this_cpu);
++	this_pcpu = vcpu_last_pcpu(this_cpu);
+ 	max_load = this_load = total_load = total_pwr = 0;
+ 
+ 	do {
+@@ -1520,20 +2361,21 @@ find_busiest_group(struct sched_domain *
+ 		int local_group;
+ 		int i, nr_cpus = 0;
+ 
+-		local_group = cpu_isset(this_cpu, group->cpumask);
++		local_group = cpu_isset(this_pcpu, group->cpumask);
+ 
+ 		/* Tally up the load of all CPUs in the group */
+ 		avg_load = 0;
+-		cpus_and(tmp, group->cpumask, cpu_online_map);
++		cpus_and(tmp, group->cpumask, vsched_pcpu_running_map(vsched));
+ 		if (unlikely(cpus_empty(tmp)))
+ 			goto nextgroup;
+ 
+ 		for_each_cpu_mask(i, tmp) {
++			vcpu = pcpu(i)->vcpu;
+ 			/* Bias balancing toward cpus of our domain */
+ 			if (local_group)
+-				load = target_load(i);
++				load = target_load(vcpu);
+ 			else
+-				load = source_load(i);
++				load = source_load(vcpu);
+ 
+ 			nr_cpus++;
+ 			avg_load += load;
+@@ -1562,6 +2404,8 @@ nextgroup:
+ 
+ 	if (!busiest || this_load >= max_load)
+ 		goto out_balanced;
++	if (!this)
++		this = busiest; /* this->cpu_power is needed below */
+ 
+ 	avg_load = (SCHED_LOAD_SCALE * total_load) / total_pwr;
+ 
+@@ -1645,36 +2489,71 @@ out_balanced:
+ /*
+  * find_busiest_queue - find the busiest runqueue among the cpus in group.
+  */
+-static runqueue_t *find_busiest_queue(struct sched_group *group)
++static vcpu_t find_busiest_queue(vcpu_t this_cpu,
++		struct sched_group *group, enum idle_type idle)
+ {
+ 	cpumask_t tmp;
++	vcpu_t vcpu;
++	struct vcpu_scheduler *vsched;
+ 	unsigned long load, max_load = 0;
+-	runqueue_t *busiest = NULL;
++	vcpu_t busiest = NULL;
+ 	int i;
+ 
++	vsched = vcpu_vsched(this_cpu);
+ 	cpus_and(tmp, group->cpumask, cpu_online_map);
+ 	for_each_cpu_mask(i, tmp) {
+-		load = source_load(i);
++		vcpu = pcpu(i)->vcpu;
++		if (vcpu_vsched(vcpu) != vsched && idle != IDLE)
++			continue;
++		load = source_load(vcpu);
++		if (load > max_load) {
++			max_load = load;
++			busiest = vcpu;
++		}
++	}
++
++#ifdef CONFIG_SCHED_VCPU
++	cpus_andnot(tmp, vsched->vcpu_online_map, vsched->vcpu_running_map);
++	for_each_cpu_mask(i, tmp) {
++		vcpu = vsched_vcpu(vsched, i);
++		load = source_load(vcpu);
+ 
+ 		if (load > max_load) {
+ 			max_load = load;
+-			busiest = cpu_rq(i);
++			busiest = vcpu;
+ 		}
+ 	}
++#endif
+ 
+ 	return busiest;
+ }
+ 
++#ifdef CONFIG_SCHED_VCPU
++vcpu_t find_idle_vcpu(struct vcpu_scheduler *vsched)
++{
++	vcpu_t vcpu;
++
++	vcpu = NULL;
++	spin_lock(&fairsched_lock);
++	if (!list_empty(&vsched->idle_list))
++		vcpu = list_entry(vsched->idle_list.next,
++				struct vcpu_info, list);
++	spin_unlock(&fairsched_lock);
++	return vcpu;
++}
++#endif
++
+ /*
+  * Check this_cpu to ensure it is balanced within domain. Attempt to move
+  * tasks if there is an imbalance.
+  *
+  * Called with this_rq unlocked.
+  */
+-static int load_balance(int this_cpu, runqueue_t *this_rq,
++static int load_balance(vcpu_t this_cpu, runqueue_t *this_rq,
+ 			struct sched_domain *sd, enum idle_type idle)
+ {
+ 	struct sched_group *group;
++	vcpu_t busiest_vcpu;
+ 	runqueue_t *busiest;
+ 	unsigned long imbalance;
+ 	int nr_moved;
+@@ -1685,18 +2564,34 @@ static int load_balance(int this_cpu, ru
+ 	if (!group)
+ 		goto out_balanced;
+ 
+-	busiest = find_busiest_queue(group);
+-	if (!busiest)
++	busiest_vcpu = find_busiest_queue(this_cpu, group, idle);
++	if (!busiest_vcpu)
+ 		goto out_balanced;
++
++#ifdef CONFIG_SCHED_VCPU
++	if (vcpu_vsched(this_cpu) != vcpu_vsched(busiest_vcpu)) {
++		spin_unlock(&this_rq->lock);
++		this_cpu = find_idle_vcpu(vcpu_vsched(busiest_vcpu));
++		if (!this_cpu)
++			goto out_tune;
++		this_rq = vcpu_rq(this_cpu);
++		spin_lock(&this_rq->lock);
++		/*
++		 * The check below is not mandatory, the lock may
++		 * be dropped below in double_lock_balance.
++		 */
++		if (this_rq->nr_running)
++			goto out_balanced;
++	}
++#endif
++	busiest = vcpu_rq(busiest_vcpu);
+ 	/*
+ 	 * This should be "impossible", but since load
+ 	 * balancing is inherently racy and statistical,
+ 	 * it could happen in theory.
+ 	 */
+-	if (unlikely(busiest == this_rq)) {
+-		WARN_ON(1);
++	if (unlikely(busiest == this_rq))
+ 		goto out_balanced;
+-	}
+ 
+ 	nr_moved = 0;
+ 	if (busiest->nr_running > 1) {
+@@ -1746,6 +2641,7 @@ static int load_balance(int this_cpu, ru
+ out_balanced:
+ 	spin_unlock(&this_rq->lock);
+ 
++out_tune:
+ 	/* tune up the balancing interval */
+ 	if (sd->balance_interval < sd->max_interval)
+ 		sd->balance_interval *= 2;
+@@ -1760,50 +2656,54 @@ out_balanced:
+  * Called from schedule when this_rq is about to become idle (NEWLY_IDLE).
+  * this_rq is locked.
+  */
+-static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
++static int load_balance_newidle(vcpu_t this_cpu, runqueue_t *this_rq,
+ 				struct sched_domain *sd)
+ {
+ 	struct sched_group *group;
+-	runqueue_t *busiest = NULL;
++	vcpu_t busiest_vcpu;
++	runqueue_t *busiest;
+ 	unsigned long imbalance;
+-	int nr_moved = 0;
+ 
+ 	group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE);
+ 	if (!group)
+ 		goto out;
+ 
+-	busiest = find_busiest_queue(group);
+-	if (!busiest || busiest == this_rq)
++	busiest_vcpu = find_busiest_queue(this_cpu, group, NEWLY_IDLE);
++	if (!busiest_vcpu || busiest_vcpu == this_cpu)
+ 		goto out;
++	busiest = vcpu_rq(busiest_vcpu);
+ 
+ 	/* Attempt to move tasks */
+ 	double_lock_balance(this_rq, busiest);
+ 
+-	nr_moved = move_tasks(this_rq, this_cpu, busiest,
+-					imbalance, sd, NEWLY_IDLE);
++	move_tasks(this_rq, this_cpu, busiest,
++			imbalance, sd, NEWLY_IDLE);
+ 
+ 	spin_unlock(&busiest->lock);
+ 
+ out:
+-	return nr_moved;
++	return 0;
+ }
+ 
+ /*
+  * idle_balance is called by schedule() if this_cpu is about to become
+  * idle. Attempts to pull tasks from other CPUs.
++ *
++ * Returns whether to continue with another runqueue
++ * instead of switching to idle.
+  */
+-static inline void idle_balance(int this_cpu, runqueue_t *this_rq)
++static int idle_balance(vcpu_t this_cpu, runqueue_t *this_rq)
+ {
+ 	struct sched_domain *sd;
+ 
+ 	for_each_domain(this_cpu, sd) {
+ 		if (sd->flags & SD_BALANCE_NEWIDLE) {
+-			if (load_balance_newidle(this_cpu, this_rq, sd)) {
++			if (load_balance_newidle(this_cpu, this_rq, sd))
+ 				/* We've pulled tasks over so stop searching */
+-				break;
+-			}
++				return 1;
+ 		}
+ 	}
++	return 0;
+ }
+ 
+ /*
+@@ -1813,34 +2713,52 @@ static inline void idle_balance(int this
+  * logical imbalance.
+  *
+  * Called with busiest locked.
++ *
++ * In human terms: balancing of CPU load by moving tasks between CPUs is
++ * performed by 2 methods, push and pull.
++ * In certain places when CPU is found to be idle, it performs pull from busy
++ * CPU to current (idle) CPU.
++ * active_load_balance implements push method, with migration thread getting
++ * scheduled on a busy CPU (hence, making all running processes on this CPU sit
++ * in the queue) and selecting where to push and which task.
+  */
+-static void active_load_balance(runqueue_t *busiest, int busiest_cpu)
++static void active_load_balance(runqueue_t *busiest, vcpu_t busiest_cpu)
+ {
+ 	struct sched_domain *sd;
+ 	struct sched_group *group, *busy_group;
++	struct vcpu_scheduler *vsched;
+ 	int i;
+ 
+ 	if (busiest->nr_running <= 1)
+ 		return;
+ 
++	/*
++	 * Our main candidate where to push our tasks is busiest->push_cpu.
++	 * First, find the domain that spans over both that candidate CPU and
++	 * the current one.
++	 *
++	 * FIXME: make sure that push_cpu doesn't disappear before we get here.
++	 */
+ 	for_each_domain(busiest_cpu, sd)
+-		if (cpu_isset(busiest->push_cpu, sd->span))
++		if (cpu_isset(vcpu_last_pcpu(busiest->push_cpu), sd->span))
+ 			break;
+ 	if (!sd) {
+ 		WARN_ON(1);
+ 		return;
+ 	}
+ 
++	/* Remember the group containing the current CPU (to ignore it). */
+  	group = sd->groups;
+-	while (!cpu_isset(busiest_cpu, group->cpumask))
++	while (!cpu_isset(vcpu_last_pcpu(busiest_cpu), group->cpumask))
+  		group = group->next;
+  	busy_group = group;
+ 
++	vsched = vcpu_vsched(busiest_cpu);
+  	group = sd->groups;
+  	do {
+ 		cpumask_t tmp;
+ 		runqueue_t *rq;
+-		int push_cpu = 0;
++		vcpu_t vcpu, push_cpu;
+ 
+  		if (group == busy_group)
+  			goto next_group;
+@@ -1849,13 +2767,21 @@ static void active_load_balance(runqueue
+ 		if (!cpus_weight(tmp))
+ 			goto next_group;
+ 
++		push_cpu = NULL;
+  		for_each_cpu_mask(i, tmp) {
+-			if (!idle_cpu(i))
++			vcpu = pcpu(i)->vcpu;
++			if (vcpu_vsched(vcpu) != vsched)
++				continue;
++			if (!idle_vcpu(vcpu))
+ 				goto next_group;
+- 			push_cpu = i;
++			push_cpu = vcpu;
+  		}
++#ifdef CONFIG_SCHED_VCPU
++		if (push_cpu == NULL)
++			goto next_group;
++#endif
+ 
+-		rq = cpu_rq(push_cpu);
++		rq = vcpu_rq(push_cpu);
+ 
+ 		/*
+ 		 * This condition is "impossible", but since load
+@@ -1871,6 +2797,28 @@ static void active_load_balance(runqueue
+ next_group:
+ 		group = group->next;
+ 	} while (group != sd->groups);
++
++#ifdef CONFIG_SCHED_VCPU
++ 	if (busiest->nr_running > 2) { /* 1 for migration thread, 1 for task */
++		cpumask_t tmp;
++		runqueue_t *rq;
++		vcpu_t vcpu;
++
++		cpus_andnot(tmp, vsched->vcpu_online_map,
++					vsched->vcpu_running_map);
++		for_each_cpu_mask(i, tmp) {
++			vcpu = vsched_vcpu(vsched, i);
++			if (!idle_vcpu(vcpu))
++				continue;
++			rq = vcpu_rq(vcpu);
++			double_lock_balance(busiest, rq);
++			move_tasks(rq, vcpu, busiest, 1, sd, IDLE);
++			spin_unlock(&rq->lock);
++			if (busiest->nr_running <= 2)
++				break;
++		}
++	}
++#endif
+ }
+ 
+ /*
+@@ -1883,27 +2831,18 @@ next_group:
+  */
+ 
+ /* Don't have all balancing operations going off at once */
+-#define CPU_OFFSET(cpu) (HZ * cpu / NR_CPUS)
++#define CPU_OFFSET(cpu) (HZ * (cpu) / NR_CPUS)
+ 
+-static void rebalance_tick(int this_cpu, runqueue_t *this_rq,
++static void rebalance_tick(vcpu_t this_cpu, runqueue_t *this_rq,
+ 			   enum idle_type idle)
+ {
+-	unsigned long old_load, this_load;
+-	unsigned long j = jiffies + CPU_OFFSET(this_cpu);
++	unsigned long j;
+ 	struct sched_domain *sd;
+ 
+ 	/* Update our load */
+-	old_load = this_rq->cpu_load;
+-	this_load = this_rq->nr_running * SCHED_LOAD_SCALE;
+-	/*
+-	 * Round up the averaging division if load is increasing. This
+-	 * prevents us from getting stuck on 9 if the load is 10, for
+-	 * example.
+-	 */
+-	if (this_load > old_load)
+-		old_load++;
+-	this_rq->cpu_load = (old_load + this_load) / 2;
++	update_rq_cpu_load(this_rq);
+ 
++	j = jiffies + CPU_OFFSET(smp_processor_id());
+ 	for_each_domain(this_cpu, sd) {
+ 		unsigned long interval = sd->balance_interval;
+ 
+@@ -1914,7 +2853,6 @@ static void rebalance_tick(int this_cpu,
+ 		interval = msecs_to_jiffies(interval);
+ 		if (unlikely(!interval))
+ 			interval = 1;
+-
+ 		if (j - sd->last_balance >= interval) {
+ 			if (load_balance(this_cpu, this_rq, sd, idle)) {
+ 				/* We've pulled tasks over so no longer idle */
+@@ -1928,26 +2866,30 @@ static void rebalance_tick(int this_cpu,
+ /*
+  * on UP we do not need to balance between CPUs:
+  */
+-static inline void rebalance_tick(int cpu, runqueue_t *rq, enum idle_type idle)
++static inline void rebalance_tick(vcpu_t cpu, runqueue_t *rq, enum idle_type idle)
+ {
+ }
+-static inline void idle_balance(int cpu, runqueue_t *rq)
++static inline void idle_balance(vcpu_t cpu, runqueue_t *rq)
+ {
+ }
+ #endif
+ 
+-static inline int wake_priority_sleeper(runqueue_t *rq)
++static inline int wake_priority_sleeper(runqueue_t *rq, task_t *idle)
+ {
++#ifndef CONFIG_SCHED_VCPU
++	/* FIXME: can we implement SMT priority sleeping for this? */
+ #ifdef CONFIG_SCHED_SMT
+ 	/*
+ 	 * If an SMT sibling task has been put to sleep for priority
+ 	 * reasons reschedule the idle task to see if it can now run.
+ 	 */
+ 	if (rq->nr_running) {
+-		resched_task(rq->idle);
++		/* FIXME */
++		resched_task(idle);
+ 		return 1;
+ 	}
+ #endif
++#endif
+ 	return 0;
+ }
+ 
+@@ -1971,6 +2913,25 @@ EXPORT_PER_CPU_SYMBOL(kstat);
+ 			STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \
+ 			((rq)->curr->static_prio > (rq)->best_expired_prio))
+ 
++#ifdef CONFIG_VE
++#define update_ve_nice(p, tick)		do {			\
++		VE_CPU_STATS(VE_TASK_INFO(p)->owner_env,	\
++			task_cpu(p))->nice += tick;		\
++	} while (0)
++#define update_ve_user(p, tick)		do {			\
++		VE_CPU_STATS(VE_TASK_INFO(p)->owner_env,	\
++			task_cpu(p))->user += tick;		\
++	} while (0)
++#define update_ve_system(p, tick)	do {			\
++		VE_CPU_STATS(VE_TASK_INFO(p)->owner_env,	\
++			task_cpu(p))->system += tick;		\
++	} while (0)
++#else
++#define update_ve_nice(p, tick)		do { } while (0)
++#define update_ve_user(p, tick)		do { } while (0)
++#define update_ve_system(p, tick)	do { } while (0)
++#endif
++
+ /*
+  * This function gets called by the timer code, with HZ frequency.
+  * We call it with interrupts disabled.
+@@ -1981,12 +2942,17 @@ EXPORT_PER_CPU_SYMBOL(kstat);
+ void scheduler_tick(int user_ticks, int sys_ticks)
+ {
+ 	int cpu = smp_processor_id();
++	vcpu_t vcpu;
+ 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+-	runqueue_t *rq = this_rq();
++	runqueue_t *rq;
+ 	task_t *p = current;
+ 
++	vcpu = this_vcpu();
++	rq = vcpu_rq(vcpu);
+ 	rq->timestamp_last_tick = sched_clock();
+ 
++	set_tsk_need_resched(p); //FIXME
++
+ 	if (rcu_pending(cpu))
+ 		rcu_check_callbacks(cpu, user_ticks);
+ 
+@@ -1998,22 +2964,25 @@ void scheduler_tick(int user_ticks, int 
+ 		cpustat->softirq += sys_ticks;
+ 		sys_ticks = 0;
+ 	}
+-
+-	if (p == rq->idle) {
++	if (p == pcpu(cpu)->idle) {
+ 		if (atomic_read(&rq->nr_iowait) > 0)
+ 			cpustat->iowait += sys_ticks;
+ 		else
+ 			cpustat->idle += sys_ticks;
+-		if (wake_priority_sleeper(rq))
++		if (wake_priority_sleeper(rq, pcpu(cpu)->idle))
+ 			goto out;
+-		rebalance_tick(cpu, rq, IDLE);
++		rebalance_tick(vcpu, rq, IDLE);
+ 		return;
+ 	}
+-	if (TASK_NICE(p) > 0)
++	if (TASK_NICE(p) > 0) {
+ 		cpustat->nice += user_ticks;
+-	else
++		update_ve_nice(p, user_ticks);
++	} else {
+ 		cpustat->user += user_ticks;
++		update_ve_user(p, user_ticks);
++	}
+ 	cpustat->system += sys_ticks;
++	update_ve_system(p, sys_ticks);
+ 
+ 	/* Task might have expired already, but not scheduled off yet */
+ 	if (p->array != rq->active) {
+@@ -2076,9 +3045,22 @@ void scheduler_tick(int user_ticks, int 
+ 		 * This only applies to tasks in the interactive
+ 		 * delta range with at least TIMESLICE_GRANULARITY to requeue.
+ 		 */
++		unsigned long ts_gran;
++
++		ts_gran = TIMESLICE_GRANULARITY(p);
++		if (ts_gran == 0) {
++			printk("BUG!!! Zero granulatity!\n"
++				"Task %d/%s, VE %d, sleep_avg %lu, cpus %d\n",
++				p->pid, p->comm,
++				VE_TASK_INFO(p)->owner_env->veid,
++				p->sleep_avg,
++				vsched_num_online_vcpus(task_vsched(p)));
++			ts_gran = 1;
++		}
++
+ 		if (TASK_INTERACTIVE(p) && !((task_timeslice(p) -
+-			p->time_slice) % TIMESLICE_GRANULARITY(p)) &&
+-			(p->time_slice >= TIMESLICE_GRANULARITY(p)) &&
++			p->time_slice) % ts_gran) &&
++			(p->time_slice >= ts_gran) &&
+ 			(p->array == rq->active)) {
+ 
+ 			dequeue_task(p, rq->active);
+@@ -2090,11 +3072,12 @@ void scheduler_tick(int user_ticks, int 
+ out_unlock:
+ 	spin_unlock(&rq->lock);
+ out:
+-	rebalance_tick(cpu, rq, NOT_IDLE);
++	rebalance_tick(vcpu, rq, NOT_IDLE);
+ }
+ 
+-#ifdef CONFIG_SCHED_SMT
+-static inline void wake_sleeping_dependent(int cpu, runqueue_t *rq)
++#if defined(CONFIG_SCHED_SMT) && !defined(CONFIG_SCHED_VCPU)
++/* FIXME: SMT scheduling */
++static void wake_sleeping_dependent(int cpu, runqueue_t *rq)
+ {
+ 	int i;
+ 	struct sched_domain *sd = rq->sd;
+@@ -2110,18 +3093,18 @@ static inline void wake_sleeping_depende
+ 		if (i == cpu)
+ 			continue;
+ 
+-		smt_rq = cpu_rq(i);
++		smt_rq = vcpu_rq(vcpu(i));
+ 
+ 		/*
+ 		 * If an SMT sibling task is sleeping due to priority
+ 		 * reasons wake it up now.
+ 		 */
+-		if (smt_rq->curr == smt_rq->idle && smt_rq->nr_running)
+-			resched_task(smt_rq->idle);
++		if (smt_rq->curr == pcpu(i)->idle && smt_rq->nr_running)
++			resched_task(pcpu(i)->idle);
+ 	}
+ }
+ 
+-static inline int dependent_sleeper(int cpu, runqueue_t *rq, task_t *p)
++static int dependent_sleeper(int cpu, runqueue_t *rq, task_t *p)
+ {
+ 	struct sched_domain *sd = rq->sd;
+ 	cpumask_t sibling_map;
+@@ -2138,7 +3121,7 @@ static inline int dependent_sleeper(int 
+ 		if (i == cpu)
+ 			continue;
+ 
+-		smt_rq = cpu_rq(i);
++		smt_rq = vcpu_rq(vcpu(i));
+ 		smt_curr = smt_rq->curr;
+ 
+ 		/*
+@@ -2162,7 +3145,7 @@ static inline int dependent_sleeper(int 
+ 		if ((((p->time_slice * (100 - sd->per_cpu_gain) / 100) >
+ 			task_timeslice(smt_curr) || rt_task(p)) &&
+ 			smt_curr->mm && p->mm && !rt_task(smt_curr)) ||
+-			(smt_curr == smt_rq->idle && smt_rq->nr_running))
++			(smt_curr == pcpu(i)->idle && smt_rq->nr_running))
+ 				resched_task(smt_curr);
+ 	}
+ 	return ret;
+@@ -2178,6 +3161,24 @@ static inline int dependent_sleeper(int 
+ }
+ #endif
+ 
++static void update_sched_lat(struct task_struct *t, cycles_t cycles)
++{
++	int cpu;
++	cycles_t ve_wstamp;
++
++	/* safe due to runqueue lock */
++	ve_wstamp = VE_TASK_INFO(t)->wakeup_stamp;
++	cpu = smp_processor_id();
++	if (ve_wstamp && cycles > ve_wstamp) {
++		KSTAT_LAT_PCPU_ADD(&kstat_glob.sched_lat,
++				cpu, cycles - ve_wstamp);
++#ifdef CONFIG_VE
++		KSTAT_LAT_PCPU_ADD(&VE_TASK_INFO(t)->exec_env->sched_lat_ve,
++				cpu, cycles - ve_wstamp);
++#endif
++	}
++}
++
+ /*
+  * schedule() is the main scheduler function.
+  */
+@@ -2190,30 +3191,34 @@ asmlinkage void __sched schedule(void)
+ 	struct list_head *queue;
+ 	unsigned long long now;
+ 	unsigned long run_time;
+-	int cpu, idx;
++	int idx;
++	vcpu_t vcpu;
++	cycles_t cycles;
+ 
+ 	/*
+ 	 * Test if we are atomic.  Since do_exit() needs to call into
+ 	 * schedule() atomically, we ignore that path for now.
+ 	 * Otherwise, whine if we are scheduling when we should not be.
+ 	 */
+-	if (likely(!(current->state & (TASK_DEAD | TASK_ZOMBIE)))) {
++	if (likely(!current->exit_state)) {
+ 		if (unlikely(in_atomic())) {
+ 			printk(KERN_ERR "bad: scheduling while atomic!\n");
+ 			dump_stack();
+ 		}
+ 	}
+-
+ need_resched:
++	cycles = get_cycles();
+ 	preempt_disable();
+ 	prev = current;
+ 	rq = this_rq();
+ 
+ 	release_kernel_lock(prev);
+ 	now = sched_clock();
+-	if (likely(now - prev->timestamp < NS_MAX_SLEEP_AVG))
++	if (likely((long long)(now - prev->timestamp) < NS_MAX_SLEEP_AVG)) {
+ 		run_time = now - prev->timestamp;
+-	else
++		if (unlikely((long long)(now - prev->timestamp) < 0))
++			run_time = 0;
++	} else
+ 		run_time = NS_MAX_SLEEP_AVG;
+ 
+ 	/*
+@@ -2226,6 +3231,8 @@ need_resched:
+ 
+ 	spin_lock_irq(&rq->lock);
+ 
++	if (unlikely(current->flags & PF_DEAD))
++		current->state = EXIT_DEAD;
+ 	/*
+ 	 * if entering off of a kernel preemption go straight
+ 	 * to picking the next task.
+@@ -2233,24 +3240,40 @@ need_resched:
+ 	switch_count = &prev->nivcsw;
+ 	if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
+ 		switch_count = &prev->nvcsw;
+-		if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
+-				unlikely(signal_pending(prev))))
++		if (unlikely(((prev->state & TASK_INTERRUPTIBLE) &&
++				unlikely(signal_pending(prev))) ||
++			     ((prev->state & TASK_STOPPED) &&
++				sigismember(&prev->pending.signal, SIGKILL))))
+ 			prev->state = TASK_RUNNING;
+ 		else
+ 			deactivate_task(prev, rq);
+ 	}
+ 
+-	cpu = smp_processor_id();
++	prev->sleep_avg -= run_time;
++	if ((long)prev->sleep_avg <= 0) {
++		prev->sleep_avg = 0;
++		if (!(HIGH_CREDIT(prev) || LOW_CREDIT(prev)))
++			prev->interactive_credit--;
++	}
++
++	vcpu = rq_vcpu(rq);
++	if (rq->nr_running &&
++	    jiffies - vcpu->start_time < msecs_to_jiffies(vcpu_timeslice))
++		goto same_vcpu;
++
++	if (unlikely(!rq->nr_running))
++		idle_balance(vcpu, rq);
++	vcpu = schedule_vcpu(vcpu, cycles);
++	rq = vcpu_rq(vcpu);
++
+ 	if (unlikely(!rq->nr_running)) {
+-		idle_balance(cpu, rq);
+-		if (!rq->nr_running) {
+-			next = rq->idle;
+-			rq->expired_timestamp = 0;
+-			wake_sleeping_dependent(cpu, rq);
+-			goto switch_tasks;
+-		}
++		next = this_pcpu()->idle;
++		rq->expired_timestamp = 0;
++		wake_sleeping_dependent(vcpu->id, rq);
++		goto switch_tasks;
+ 	}
+ 
++same_vcpu:
+ 	array = rq->active;
+ 	if (unlikely(!array->nr_active)) {
+ 		/*
+@@ -2266,14 +3289,15 @@ need_resched:
+ 	idx = sched_find_first_bit(array->bitmap);
+ 	queue = array->queue + idx;
+ 	next = list_entry(queue->next, task_t, run_list);
+-
+-	if (dependent_sleeper(cpu, rq, next)) {
+-		next = rq->idle;
++	if (dependent_sleeper(vcpu->id, rq, next)) {
++		/* FIXME: switch to idle if CONFIG_SCHED_VCPU */
++		next = this_pcpu()->idle;
+ 		goto switch_tasks;
+ 	}
+-
+ 	if (!rt_task(next) && next->activated > 0) {
+ 		unsigned long long delta = now - next->timestamp;
++		if (unlikely((long long)delta < 0))
++			delta = 0;
+ 
+ 		if (next->activated == 1)
+ 			delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128;
+@@ -2284,37 +3308,68 @@ need_resched:
+ 		enqueue_task(next, array);
+ 	}
+ 	next->activated = 0;
++
+ switch_tasks:
+ 	prefetch(next);
+ 	clear_tsk_need_resched(prev);
+-	RCU_qsctr(task_cpu(prev))++;
++	RCU_qsctr(task_pcpu(prev))++;
+ 
+-	prev->sleep_avg -= run_time;
+-	if ((long)prev->sleep_avg <= 0) {
+-		prev->sleep_avg = 0;
+-		if (!(HIGH_CREDIT(prev) || LOW_CREDIT(prev)))
+-			prev->interactive_credit--;
+-	}
++	/* updated w/o rq->lock, which is ok due to after-read-checks */
+ 	prev->timestamp = now;
+ 
+ 	if (likely(prev != next)) {
++		/* current physical CPU id should be valid after switch */
++		set_task_vcpu(next, vcpu);
++		set_task_pcpu(next, task_pcpu(prev));
+ 		next->timestamp = now;
+ 		rq->nr_switches++;
++		glob_tasks_nrs[smp_processor_id()].nr_switches++;
+ 		rq->curr = next;
+ 		++*switch_count;
+ 
++		VE_TASK_INFO(prev)->sleep_stamp = cycles;
++		if (prev->state == TASK_RUNNING && prev != this_pcpu()->idle)
++			write_wakeup_stamp(prev, cycles);
++		update_sched_lat(next, cycles);
++
++		/* because next & prev are protected with
++		 * runqueue lock we may not worry about
++		 * wakeup_stamp and sched_time protection
++		 * (same thing in 'else' branch below)
++		 */
++		if (prev != this_pcpu()->idle) {
++#ifdef CONFIG_VE
++			VE_CPU_STATS(VE_TASK_INFO(prev)->owner_env,
++					smp_processor_id())->used_time +=
++				cycles - VE_TASK_INFO(prev)->sched_time;
++#endif
++			VE_TASK_INFO(prev)->sched_time = 0;
++		}
++		VE_TASK_INFO(next)->sched_time = cycles;
++		write_wakeup_stamp(next, 0);
++
+ 		prepare_arch_switch(rq, next);
+ 		prev = context_switch(rq, prev, next);
+ 		barrier();
+ 
+ 		finish_task_switch(prev);
+-	} else
++	} else {
++		if (prev != this_pcpu()->idle) {
++#ifdef CONFIG_VE
++			VE_CPU_STATS(VE_TASK_INFO(prev)->owner_env,
++					smp_processor_id())->used_time +=
++				cycles - VE_TASK_INFO(prev)->sched_time;
++#endif
++			VE_TASK_INFO(prev)->sched_time = cycles;
++		}
+ 		spin_unlock_irq(&rq->lock);
++	}
+ 
+ 	reacquire_kernel_lock(current);
+ 	preempt_enable_no_resched();
+ 	if (test_thread_flag(TIF_NEED_RESCHED))
+ 		goto need_resched;
++	return;
+ }
+ 
+ EXPORT_SYMBOL(schedule);
+@@ -2675,23 +3730,12 @@ int task_nice(const task_t *p)
+ EXPORT_SYMBOL(task_nice);
+ 
+ /**
+- * idle_cpu - is a given cpu idle currently?
+- * @cpu: the processor in question.
+- */
+-int idle_cpu(int cpu)
+-{
+-	return cpu_curr(cpu) == cpu_rq(cpu)->idle;
+-}
+-
+-EXPORT_SYMBOL_GPL(idle_cpu);
+-
+-/**
+  * find_process_by_pid - find a process with a matching PID value.
+  * @pid: the pid in question.
+  */
+ static inline task_t *find_process_by_pid(pid_t pid)
+ {
+-	return pid ? find_task_by_pid(pid) : current;
++	return pid ? find_task_by_pid_ve(pid) : current;
+ }
+ 
+ /* Actually do priority change: must hold rq lock. */
+@@ -2709,7 +3753,7 @@ static void __setscheduler(struct task_s
+ /*
+  * setscheduler - change the scheduling policy and/or RT priority of a thread.
+  */
+-static int setscheduler(pid_t pid, int policy, struct sched_param __user *param)
++int setscheduler(pid_t pid, int policy, struct sched_param __user *param)
+ {
+ 	struct sched_param lp;
+ 	int retval = -EINVAL;
+@@ -2764,7 +3808,7 @@ static int setscheduler(pid_t pid, int p
+ 
+ 	retval = -EPERM;
+ 	if ((policy == SCHED_FIFO || policy == SCHED_RR) &&
+-	    !capable(CAP_SYS_NICE))
++	    !capable(CAP_SYS_ADMIN))
+ 		goto out_unlock;
+ 	if ((current->euid != p->euid) && (current->euid != p->uid) &&
+ 	    !capable(CAP_SYS_NICE))
+@@ -2802,6 +3846,7 @@ out_unlock_tasklist:
+ out_nounlock:
+ 	return retval;
+ }
++EXPORT_SYMBOL(setscheduler);
+ 
+ /**
+  * sys_sched_setscheduler - set/change the scheduler policy and RT priority
+@@ -3065,9 +4110,14 @@ EXPORT_SYMBOL(yield);
+ void __sched io_schedule(void)
+ {
+ 	struct runqueue *rq = this_rq();
++	struct ve_struct *ve;
++
++	ve = VE_TASK_INFO(current)->owner_env;
+ 
+ 	atomic_inc(&rq->nr_iowait);
++	nr_iowait_inc(smp_processor_id(), task_cpu(current), ve);
+ 	schedule();
++	nr_iowait_dec(smp_processor_id(), task_cpu(current), ve);
+ 	atomic_dec(&rq->nr_iowait);
+ }
+ 
+@@ -3077,9 +4127,14 @@ long __sched io_schedule_timeout(long ti
+ {
+ 	struct runqueue *rq = this_rq();
+ 	long ret;
++	struct ve_struct *ve;
++
++	ve = VE_TASK_INFO(current)->owner_env;
+ 
+ 	atomic_inc(&rq->nr_iowait);
++	nr_iowait_inc(smp_processor_id(), task_cpu(current), ve);
+ 	ret = schedule_timeout(timeout);
++	nr_iowait_dec(smp_processor_id(), task_cpu(current), ve);
+ 	atomic_dec(&rq->nr_iowait);
+ 	return ret;
+ }
+@@ -3199,16 +4254,13 @@ static void show_task(task_t * p)
+ 		printk(stat_nam[state]);
+ 	else
+ 		printk("?");
++	if (state)
++		printk(" %012Lx", (unsigned long long)
++			(VE_TASK_INFO(p)->sleep_stamp >> 16));
+ #if (BITS_PER_LONG == 32)
+-	if (state == TASK_RUNNING)
+-		printk(" running ");
+-	else
+-		printk(" %08lX ", thread_saved_pc(p));
++	printk(" %08lX ", (unsigned long)p);
+ #else
+-	if (state == TASK_RUNNING)
+-		printk("  running task   ");
+-	else
+-		printk(" %016lx ", thread_saved_pc(p));
++	printk(" %016lx ", (unsigned long)p);
+ #endif
+ #ifdef CONFIG_DEBUG_STACK_USAGE
+ 	{
+@@ -3247,39 +4299,82 @@ void show_state(void)
+ #if (BITS_PER_LONG == 32)
+ 	printk("\n"
+ 	       "                                               sibling\n");
+-	printk("  task             PC      pid father child younger older\n");
++	printk("  task       taskaddr      pid father child younger older\n");
+ #else
+ 	printk("\n"
+ 	       "                                                       sibling\n");
+-	printk("  task                 PC          pid father child younger older\n");
++	printk("  task           taskaddr          pid father child younger older\n");
+ #endif
+ 	read_lock(&tasklist_lock);
+-	do_each_thread(g, p) {
++	do_each_thread_all(g, p) {
+ 		/*
+ 		 * reset the NMI-timeout, listing all files on a slow
+ 		 * console might take alot of time:
+ 		 */
+ 		touch_nmi_watchdog();
+ 		show_task(p);
+-	} while_each_thread(g, p);
++	} while_each_thread_all(g, p);
+ 
+ 	read_unlock(&tasklist_lock);
+ }
+ 
++static void init_rq(struct runqueue *rq);
++
++static void init_vcpu(vcpu_t vcpu, int id)
++{
++	memset(vcpu, 0, sizeof(struct vcpu_info));
++	vcpu->id = id;
++#ifdef CONFIG_SCHED_VCPU
++	vcpu->last_pcpu = id;
++#endif
++	init_rq(vcpu_rq(vcpu));
++}
++
+ void __devinit init_idle(task_t *idle, int cpu)
+ {
+-	runqueue_t *idle_rq = cpu_rq(cpu), *rq = cpu_rq(task_cpu(idle));
++	struct vcpu_scheduler *vsched;
++	vcpu_t vcpu;
++	runqueue_t *idle_rq, *rq;
+ 	unsigned long flags;
+ 
++#ifdef CONFIG_SCHED_VCPU
++	if (__add_vcpu(&idle_vsched, cpu))
++		panic("Can't create idle vcpu %d\n", cpu);
++
++	/* Also create vcpu for default_vsched */
++	if (cpu > 0 && __add_vcpu(&default_vsched, cpu) != 0)
++		panic("Can't create default vcpu %d\n", cpu);
++	cpu_set(cpu, idle_vsched.pcpu_running_map);
++#endif
++	vsched = &idle_vsched;
++	vcpu = vsched_vcpu(vsched, cpu);
++
++	idle_rq = vcpu_rq(vcpu);
++	rq = vcpu_rq(task_vcpu(idle));
++
+ 	local_irq_save(flags);
+ 	double_rq_lock(idle_rq, rq);
+ 
+-	idle_rq->curr = idle_rq->idle = idle;
++	pcpu(cpu)->idle = idle;
++	idle_rq->curr = idle;
+ 	deactivate_task(idle, rq);
+ 	idle->array = NULL;
+ 	idle->prio = MAX_PRIO;
+ 	idle->state = TASK_RUNNING;
+-	set_task_cpu(idle, cpu);
++	set_task_pcpu(idle, cpu);
++#ifdef CONFIG_SCHED_VCPU
++	/* the following code is very close to vcpu_get */
++	spin_lock(&fairsched_lock);
++	pcpu(cpu)->vcpu = vcpu;
++	pcpu(cpu)->vsched = vcpu->vsched;
++	list_move_tail(&vcpu->list, &vsched->running_list);
++	__set_bit(cpu, vsched->vcpu_running_map.bits);
++	__set_bit(cpu, vsched->pcpu_running_map.bits);
++	vcpu->running = 1;
++	spin_unlock(&fairsched_lock);
++#endif
++	set_task_vsched(idle, vsched);
++	set_task_vcpu(idle, vcpu);
+ 	double_rq_unlock(idle_rq, rq);
+ 	set_tsk_need_resched(idle);
+ 	local_irq_restore(flags);
+@@ -3301,7 +4396,7 @@ void __devinit init_idle(task_t *idle, i
+  */
+ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
+ 
+-#ifdef CONFIG_SMP
++#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_VCPU)
+ /*
+  * This is how migration works:
+  *
+@@ -3327,15 +4422,18 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
+  * task must not exit() & deallocate itself prematurely.  The
+  * call is not atomic; no spinlocks may be held.
+  */
++#ifdef CONFIG_SMP
+ int set_cpus_allowed(task_t *p, cpumask_t new_mask)
+ {
+ 	unsigned long flags;
+ 	int ret = 0;
+ 	migration_req_t req;
+ 	runqueue_t *rq;
++	struct vcpu_scheduler *vsched;
+ 
++	vsched = task_vsched(p);
+ 	rq = task_rq_lock(p, &flags);
+-	if (!cpus_intersects(new_mask, cpu_online_map)) {
++	if (!cpus_intersects(new_mask, vsched_vcpu_online_map(vsched))) {
+ 		ret = -EINVAL;
+ 		goto out;
+ 	}
+@@ -3345,7 +4443,8 @@ int set_cpus_allowed(task_t *p, cpumask_
+ 	if (cpu_isset(task_cpu(p), new_mask))
+ 		goto out;
+ 
+-	if (migrate_task(p, any_online_cpu(new_mask), &req)) {
++	if (migrate_task(p, vsched_vcpu(vsched, any_online_cpu(new_mask)),
++								&req)) {
+ 		/* Need help from migration thread: drop lock and wait. */
+ 		task_rq_unlock(rq, &flags);
+ 		wake_up_process(rq->migration_thread);
+@@ -3359,6 +4458,7 @@ out:
+ }
+ 
+ EXPORT_SYMBOL_GPL(set_cpus_allowed);
++#endif
+ 
+ /*
+  * Move (not current) task off this cpu, onto dest cpu.  We're doing
+@@ -3369,25 +4469,30 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed);
+  * So we race with normal scheduler movements, but that's OK, as long
+  * as the task is no longer on this CPU.
+  */
+-static void __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
++static void __migrate_task(struct task_struct *p, vcpu_t src_cpu, vcpu_t dest_cpu)
+ {
+ 	runqueue_t *rq_dest, *rq_src;
+ 
+-	if (unlikely(cpu_is_offline(dest_cpu)))
++	if (unlikely(vcpu_is_offline(dest_cpu)))
+ 		return;
+ 
+-	rq_src  = cpu_rq(src_cpu);
+-	rq_dest = cpu_rq(dest_cpu);
++#ifdef CONFIG_SCHED_VCPU
++	BUG_ON(vcpu_vsched(src_cpu) == &idle_vsched);
++#endif
++	rq_src = vcpu_rq(src_cpu);
++	rq_dest = vcpu_rq(dest_cpu);
+ 
+ 	double_rq_lock(rq_src, rq_dest);
+ 	/* Already moved. */
+-	if (task_cpu(p) != src_cpu)
++	if (task_vcpu(p) != src_cpu)
+ 		goto out;
+ 	/* Affinity changed (again). */
+-	if (!cpu_isset(dest_cpu, p->cpus_allowed))
++	if (!vcpu_isset(dest_cpu, p->cpus_allowed))
+ 		goto out;
+ 
+-	set_task_cpu(p, dest_cpu);
++	BUG_ON(task_running(rq_src, p));
++	set_task_vsched(p, vcpu_vsched(dest_cpu));
++	set_task_vcpu(p, dest_cpu);
+ 	if (p->array) {
+ 		/*
+ 		 * Sync timestamp with rq_dest's before activating.
+@@ -3415,9 +4520,9 @@ out:
+ static int migration_thread(void * data)
+ {
+ 	runqueue_t *rq;
+-	int cpu = (long)data;
++	vcpu_t cpu = (vcpu_t)data;
+ 
+-	rq = cpu_rq(cpu);
++	rq = vcpu_rq(cpu);
+ 	BUG_ON(rq->migration_thread != current);
+ 
+ 	set_current_state(TASK_INTERRUPTIBLE);
+@@ -3425,21 +4530,21 @@ static int migration_thread(void * data)
+ 		struct list_head *head;
+ 		migration_req_t *req;
+ 
+-		if (current->flags & PF_FREEZE)
+-			refrigerator(PF_FREEZE);
++		if (test_thread_flag(TIF_FREEZE))
++			refrigerator();
+ 
+ 		spin_lock_irq(&rq->lock);
+ 
+-		if (cpu_is_offline(cpu)) {
++		if (vcpu_is_offline(cpu)) {
+ 			spin_unlock_irq(&rq->lock);
+ 			goto wait_to_die;
+ 		}
+-
++#ifdef CONFIG_SMP
+ 		if (rq->active_balance) {
+ 			active_load_balance(rq, cpu);
+ 			rq->active_balance = 0;
+ 		}
+-
++#endif
+ 		head = &rq->migration_queue;
+ 
+ 		if (list_empty(head)) {
+@@ -3453,12 +4558,14 @@ static int migration_thread(void * data)
+ 
+ 		if (req->type == REQ_MOVE_TASK) {
+ 			spin_unlock(&rq->lock);
+-			__migrate_task(req->task, smp_processor_id(),
++			__migrate_task(req->task, this_vcpu(),
+ 					req->dest_cpu);
+ 			local_irq_enable();
++#ifdef CONFIG_SMP
+ 		} else if (req->type == REQ_SET_DOMAIN) {
+ 			rq->sd = req->sd;
+ 			spin_unlock_irq(&rq->lock);
++#endif
+ 		} else {
+ 			spin_unlock_irq(&rq->lock);
+ 			WARN_ON(1);
+@@ -3480,10 +4587,10 @@ wait_to_die:
+ 	return 0;
+ }
+ 
+-#ifdef CONFIG_HOTPLUG_CPU
+ /* migrate_all_tasks - function to migrate all tasks from the dead cpu.  */
+-static void migrate_all_tasks(int src_cpu)
++static void migrate_all_tasks(vcpu_t src_vcpu)
+ {
++#if defined(CONFIG_HOTPLUG_CPU) && !defined(CONFIG_SCHED_VCPU)
+ 	struct task_struct *tsk, *t;
+ 	int dest_cpu;
+ 	unsigned int node;
+@@ -3491,14 +4598,14 @@ static void migrate_all_tasks(int src_cp
+ 	write_lock_irq(&tasklist_lock);
+ 
+ 	/* watch out for per node tasks, let's stay on this node */
+-	node = cpu_to_node(src_cpu);
++	node = cpu_to_node(src_vcpu);
+ 
+-	do_each_thread(t, tsk) {
++	do_each_thread_all(t, tsk) {
+ 		cpumask_t mask;
+ 		if (tsk == current)
+ 			continue;
+ 
+-		if (task_cpu(tsk) != src_cpu)
++		if (task_vcpu(tsk) != src_vcpu)
+ 			continue;
+ 
+ 		/* Figure out where this task should go (attempting to
+@@ -3520,22 +4627,43 @@ static void migrate_all_tasks(int src_cp
+ 			if (tsk->mm && printk_ratelimit())
+ 				printk(KERN_INFO "process %d (%s) no "
+ 				       "longer affine to cpu%d\n",
+-				       tsk->pid, tsk->comm, src_cpu);
++				       tsk->pid, tsk->comm, src_vcpu->id);
+ 		}
+-
+-		__migrate_task(tsk, src_cpu, dest_cpu);
+-	} while_each_thread(t, tsk);
++		__migrate_task(tsk, src_vcpu, 
++			vsched_vcpu(vcpu_vsched(src_vcpu), dest_cpu));
++	} while_each_thread_all(t, tsk);
+ 
+ 	write_unlock_irq(&tasklist_lock);
++#elif defined(CONFIG_SCHED_VCPU) 
++	struct task_struct *tsk, *t;
++
++	/*
++	 * FIXME: should migrate tasks from scr_vcpu to others if dynamic
++	 * VCPU add/del is implemented. Right now just does sanity checks.
++	 */
++	read_lock(&tasklist_lock);
++	do_each_thread_all(t, tsk) {
++		if (task_vcpu(tsk) != src_vcpu)
++			continue;
++		if (tsk == vcpu_rq(src_vcpu)->migration_thread)
++			continue;
++
++		printk("VSCHED: task %s (%d) was left on src VCPU %d:%d\n",
++				tsk->comm, tsk->pid,
++				vcpu_vsched(src_vcpu)->id, src_vcpu->id);
++	} while_each_thread_all(t, tsk);
++	read_unlock(&tasklist_lock);
++#endif
+ }
+ 
++#ifdef CONFIG_HOTPLUG_CPU
+ /* Schedules idle task to be the next runnable task on current CPU.
+  * It does so by boosting its priority to highest possible and adding it to
+  * the _front_ of runqueue. Used by CPU offline code.
+  */
+ void sched_idle_next(void)
+ {
+-	int cpu = smp_processor_id();
++	int cpu = this_vcpu();
+ 	runqueue_t *rq = this_rq();
+ 	struct task_struct *p = rq->idle;
+ 	unsigned long flags;
+@@ -3550,60 +4678,100 @@ void sched_idle_next(void)
+ 
+ 	__setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1);
+ 	/* Add idle task to _front_ of it's priority queue */
++#ifdef CONFIG_SCHED_VCPU
++#error "FIXME: VCPU vs. HOTPLUG: fix the code below"
++#endif
+ 	__activate_idle_task(p, rq);
+ 
+ 	spin_unlock_irqrestore(&rq->lock, flags);
+ }
+ #endif /* CONFIG_HOTPLUG_CPU */
+ 
++static void migration_thread_bind(struct task_struct *k, vcpu_t cpu)
++{
++	BUG_ON(k->state != TASK_INTERRUPTIBLE);
++	/* Must have done schedule() in kthread() before we set_task_cpu */
++	wait_task_inactive(k);
++
++	set_task_vsched(k, vcpu_vsched(cpu));
++	set_task_vcpu(k, cpu);
++	k->cpus_allowed = cpumask_of_cpu(cpu->id);
++}
++
++static void migration_thread_stop(runqueue_t *rq)
++{
++	struct task_struct *thread;
++
++	thread = rq->migration_thread;
++	if (thread == NULL)
++		return;
++
++	get_task_struct(thread);
++	kthread_stop(thread);
++
++	/* We MUST ensure, that the do_exit of the migration thread is
++	 * completed and it will never scheduled again before vsched_destroy.
++	 * The task with flag PF_DEAD if unscheduled will never receive
++	 * CPU again. */
++	while (!(thread->flags & PF_DEAD) || task_running(rq, thread))
++		yield();
++	put_task_struct(thread);
++
++	rq->migration_thread = NULL;
++}
++
+ /*
+  * migration_call - callback that gets triggered when a CPU is added.
+  * Here we can start up the necessary migration thread for the new CPU.
+  */
+-static int migration_call(struct notifier_block *nfb, unsigned long action,
++static int vmigration_call(struct notifier_block *nfb, unsigned long action,
+ 			  void *hcpu)
+ {
+-	int cpu = (long)hcpu;
++	vcpu_t cpu = (vcpu_t)hcpu;
+ 	struct task_struct *p;
+ 	struct runqueue *rq;
+ 	unsigned long flags;
+ 
+ 	switch (action) {
+ 	case CPU_UP_PREPARE:
+-		p = kthread_create(migration_thread, hcpu, "migration/%d",cpu);
++		p = kthread_create(migration_thread, hcpu, "migration/%d/%d", 
++			vsched_id(vcpu_vsched(cpu)), cpu->id);
+ 		if (IS_ERR(p))
+ 			return NOTIFY_BAD;
+ 		p->flags |= PF_NOFREEZE;
+-		kthread_bind(p, cpu);
+-		/* Must be high prio: stop_machine expects to yield to it. */
++
++		migration_thread_bind(p, cpu);
+ 		rq = task_rq_lock(p, &flags);
++		/* Must be high prio: stop_machine expects to yield to it. */
+ 		__setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1);
+ 		task_rq_unlock(rq, &flags);
+-		cpu_rq(cpu)->migration_thread = p;
++		vcpu_rq(cpu)->migration_thread = p;
+ 		break;
+ 	case CPU_ONLINE:
+ 		/* Strictly unneccessary, as first user will wake it. */
+-		wake_up_process(cpu_rq(cpu)->migration_thread);
++		wake_up_process(vcpu_rq(cpu)->migration_thread);
+ 		break;
+-#ifdef CONFIG_HOTPLUG_CPU
++
++#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_SCHED_VCPU)
++#error "FIXME: CPU down code doesn't work yet with VCPUs"
++#endif
+ 	case CPU_UP_CANCELED:
+ 		/* Unbind it from offline cpu so it can run.  Fall thru. */
+-		kthread_bind(cpu_rq(cpu)->migration_thread,smp_processor_id());
+-		kthread_stop(cpu_rq(cpu)->migration_thread);
+-		cpu_rq(cpu)->migration_thread = NULL;
++		migration_thread_bind(vcpu_rq(cpu)->migration_thread, this_vcpu());
++		migration_thread_stop(vcpu_rq(cpu));
+ 		break;
+ 	case CPU_DEAD:
+ 		migrate_all_tasks(cpu);
+-		rq = cpu_rq(cpu);
+-		kthread_stop(rq->migration_thread);
+-		rq->migration_thread = NULL;
++		rq = vcpu_rq(cpu);
++		migration_thread_stop(rq);
++#ifdef CONFIG_HOTPLUG_CPU
+ 		/* Idle task back to normal (off runqueue, low prio) */
+ 		rq = task_rq_lock(rq->idle, &flags);
+ 		deactivate_task(rq->idle, rq);
+ 		rq->idle->static_prio = MAX_PRIO;
+ 		__setscheduler(rq->idle, SCHED_NORMAL, 0);
+ 		task_rq_unlock(rq, &flags);
+- 		BUG_ON(rq->nr_running != 0);
++#endif
+ 
+ 		/* No need to migrate the tasks: it was best-effort if
+ 		 * they didn't do lock_cpu_hotplug().  Just wake up
+@@ -3619,11 +4787,17 @@ static int migration_call(struct notifie
+ 		}
+ 		spin_unlock_irq(&rq->lock);
+  		break;
+-#endif
+ 	}
+ 	return NOTIFY_OK;
+ }
+ 
++static int migration_call(struct notifier_block *nfb, unsigned long action,
++			  void *hcpu)
++{
++	/* we need to translate pcpu to vcpu */
++	return vmigration_call(nfb, action, vsched_default_vcpu((long)hcpu));
++}
++
+ /* Register at highest priority so that task migration (migrate_all_tasks)
+  * happens before everything else.
+  */
+@@ -3664,13 +4838,14 @@ void cpu_attach_domain(struct sched_doma
+ {
+ 	migration_req_t req;
+ 	unsigned long flags;
+-	runqueue_t *rq = cpu_rq(cpu);
++	runqueue_t *rq = vcpu_rq(vsched_default_vcpu(cpu));
+ 	int local = 1;
+ 
+ 	lock_cpu_hotplug();
+ 
+ 	spin_lock_irqsave(&rq->lock, flags);
+ 
++	pcpu(cpu)->sd = sd;
+ 	if (cpu == smp_processor_id() || !cpu_online(cpu)) {
+ 		rq->sd = sd;
+ 	} else {
+@@ -3815,11 +4990,10 @@ void sched_domain_debug(void)
+ 	int i;
+ 
+ 	for_each_cpu(i) {
+-		runqueue_t *rq = cpu_rq(i);
+ 		struct sched_domain *sd;
+ 		int level = 0;
+ 
+-		sd = rq->sd;
++		sd = pcpu(i)->sd;
+ 
+ 		printk(KERN_DEBUG "CPU%d: %s\n",
+ 				i, (cpu_online(i) ? " online" : "offline"));
+@@ -3836,7 +5010,8 @@ void sched_domain_debug(void)
+ 			printk(KERN_DEBUG);
+ 			for (j = 0; j < level + 1; j++)
+ 				printk(" ");
+-			printk("domain %d: span %s\n", level, str);
++			printk("domain %d: span %s flags 0x%x\n",
++					level, str, sd->flags);
+ 
+ 			if (!cpu_isset(i, sd->span))
+ 				printk(KERN_DEBUG "ERROR domain->span does not contain CPU%d\n", i);
+@@ -3907,16 +5082,13 @@ int in_sched_functions(unsigned long add
+ 		&& addr < (unsigned long)__sched_text_end;
+ }
+ 
+-void __init sched_init(void)
+-{
+-	runqueue_t *rq;
+-	int i, j, k;
+-
+ #ifdef CONFIG_SMP
+-	/* Set up an initial dummy domain for early boot */
+-	static struct sched_domain sched_domain_init;
+-	static struct sched_group sched_group_init;
++static struct sched_domain sched_domain_init;
++static struct sched_group sched_group_init;
+ 
++/* Set up an initial dummy domain for early boot */
++static void init_sd(void)
++{
+ 	memset(&sched_domain_init, 0, sizeof(struct sched_domain));
+ 	sched_domain_init.span = CPU_MASK_ALL;
+ 	sched_domain_init.groups = &sched_group_init;
+@@ -3928,45 +5100,570 @@ void __init sched_init(void)
+ 	sched_group_init.cpumask = CPU_MASK_ALL;
+ 	sched_group_init.next = &sched_group_init;
+ 	sched_group_init.cpu_power = SCHED_LOAD_SCALE;
++}
++#else
++static void inline init_sd(void)
++{
++}
+ #endif
+ 
+-	for (i = 0; i < NR_CPUS; i++) {
+-		prio_array_t *array;
++static void init_rq(struct runqueue *rq)
++{
++	int j, k;
++	prio_array_t *array;
+ 
+-		rq = cpu_rq(i);
+-		spin_lock_init(&rq->lock);
+-		rq->active = rq->arrays;
+-		rq->expired = rq->arrays + 1;
+-		rq->best_expired_prio = MAX_PRIO;
++	spin_lock_init(&rq->lock);
++	rq->active = &rq->arrays[0];
++	rq->expired = &rq->arrays[1];
++	rq->best_expired_prio = MAX_PRIO;
+ 
+ #ifdef CONFIG_SMP
+-		rq->sd = &sched_domain_init;
+-		rq->cpu_load = 0;
+-		rq->active_balance = 0;
+-		rq->push_cpu = 0;
+-		rq->migration_thread = NULL;
+-		INIT_LIST_HEAD(&rq->migration_queue);
+-#endif
+-		atomic_set(&rq->nr_iowait, 0);
+-
+-		for (j = 0; j < 2; j++) {
+-			array = rq->arrays + j;
+-			for (k = 0; k < MAX_PRIO; k++) {
+-				INIT_LIST_HEAD(array->queue + k);
+-				__clear_bit(k, array->bitmap);
+-			}
+-			// delimiter for bitsearch
+-			__set_bit(MAX_PRIO, array->bitmap);
++	rq->sd = &sched_domain_init;
++	rq->cpu_load = 0;
++	rq->active_balance = 0;
++#endif
++	rq->push_cpu = 0;
++	rq->migration_thread = NULL;
++	INIT_LIST_HEAD(&rq->migration_queue);
++	atomic_set(&rq->nr_iowait, 0);
++
++	for (j = 0; j < 2; j++) {
++		array = rq->arrays + j;
++		for (k = 0; k < MAX_PRIO; k++) {
++			INIT_LIST_HEAD(array->queue + k);
++			__clear_bit(k, array->bitmap);
++		}
++		// delimiter for bitsearch
++		__set_bit(MAX_PRIO, array->bitmap);
++	}
++}
++
++#if defined(CONFIG_SCHED_VCPU) || defined(CONFIG_FAIRSCHED)
++/* both rq and vsched lock should be taken */
++static void __install_vcpu(struct vcpu_scheduler *vsched, vcpu_t vcpu)
++{
++	int id;
++
++	id = vcpu->id;
++	vcpu->vsched = vsched;
++	vsched->vcpu[id] = vcpu;
++	vcpu->last_pcpu = id;
++	wmb();
++	/* FIXME: probably locking should be reworked, e.g.
++	   we don't have corresponding rmb(), so we need to update mask
++	   only after quiscent state */
++	/* init_boot_vcpu() should be remade if RCU is used here */
++	list_add(&vcpu->list, &vsched->idle_list);
++	cpu_set(id, vsched->vcpu_online_map);
++	vsched->num_online_vcpus++;
++}
++
++static int install_vcpu(vcpu_t vcpu, struct vcpu_scheduler *vsched)
++{
++	runqueue_t *rq;
++	unsigned long flags;
++	int res = 0;
++
++	rq = vcpu_rq(vcpu);
++	spin_lock_irqsave(&rq->lock, flags);
++	spin_lock(&fairsched_lock);
++
++	if (vsched->vcpu[vcpu->id] != NULL)
++		res = -EBUSY;
++	else
++		__install_vcpu(vsched, vcpu);
++
++	spin_unlock(&fairsched_lock);
++	spin_unlock_irqrestore(&rq->lock, flags);
++	return res;
++}
++
++static int __add_vcpu(struct vcpu_scheduler *vsched, int id)
++{
++	vcpu_t vcpu;
++	int res;
++
++	res = -ENOMEM;
++	vcpu = kmalloc(sizeof(struct vcpu_info), GFP_KERNEL);
++	if (vcpu == NULL)
++		goto out;
++
++	init_vcpu(vcpu, id);
++	vcpu_rq(vcpu)->curr = this_pcpu()->idle;
++	res = install_vcpu(vcpu, vsched);
++	if (res < 0)
++		goto out_free;
++	return 0;
++
++out_free:
++	kfree(vcpu);
++out:
++	return res;
++}
++
++void vsched_init(struct vcpu_scheduler *vsched, int id)
++{
++	memset(vsched, 0, sizeof(*vsched));
++
++	INIT_LIST_HEAD(&vsched->idle_list);
++	INIT_LIST_HEAD(&vsched->active_list);
++	INIT_LIST_HEAD(&vsched->running_list);
++	vsched->num_online_vcpus = 0;
++	vsched->vcpu_online_map = CPU_MASK_NONE;
++	vsched->vcpu_running_map = CPU_MASK_NONE;
++	vsched->pcpu_running_map = CPU_MASK_NONE;
++	vsched->id = id;
++}
++
++#ifdef CONFIG_FAIRSCHED
++
++/* No locks supposed to be held */
++static void vsched_del_vcpu(vcpu_t vcpu);
++static int vsched_add_vcpu(struct vcpu_scheduler *vsched)
++{
++	int res, err;
++	vcpu_t vcpu;
++	int id;
++	static DECLARE_MUTEX(id_mutex);
++
++	down(&id_mutex);
++	id = find_first_zero_bit(vsched->vcpu_online_map.bits, NR_CPUS);
++	if (id >= NR_CPUS) {
++		err = -EBUSY;
++		goto out_up;
++	}
++
++	err = __add_vcpu(vsched, id);
++	if (err < 0)
++		goto out_up;
++
++	vcpu = vsched_vcpu(vsched, id);
++	err = -ENOMEM;
++
++	res = vmigration_call(&migration_notifier, CPU_UP_PREPARE, vcpu);
++	if (res != NOTIFY_OK)
++		goto out_del_up;
++
++	res = vmigration_call(&migration_notifier, CPU_ONLINE, vcpu);
++	if (res != NOTIFY_OK)
++		goto out_cancel_del_up;
++
++	err = 0;
++
++out_up:
++	up(&id_mutex);
++	return err;
++
++out_cancel_del_up:
++	vmigration_call(&migration_notifier, CPU_UP_CANCELED, vcpu);
++out_del_up:
++	vsched_del_vcpu(vcpu);
++	goto out_up;
++}
++
++static void vsched_del_vcpu(vcpu_t vcpu)
++{
++	struct vcpu_scheduler *vsched;
++	runqueue_t *rq;
++
++	vsched = vcpu_vsched(vcpu);
++	rq = vcpu_rq(vcpu);
++
++	spin_lock_irq(&rq->lock);
++	spin_lock(&fairsched_lock);
++	cpu_clear(vcpu->id, vsched->vcpu_online_map);
++	vsched->num_online_vcpus--;
++	spin_unlock(&fairsched_lock);
++	spin_unlock_irq(&rq->lock);
++
++	/*
++	 * all tasks should migrate from this VCPU somewhere,
++	 * also, since this moment VCPU is offline, so migration_thread
++	 * won't accept any new tasks...
++	 */
++	vmigration_call(&migration_notifier, CPU_DEAD, vcpu);
++	BUG_ON(rq->nr_running != 0);
++
++	/* vcpu_put() is called after deactivate_task. This loop makes sure
++	 * that vcpu_put() was finished and vcpu can be freed */
++	while ((volatile int)vcpu->running)
++		cpu_relax();
++
++	BUG_ON(vcpu->active);	/* should be in idle_list */
++
++	spin_lock_irq(&fairsched_lock);
++	list_del(&vcpu->list);
++	vsched_vcpu(vsched, vcpu->id) = NULL;
++	spin_unlock_irq(&fairsched_lock);
++
++	kfree(vcpu);
++}
++
++int vsched_mvpr(struct task_struct *p, struct vcpu_scheduler *vsched)
++{
++	vcpu_t dest_vcpu;
++	int id;
++	int res;
++
++	res = 0;
++	while(1) {
++		/* FIXME: we suppose here that vcpu can't dissapear on the fly */
++		for(id = first_cpu(vsched->vcpu_online_map); id < NR_CPUS; 
++		    id++) {
++			if ((vsched->vcpu[id] != NULL) && 
++			    !vcpu_isset(vsched->vcpu[id], p->cpus_allowed))
++				continue;
++			else
++				break;
++		}
++		if (id >= NR_CPUS) {
++			res = -EINVAL;
++			goto out;
++		}
++
++		dest_vcpu = vsched_vcpu(vsched, id);
++		while(1) {
++			sched_migrate_task(p, dest_vcpu);
++			if (task_vsched_id(p) == vsched_id(vsched))
++				goto out;
++			if (!vcpu_isset(vsched->vcpu[id], p->cpus_allowed))
++				break;
++		}
++	}
++out:
++	return res;
++}
++
++void vsched_fairsched_link(struct vcpu_scheduler *vsched,
++		struct fairsched_node *node)
++{
++	vsched->node = node;
++	node->vsched = vsched;
++}
++
++void vsched_fairsched_unlink(struct vcpu_scheduler *vsched,
++		struct fairsched_node *node)
++{
++	vsched->node = NULL;
++	node->vsched = NULL;
++}
++
++int vsched_create(int id, struct fairsched_node *node)
++{
++	struct vcpu_scheduler *vsched;
++	int i, res;
++
++	vsched = kmalloc(sizeof(*vsched), GFP_KERNEL);
++	if (vsched == NULL)
++		return -ENOMEM;
++
++	vsched_init(vsched, node->id);
++	vsched_fairsched_link(vsched, node);
++
++	for(i = 0; i < num_online_cpus(); i++) {
++		res = vsched_add_vcpu(vsched);
++		if (res < 0)
++			goto err_add;
++	}
++	return 0;
++
++err_add:
++	vsched_destroy(vsched);
++	return res;
++}
++
++int vsched_destroy(struct vcpu_scheduler *vsched)
++{
++	vcpu_t vcpu;
++
++	if (vsched == NULL)
++		return 0;
++
++	spin_lock_irq(&fairsched_lock);
++	while(1) {
++		if (!list_empty(&vsched->running_list))
++			vcpu = list_entry(vsched->running_list.next,
++						struct vcpu_info, list);
++		else if (!list_empty(&vsched->active_list))
++			vcpu = list_entry(vsched->active_list.next,
++						struct vcpu_info, list);
++		else if (!list_empty(&vsched->idle_list))
++			vcpu = list_entry(vsched->idle_list.next,
++						struct vcpu_info, list);
++		else
++			break;
++		spin_unlock_irq(&fairsched_lock);
++		vsched_del_vcpu(vcpu);
++		spin_lock_irq(&fairsched_lock);
++	}
++	if (vsched->num_online_vcpus)
++		goto err_busy;
++	spin_unlock_irq(&fairsched_lock);
++
++	vsched_fairsched_unlink(vsched, vsched->node);
++	kfree(vsched);
++	return 0;
++
++err_busy:
++	printk(KERN_ERR "BUG in vsched_destroy, vsched id %d\n",
++			vsched->id);
++	spin_unlock_irq(&fairsched_lock);
++	return -EBUSY;
++	
++}
++#endif /* defined(CONFIG_FAIRSCHED) */
++#endif /* defined(CONFIG_SCHED_VCPU) || defined(CONFIG_FAIRSCHED) */
++
++#ifdef CONFIG_VE
++/*
++ * This function is used to show fake CPU information.
++ *
++ * I'm still quite unsure that faking CPU speed is such a good idea,
++ * but someone (Kirill?) has made this decision.
++ * What I'm absolutely sure is that it's a part of virtualization,
++ * not a scheduler.  20050727 SAW
++ */
++#ifdef CONFIG_FAIRSCHED
++unsigned long ve_scale_khz(unsigned long khz)
++{
++	struct fairsched_node *node;
++	int cpus;
++	unsigned long rate;
++
++	cpus = fairsched_nr_cpus;
++	rate = cpus << FSCHRATE_SHIFT;
++
++	/*
++	 * Ideally fairsched node should be taken from the current ve_struct.
++	 * However, to simplify the code and locking, it is taken from current
++	 * (currently fairsched_node can be changed only for a sleeping task).
++	 * That means that VE0 processes moved to some special node will get
++	 * fake CPU speed, but that shouldn't be a big problem.
++	 */
++	preempt_disable();
++	node = current->vsched->node;
++	if (node->rate_limited)
++		rate = node->rate;
++	preempt_enable();
++
++	return ((unsigned long long)khz * (rate / cpus)) >> FSCHRATE_SHIFT;
++}
++#endif
++#endif /* CONFIG_VE */
++
++static void init_boot_vcpu(void)
++{
++	int res;
++
++	/*
++	 * We setup boot_vcpu and it's runqueue until init_idle() happens
++	 * on cpu0. This is required since timer interrupts can happen
++	 * between sched_init() and init_idle().
++	 */
++	init_vcpu(&boot_vcpu, 0);
++	vcpu_rq(&boot_vcpu)->curr = current;
++	res = install_vcpu(&boot_vcpu, &default_vsched);
++	if (res < 0)
++		panic("Can't install boot vcpu");
++
++	this_pcpu()->vcpu = &boot_vcpu;
++	this_pcpu()->vsched = boot_vcpu.vsched;
++}
++
++static void init_pcpu(int id)
++{
++	struct pcpu_info *pcpu;
++
++	pcpu = pcpu(id);
++	pcpu->id = id;
++#ifdef CONFIG_SMP
++	pcpu->sd = &sched_domain_init;
++#endif
++
++#ifndef CONFIG_SCHED_VCPU
++	init_vcpu(vcpu(id), id);
++#endif
++}
++
++static void init_pcpus(void)
++{
++	int i;
++	for (i = 0; i < NR_CPUS; i++)
++		init_pcpu(i);
++}
++
++#ifdef CONFIG_SCHED_VCPU
++static void show_vcpu_list(struct vcpu_scheduler *vsched, struct list_head *lh)
++{
++	cpumask_t m;
++	vcpu_t vcpu;
++	int i;
++
++	cpus_clear(m);
++	list_for_each_entry(vcpu, lh, list)
++		cpu_set(vcpu->id, m);
++
++	for (i = 0; i < NR_CPUS; i++)
++		if (cpu_isset(i, m))
++			printk("%d ", i);
++}
++
++#define PRINT(s, sz, fmt...)				\
++	do {						\
++		int __out;				\
++		__out = scnprintf(*s, *sz, fmt);	\
++		*s += __out;				\
++		*sz -= __out;				\
++	} while(0)
++
++static void show_rq_array(prio_array_t *array, char *header, char **s, int *sz)
++{
++	struct list_head *list;
++	task_t *p;
++	int k, h;
++
++	h = 0;
++	for (k = 0; k < MAX_PRIO; k++) {
++		list = array->queue + k;
++		if (list_empty(list))
++			continue;
++
++		if (!h) {
++			PRINT(s, sz, header);
++			h = 1;
+ 		}
++
++		PRINT(s, sz, " prio %d (", k);
++		list_for_each_entry(p, list, run_list)
++			PRINT(s, sz, "%s[%d] ", p->comm, p->pid);
++		PRINT(s, sz, ")");
+ 	}
++	if (h)
++		PRINT(s, sz, "\n");
++}
++
++static void show_vcpu(vcpu_t vcpu)
++{
++	runqueue_t *rq;
++	char buf[1024], *s;
++	unsigned long flags;
++	int sz;
++
++	if (vcpu == NULL)
++		return;
++
++	rq = vcpu_rq(vcpu);
++	spin_lock_irqsave(&rq->lock, flags);
++	printk("  vcpu %d: last_pcpu %d, state %s%s\n",
++			vcpu->id, vcpu->last_pcpu,
++			vcpu->active ? "A" : "",
++			vcpu->running ? "R" : "");
++
++	printk("    rq: running %lu, load %lu, sw %Lu, sd %p\n",
++			rq->nr_running,
++#ifdef CONFIG_SMP
++			rq->cpu_load,
++#else
++			0LU,
++#endif
++			rq->nr_switches,
++#ifdef CONFIG_SMP
++			rq->sd
++#else
++			NULL
++#endif
++	      );
++
++	s = buf;
++	sz = sizeof(buf) - 1;
++
++	show_rq_array(rq->active, "      active:", &s, &sz);
++	show_rq_array(rq->expired, "      expired:", &s, &sz);
++	spin_unlock_irqrestore(&rq->lock, flags);
++
++	*s = 0;
++	printk(buf);
++}
++
++static inline void fairsched_show_node(struct vcpu_scheduler *vsched)
++{
++#ifdef CONFIG_FAIRSCHED
++	struct fairsched_node *node;
++
++	node = vsched->node;
++	printk("fsnode: ready %d run %d cpu %d vsched %p, pcpu %d\n",
++			node->nr_ready, node->nr_runnable, node->nr_pcpu,
++			node->vsched, smp_processor_id());
++#endif
++}
++
++static void __show_vsched(struct vcpu_scheduler *vsched)
++{
++	char mask[NR_CPUS + 1];
++	int i;
++	unsigned long flags;
++
++	spin_lock_irqsave(&fairsched_lock, flags);
++	printk("vsched id=%d\n", vsched_id(vsched));
++	fairsched_show_node(vsched);
++
++	printk("  idle cpus ");
++	show_vcpu_list(vsched, &vsched->idle_list);
++	printk("; active cpus ");
++	show_vcpu_list(vsched, &vsched->active_list);
++	printk("; running cpus ");
++	show_vcpu_list(vsched, &vsched->running_list);
++	printk("\n");
++
++	cpumask_scnprintf(mask, NR_CPUS, vsched->vcpu_online_map);
++	printk("  num_online_cpus=%d, mask=%s (w=%d)\n",
++			vsched->num_online_vcpus, mask,
++			cpus_weight(vsched->vcpu_online_map));
++	spin_unlock_irqrestore(&fairsched_lock, flags);
++
++	for (i = 0; i < NR_CPUS; i++)
++		show_vcpu(vsched->vcpu[i]);
++}
++
++void show_vsched(void)
++{
++	oops_in_progress = 1;
++	__show_vsched(&idle_vsched);
++	__show_vsched(&default_vsched);
++	oops_in_progress = 0;
++}
++#endif /* CONFIG_SCHED_VCPU */
++
++void __init sched_init(void)
++{
++	runqueue_t *rq;
++
++	init_sd();
++	init_pcpus();
++#if defined(CONFIG_SCHED_VCPU)
++	vsched_init(&idle_vsched, -1);
++	vsched_init(&default_vsched, 0);
++#if defined(CONFIG_FAIRSCHED)
++	fairsched_init_early();
++	vsched_fairsched_link(&idle_vsched, &fairsched_idle_node);
++	vsched_fairsched_link(&default_vsched, &fairsched_init_node);
++#endif
++	init_boot_vcpu();
++#else
++#if defined(CONFIG_FAIRSCHED)
++	fairsched_init_early();
++#endif
++#endif
+ 	/*
+ 	 * We have to do a little magic to get the first
+ 	 * thread right in SMP mode.
+ 	 */
++	set_task_vsched(current, &default_vsched);
++	set_task_cpu(current, smp_processor_id());
++	/* FIXME: remove or is it required for UP? --set in vsched_init() */
+ 	rq = this_rq();
+ 	rq->curr = current;
+-	rq->idle = current;
+-	set_task_cpu(current, smp_processor_id());
++	this_pcpu()->idle = current;
+ 	wake_up_forked_process(current);
+ 
+ 	/*
+@@ -4043,3 +5740,7 @@ void __sched __preempt_write_lock(rwlock
+ 
+ EXPORT_SYMBOL(__preempt_write_lock);
+ #endif /* defined(CONFIG_SMP) && defined(CONFIG_PREEMPT) */
++
++EXPORT_SYMBOL(ve_sched_get_idle_time);
++EXPORT_SYMBOL(nr_running_ve);
++EXPORT_SYMBOL(nr_uninterruptible_ve);
+diff -uprN linux-2.6.8.1.orig/kernel/signal.c linux-2.6.8.1-ve022stab078/kernel/signal.c
+--- linux-2.6.8.1.orig/kernel/signal.c	2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/kernel/signal.c	2006-05-11 13:05:45.000000000 +0400
+@@ -12,6 +12,7 @@
+ 
+ #include <linux/config.h>
+ #include <linux/slab.h>
++#include <linux/kmem_cache.h>
+ #include <linux/module.h>
+ #include <linux/smp_lock.h>
+ #include <linux/init.h>
+@@ -26,6 +27,9 @@
+ #include <asm/unistd.h>
+ #include <asm/siginfo.h>
+ 
++#include <ub/beancounter.h>
++#include <ub/ub_misc.h>
++
+ /*
+  * SLAB caches for signal bits.
+  */
+@@ -214,6 +218,7 @@ static inline int has_pending_signals(si
+ fastcall void recalc_sigpending_tsk(struct task_struct *t)
+ {
+ 	if (t->signal->group_stop_count > 0 ||
++	    test_tsk_thread_flag(t,TIF_FREEZE) ||
+ 	    PENDING(&t->pending, &t->blocked) ||
+ 	    PENDING(&t->signal->shared_pending, &t->blocked))
+ 		set_tsk_thread_flag(t, TIF_SIGPENDING);
+@@ -267,13 +272,26 @@ static struct sigqueue *__sigqueue_alloc
+ 	struct sigqueue *q = NULL;
+ 
+ 	if (atomic_read(&current->user->sigpending) <
+-			current->rlim[RLIMIT_SIGPENDING].rlim_cur)
++			current->rlim[RLIMIT_SIGPENDING].rlim_cur) {
+ 		q = kmem_cache_alloc(sigqueue_cachep, GFP_ATOMIC);
++		if (q != NULL) {
++			/*
++			 * Note: use of get_exec_ub() here vs get_task_ub()
++			 * in send_signal() is not intentional.  SAW 2005/03/09
++			 */
++			if (ub_siginfo_charge(get_exec_ub(),
++					kmem_cache_memusage(sigqueue_cachep))) {
++				kfree(q);
++				q = NULL;
++			}
++		}
++	}
+ 	if (q) {
+ 		INIT_LIST_HEAD(&q->list);
+ 		q->flags = 0;
+ 		q->lock = NULL;
+ 		q->user = get_uid(current->user);
++		sig_ub(q) = get_beancounter(get_exec_ub());
+ 		atomic_inc(&q->user->sigpending);
+ 	}
+ 	return(q);
+@@ -283,6 +301,8 @@ static inline void __sigqueue_free(struc
+ {
+ 	if (q->flags & SIGQUEUE_PREALLOC)
+ 		return;
++	ub_siginfo_uncharge(sig_ub(q), kmem_cache_memusage(sigqueue_cachep));
++	put_beancounter(sig_ub(q));
+ 	atomic_dec(&q->user->sigpending);
+ 	free_uid(q->user);
+ 	kmem_cache_free(sigqueue_cachep, q);
+@@ -500,7 +520,16 @@ static int __dequeue_signal(struct sigpe
+ {
+ 	int sig = 0;
+ 
+-	sig = next_signal(pending, mask);
++	/* SIGKILL must have priority, otherwise it is quite easy
++	 * to create an unkillable process, sending sig < SIGKILL
++	 * to self */
++	if (unlikely(sigismember(&pending->signal, SIGKILL))) {
++		if (!sigismember(mask, SIGKILL))
++			sig = SIGKILL;
++	}
++
++	if (likely(!sig))
++		sig = next_signal(pending, mask);
+ 	if (sig) {
+ 		if (current->notifier) {
+ 			if (sigismember(current->notifier_mask, sig)) {
+@@ -721,12 +750,21 @@ static int send_signal(int sig, struct s
+ 	   pass on the info struct.  */
+ 
+ 	if (atomic_read(&t->user->sigpending) <
+-			t->rlim[RLIMIT_SIGPENDING].rlim_cur)
++			t->rlim[RLIMIT_SIGPENDING].rlim_cur) {
+ 		q = kmem_cache_alloc(sigqueue_cachep, GFP_ATOMIC);
++		if (q != NULL) {
++			if (ub_siginfo_charge(get_task_ub(t),
++					kmem_cache_memusage(sigqueue_cachep))) {
++				kfree(q);
++				q = NULL;
++			}
++		}
++	}
+ 
+ 	if (q) {
+ 		q->flags = 0;
+ 		q->user = get_uid(t->user);
++		sig_ub(q) = get_beancounter(get_task_ub(t));
+ 		atomic_inc(&q->user->sigpending);
+ 		list_add_tail(&q->list, &signals->list);
+ 		switch ((unsigned long) info) {
+@@ -734,7 +772,7 @@ static int send_signal(int sig, struct s
+ 			q->info.si_signo = sig;
+ 			q->info.si_errno = 0;
+ 			q->info.si_code = SI_USER;
+-			q->info.si_pid = current->pid;
++			q->info.si_pid = virt_pid(current);
+ 			q->info.si_uid = current->uid;
+ 			break;
+ 		case 1:
+@@ -855,7 +893,7 @@ force_sig_specific(int sig, struct task_
+  */
+ #define wants_signal(sig, p, mask) 			\
+ 	(!sigismember(&(p)->blocked, sig)		\
+-	 && !((p)->state & mask)			\
++	 && !(((p)->state | (p)->exit_state) & mask)	\
+ 	 && !((p)->flags & PF_EXITING)			\
+ 	 && (task_curr(p) || !signal_pending(p)))
+ 
+@@ -993,7 +1031,7 @@ __group_send_sig_info(int sig, struct si
+ 	 * Don't bother zombies and stopped tasks (but
+ 	 * SIGKILL will punch through stopped state)
+ 	 */
+-	mask = TASK_DEAD | TASK_ZOMBIE;
++ 	mask = EXIT_DEAD | EXIT_ZOMBIE;
+ 	if (sig != SIGKILL)
+ 		mask |= TASK_STOPPED;
+ 
+@@ -1026,7 +1064,7 @@ void zap_other_threads(struct task_struc
+ 		/*
+ 		 * Don't bother with already dead threads
+ 		 */
+-		if (t->state & (TASK_ZOMBIE|TASK_DEAD))
++		if (t->exit_state & (EXIT_ZOMBIE|EXIT_DEAD))
+ 			continue;
+ 
+ 		/*
+@@ -1072,20 +1110,23 @@ int group_send_sig_info(int sig, struct 
+ int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp)
+ {
+ 	struct task_struct *p;
+-	struct list_head *l;
+-	struct pid *pid;
+ 	int retval, success;
+ 
+ 	if (pgrp <= 0)
+ 		return -EINVAL;
+ 
++	/* Use __vpid_to_pid(). This function is used under write_lock
++	 * tasklist_lock. */
++	if (is_virtual_pid(pgrp))
++		pgrp = __vpid_to_pid(pgrp);
++
+ 	success = 0;
+ 	retval = -ESRCH;
+-	for_each_task_pid(pgrp, PIDTYPE_PGID, p, l, pid) {
++	do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
+ 		int err = group_send_sig_info(sig, info, p);
+ 		success |= !err;
+ 		retval = err;
+-	}
++	} while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
+ 	return success ? 0 : retval;
+ }
+ 
+@@ -1112,22 +1153,22 @@ int
+ kill_sl_info(int sig, struct siginfo *info, pid_t sid)
+ {
+ 	int err, retval = -EINVAL;
+-	struct pid *pid;
+-	struct list_head *l;
+ 	struct task_struct *p;
+ 
+ 	if (sid <= 0)
+ 		goto out;
+ 
++	sid = vpid_to_pid(sid);
++
+ 	retval = -ESRCH;
+ 	read_lock(&tasklist_lock);
+-	for_each_task_pid(sid, PIDTYPE_SID, p, l, pid) {
++	do_each_task_pid_ve(sid, PIDTYPE_SID, p) {
+ 		if (!p->signal->leader)
+ 			continue;
+ 		err = group_send_sig_info(sig, info, p);
+ 		if (retval)
+ 			retval = err;
+-	}
++	} while_each_task_pid_ve(sid, PIDTYPE_SID, p);
+ 	read_unlock(&tasklist_lock);
+ out:
+ 	return retval;
+@@ -1140,7 +1181,7 @@ kill_proc_info(int sig, struct siginfo *
+ 	struct task_struct *p;
+ 
+ 	read_lock(&tasklist_lock);
+-	p = find_task_by_pid(pid);
++	p = find_task_by_pid_ve(pid);
+ 	error = -ESRCH;
+ 	if (p)
+ 		error = group_send_sig_info(sig, info, p);
+@@ -1165,8 +1206,8 @@ static int kill_something_info(int sig, 
+ 		struct task_struct * p;
+ 
+ 		read_lock(&tasklist_lock);
+-		for_each_process(p) {
+-			if (p->pid > 1 && p->tgid != current->tgid) {
++		for_each_process_ve(p) {
++			if (virt_pid(p) > 1 && p->tgid != current->tgid) {
+ 				int err = group_send_sig_info(sig, info, p);
+ 				++count;
+ 				if (err != -EPERM)
+@@ -1377,7 +1418,7 @@ send_group_sigqueue(int sig, struct sigq
+ 	 * Don't bother zombies and stopped tasks (but
+ 	 * SIGKILL will punch through stopped state)
+ 	 */
+-	mask = TASK_DEAD | TASK_ZOMBIE;
++	mask = EXIT_DEAD | EXIT_ZOMBIE;
+ 	if (sig != SIGKILL)
+ 		mask |= TASK_STOPPED;
+ 
+@@ -1436,12 +1477,22 @@ void do_notify_parent(struct task_struct
+ 	if (sig == -1)
+ 		BUG();
+ 
+-	BUG_ON(tsk->group_leader != tsk && tsk->group_leader->state != TASK_ZOMBIE && !tsk->ptrace);
++	BUG_ON(tsk->group_leader != tsk &&
++	       tsk->group_leader->exit_state != EXIT_ZOMBIE &&
++	       tsk->group_leader->exit_state != EXIT_DEAD &&
++	       !tsk->ptrace);
+ 	BUG_ON(tsk->group_leader == tsk && !thread_group_empty(tsk) && !tsk->ptrace);
+ 
++#ifdef CONFIG_VE
++	/* Allow to send only SIGCHLD from VE */
++	if (sig != SIGCHLD &&
++	   VE_TASK_INFO(tsk)->owner_env != VE_TASK_INFO(tsk->parent)->owner_env)
++		sig = SIGCHLD;
++#endif
++
+ 	info.si_signo = sig;
+ 	info.si_errno = 0;
+-	info.si_pid = tsk->pid;
++	info.si_pid = get_task_pid_ve(tsk, VE_TASK_INFO(tsk->parent)->owner_env);
+ 	info.si_uid = tsk->uid;
+ 
+ 	/* FIXME: find out whether or not this is supposed to be c*time. */
+@@ -1475,7 +1526,7 @@ void do_notify_parent(struct task_struct
+ 
+ 	psig = tsk->parent->sighand;
+ 	spin_lock_irqsave(&psig->siglock, flags);
+-	if (sig == SIGCHLD && tsk->state != TASK_STOPPED &&
++	if (!tsk->ptrace && sig == SIGCHLD && tsk->state != TASK_STOPPED &&
+ 	    (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN ||
+ 	     (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT))) {
+ 		/*
+@@ -1530,7 +1581,7 @@ do_notify_parent_cldstop(struct task_str
+ 
+ 	info.si_signo = SIGCHLD;
+ 	info.si_errno = 0;
+-	info.si_pid = tsk->pid;
++	info.si_pid = get_task_pid_ve(tsk, VE_TASK_INFO(parent)->owner_env);
+ 	info.si_uid = tsk->uid;
+ 
+ 	/* FIXME: find out whether or not this is supposed to be c*time. */
+@@ -1575,7 +1626,9 @@ finish_stop(int stop_count)
+ 		read_unlock(&tasklist_lock);
+ 	}
+ 
++	set_stop_state(current);
+ 	schedule();
++	clear_stop_state(current);
+ 	/*
+ 	 * Now we don't run again until continued.
+ 	 */
+@@ -1756,10 +1809,12 @@ relock:
+ 			/* Let the debugger run.  */
+ 			current->exit_code = signr;
+ 			current->last_siginfo = info;
++			set_pn_state(current, PN_STOP_SIGNAL);
+ 			set_current_state(TASK_STOPPED);
+ 			spin_unlock_irq(&current->sighand->siglock);
+ 			notify_parent(current, SIGCHLD);
+ 			schedule();
++			clear_pn_state(current);
+ 
+ 			current->last_siginfo = NULL;
+ 
+@@ -1779,7 +1834,7 @@ relock:
+ 				info->si_signo = signr;
+ 				info->si_errno = 0;
+ 				info->si_code = SI_USER;
+-				info->si_pid = current->parent->pid;
++				info->si_pid = virt_pid(current->parent);
+ 				info->si_uid = current->parent->uid;
+ 			}
+ 
+@@ -1803,8 +1858,14 @@ relock:
+ 			continue;
+ 
+ 		/* Init gets no signals it doesn't want.  */
+-		if (current->pid == 1)
++		if (virt_pid(current) == 1) {
++			/* Allow SIGKILL for non-root VE */
++#ifdef CONFIG_VE
++			if (current->pid == 1 ||
++			    signr != SIGKILL)
++#endif
+ 			continue;
++		}
+ 
+ 		if (sig_kernel_stop(signr)) {
+ 			/*
+@@ -2174,7 +2235,7 @@ sys_kill(int pid, int sig)
+ 	info.si_signo = sig;
+ 	info.si_errno = 0;
+ 	info.si_code = SI_USER;
+-	info.si_pid = current->tgid;
++	info.si_pid = virt_tgid(current);
+ 	info.si_uid = current->uid;
+ 
+ 	return kill_something_info(sig, &info, pid);
+@@ -2203,13 +2264,13 @@ asmlinkage long sys_tgkill(int tgid, int
+ 	info.si_signo = sig;
+ 	info.si_errno = 0;
+ 	info.si_code = SI_TKILL;
+-	info.si_pid = current->tgid;
++	info.si_pid = virt_tgid(current);
+ 	info.si_uid = current->uid;
+ 
+ 	read_lock(&tasklist_lock);
+-	p = find_task_by_pid(pid);
++	p = find_task_by_pid_ve(pid);
+ 	error = -ESRCH;
+-	if (p && (p->tgid == tgid)) {
++	if (p && (virt_tgid(p) == tgid)) {
+ 		error = check_kill_permission(sig, &info, p);
+ 		/*
+ 		 * The null signal is a permissions and process existence
+@@ -2243,11 +2304,11 @@ sys_tkill(int pid, int sig)
+ 	info.si_signo = sig;
+ 	info.si_errno = 0;
+ 	info.si_code = SI_TKILL;
+-	info.si_pid = current->tgid;
++	info.si_pid = virt_tgid(current);
+ 	info.si_uid = current->uid;
+ 
+ 	read_lock(&tasklist_lock);
+-	p = find_task_by_pid(pid);
++	p = find_task_by_pid_ve(pid);
+ 	error = -ESRCH;
+ 	if (p) {
+ 		error = check_kill_permission(sig, &info, p);
+@@ -2285,7 +2346,7 @@ sys_rt_sigqueueinfo(int pid, int sig, si
+ }
+ 
+ int
+-do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
++do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
+ {
+ 	struct k_sigaction *k;
+ 
+@@ -2308,6 +2369,8 @@ do_sigaction(int sig, const struct k_sig
+ 		*oact = *k;
+ 
+ 	if (act) {
++		sigdelsetmask(&act->sa.sa_mask,
++			      sigmask(SIGKILL) | sigmask(SIGSTOP));
+ 		/*
+ 		 * POSIX 3.3.1.3:
+ 		 *  "Setting a signal action to SIG_IGN for a signal that is
+@@ -2333,8 +2396,6 @@ do_sigaction(int sig, const struct k_sig
+ 			read_lock(&tasklist_lock);
+ 			spin_lock_irq(&t->sighand->siglock);
+ 			*k = *act;
+-			sigdelsetmask(&k->sa.sa_mask,
+-				      sigmask(SIGKILL) | sigmask(SIGSTOP));
+ 			rm_from_queue(sigmask(sig), &t->signal->shared_pending);
+ 			do {
+ 				rm_from_queue(sigmask(sig), &t->pending);
+@@ -2347,8 +2408,6 @@ do_sigaction(int sig, const struct k_sig
+ 		}
+ 
+ 		*k = *act;
+-		sigdelsetmask(&k->sa.sa_mask,
+-			      sigmask(SIGKILL) | sigmask(SIGSTOP));
+ 	}
+ 
+ 	spin_unlock_irq(&current->sighand->siglock);
+@@ -2554,6 +2613,7 @@ sys_signal(int sig, __sighandler_t handl
+ 
+ 	new_sa.sa.sa_handler = handler;
+ 	new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK;
++	sigemptyset(&new_sa.sa.sa_mask);
+ 
+ 	ret = do_sigaction(sig, &new_sa, &old_sa);
+ 
+@@ -2579,5 +2639,5 @@ void __init signals_init(void)
+ 		kmem_cache_create("sigqueue",
+ 				  sizeof(struct sigqueue),
+ 				  __alignof__(struct sigqueue),
+-				  SLAB_PANIC, NULL, NULL);
++				  SLAB_PANIC|SLAB_UBC, NULL, NULL);
+ }
+diff -uprN linux-2.6.8.1.orig/kernel/softirq.c linux-2.6.8.1-ve022stab078/kernel/softirq.c
+--- linux-2.6.8.1.orig/kernel/softirq.c	2004-08-14 14:54:52.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/kernel/softirq.c	2006-05-11 13:05:40.000000000 +0400
+@@ -15,8 +15,10 @@
+ #include <linux/percpu.h>
+ #include <linux/cpu.h>
+ #include <linux/kthread.h>
++#include <linux/sysctl.h>
+ 
+ #include <asm/irq.h>
++#include <ub/beancounter.h>
+ /*
+    - No shared variables, all the data are CPU local.
+    - If a softirq needs serialization, let it serialize itself
+@@ -43,6 +45,8 @@ EXPORT_SYMBOL(irq_stat);
+ static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
+ 
+ static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
++static DEFINE_PER_CPU(struct task_struct *, ksoftirqd_wakeup);
++static int ksoftirqd_stat[NR_CPUS];
+ 
+ /*
+  * we cannot loop indefinitely here to avoid userspace starvation,
+@@ -53,7 +57,7 @@ static DEFINE_PER_CPU(struct task_struct
+ static inline void wakeup_softirqd(void)
+ {
+ 	/* Interrupts are disabled: no need to stop preemption */
+-	struct task_struct *tsk = __get_cpu_var(ksoftirqd);
++	struct task_struct *tsk = __get_cpu_var(ksoftirqd_wakeup);
+ 
+ 	if (tsk && tsk->state != TASK_RUNNING)
+ 		wake_up_process(tsk);
+@@ -75,10 +79,13 @@ asmlinkage void __do_softirq(void)
+ 	struct softirq_action *h;
+ 	__u32 pending;
+ 	int max_restart = MAX_SOFTIRQ_RESTART;
++	struct user_beancounter *old_exec_ub;
++	struct ve_struct *envid;
+ 
+ 	pending = local_softirq_pending();
+ 
+ 	local_bh_disable();
++	envid = set_exec_env(get_ve0());
+ restart:
+ 	/* Reset the pending bitmask before enabling irqs */
+ 	local_softirq_pending() = 0;
+@@ -87,6 +94,8 @@ restart:
+ 
+ 	h = softirq_vec;
+ 
++	old_exec_ub = set_exec_ub(get_ub0());
++
+ 	do {
+ 		if (pending & 1)
+ 			h->action(h);
+@@ -94,6 +103,8 @@ restart:
+ 		pending >>= 1;
+ 	} while (pending);
+ 
++	(void)set_exec_ub(old_exec_ub);
++
+ 	local_irq_disable();
+ 
+ 	pending = local_softirq_pending();
+@@ -103,6 +114,7 @@ restart:
+ 	if (pending)
+ 		wakeup_softirqd();
+ 
++	(void)set_exec_env(envid);
+ 	__local_bh_enable();
+ }
+ 
+@@ -451,6 +463,52 @@ static int __devinit cpu_callback(struct
+ 	return NOTIFY_OK;
+ }
+ 
++static int proc_ksoftirqd(ctl_table *ctl, int write, struct file *filp,
++		void __user *buffer, size_t *lenp, loff_t *ppos)
++{
++	int ret, cpu;
++
++	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
++	if (!write)
++		return ret;
++
++	for_each_online_cpu(cpu) {
++		per_cpu(ksoftirqd_wakeup, cpu) =
++			ksoftirqd_stat[cpu] ? per_cpu(ksoftirqd, cpu) : NULL;
++	}
++	return ret;
++}
++
++static int sysctl_ksoftirqd(ctl_table *table, int *name, int nlen,
++		void *oldval, size_t *oldlenp, void *newval, size_t newlen,
++		void **context)
++{
++	return -EINVAL;
++}
++
++static ctl_table debug_table[] = {
++	{
++		.ctl_name	= 1246,
++		.procname	= "ksoftirqd",
++		.data		= ksoftirqd_stat,
++		.maxlen		= sizeof(ksoftirqd_stat),
++		.mode		= 0644,
++		.proc_handler	= &proc_ksoftirqd,
++		.strategy	= &sysctl_ksoftirqd
++	},
++	{0}
++};
++
++static ctl_table root_table[] = {
++	{
++		.ctl_name	= CTL_DEBUG,
++		.procname	= "debug",
++		.mode		= 0555,
++		.child		= debug_table
++	},
++	{0}
++};
++
+ static struct notifier_block __devinitdata cpu_nfb = {
+ 	.notifier_call = cpu_callback
+ };
+@@ -461,5 +519,6 @@ __init int spawn_ksoftirqd(void)
+ 	cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
+ 	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
+ 	register_cpu_notifier(&cpu_nfb);
++	register_sysctl_table(root_table, 0);
+ 	return 0;
+ }
+diff -uprN linux-2.6.8.1.orig/kernel/stop_machine.c linux-2.6.8.1-ve022stab078/kernel/stop_machine.c
+--- linux-2.6.8.1.orig/kernel/stop_machine.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/kernel/stop_machine.c	2006-05-11 13:05:39.000000000 +0400
+@@ -6,6 +6,7 @@
+ #include <linux/syscalls.h>
+ #include <asm/atomic.h>
+ #include <asm/semaphore.h>
++#include <asm/uaccess.h>
+ 
+ /* Since we effect priority and affinity (both of which are visible
+  * to, and settable by outside processes) we do indirection via a
+@@ -81,16 +82,20 @@ static int stop_machine(void)
+ {
+ 	int i, ret = 0;
+ 	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
++	mm_segment_t old_fs = get_fs();
+ 
+ 	/* One high-prio thread per cpu.  We'll do this one. */
+-	sys_sched_setscheduler(current->pid, SCHED_FIFO, &param);
++	set_fs(KERNEL_DS);
++	sys_sched_setscheduler(current->pid, SCHED_FIFO,
++				(struct sched_param __user *)&param);
++	set_fs(old_fs);
+ 
+ 	atomic_set(&stopmachine_thread_ack, 0);
+ 	stopmachine_num_threads = 0;
+ 	stopmachine_state = STOPMACHINE_WAIT;
+ 
+ 	for_each_online_cpu(i) {
+-		if (i == smp_processor_id())
++		if (i == task_cpu(current))
+ 			continue;
+ 		ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
+ 		if (ret < 0)
+@@ -109,13 +114,12 @@ static int stop_machine(void)
+ 		return ret;
+ 	}
+ 
+-	/* Don't schedule us away at this point, please. */
+-	local_irq_disable();
+-
+ 	/* Now they are all started, make them hold the CPUs, ready. */
++	preempt_disable();
+ 	stopmachine_set_state(STOPMACHINE_PREPARE);
+ 
+ 	/* Make them disable irqs. */
++	local_irq_disable();
+ 	stopmachine_set_state(STOPMACHINE_DISABLE_IRQ);
+ 
+ 	return 0;
+@@ -125,6 +129,7 @@ static void restart_machine(void)
+ {
+ 	stopmachine_set_state(STOPMACHINE_EXIT);
+ 	local_irq_enable();
++	preempt_enable_no_resched();
+ }
+ 
+ struct stop_machine_data
+diff -uprN linux-2.6.8.1.orig/kernel/sys.c linux-2.6.8.1-ve022stab078/kernel/sys.c
+--- linux-2.6.8.1.orig/kernel/sys.c	2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/kernel/sys.c	2006-05-11 13:05:47.000000000 +0400
+@@ -12,6 +12,7 @@
+ #include <linux/mman.h>
+ #include <linux/smp_lock.h>
+ #include <linux/notifier.h>
++#include <linux/virtinfo.h>
+ #include <linux/reboot.h>
+ #include <linux/prctl.h>
+ #include <linux/init.h>
+@@ -23,6 +24,7 @@
+ #include <linux/security.h>
+ #include <linux/dcookies.h>
+ #include <linux/suspend.h>
++#include <linux/tty.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/io.h>
+@@ -213,6 +215,102 @@ int unregister_reboot_notifier(struct no
+ 
+ EXPORT_SYMBOL(unregister_reboot_notifier);
+ 
++DECLARE_MUTEX(virtinfo_sem);
++EXPORT_SYMBOL(virtinfo_sem);
++static struct vnotifier_block *virtinfo_chain[VIRT_TYPES];
++
++void __virtinfo_notifier_register(int type, struct vnotifier_block *nb)
++{
++	struct vnotifier_block **p;
++
++	for (p = &virtinfo_chain[type];
++	     *p != NULL && nb->priority < (*p)->priority;
++	     p = &(*p)->next);
++	nb->next = *p;
++	smp_wmb();
++	*p = nb;
++}
++
++EXPORT_SYMBOL(__virtinfo_notifier_register);
++
++void virtinfo_notifier_register(int type, struct vnotifier_block *nb)
++{
++	down(&virtinfo_sem);
++	__virtinfo_notifier_register(type, nb);
++	up(&virtinfo_sem);
++}
++
++EXPORT_SYMBOL(virtinfo_notifier_register);
++
++struct virtinfo_cnt_struct {
++	volatile unsigned long exit[NR_CPUS];
++	volatile unsigned long entry;
++};
++static DEFINE_PER_CPU(struct virtinfo_cnt_struct, virtcnt);
++
++void virtinfo_notifier_unregister(int type, struct vnotifier_block *nb)
++{
++	struct vnotifier_block **p;
++	int entry_cpu, exit_cpu;
++	unsigned long cnt, ent;
++
++	down(&virtinfo_sem);
++	for (p = &virtinfo_chain[type]; *p != nb; p = &(*p)->next);
++	*p = nb->next;
++	smp_mb();
++
++	for_each_cpu_mask(entry_cpu, cpu_possible_map) {
++		while (1) {
++			cnt = 0;
++			for_each_cpu_mask(exit_cpu, cpu_possible_map)
++				cnt +=
++				    per_cpu(virtcnt, entry_cpu).exit[exit_cpu];
++			smp_rmb();
++			ent = per_cpu(virtcnt, entry_cpu).entry;
++			if (cnt == ent)
++				break;
++			__set_current_state(TASK_UNINTERRUPTIBLE);
++			schedule_timeout(HZ / 100);
++		}
++	}
++	up(&virtinfo_sem);
++}
++
++EXPORT_SYMBOL(virtinfo_notifier_unregister);
++
++int virtinfo_notifier_call(int type, unsigned long n, void *data)
++{
++	int ret;
++	int entry_cpu, exit_cpu;
++	struct vnotifier_block *nb;
++
++	entry_cpu = get_cpu();
++	per_cpu(virtcnt, entry_cpu).entry++;
++	smp_wmb();
++	put_cpu();
++
++	nb = virtinfo_chain[type];
++	ret = NOTIFY_DONE;
++	while (nb)
++	{
++		ret = nb->notifier_call(nb, n, data, ret);
++		if(ret & NOTIFY_STOP_MASK) {
++			ret &= ~NOTIFY_STOP_MASK;
++			break;
++		}
++		nb = nb->next;
++	}
++
++	exit_cpu = get_cpu();
++	smp_wmb();
++	per_cpu(virtcnt, entry_cpu).exit[exit_cpu]++;
++	put_cpu();
++
++	return ret;
++}
++
++EXPORT_SYMBOL(virtinfo_notifier_call);
++
+ asmlinkage long sys_ni_syscall(void)
+ {
+ 	return -ENOSYS;
+@@ -310,8 +408,6 @@ asmlinkage long sys_setpriority(int whic
+ {
+ 	struct task_struct *g, *p;
+ 	struct user_struct *user;
+-	struct pid *pid;
+-	struct list_head *l;
+ 	int error = -EINVAL;
+ 
+ 	if (which > 2 || which < 0)
+@@ -328,16 +424,19 @@ asmlinkage long sys_setpriority(int whic
+ 	switch (which) {
+ 		case PRIO_PROCESS:
+ 			if (!who)
+-				who = current->pid;
+-			p = find_task_by_pid(who);
++				who = virt_pid(current);
++			p = find_task_by_pid_ve(who);
+ 			if (p)
+ 				error = set_one_prio(p, niceval, error);
+ 			break;
+ 		case PRIO_PGRP:
+ 			if (!who)
+ 				who = process_group(current);
+-			for_each_task_pid(who, PIDTYPE_PGID, p, l, pid)
++			else
++				who = vpid_to_pid(who);
++			do_each_task_pid_ve(who, PIDTYPE_PGID, p) {
+ 				error = set_one_prio(p, niceval, error);
++			} while_each_task_pid_ve(who, PIDTYPE_PGID, p);
+ 			break;
+ 		case PRIO_USER:
+ 			if (!who)
+@@ -348,10 +447,10 @@ asmlinkage long sys_setpriority(int whic
+ 			if (!user)
+ 				goto out_unlock;
+ 
+-			do_each_thread(g, p)
++			do_each_thread_ve(g, p) {
+ 				if (p->uid == who)
+ 					error = set_one_prio(p, niceval, error);
+-			while_each_thread(g, p);
++			} while_each_thread_ve(g, p);
+ 			if (who)
+ 				free_uid(user);		/* For find_user() */
+ 			break;
+@@ -371,8 +470,6 @@ out:
+ asmlinkage long sys_getpriority(int which, int who)
+ {
+ 	struct task_struct *g, *p;
+-	struct list_head *l;
+-	struct pid *pid;
+ 	struct user_struct *user;
+ 	long niceval, retval = -ESRCH;
+ 
+@@ -383,8 +480,8 @@ asmlinkage long sys_getpriority(int whic
+ 	switch (which) {
+ 		case PRIO_PROCESS:
+ 			if (!who)
+-				who = current->pid;
+-			p = find_task_by_pid(who);
++				who = virt_pid(current);
++			p = find_task_by_pid_ve(who);
+ 			if (p) {
+ 				niceval = 20 - task_nice(p);
+ 				if (niceval > retval)
+@@ -394,11 +491,13 @@ asmlinkage long sys_getpriority(int whic
+ 		case PRIO_PGRP:
+ 			if (!who)
+ 				who = process_group(current);
+-			for_each_task_pid(who, PIDTYPE_PGID, p, l, pid) {
++			else
++				who = vpid_to_pid(who);
++			do_each_task_pid_ve(who, PIDTYPE_PGID, p) {
+ 				niceval = 20 - task_nice(p);
+ 				if (niceval > retval)
+ 					retval = niceval;
+-			}
++			} while_each_task_pid_ve(who, PIDTYPE_PGID, p);
+ 			break;
+ 		case PRIO_USER:
+ 			if (!who)
+@@ -409,13 +508,13 @@ asmlinkage long sys_getpriority(int whic
+ 			if (!user)
+ 				goto out_unlock;
+ 
+-			do_each_thread(g, p)
++			do_each_thread_ve(g, p) {
+ 				if (p->uid == who) {
+ 					niceval = 20 - task_nice(p);
+ 					if (niceval > retval)
+ 						retval = niceval;
+ 				}
+-			while_each_thread(g, p);
++			} while_each_thread_ve(g, p);
+ 			if (who)
+ 				free_uid(user);		/* for find_user() */
+ 			break;
+@@ -451,6 +550,35 @@ asmlinkage long sys_reboot(int magic1, i
+ 	                magic2 != LINUX_REBOOT_MAGIC2C))
+ 		return -EINVAL;
+ 
++#ifdef CONFIG_VE
++	if (!ve_is_super(get_exec_env()))
++		switch (cmd) {
++		case LINUX_REBOOT_CMD_RESTART:
++		case LINUX_REBOOT_CMD_HALT:
++		case LINUX_REBOOT_CMD_POWER_OFF:
++		case LINUX_REBOOT_CMD_RESTART2: {
++				struct siginfo info;
++
++				info.si_errno = 0;
++				info.si_code = SI_KERNEL;
++				info.si_pid = virt_pid(current);
++				info.si_uid = current->uid;
++				info.si_signo = SIGKILL;
++
++				/* Sending to real init is safe */
++				send_sig_info(SIGKILL, &info,
++						get_exec_env()->init_entry);
++			}
++
++		case LINUX_REBOOT_CMD_CAD_ON:
++		case LINUX_REBOOT_CMD_CAD_OFF:
++			return 0;
++
++		default:
++			return -EINVAL;
++		}
++#endif
++
+ 	lock_kernel();
+ 	switch (cmd) {
+ 	case LINUX_REBOOT_CMD_RESTART:
+@@ -641,7 +769,7 @@ asmlinkage long sys_setgid(gid_t gid)
+ 	return 0;
+ }
+   
+-static int set_user(uid_t new_ruid, int dumpclear)
++int set_user(uid_t new_ruid, int dumpclear)
+ {
+ 	struct user_struct *new_user;
+ 
+@@ -666,6 +794,7 @@ static int set_user(uid_t new_ruid, int 
+ 	current->uid = new_ruid;
+ 	return 0;
+ }
++EXPORT_SYMBOL(set_user);
+ 
+ /*
+  * Unprivileged users may change the real uid to the effective uid
+@@ -954,7 +1083,12 @@ asmlinkage long sys_times(struct tms __u
+ 		if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
+ 			return -EFAULT;
+ 	}
++#ifndef CONFIG_VE
+ 	return (long) jiffies_64_to_clock_t(get_jiffies_64());
++#else
++	return (long) jiffies_64_to_clock_t(get_jiffies_64() -
++			get_exec_env()->init_entry->start_time);
++#endif
+ }
+ 
+ /*
+@@ -974,21 +1108,24 @@ asmlinkage long sys_setpgid(pid_t pid, p
+ {
+ 	struct task_struct *p;
+ 	int err = -EINVAL;
++	pid_t _pgid;
+ 
+ 	if (!pid)
+-		pid = current->pid;
++		pid = virt_pid(current);
+ 	if (!pgid)
+ 		pgid = pid;
+ 	if (pgid < 0)
+ 		return -EINVAL;
+ 
++	_pgid = vpid_to_pid(pgid);
++
+ 	/* From this point forward we keep holding onto the tasklist lock
+ 	 * so that our parent does not change from under us. -DaveM
+ 	 */
+ 	write_lock_irq(&tasklist_lock);
+ 
+ 	err = -ESRCH;
+-	p = find_task_by_pid(pid);
++	p = find_task_by_pid_ve(pid);
+ 	if (!p)
+ 		goto out;
+ 
+@@ -1013,26 +1150,35 @@ asmlinkage long sys_setpgid(pid_t pid, p
+ 	if (p->signal->leader)
+ 		goto out;
+ 
+-	if (pgid != pid) {
++	pgid = virt_pid(p);
++	if (_pgid != p->pid) {
+ 		struct task_struct *p;
+-		struct pid *pid;
+-		struct list_head *l;
+ 
+-		for_each_task_pid(pgid, PIDTYPE_PGID, p, l, pid)
+-			if (p->signal->session == current->signal->session)
++		do_each_task_pid_ve(_pgid, PIDTYPE_PGID, p) {
++			if (p->signal->session == current->signal->session) {
++				pgid = virt_pgid(p);
+ 				goto ok_pgid;
++			}
++		} while_each_task_pid_ve(_pgid, PIDTYPE_PGID, p);
+ 		goto out;
+ 	}
+ 
+ ok_pgid:
+-	err = security_task_setpgid(p, pgid);
++	err = security_task_setpgid(p, _pgid);
+ 	if (err)
+ 		goto out;
+ 
+-	if (process_group(p) != pgid) {
++	if (process_group(p) != _pgid) {
+ 		detach_pid(p, PIDTYPE_PGID);
+-		p->signal->pgrp = pgid;
+-		attach_pid(p, PIDTYPE_PGID, pgid);
++		p->signal->pgrp = _pgid;
++		set_virt_pgid(p, pgid);
++		attach_pid(p, PIDTYPE_PGID, _pgid);
++		if (atomic_read(&p->signal->count) != 1) {
++			task_t *t;
++			for (t = next_thread(p); t != p; t = next_thread(t)) {
++				set_virt_pgid(t, pgid);
++			}
++		}
+ 	}
+ 
+ 	err = 0;
+@@ -1045,19 +1191,19 @@ out:
+ asmlinkage long sys_getpgid(pid_t pid)
+ {
+ 	if (!pid) {
+-		return process_group(current);
++		return virt_pgid(current);
+ 	} else {
+ 		int retval;
+ 		struct task_struct *p;
+ 
+ 		read_lock(&tasklist_lock);
+-		p = find_task_by_pid(pid);
++		p = find_task_by_pid_ve(pid);
+ 
+ 		retval = -ESRCH;
+ 		if (p) {
+ 			retval = security_task_getpgid(p);
+ 			if (!retval)
+-				retval = process_group(p);
++				retval = virt_pgid(p);
+ 		}
+ 		read_unlock(&tasklist_lock);
+ 		return retval;
+@@ -1069,7 +1215,7 @@ asmlinkage long sys_getpgid(pid_t pid)
+ asmlinkage long sys_getpgrp(void)
+ {
+ 	/* SMP - assuming writes are word atomic this is fine */
+-	return process_group(current);
++	return virt_pgid(current);
+ }
+ 
+ #endif
+@@ -1077,19 +1223,19 @@ asmlinkage long sys_getpgrp(void)
+ asmlinkage long sys_getsid(pid_t pid)
+ {
+ 	if (!pid) {
+-		return current->signal->session;
++		return virt_sid(current);
+ 	} else {
+ 		int retval;
+ 		struct task_struct *p;
+ 
+ 		read_lock(&tasklist_lock);
+-		p = find_task_by_pid(pid);
++		p = find_task_by_pid_ve(pid);
+ 
+ 		retval = -ESRCH;
+ 		if(p) {
+ 			retval = security_task_getsid(p);
+ 			if (!retval)
+-				retval = p->signal->session;
++				retval = virt_sid(p);
+ 		}
+ 		read_unlock(&tasklist_lock);
+ 		return retval;
+@@ -1104,6 +1250,7 @@ asmlinkage long sys_setsid(void)
+ 	if (!thread_group_leader(current))
+ 		return -EINVAL;
+ 
++	down(&tty_sem);
+ 	write_lock_irq(&tasklist_lock);
+ 
+ 	pid = find_pid(PIDTYPE_PGID, current->pid);
+@@ -1112,11 +1259,22 @@ asmlinkage long sys_setsid(void)
+ 
+ 	current->signal->leader = 1;
+ 	__set_special_pids(current->pid, current->pid);
++	set_virt_pgid(current, virt_pid(current));
++	set_virt_sid(current, virt_pid(current));
+ 	current->signal->tty = NULL;
+ 	current->signal->tty_old_pgrp = 0;
+-	err = process_group(current);
++	if (atomic_read(&current->signal->count) != 1) {
++		task_t *t;
++		for (t = next_thread(current); t != current; t = next_thread(t)) {
++			set_virt_pgid(t, virt_pid(current));
++			set_virt_sid(t, virt_pid(current));
++		}
++	}
++
++	err = virt_pgid(current);
+ out:
+ 	write_unlock_irq(&tasklist_lock);
++	up(&tty_sem);
+ 	return err;
+ }
+ 
+@@ -1393,7 +1551,7 @@ asmlinkage long sys_newuname(struct new_
+ 	int errno = 0;
+ 
+ 	down_read(&uts_sem);
+-	if (copy_to_user(name,&system_utsname,sizeof *name))
++	if (copy_to_user(name,&ve_utsname,sizeof *name))
+ 		errno = -EFAULT;
+ 	up_read(&uts_sem);
+ 	return errno;
+@@ -1404,15 +1562,15 @@ asmlinkage long sys_sethostname(char __u
+ 	int errno;
+ 	char tmp[__NEW_UTS_LEN];
+ 
+-	if (!capable(CAP_SYS_ADMIN))
++	if (!capable(CAP_VE_SYS_ADMIN))
+ 		return -EPERM;
+ 	if (len < 0 || len > __NEW_UTS_LEN)
+ 		return -EINVAL;
+ 	down_write(&uts_sem);
+ 	errno = -EFAULT;
+ 	if (!copy_from_user(tmp, name, len)) {
+-		memcpy(system_utsname.nodename, tmp, len);
+-		system_utsname.nodename[len] = 0;
++		memcpy(ve_utsname.nodename, tmp, len);
++		ve_utsname.nodename[len] = 0;
+ 		errno = 0;
+ 	}
+ 	up_write(&uts_sem);
+@@ -1428,11 +1586,11 @@ asmlinkage long sys_gethostname(char __u
+ 	if (len < 0)
+ 		return -EINVAL;
+ 	down_read(&uts_sem);
+-	i = 1 + strlen(system_utsname.nodename);
++	i = 1 + strlen(ve_utsname.nodename);
+ 	if (i > len)
+ 		i = len;
+ 	errno = 0;
+-	if (copy_to_user(name, system_utsname.nodename, i))
++	if (copy_to_user(name, ve_utsname.nodename, i))
+ 		errno = -EFAULT;
+ 	up_read(&uts_sem);
+ 	return errno;
+@@ -1449,7 +1607,7 @@ asmlinkage long sys_setdomainname(char _
+ 	int errno;
+ 	char tmp[__NEW_UTS_LEN];
+ 
+-	if (!capable(CAP_SYS_ADMIN))
++	if (!capable(CAP_VE_SYS_ADMIN))
+ 		return -EPERM;
+ 	if (len < 0 || len > __NEW_UTS_LEN)
+ 		return -EINVAL;
+@@ -1457,8 +1615,8 @@ asmlinkage long sys_setdomainname(char _
+ 	down_write(&uts_sem);
+ 	errno = -EFAULT;
+ 	if (!copy_from_user(tmp, name, len)) {
+-		memcpy(system_utsname.domainname, tmp, len);
+-		system_utsname.domainname[len] = 0;
++		memcpy(ve_utsname.domainname, tmp, len);
++		ve_utsname.domainname[len] = 0;
+ 		errno = 0;
+ 	}
+ 	up_write(&uts_sem);
+diff -uprN linux-2.6.8.1.orig/kernel/sysctl.c linux-2.6.8.1-ve022stab078/kernel/sysctl.c
+--- linux-2.6.8.1.orig/kernel/sysctl.c	2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/kernel/sysctl.c	2006-05-11 13:05:49.000000000 +0400
+@@ -25,6 +25,8 @@
+ #include <linux/slab.h>
+ #include <linux/sysctl.h>
+ #include <linux/proc_fs.h>
++#include <linux/ve_owner.h>
++#include <linux/ve.h>
+ #include <linux/ctype.h>
+ #include <linux/utsname.h>
+ #include <linux/capability.h>
+@@ -57,6 +59,7 @@ extern int sysctl_overcommit_ratio;
+ extern int max_threads;
+ extern int sysrq_enabled;
+ extern int core_uses_pid;
++extern int sysctl_at_vsyscall;
+ extern char core_pattern[];
+ extern int cad_pid;
+ extern int pid_max;
+@@ -64,6 +67,10 @@ extern int sysctl_lower_zone_protection;
+ extern int min_free_kbytes;
+ extern int printk_ratelimit_jiffies;
+ extern int printk_ratelimit_burst;
++#ifdef CONFIG_VE
++int glob_virt_pids = 1;
++EXPORT_SYMBOL(glob_virt_pids);
++#endif
+ 
+ /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
+ static int maxolduid = 65535;
+@@ -89,6 +96,10 @@ extern int msg_ctlmnb;
+ extern int msg_ctlmni;
+ extern int sem_ctls[];
+ #endif
++#ifdef CONFIG_SCHED_VCPU
++extern u32 vcpu_sched_timeslice;
++extern u32 vcpu_timeslice;
++#endif
+ 
+ #ifdef __sparc__
+ extern char reboot_command [];
+@@ -109,6 +120,7 @@ extern int sysctl_userprocess_debug;
+ #endif
+ 
+ extern int sysctl_hz_timer;
++int decode_call_traces = 1;
+ 
+ #if defined(CONFIG_PPC32) && defined(CONFIG_6xx)
+ extern unsigned long powersave_nap;
+@@ -120,10 +132,14 @@ int proc_dol2crvec(ctl_table *table, int
+ extern int acct_parm[];
+ #endif
+ 
++#ifdef CONFIG_FAIRSCHED
++extern int fairsched_max_latency;
++int fsch_sysctl_latency(ctl_table *ctl, int write, struct file *filp,
++		        void __user *buffer, size_t *lenp, loff_t *ppos);
++#endif
++
+ static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t,
+ 		       ctl_table *, void **);
+-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+-		  void __user *buffer, size_t *lenp, loff_t *ppos);
+ 
+ static ctl_table root_table[];
+ static struct ctl_table_header root_table_header =
+@@ -143,6 +159,8 @@ extern ctl_table random_table[];
+ extern ctl_table pty_table[];
+ #endif
+ 
++extern int ve_area_access_check; /* fs/namei.c */
++
+ /* /proc declarations: */
+ 
+ #ifdef CONFIG_PROC_FS
+@@ -159,8 +177,10 @@ struct file_operations proc_sys_file_ope
+ 
+ extern struct proc_dir_entry *proc_sys_root;
+ 
+-static void register_proc_table(ctl_table *, struct proc_dir_entry *);
++static void register_proc_table(ctl_table *, struct proc_dir_entry *, void *);
+ static void unregister_proc_table(ctl_table *, struct proc_dir_entry *);
++
++extern struct new_utsname virt_utsname;
+ #endif
+ 
+ /* The default sysctl tables: */
+@@ -260,6 +280,15 @@ static ctl_table kern_table[] = {
+ 		.strategy	= &sysctl_string,
+ 	},
+ 	{
++		.ctl_name	= KERN_VIRT_OSRELEASE,
++		.procname	= "virt_osrelease",
++		.data		= virt_utsname.release,
++		.maxlen		= sizeof(virt_utsname.release),
++		.mode		= 0644,
++		.proc_handler	= &proc_doutsstring,
++		.strategy	= &sysctl_string,
++	},
++	{
+ 		.ctl_name	= KERN_PANIC,
+ 		.procname	= "panic",
+ 		.data		= &panic_timeout,
+@@ -579,6 +608,24 @@ static ctl_table kern_table[] = {
+ 		.proc_handler	= &proc_dointvec,
+ 	},
+ #endif
++#ifdef CONFIG_SCHED_VCPU
++	{
++		.ctl_name	= KERN_VCPU_SCHED_TIMESLICE,
++		.procname	= "vcpu_sched_timeslice",
++		.data		= &vcpu_sched_timeslice,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec,
++	},
++	{
++		.ctl_name	= KERN_VCPU_TIMESLICE,
++		.procname	= "vcpu_timeslice",
++		.data		= &vcpu_timeslice,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec,
++	},
++#endif
+ 	{
+ 		.ctl_name	= KERN_PIDMAX,
+ 		.procname	= "pid_max",
+@@ -587,6 +634,16 @@ static ctl_table kern_table[] = {
+ 		.mode		= 0644,
+ 		.proc_handler	= &proc_dointvec,
+ 	},
++#ifdef CONFIG_VE
++	{
++		.ctl_name	= KERN_VIRT_PIDS,
++		.procname	= "virt_pids",
++		.data		= &glob_virt_pids,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec,
++	},
++#endif
+ 	{
+ 		.ctl_name	= KERN_PANIC_ON_OOPS,
+ 		.procname	= "panic_on_oops",
+@@ -620,6 +677,32 @@ static ctl_table kern_table[] = {
+ 		.mode		= 0444,
+ 		.proc_handler	= &proc_dointvec,
+ 	},
++	{
++		.ctl_name	= KERN_SILENCE_LEVEL,
++		.procname	= "silence-level",
++		.data		= &console_silence_loglevel,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec
++	},
++	{
++		.ctl_name	= KERN_ALLOC_FAIL_WARN,
++		.procname	= "alloc_fail_warn",
++		.data		= &alloc_fail_warn,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec
++	},
++#ifdef CONFIG_FAIRSCHED
++	{
++		.ctl_name	= KERN_FAIRSCHED_MAX_LATENCY,
++		.procname	= "fairsched-max-latency",
++		.data		=  &fairsched_max_latency,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &fsch_sysctl_latency
++	},
++#endif
+ 	{ .ctl_name = 0 }
+ };
+ 
+@@ -899,10 +982,26 @@ static ctl_table fs_table[] = {
+ 		.mode		= 0644,
+ 		.proc_handler	= &proc_dointvec,
+ 	},
++	{
++		.ctl_name	= FS_AT_VSYSCALL,
++		.procname	= "vsyscall",
++		.data		= &sysctl_at_vsyscall,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec
++	},
+ 	{ .ctl_name = 0 }
+ };
+ 
+ static ctl_table debug_table[] = {
++	{
++		.ctl_name	= DBG_DECODE_CALLTRACES,
++		.procname	= "decode_call_traces",
++		.data		= &decode_call_traces,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec
++	},
+ 	{ .ctl_name = 0 }
+ };
+ 
+@@ -912,10 +1011,51 @@ static ctl_table dev_table[] = {
+ 
+ extern void init_irq_proc (void);
+ 
++static spinlock_t sysctl_lock = SPIN_LOCK_UNLOCKED;
++
++/* called under sysctl_lock */
++static int use_table(struct ctl_table_header *p)
++{
++	if (unlikely(p->unregistering))
++		return 0;
++	p->used++;
++	return 1;
++}
++
++/* called under sysctl_lock */
++static void unuse_table(struct ctl_table_header *p)
++{
++	if (!--p->used)
++		if (unlikely(p->unregistering))
++			complete(p->unregistering);
++}
++
++/* called under sysctl_lock, will reacquire if has to wait */
++static void start_unregistering(struct ctl_table_header *p)
++{
++	/*
++	 * if p->used is 0, nobody will ever touch that entry again;
++	 * we'll eliminate all paths to it before dropping sysctl_lock
++	 */
++	if (unlikely(p->used)) {
++		struct completion wait;
++		init_completion(&wait);
++		p->unregistering = &wait;
++		spin_unlock(&sysctl_lock);
++		wait_for_completion(&wait);
++		spin_lock(&sysctl_lock);
++	}
++	/*
++	 * do not remove from the list until nobody holds it; walking the
++	 * list in do_sysctl() relies on that.
++	 */
++	list_del_init(&p->ctl_entry);
++}
++
+ void __init sysctl_init(void)
+ {
+ #ifdef CONFIG_PROC_FS
+-	register_proc_table(root_table, proc_sys_root);
++	register_proc_table(root_table, proc_sys_root, &root_table_header);
+ 	init_irq_proc();
+ #endif
+ }
+@@ -924,6 +1064,8 @@ int do_sysctl(int __user *name, int nlen
+ 	       void __user *newval, size_t newlen)
+ {
+ 	struct list_head *tmp;
++	int error = -ENOTDIR;
++	struct ve_struct *ve;
+ 
+ 	if (nlen <= 0 || nlen >= CTL_MAXNAME)
+ 		return -ENOTDIR;
+@@ -932,21 +1074,35 @@ int do_sysctl(int __user *name, int nlen
+ 		if (!oldlenp || get_user(old_len, oldlenp))
+ 			return -EFAULT;
+ 	}
+-	tmp = &root_table_header.ctl_entry;
++	ve = get_exec_env();
++	spin_lock(&sysctl_lock);
++	tmp = ve->sysctl_lh.next;
+ 	do {
+-		struct ctl_table_header *head =
+-			list_entry(tmp, struct ctl_table_header, ctl_entry);
++		struct ctl_table_header *head;
+ 		void *context = NULL;
+-		int error = parse_table(name, nlen, oldval, oldlenp, 
++
++		if (tmp == &ve->sysctl_lh)
++			/* second pass over global variables */
++			tmp = &root_table_header.ctl_entry;
++
++		head = list_entry(tmp, struct ctl_table_header, ctl_entry);
++		if (!use_table(head))
++			continue;
++
++		spin_unlock(&sysctl_lock);
++		
++		error = parse_table(name, nlen, oldval, oldlenp, 
+ 					newval, newlen, head->ctl_table,
+ 					&context);
+-		if (context)
+-			kfree(context);
++		kfree(context);
++
++		spin_lock(&sysctl_lock);
++		unuse_table(head);
+ 		if (error != -ENOTDIR)
+-			return error;
+-		tmp = tmp->next;
+-	} while (tmp != &root_table_header.ctl_entry);
+-	return -ENOTDIR;
++			break;
++	} while ((tmp = tmp->next) != &root_table_header.ctl_entry);
++	spin_unlock(&sysctl_lock);
++	return error;
+ }
+ 
+ asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
+@@ -983,10 +1139,14 @@ static int test_perm(int mode, int op)
+ static inline int ctl_perm(ctl_table *table, int op)
+ {
+ 	int error;
++	int mode = table->mode;
++
+ 	error = security_sysctl(table, op);
+ 	if (error)
+ 		return error;
+-	return test_perm(table->mode, op);
++	if (!ve_accessible(table->owner_env, get_exec_env()))
++		mode &= ~0222; /* disable write access */
++	return test_perm(mode, op);
+ }
+ 
+ static int parse_table(int __user *name, int nlen,
+@@ -1152,21 +1312,62 @@ struct ctl_table_header *register_sysctl
+ 					       int insert_at_head)
+ {
+ 	struct ctl_table_header *tmp;
++	struct list_head *lh;
++
+ 	tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
+ 	if (!tmp)
+ 		return NULL;
+ 	tmp->ctl_table = table;
+ 	INIT_LIST_HEAD(&tmp->ctl_entry);
++	tmp->used = 0;
++	tmp->unregistering = NULL;
++	spin_lock(&sysctl_lock);
++#ifdef CONFIG_VE
++	lh = &get_exec_env()->sysctl_lh;
++#else
++	lh = &root_table_header.ctl_entry;
++#endif
+ 	if (insert_at_head)
+-		list_add(&tmp->ctl_entry, &root_table_header.ctl_entry);
++		list_add(&tmp->ctl_entry, lh);
+ 	else
+-		list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
++		list_add_tail(&tmp->ctl_entry, lh);
++	spin_unlock(&sysctl_lock);
+ #ifdef CONFIG_PROC_FS
+-	register_proc_table(table, proc_sys_root);
++#ifdef CONFIG_VE
++	register_proc_table(table, get_exec_env()->proc_sys_root, tmp);
++#else
++	register_proc_table(table, proc_sys_root, tmp);
++#endif
+ #endif
+ 	return tmp;
+ }
+ 
++void free_sysctl_clone(ctl_table *clone)
++{
++	kfree(clone);
++}
++
++ctl_table *clone_sysctl_template(ctl_table *tmpl, int nr)
++{
++	int i;
++	ctl_table *clone;
++
++	clone = kmalloc(nr * sizeof(ctl_table), GFP_KERNEL);
++	if (clone == NULL)
++		return NULL;
++
++	memcpy(clone, tmpl, nr * sizeof(ctl_table));
++	for (i = 0; i < nr; i++) {
++		if (tmpl[i].ctl_name == 0)
++			continue;
++		clone[i].owner_env = get_exec_env();
++		if (tmpl[i].child == NULL)
++			continue;
++		clone[i].child = clone + (tmpl[i].child - tmpl);
++	}
++	return clone;
++}
++
+ /**
+  * unregister_sysctl_table - unregister a sysctl table hierarchy
+  * @header: the header returned from register_sysctl_table
+@@ -1176,10 +1377,17 @@ struct ctl_table_header *register_sysctl
+  */
+ void unregister_sysctl_table(struct ctl_table_header * header)
+ {
+-	list_del(&header->ctl_entry);
++	might_sleep();
++	spin_lock(&sysctl_lock);
++	start_unregistering(header);
+ #ifdef CONFIG_PROC_FS
++#ifdef CONFIG_VE
++	unregister_proc_table(header->ctl_table, get_exec_env()->proc_sys_root);
++#else
+ 	unregister_proc_table(header->ctl_table, proc_sys_root);
+ #endif
++#endif
++	spin_unlock(&sysctl_lock);
+ 	kfree(header);
+ }
+ 
+@@ -1190,7 +1398,7 @@ void unregister_sysctl_table(struct ctl_
+ #ifdef CONFIG_PROC_FS
+ 
+ /* Scan the sysctl entries in table and add them all into /proc */
+-static void register_proc_table(ctl_table * table, struct proc_dir_entry *root)
++static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, void *set)
+ {
+ 	struct proc_dir_entry *de;
+ 	int len;
+@@ -1226,13 +1434,14 @@ static void register_proc_table(ctl_tabl
+ 			de = create_proc_entry(table->procname, mode, root);
+ 			if (!de)
+ 				continue;
++			de->set = set;
+ 			de->data = (void *) table;
+ 			if (table->proc_handler)
+ 				de->proc_fops = &proc_sys_file_operations;
+ 		}
+ 		table->de = de;
+ 		if (de->mode & S_IFDIR)
+-			register_proc_table(table->child, de);
++			register_proc_table(table->child, de, set);
+ 	}
+ }
+ 
+@@ -1257,12 +1466,15 @@ static void unregister_proc_table(ctl_ta
+ 				continue;
+ 		}
+ 
+-		/* Don't unregister proc entries that are still being used.. */
+-		if (atomic_read(&de->count))
+-			continue;
+-
++		de->data = NULL;
+ 		table->de = NULL;
++		/*
++		 * sys_sysctl can't find us, since we are removed from list.
++		 * proc won't touch either, since de->data is NULL.
++		 */
++		spin_unlock(&sysctl_lock);
+ 		remove_proc_entry(table->procname, root);
++		spin_lock(&sysctl_lock);
+ 	}
+ }
+ 
+@@ -1270,27 +1482,38 @@ static ssize_t do_rw_proc(int write, str
+ 			  size_t count, loff_t *ppos)
+ {
+ 	int op;
+-	struct proc_dir_entry *de;
++	struct proc_dir_entry *de = PDE(file->f_dentry->d_inode);
+ 	struct ctl_table *table;
+ 	size_t res;
+-	ssize_t error;
+-	
+-	de = PDE(file->f_dentry->d_inode);
+-	if (!de || !de->data)
+-		return -ENOTDIR;
+-	table = (struct ctl_table *) de->data;
+-	if (!table || !table->proc_handler)
+-		return -ENOTDIR;
+-	op = (write ? 002 : 004);
+-	if (ctl_perm(table, op))
+-		return -EPERM;
++	ssize_t error = -ENOTDIR;
+ 	
+-	res = count;
+-
+-	error = (*table->proc_handler) (table, write, file, buf, &res, ppos);
+-	if (error)
+-		return error;
+-	return res;
++	spin_lock(&sysctl_lock);
++	if (de && de->data && use_table(de->set)) {
++		/*
++		 * at that point we know that sysctl was not unregistered
++		 * and won't be until we finish
++		 */
++		spin_unlock(&sysctl_lock);
++		table = (struct ctl_table *) de->data;
++		if (!table || !table->proc_handler)
++			goto out;
++		error = -EPERM;
++		op = (write ? 002 : 004);
++		if (ctl_perm(table, op))
++			goto out;
++		
++		/* careful: calling conventions are nasty here */
++		res = count;
++		error = (*table->proc_handler)(table, write, file,
++						buf, &res, ppos);
++		if (!error)
++			error = res;
++	out:
++		spin_lock(&sysctl_lock);
++		unuse_table(de->set);
++	}
++	spin_unlock(&sysctl_lock);
++	return error;
+ }
+ 
+ static int proc_opensys(struct inode *inode, struct file *file)
+@@ -1390,7 +1613,7 @@ int proc_dostring(ctl_table *table, int 
+  *	to observe. Should this be in kernel/sys.c ????
+  */
+  
+-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
++int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+ 		  void __user *buffer, size_t *lenp, loff_t *ppos)
+ {
+ 	int r;
+@@ -1914,7 +2137,7 @@ int proc_dostring(ctl_table *table, int 
+ 	return -ENOSYS;
+ }
+ 
+-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
++int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+ 			    void __user *buffer, size_t *lenp, loff_t *ppos)
+ {
+ 	return -ENOSYS;
+@@ -1967,7 +2190,6 @@ int proc_doulongvec_ms_jiffies_minmax(ct
+ 
+ #endif /* CONFIG_PROC_FS */
+ 
+-
+ /*
+  * General sysctl support routines 
+  */
+@@ -2169,6 +2391,14 @@ void unregister_sysctl_table(struct ctl_
+ {
+ }
+ 
++ctl_table * clone_sysctl_template(ctl_table *tmpl, int nr)
++{
++	return NULL;
++}
++
++void free_sysctl_clone(ctl_table *tmpl)
++{
++}
+ #endif /* CONFIG_SYSCTL */
+ 
+ /*
+@@ -2180,9 +2410,12 @@ EXPORT_SYMBOL(proc_dointvec_jiffies);
+ EXPORT_SYMBOL(proc_dointvec_minmax);
+ EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
+ EXPORT_SYMBOL(proc_dostring);
++EXPORT_SYMBOL(proc_doutsstring);
+ EXPORT_SYMBOL(proc_doulongvec_minmax);
+ EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
+ EXPORT_SYMBOL(register_sysctl_table);
++EXPORT_SYMBOL(clone_sysctl_template);
++EXPORT_SYMBOL(free_sysctl_clone);
+ EXPORT_SYMBOL(sysctl_intvec);
+ EXPORT_SYMBOL(sysctl_jiffies);
+ EXPORT_SYMBOL(sysctl_string);
+diff -uprN linux-2.6.8.1.orig/kernel/time.c linux-2.6.8.1-ve022stab078/kernel/time.c
+--- linux-2.6.8.1.orig/kernel/time.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/kernel/time.c	2006-05-11 13:05:32.000000000 +0400
+@@ -30,6 +30,7 @@
+ #include <linux/smp_lock.h>
+ #include <asm/uaccess.h>
+ #include <asm/unistd.h>
++#include <linux/fs.h>
+ 
+ /* 
+  * The timezone where the local system is located.  Used as a default by some
+@@ -421,6 +422,50 @@ struct timespec current_kernel_time(void
+ 
+ EXPORT_SYMBOL(current_kernel_time);
+ 
++/**
++ * current_fs_time - Return FS time
++ * @sb: Superblock.
++ *
++ * Return the current time truncated to the time granuality supported by
++ * the fs.
++ */
++struct timespec current_fs_time(struct super_block *sb)
++{
++	struct timespec now = current_kernel_time();
++	return timespec_trunc(now, get_sb_time_gran(sb));
++}
++EXPORT_SYMBOL(current_fs_time);
++
++/**
++ * timespec_trunc - Truncate timespec to a granuality
++ * @t: Timespec
++ * @gran: Granuality in ns.
++ *
++ * Truncate a timespec to a granuality. gran must be smaller than a second.
++ * Always rounds down.
++ *
++ * This function should be only used for timestamps returned by
++ * current_kernel_time() or CURRENT_TIME, not with do_gettimeofday() because
++ * it doesn't handle the better resolution of the later.
++ */
++struct timespec timespec_trunc(struct timespec t, unsigned gran)
++{
++	/*
++	 * Division is pretty slow so avoid it for common cases.
++	 * Currently current_kernel_time() never returns better than
++	 * jiffies resolution. Exploit that.
++	 */
++	if (gran <= jiffies_to_usecs(1) * 1000) {
++		/* nothing */
++	} else if (gran == 1000000000) {
++		t.tv_nsec = 0;
++	} else {
++		t.tv_nsec -= t.tv_nsec % gran;
++	}
++	return t;
++}
++EXPORT_SYMBOL(timespec_trunc);
++
+ #if (BITS_PER_LONG < 64)
+ u64 get_jiffies_64(void)
+ {
+diff -uprN linux-2.6.8.1.orig/kernel/timer.c linux-2.6.8.1-ve022stab078/kernel/timer.c
+--- linux-2.6.8.1.orig/kernel/timer.c	2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/kernel/timer.c	2006-05-11 13:05:49.000000000 +0400
+@@ -31,6 +31,7 @@
+ #include <linux/time.h>
+ #include <linux/jiffies.h>
+ #include <linux/cpu.h>
++#include <linux/virtinfo.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/unistd.h>
+@@ -299,6 +300,10 @@ repeat:
+ 		goto repeat;
+ 	}
+ 	list_del(&timer->entry);
++	smp_wmb(); /* the list del must have taken effect before timer->base
++		    * change is visible to other CPUs, or a concurrent mod_timer
++		    * would cause a race with list_add
++		    */
+ 	timer->base = NULL;
+ 	spin_unlock_irqrestore(&base->lock, flags);
+ 
+@@ -444,6 +449,7 @@ repeat:
+ 		if (!list_empty(head)) {
+ 			void (*fn)(unsigned long);
+ 			unsigned long data;
++			struct ve_struct *envid;
+ 
+ 			timer = list_entry(head->next,struct timer_list,entry);
+  			fn = timer->function;
+@@ -451,11 +457,16 @@ repeat:
+ 
+ 			list_del(&timer->entry);
+ 			set_running_timer(base, timer);
+-			smp_wmb();
++			smp_wmb(); /* the list del must have taken effect before timer->base
++				    * change is visible to other CPUs, or a concurrent mod_timer
++				    * would cause a race with list_add
++				    */
+ 			timer->base = NULL;
++			envid = set_exec_env(get_ve0());
+ 			spin_unlock_irq(&base->lock);
+ 			fn(data);
+ 			spin_lock_irq(&base->lock);
++			(void)set_exec_env(envid);
+ 			goto repeat;
+ 		}
+ 	}
+@@ -776,13 +787,12 @@ static void update_wall_time(unsigned lo
+ 	do {
+ 		ticks--;
+ 		update_wall_time_one_tick();
++		if (xtime.tv_nsec >= 1000000000) {
++			xtime.tv_nsec -= 1000000000;
++			xtime.tv_sec++;
++			second_overflow();
++		}
+ 	} while (ticks);
+-
+-	if (xtime.tv_nsec >= 1000000000) {
+-	    xtime.tv_nsec -= 1000000000;
+-	    xtime.tv_sec++;
+-	    second_overflow();
+-	}
+ }
+ 
+ static inline void do_process_times(struct task_struct *p,
+@@ -869,6 +879,22 @@ static unsigned long count_active_tasks(
+  */
+ unsigned long avenrun[3];
+ 
++static void calc_load_ve(void)
++{
++	unsigned long flags, nr_unint;
++
++	nr_unint = nr_uninterruptible() * FIXED_1;
++	spin_lock_irqsave(&kstat_glb_lock, flags);
++	CALC_LOAD(kstat_glob.nr_unint_avg[0], EXP_1, nr_unint);
++	CALC_LOAD(kstat_glob.nr_unint_avg[1], EXP_5, nr_unint);
++	CALC_LOAD(kstat_glob.nr_unint_avg[2], EXP_15, nr_unint);
++	spin_unlock_irqrestore(&kstat_glb_lock, flags);
++
++#ifdef CONFIG_VE
++	do_update_load_avg_ve();
++#endif
++}
++
+ /*
+  * calc_load - given tick count, update the avenrun load estimates.
+  * This is called while holding a write_lock on xtime_lock.
+@@ -885,6 +911,7 @@ static inline void calc_load(unsigned lo
+ 		CALC_LOAD(avenrun[0], EXP_1, active_tasks);
+ 		CALC_LOAD(avenrun[1], EXP_5, active_tasks);
+ 		CALC_LOAD(avenrun[2], EXP_15, active_tasks);
++		calc_load_ve();
+ 	}
+ }
+ 
+@@ -996,7 +1023,7 @@ asmlinkage unsigned long sys_alarm(unsig
+  */
+ asmlinkage long sys_getpid(void)
+ {
+-	return current->tgid;
++	return virt_tgid(current);
+ }
+ 
+ /*
+@@ -1018,28 +1045,15 @@ asmlinkage long sys_getpid(void)
+ asmlinkage long sys_getppid(void)
+ {
+ 	int pid;
+-	struct task_struct *me = current;
+-	struct task_struct *parent;
+ 
+-	parent = me->group_leader->real_parent;
+-	for (;;) {
+-		pid = parent->tgid;
+-#ifdef CONFIG_SMP
+-{
+-		struct task_struct *old = parent;
+-
+-		/*
+-		 * Make sure we read the pid before re-reading the
+-		 * parent pointer:
+-		 */
+-		rmb();
+-		parent = me->group_leader->real_parent;
+-		if (old != parent)
+-			continue;
+-}
+-#endif
+-		break;
+-	}
++	/* Some smart code used to be here. It was wrong.
++	 * ->real_parent could be released before dereference and
++	 * we accessed freed kernel memory, which faults with debugging on.
++	 * Keep it simple and stupid.
++	 */
++	read_lock(&tasklist_lock);
++	pid = virt_tgid(current->group_leader->real_parent);
++	read_unlock(&tasklist_lock);
+ 	return pid;
+ }
+ 
+@@ -1157,7 +1171,7 @@ EXPORT_SYMBOL(schedule_timeout);
+ /* Thread ID - the internal kernel "pid" */
+ asmlinkage long sys_gettid(void)
+ {
+-	return current->pid;
++	return virt_pid(current);
+ }
+ 
+ static long __sched nanosleep_restart(struct restart_block *restart)
+@@ -1227,11 +1241,12 @@ asmlinkage long sys_sysinfo(struct sysin
+ 	unsigned long mem_total, sav_total;
+ 	unsigned int mem_unit, bitcount;
+ 	unsigned long seq;
++	unsigned long *__avenrun;
++	struct timespec tp;
+ 
+ 	memset((char *)&val, 0, sizeof(struct sysinfo));
+ 
+ 	do {
+-		struct timespec tp;
+ 		seq = read_seqbegin(&xtime_lock);
+ 
+ 		/*
+@@ -1249,18 +1264,34 @@ asmlinkage long sys_sysinfo(struct sysin
+ 			tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
+ 			tp.tv_sec++;
+ 		}
+-		val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
+-
+-		val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
+-		val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
+-		val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
++	} while (read_seqretry(&xtime_lock, seq));
+ 
++	if (ve_is_super(get_exec_env())) {
++		val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
++		__avenrun = &avenrun[0];
+ 		val.procs = nr_threads;
+-	} while (read_seqretry(&xtime_lock, seq));
++	}
++#ifdef CONFIG_VE
++	else {
++		struct ve_struct *ve;
++		ve = get_exec_env();
++		__avenrun = &ve->avenrun[0];
++		val.procs = atomic_read(&ve->pcounter);
++		val.uptime = tp.tv_sec - ve->start_timespec.tv_sec;
++	}
++#endif
++	val.loads[0] = __avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
++	val.loads[1] = __avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
++	val.loads[2] = __avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
+ 
+ 	si_meminfo(&val);
+ 	si_swapinfo(&val);
+ 
++#ifdef CONFIG_USER_RESOURCE
++	if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_SYSINFO, &val)
++			& NOTIFY_FAIL)
++		return -ENOMSG;
++#endif
+ 	/*
+ 	 * If the sum of all the available memory (i.e. ram + swap)
+ 	 * is less than can be stored in a 32 bit unsigned long then
+diff -uprN linux-2.6.8.1.orig/kernel/ub/Kconfig linux-2.6.8.1-ve022stab078/kernel/ub/Kconfig
+--- linux-2.6.8.1.orig/kernel/ub/Kconfig	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/kernel/ub/Kconfig	2006-05-11 13:05:39.000000000 +0400
+@@ -0,0 +1,89 @@
++#
++# User resources part (UBC)
++#
++# Copyright (C) 2005  SWsoft
++# All rights reserved.
++#
++# Licensing governed by "linux/COPYING.SWsoft" file.
++
++menu "User resources"
++
++config USER_RESOURCE
++	bool "Enable user resource accounting"
++	default y
++	help 
++          This patch provides accounting and allows to configure
++          limits for user's consumption of exhaustible system resources.
++          The most important resource controlled by this patch is unswappable 
++          memory (either mlock'ed or used by internal kernel structures and 
++          buffers). The main goal of this patch is to protect processes
++          from running short of important resources because of an accidental
++          misbehavior of processes or malicious activity aiming to ``kill'' 
++          the system. It's worth to mention that resource limits configured 
++          by setrlimit(2) do not give an acceptable level of protection 
++          because they cover only small fraction of resources and work on a 
++          per-process basis.  Per-process accounting doesn't prevent malicious
++          users from spawning a lot of resource-consuming processes.
++
++config USER_RSS_ACCOUNTING
++	bool "Account physical memory usage"
++	default y
++	depends on USER_RESOURCE
++	help
++          This allows to estimate per beancounter physical memory usage.
++          Implemented alghorithm accounts shared pages of memory as well,
++          dividing them by number of beancounter which use the page.
++
++config USER_SWAP_ACCOUNTING
++	bool "Account swap usage"
++	default y
++	depends on USER_RESOURCE
++	help
++          This allows accounting of swap usage.
++
++config USER_RESOURCE_PROC
++	bool "Report resource usage in /proc"
++	default y
++	depends on USER_RESOURCE
++	help
++          Allows a system administrator to inspect resource accounts and limits.
++
++config UBC_DEBUG
++	bool "User resources debug features"
++	default n
++	depends on USER_RESOURCE
++	help
++	  Enables to setup debug features for user resource accounting
++
++config UBC_DEBUG_KMEM
++	bool "Debug kmemsize with cache counters"
++	default n
++	depends on UBC_DEBUG
++	help
++	  Adds /proc/user_beancounters_debug entry to get statistics
++	  about cache usage of each beancounter
++
++config UBC_KEEP_UNUSED
++	bool "Keep unused beancounter alive"
++	default y
++	depends on UBC_DEBUG
++	help
++	  If on, unused beancounters are kept on the hash and maxheld value
++	  can be looked through.
++
++config UBC_DEBUG_ITEMS
++	bool "Account resources in items rather than in bytes"
++	default y
++	depends on UBC_DEBUG
++	help
++	  When true some of the resources (e.g. kmemsize) are accounted
++	  in items instead of bytes.
++
++config UBC_UNLIMITED
++	bool "Use unlimited ubc settings"
++	default y
++	depends on UBC_DEBUG
++	help
++	  When ON all limits and barriers are set to max values.
++
++endmenu
+diff -uprN linux-2.6.8.1.orig/kernel/ub/Makefile linux-2.6.8.1-ve022stab078/kernel/ub/Makefile
+--- linux-2.6.8.1.orig/kernel/ub/Makefile	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/kernel/ub/Makefile	2006-05-11 13:05:39.000000000 +0400
+@@ -0,0 +1,20 @@
++#
++# User resources part (UBC)
++#
++# Copyright (C) 2005  SWsoft
++# All rights reserved.
++#
++# Licensing governed by "linux/COPYING.SWsoft" file.
++
++obj-y := ub_sys.o
++obj-$(CONFIG_USER_RESOURCE) += beancounter.o
++obj-$(CONFIG_USER_RESOURCE) += ub_dcache.o
++obj-$(CONFIG_USER_RESOURCE) += ub_mem.o
++obj-$(CONFIG_USER_RESOURCE) += ub_misc.o
++obj-$(CONFIG_USER_RESOURCE) += ub_net.o
++obj-$(CONFIG_USER_RESOURCE) += ub_pages.o
++obj-$(CONFIG_USER_RESOURCE) += ub_stat.o
++obj-$(CONFIG_USER_RESOURCE) += ub_oom.o
++
++obj-$(CONFIG_USER_RSS_ACCOUNTING) += ub_page_bc.o
++obj-$(CONFIG_USER_RESOURCE_PROC)  += ub_proc.o
+diff -uprN linux-2.6.8.1.orig/kernel/ub/beancounter.c linux-2.6.8.1-ve022stab078/kernel/ub/beancounter.c
+--- linux-2.6.8.1.orig/kernel/ub/beancounter.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/kernel/ub/beancounter.c	2006-05-11 13:05:39.000000000 +0400
+@@ -0,0 +1,675 @@
++/*
++ *  linux/kernel/ub/beancounter.c
++ *
++ *  Copyright (C) 1998  Alan Cox
++ *                1998-2000  Andrey V. Savochkin <saw@saw.sw.com.sg>
++ *  Copyright (C) 2000-2005 SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * TODO:
++ *   - more intelligent limit check in mremap(): currently the new size is
++ *     charged and _then_ old size is uncharged
++ *     (almost done: !move_vma case is completely done,
++ *      move_vma in its current implementation requires too many conditions to
++ *      do things right, because it may be not only expansion, but shrinking
++ *      also, plus do_munmap will require an additional parameter...)
++ *   - problem: bad pmd page handling
++ *   - consider /proc redesign
++ *   - TCP/UDP ports
++ *   + consider whether __charge_beancounter_locked should be inline
++ *
++ * Changes:
++ *   1999/08/17  Marcelo Tosatti <marcelo@conectiva.com.br>
++ *	- Set "barrier" and "limit" parts of limits atomically.
++ *   1999/10/06  Marcelo Tosatti <marcelo@conectiva.com.br>
++ *	- setublimit system call.
++ */
++
++#include <linux/slab.h>
++#include <linux/module.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_hash.h>
++#include <ub/ub_vmpages.h>
++
++static kmem_cache_t *ub_cachep;
++static struct user_beancounter default_beancounter;
++struct user_beancounter ub0;
++
++const char *ub_rnames[] = {
++	"kmemsize",	/* 0 */
++	"lockedpages",
++	"privvmpages",
++	"shmpages",
++	"dummy",
++	"numproc",	/* 5 */
++	"physpages",
++	"vmguarpages",
++	"oomguarpages",
++	"numtcpsock",
++	"numflock",	/* 10 */
++	"numpty",
++	"numsiginfo",
++	"tcpsndbuf",
++	"tcprcvbuf",
++	"othersockbuf",	/* 15 */
++	"dgramrcvbuf",
++	"numothersock",
++	"dcachesize",
++	"numfile",
++	"dummy",	/* 20 */
++	"dummy",
++	"dummy",
++	"numiptent",
++	"unused_privvmpages",	/* UB_RESOURCES */
++	"tmpfs_respages",
++	"swap_pages",
++	"held_pages",
++};
++
++static void init_beancounter_struct(struct user_beancounter *ub);
++static void init_beancounter_store(struct user_beancounter *ub);
++static void init_beancounter_nolimits(struct user_beancounter *ub);
++
++void print_ub_uid(struct user_beancounter *ub, char *buf, int size)
++{
++	if (ub->parent != NULL)
++		snprintf(buf, size, "%u.%u", ub->parent->ub_uid, ub->ub_uid);
++	else
++		snprintf(buf, size, "%u", ub->ub_uid);
++}
++EXPORT_SYMBOL(print_ub_uid);
++
++#define ub_hash_fun(x) ((((x) >> 8) ^ (x)) & (UB_HASH_SIZE - 1))
++#define ub_subhash_fun(p, id) ub_hash_fun((p)->ub_uid + (id) * 17)
++struct ub_hash_slot ub_hash[UB_HASH_SIZE];
++spinlock_t ub_hash_lock;
++EXPORT_SYMBOL(ub_hash);
++EXPORT_SYMBOL(ub_hash_lock);
++
++/*
++ *	Per user resource beancounting. Resources are tied to their luid.
++ *	The resource structure itself is tagged both to the process and
++ *	the charging resources (a socket doesn't want to have to search for
++ *	things at irq time for example). Reference counters keep things in
++ *	hand.
++ *
++ *	The case where a user creates resource, kills all his processes and
++ *	then starts new ones is correctly handled this way. The refcounters
++ *	will mean the old entry is still around with resource tied to it.
++ */
++struct user_beancounter *get_beancounter_byuid(uid_t uid, int create)
++{
++	struct user_beancounter *new_ub, *ub;
++	unsigned long flags;
++	struct ub_hash_slot *slot;
++
++	slot = &ub_hash[ub_hash_fun(uid)];
++	new_ub = NULL;
++
++retry:
++	spin_lock_irqsave(&ub_hash_lock, flags);
++	ub = slot->ubh_beans;
++	while (ub != NULL && (ub->ub_uid != uid || ub->parent != NULL))
++		ub = ub->ub_next;
++
++	if (ub != NULL) {
++		/* found */
++		get_beancounter(ub);
++		spin_unlock_irqrestore(&ub_hash_lock, flags);
++		if (new_ub != NULL)
++			kmem_cache_free(ub_cachep, new_ub);
++		return ub;
++	}
++
++	if (!create) {
++		/* no ub found */
++		spin_unlock_irqrestore(&ub_hash_lock, flags);
++		return NULL;
++	}
++
++	if (new_ub != NULL) {
++		/* install new ub */
++		new_ub->ub_next = slot->ubh_beans;
++		slot->ubh_beans = new_ub;
++		spin_unlock_irqrestore(&ub_hash_lock, flags);
++		return new_ub;
++	}
++	spin_unlock_irqrestore(&ub_hash_lock, flags);
++
++	/* alloc new ub */
++	new_ub = (struct user_beancounter *)kmem_cache_alloc(ub_cachep, 
++			GFP_KERNEL);
++	if (new_ub == NULL)
++		return NULL;
++
++	ub_debug(UBD_ALLOC, "Creating ub %p in slot %p\n", new_ub, slot);
++	memcpy(new_ub, &default_beancounter, sizeof(*new_ub));
++	init_beancounter_struct(new_ub);
++	new_ub->ub_uid = uid;
++	goto retry;
++}
++EXPORT_SYMBOL(get_beancounter_byuid);
++
++struct user_beancounter *get_subbeancounter_byid(struct user_beancounter *p,
++		int id, int create)
++{
++	struct user_beancounter *new_ub, *ub;
++	unsigned long flags;
++	struct ub_hash_slot *slot;
++
++	slot = &ub_hash[ub_subhash_fun(p, id)];
++	new_ub = NULL;
++
++retry:
++	spin_lock_irqsave(&ub_hash_lock, flags);
++	ub = slot->ubh_beans;
++	while (ub != NULL && (ub->parent != p || ub->ub_uid != id))
++		ub = ub->ub_next;
++
++	if (ub != NULL) {
++		/* found */
++		get_beancounter(ub);
++		spin_unlock_irqrestore(&ub_hash_lock, flags);
++		if (new_ub != NULL) {
++			put_beancounter(new_ub->parent);
++			kmem_cache_free(ub_cachep, new_ub);
++		}
++		return ub;
++	}
++
++	if (!create) {
++		/* no ub found */
++		spin_unlock_irqrestore(&ub_hash_lock, flags);
++		return NULL;
++	}
++
++	if (new_ub != NULL) {
++		/* install new ub */
++		get_beancounter(new_ub);
++		new_ub->ub_next = slot->ubh_beans;
++		slot->ubh_beans = new_ub;
++		spin_unlock_irqrestore(&ub_hash_lock, flags);
++		return new_ub;
++	}
++	spin_unlock_irqrestore(&ub_hash_lock, flags);
++
++	/* alloc new ub */
++	new_ub = (struct user_beancounter *)kmem_cache_alloc(ub_cachep, 
++			GFP_KERNEL);
++	if (new_ub == NULL)
++		return NULL;
++
++	ub_debug(UBD_ALLOC, "Creating sub %p in slot %p\n", new_ub, slot);
++	memset(new_ub, 0, sizeof(*new_ub));
++	init_beancounter_nolimits(new_ub);
++	init_beancounter_store(new_ub);
++	init_beancounter_struct(new_ub);
++	atomic_set(&new_ub->ub_refcount, 0);
++	new_ub->ub_uid = id;
++	new_ub->parent = get_beancounter(p);
++	goto retry;
++}
++EXPORT_SYMBOL(get_subbeancounter_byid);
++
++struct user_beancounter *subbeancounter_findcreate(struct user_beancounter *p,
++		int id)
++{
++	struct user_beancounter *ub;
++	unsigned long flags;
++	struct ub_hash_slot *slot;
++
++	slot = &ub_hash[ub_subhash_fun(p, id)];
++
++	spin_lock_irqsave(&ub_hash_lock, flags);
++	ub = slot->ubh_beans;
++	while (ub != NULL && (ub->parent != p || ub->ub_uid != id))
++		ub = ub->ub_next;
++
++	if (ub != NULL) {
++		/* found */
++		get_beancounter(ub);
++		goto done;
++	}
++
++	/* alloc new ub */
++	/* Can be called from non-atomic contexts. Den */
++	ub = (struct user_beancounter *)kmem_cache_alloc(ub_cachep, GFP_ATOMIC);
++	if (ub == NULL)
++		goto done;
++
++	ub_debug(UBD_ALLOC, "Creating sub %p in slot %p\n", ub, slot);
++	memset(ub, 0, sizeof(*ub));
++	init_beancounter_nolimits(ub);
++	init_beancounter_store(ub);
++	init_beancounter_struct(ub);
++	atomic_set(&ub->ub_refcount, 0);
++	ub->ub_uid = id;
++	ub->parent = get_beancounter(p);
++
++	/* install new ub */
++	get_beancounter(ub);
++	ub->ub_next = slot->ubh_beans;
++	slot->ubh_beans = ub;
++
++done:
++	spin_unlock_irqrestore(&ub_hash_lock, flags);
++	return ub;
++}
++EXPORT_SYMBOL(subbeancounter_findcreate);
++#ifndef CONFIG_UBC_KEEP_UNUSED
++
++static int verify_res(struct user_beancounter *ub, int resource,
++		unsigned long held)
++{
++	char id[64];
++
++	if (likely(held == 0))
++		return 1;
++
++	print_ub_uid(ub, id, sizeof(id));
++	printk(KERN_WARNING "Ub %s helds %lu in %s on put\n",
++			id, held, ub_rnames[resource]);
++	return 0;
++}
++
++static inline void verify_held(struct user_beancounter *ub)
++{
++	int i, clean;
++
++	clean = 1;
++	for (i = 0; i < UB_RESOURCES; i++)
++		clean &= verify_res(ub, i, ub->ub_parms[i].held);
++
++	clean &= verify_res(ub, UB_UNUSEDPRIVVM, ub->ub_unused_privvmpages);
++	clean &= verify_res(ub, UB_TMPFSPAGES, ub->ub_tmpfs_respages);
++	clean &= verify_res(ub, UB_SWAPPAGES, ub->ub_swap_pages);
++	clean &= verify_res(ub, UB_HELDPAGES, (unsigned long)ub->ub_held_pages);
++
++	ub_debug_trace(!clean, 5, 60*HZ);
++}
++
++static void __unhash_beancounter(struct user_beancounter *ub)
++{
++	struct user_beancounter **ubptr;
++	struct ub_hash_slot *slot;
++
++	if (ub->parent != NULL)
++		slot = &ub_hash[ub_subhash_fun(ub->parent, ub->ub_uid)];
++	else
++	       	slot = &ub_hash[ub_hash_fun(ub->ub_uid)];
++	ubptr = &slot->ubh_beans;
++
++	while (*ubptr != NULL) {
++		if (*ubptr == ub) {
++			verify_held(ub);
++			*ubptr = ub->ub_next;
++			return;
++		}
++		ubptr = &((*ubptr)->ub_next);
++	}
++	printk(KERN_ERR "Invalid beancounter %p, luid=%d on free, slot %p\n",
++			ub, ub->ub_uid, slot);
++}
++#endif
++
++void __put_beancounter(struct user_beancounter *ub)
++{
++	unsigned long flags;
++	struct user_beancounter *parent;
++
++again:
++	parent = ub->parent;
++	ub_debug(UBD_ALLOC, "__put bc %p (cnt %d) for %.20s pid %d "
++			"cur %08lx cpu %d.\n",
++			ub, atomic_read(&ub->ub_refcount), 
++			current->comm, current->pid, 
++			(unsigned long)current, smp_processor_id());
++
++	/* equevalent to atomic_dec_and_lock_irqsave() */
++	local_irq_save(flags);
++	if (likely(!atomic_dec_and_lock(&ub->ub_refcount, &ub_hash_lock))) {
++		if (unlikely(atomic_read(&ub->ub_refcount) < 0))
++			printk(KERN_ERR "UB: Bad ub refcount: ub=%p, "
++					"luid=%d, ref=%d\n",
++					ub, ub->ub_uid,
++					atomic_read(&ub->ub_refcount));
++		local_irq_restore(flags);
++		return;
++	}
++
++	if (unlikely(ub == get_ub0())) {
++		printk(KERN_ERR "Trying to put ub0\n");
++		spin_unlock_irqrestore(&ub_hash_lock, flags);
++		return;
++	}
++
++#ifndef CONFIG_UBC_KEEP_UNUSED
++	__unhash_beancounter(ub);
++	spin_unlock_irqrestore(&ub_hash_lock, flags);
++	ub_free_counters(ub);
++	kmem_cache_free(ub_cachep, ub);
++#else
++	spin_unlock_irqrestore(&ub_hash_lock, flags);
++#endif
++	ub = parent;
++	if (ub != NULL)
++		goto again;
++}
++EXPORT_SYMBOL(__put_beancounter);
++
++/*
++ *	Generic resource charging stuff
++ */
++
++int __charge_beancounter_locked(struct user_beancounter *ub,
++		int resource, unsigned long val, enum severity strict)
++{
++	ub_debug_resource(resource, "Charging %lu for %d of %p with %lu\n",
++			val, resource, ub, ub->ub_parms[resource].held);
++	/*
++	 * ub_value <= UB_MAXVALUE, value <= UB_MAXVALUE, and only one addition
++	 * at the moment is possible so an overflow is impossible.  
++	 */
++	ub->ub_parms[resource].held += val;
++
++	switch (strict) {
++		case UB_HARD:
++			if (ub->ub_parms[resource].held >
++					ub->ub_parms[resource].barrier)
++				break;
++		case UB_SOFT:
++			if (ub->ub_parms[resource].held >
++					ub->ub_parms[resource].limit)
++				break;
++		case UB_FORCE:
++			ub_adjust_maxheld(ub, resource);
++			return 0;
++		default:
++			BUG();
++	}
++
++	if (strict == UB_SOFT && ub_ratelimit(&ub->ub_limit_rl))
++		printk(KERN_INFO "Fatal resource shortage: %s, UB %d.\n",
++		       ub_rnames[resource], ub->ub_uid);
++	ub->ub_parms[resource].failcnt++;
++	ub->ub_parms[resource].held -= val;
++	return -ENOMEM;
++}
++
++int charge_beancounter(struct user_beancounter *ub,
++		int resource, unsigned long val, enum severity strict)
++{
++	int retval;
++	struct user_beancounter *p, *q;
++	unsigned long flags;
++
++	retval = -EINVAL;
++	if (val > UB_MAXVALUE)
++		goto out;
++
++	local_irq_save(flags);
++	for (p = ub; p != NULL; p = p->parent) {
++		spin_lock(&p->ub_lock);
++		retval = __charge_beancounter_locked(p, resource, val, strict);
++		spin_unlock(&p->ub_lock);
++		if (retval)
++			goto unroll;
++	}
++out_restore:
++	local_irq_restore(flags);
++out:
++	return retval;
++
++unroll:
++	for (q = ub; q != p; q = q->parent) {
++		spin_lock(&q->ub_lock);
++		__uncharge_beancounter_locked(q, resource, val);
++		spin_unlock(&q->ub_lock);
++	}
++	goto out_restore;
++}
++
++EXPORT_SYMBOL(charge_beancounter);
++
++void charge_beancounter_notop(struct user_beancounter *ub,
++		int resource, unsigned long val)
++{
++	struct user_beancounter *p;
++	unsigned long flags;
++
++	local_irq_save(flags);
++	for (p = ub; p->parent != NULL; p = p->parent) {
++		spin_lock(&p->ub_lock);
++		__charge_beancounter_locked(p, resource, val, UB_FORCE);
++		spin_unlock(&p->ub_lock);
++	}
++	local_irq_restore(flags);
++}
++
++EXPORT_SYMBOL(charge_beancounter_notop);
++
++void uncharge_warn(struct user_beancounter *ub, int resource,
++		unsigned long val, unsigned long held)
++{
++	char id[64];
++
++	print_ub_uid(ub, id, sizeof(id));
++	printk(KERN_ERR "Uncharging too much %lu h %lu, res %s ub %s\n",
++			val, held, ub_rnames[resource], id);
++	ub_debug_trace(1, 10, 10*HZ);
++}
++
++void __uncharge_beancounter_locked(struct user_beancounter *ub,
++		int resource, unsigned long val)
++{
++	ub_debug_resource(resource, "Uncharging %lu for %d of %p with %lu\n",
++			val, resource, ub, ub->ub_parms[resource].held);
++	if (ub->ub_parms[resource].held < val) {
++		uncharge_warn(ub, resource,
++				val, ub->ub_parms[resource].held);
++		val = ub->ub_parms[resource].held;
++	}
++	ub->ub_parms[resource].held -= val;
++}
++
++void uncharge_beancounter(struct user_beancounter *ub,
++		int resource, unsigned long val)
++{
++	unsigned long flags;
++	struct user_beancounter *p;
++
++	for (p = ub; p != NULL; p = p->parent) {
++		spin_lock_irqsave(&p->ub_lock, flags);
++		__uncharge_beancounter_locked(p, resource, val);
++		spin_unlock_irqrestore(&p->ub_lock, flags);
++	}
++}
++
++EXPORT_SYMBOL(uncharge_beancounter);
++
++void uncharge_beancounter_notop(struct user_beancounter *ub,
++		int resource, unsigned long val)
++{
++	struct user_beancounter *p;
++	unsigned long flags;
++
++	local_irq_save(flags);
++	for (p = ub; p->parent != NULL; p = p->parent) {
++		spin_lock(&p->ub_lock);
++		__uncharge_beancounter_locked(p, resource, val);
++		spin_unlock(&p->ub_lock);
++	}
++	local_irq_restore(flags);
++}
++
++EXPORT_SYMBOL(uncharge_beancounter_notop);
++
++
++/*
++ *	Rate limiting stuff.
++ */
++int ub_ratelimit(struct ub_rate_info *p)
++{
++	unsigned long cjif, djif;
++	unsigned long flags;
++	static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED;
++	long new_bucket;
++
++	spin_lock_irqsave(&ratelimit_lock, flags);
++	cjif = jiffies;
++	djif = cjif - p->last;
++	if (djif < p->interval) {
++		if (p->bucket >= p->burst) {
++			spin_unlock_irqrestore(&ratelimit_lock, flags);
++			return 0;
++		}
++		p->bucket++;
++	} else {
++		new_bucket = p->bucket - (djif / (unsigned)p->interval);
++		if (new_bucket < 0)
++			new_bucket = 0;
++		p->bucket = new_bucket + 1;
++	}
++	p->last = cjif;
++	spin_unlock_irqrestore(&ratelimit_lock, flags);
++	return 1;
++}
++EXPORT_SYMBOL(ub_ratelimit);
++
++
++/*
++ *	Initialization
++ *
++ *	struct user_beancounter contains
++ *	 - limits and other configuration settings,
++ *	   with a copy stored for accounting purposes,
++ *	 - structural fields: lists, spinlocks and so on.
++ *
++ *	Before these parts are initialized, the structure should be memset
++ *	to 0 or copied from a known clean structure.  That takes care of a lot
++ *	of fields not initialized explicitly.
++ */
++
++static void init_beancounter_struct(struct user_beancounter *ub)
++{
++	ub->ub_magic = UB_MAGIC;
++	atomic_set(&ub->ub_refcount, 1);
++	spin_lock_init(&ub->ub_lock);
++	INIT_LIST_HEAD(&ub->ub_tcp_sk_list);
++	INIT_LIST_HEAD(&ub->ub_other_sk_list);
++#ifdef CONFIG_UBC_DEBUG_KMEM
++	INIT_LIST_HEAD(&ub->ub_cclist);
++#endif
++}
++
++static void init_beancounter_store(struct user_beancounter *ub)
++{
++	int k;
++
++	for (k = 0; k < UB_RESOURCES; k++) {
++		memcpy(&ub->ub_store[k], &ub->ub_parms[k],
++				sizeof(struct ubparm));
++	}
++}
++
++static void init_beancounter_nolimits(struct user_beancounter *ub)
++{
++	int k;
++
++	for (k = 0; k < UB_RESOURCES; k++) {
++		ub->ub_parms[k].limit = UB_MAXVALUE;
++		/* FIXME: whether this is right for physpages and guarantees? */
++		ub->ub_parms[k].barrier = UB_MAXVALUE;
++	}
++
++	/* FIXME: set unlimited rate? */
++	ub->ub_limit_rl.burst = 4;
++	ub->ub_limit_rl.interval = 300*HZ;
++}
++
++static void init_beancounter_syslimits(struct user_beancounter *ub,
++		unsigned long mp)
++{
++	extern int max_threads;
++	int k;
++
++	ub->ub_parms[UB_KMEMSIZE].limit = 
++		mp > (192*1024*1024 >> PAGE_SHIFT) ?
++				32*1024*1024 : (mp << PAGE_SHIFT) / 6;
++	ub->ub_parms[UB_LOCKEDPAGES].limit = 8;
++	ub->ub_parms[UB_PRIVVMPAGES].limit = UB_MAXVALUE;
++	ub->ub_parms[UB_SHMPAGES].limit = 64;
++	ub->ub_parms[UB_NUMPROC].limit = max_threads / 2;
++	ub->ub_parms[UB_NUMTCPSOCK].limit = 1024;
++	ub->ub_parms[UB_TCPSNDBUF].limit = 1024*4*1024; /* 4k per socket */
++	ub->ub_parms[UB_TCPRCVBUF].limit = 1024*6*1024; /* 6k per socket */
++	ub->ub_parms[UB_NUMOTHERSOCK].limit = 256;
++	ub->ub_parms[UB_DGRAMRCVBUF].limit = 256*4*1024; /* 4k per socket */
++	ub->ub_parms[UB_OTHERSOCKBUF].limit = 256*8*1024; /* 8k per socket */
++	ub->ub_parms[UB_NUMFLOCK].limit = 1024;
++	ub->ub_parms[UB_NUMPTY].limit = 16;
++	ub->ub_parms[UB_NUMSIGINFO].limit = 1024;
++	ub->ub_parms[UB_DCACHESIZE].limit = 1024*1024;
++	ub->ub_parms[UB_NUMFILE].limit = 1024;
++
++	for (k = 0; k < UB_RESOURCES; k++)
++		ub->ub_parms[k].barrier = ub->ub_parms[k].limit;
++
++	ub->ub_limit_rl.burst = 4;
++	ub->ub_limit_rl.interval = 300*HZ;
++}
++
++void __init ub0_init(void)
++{
++	struct user_beancounter *ub;
++
++	init_cache_counters();
++	ub = get_ub0();
++	memset(ub, 0, sizeof(*ub));
++	ub->ub_uid = 0;
++	init_beancounter_nolimits(ub);
++	init_beancounter_store(ub);
++	init_beancounter_struct(ub);
++
++	memset(task_bc(current), 0, sizeof(struct task_beancounter));
++	(void)set_exec_ub(get_ub0());
++	task_bc(current)->fork_sub = get_beancounter(get_ub0());
++	mm_ub(&init_mm) = get_beancounter(ub);
++}
++
++void __init ub_hash_init(void)
++{
++	struct ub_hash_slot *slot;
++
++	spin_lock_init(&ub_hash_lock);
++	/* insert ub0 into the hash */
++	slot = &ub_hash[ub_hash_fun(get_ub0()->ub_uid)];
++	slot->ubh_beans = get_ub0();
++}
++
++void __init beancounter_init(unsigned long mempages)
++{
++	extern int skbc_cache_init(void);
++	int res;
++
++	res = skbc_cache_init();
++	ub_cachep = kmem_cache_create("user_beancounters",
++			sizeof(struct user_beancounter),
++			0, SLAB_HWCACHE_ALIGN, NULL, NULL);
++	if (res < 0 || ub_cachep == NULL)
++		panic("Can't create ubc caches\n");
++
++	memset(&default_beancounter, 0, sizeof(default_beancounter));
++#ifdef CONFIG_UBC_UNLIMITED
++	init_beancounter_nolimits(&default_beancounter);
++#else
++	init_beancounter_syslimits(&default_beancounter, mempages);
++#endif
++	init_beancounter_store(&default_beancounter);
++	init_beancounter_struct(&default_beancounter);
++
++	ub_hash_init();
++}
+diff -uprN linux-2.6.8.1.orig/kernel/ub/ub_dcache.c linux-2.6.8.1-ve022stab078/kernel/ub/ub_dcache.c
+--- linux-2.6.8.1.orig/kernel/ub/ub_dcache.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/kernel/ub/ub_dcache.c	2006-05-11 13:05:39.000000000 +0400
+@@ -0,0 +1,333 @@
++/*
++ *  kernel/ub/ub_dcache.c
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/dcache.h>
++#include <linux/slab.h>
++#include <linux/kmem_cache.h>
++#include <linux/err.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_mem.h>
++#include <ub/ub_dcache.h>
++
++/*
++ * Locking
++ *                          traverse  dcache_lock  d_lock
++ *        ub_dentry_charge   +         +            +
++ *      ub_dentry_uncharge   +         -            +
++ * ub_dentry_charge_nofail   +         +            -
++ *
++ * d_inuse is atomic so that we can inc dentry's parent d_inuse in 
++ * ub_dentry_charhe with the only dentry's d_lock held.
++ *
++ * Race in uncharge vs charge_nofail is handled with dcache_lock.
++ * Race in charge vs charge_nofail is inessential since they both inc d_inuse.
++ * Race in uncharge vs charge is handled by altering d_inuse under d_lock.
++ *
++ * Race with d_move is handled this way:
++ *  - charge_nofail and uncharge are protected by dcache_lock;
++ *  - charge works only with dentry and dentry->d_parent->d_inuse, so
++ *    it's enough to lock only the dentry.
++ */
++
++/*
++ * Beancounting
++ * UB argument must NOT be NULL
++ */
++
++static int do_charge_dcache(struct user_beancounter *ub, unsigned long size, 
++		enum severity sv)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	if (__charge_beancounter_locked(ub, UB_KMEMSIZE, CHARGE_SIZE(size), sv))
++		goto out_mem;
++	if (__charge_beancounter_locked(ub, UB_DCACHESIZE, size, sv))
++		goto out_dcache;
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++	return 0;
++
++out_dcache:
++	__uncharge_beancounter_locked(ub, UB_KMEMSIZE, CHARGE_SIZE(size));
++out_mem:
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++	return -ENOMEM;
++}
++
++static void do_uncharge_dcache(struct user_beancounter *ub, 
++		unsigned long size)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	__uncharge_beancounter_locked(ub, UB_KMEMSIZE, CHARGE_SIZE(size));
++	__uncharge_beancounter_locked(ub, UB_DCACHESIZE, size);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++static int charge_dcache(struct user_beancounter *ub, unsigned long size, 
++		enum severity sv)
++{
++	struct user_beancounter *p, *q;
++
++	for (p = ub; p != NULL; p = p->parent) {
++		if (do_charge_dcache(p, size, sv))
++			goto unroll;
++	}
++	return 0;
++
++unroll:
++	for (q = ub; q != p; q = q->parent)
++		do_uncharge_dcache(q, size);
++	return -ENOMEM;
++}
++
++void uncharge_dcache(struct user_beancounter *ub, unsigned long size)
++{
++	for (; ub != NULL; ub = ub->parent)
++		do_uncharge_dcache(ub, size);
++}
++
++static inline void charge_dcache_forced(struct user_beancounter *ub, 
++		unsigned long size)
++{
++	charge_dcache(ub, size, UB_FORCE);
++}
++
++static inline void d_forced_charge(struct dentry_beancounter *d_bc)
++{
++	d_bc->d_ub = get_beancounter(get_exec_ub());
++	if (d_bc->d_ub == NULL)
++		return;
++
++	charge_dcache_forced(d_bc->d_ub, d_bc->d_ubsize);
++}
++
++static inline void d_uncharge(struct dentry_beancounter *d_bc)
++{
++	if (d_bc->d_ub == NULL)
++		return;
++
++	uncharge_dcache(d_bc->d_ub, d_bc->d_ubsize);
++	put_beancounter(d_bc->d_ub);
++	d_bc->d_ub = NULL;
++}
++
++/*
++ * Alloc / free dentry_beancounter
++ */
++
++static inline int d_alloc_beancounter(struct dentry *d)
++{
++	return 0;
++}
++
++static inline void d_free_beancounter(struct dentry_beancounter *d_bc)
++{
++}
++
++static inline unsigned long d_charge_size(struct dentry *dentry)
++{
++	/* dentry's d_name is already set to appropriate value (see d_alloc) */
++	return inode_memusage() + dentry_memusage() +
++		(dname_external(dentry) ?
++		 kmem_obj_memusage((void *)dentry->d_name.name) : 0);
++}
++
++/*
++ * dentry mark in use operation
++ * d_lock is held
++ */
++
++static int d_inc_inuse(struct dentry *dentry)
++{
++	struct user_beancounter *ub;
++	struct dentry_beancounter *d_bc;
++
++	if (dentry != dentry->d_parent) {
++		struct dentry *parent;
++
++		/*
++		 * Increment d_inuse of parent.
++		 * It can't change since dentry->d_lock is held.
++		 */
++		parent = dentry->d_parent;
++		if (atomic_inc_and_test(&dentry_bc(parent)->d_inuse))
++			BUG();
++	}
++
++	d_bc = dentry_bc(dentry);
++	ub = get_beancounter(get_exec_ub());
++
++	if (ub != NULL && charge_dcache(ub, d_bc->d_ubsize, UB_SOFT))
++		goto out_err;
++
++	d_bc->d_ub = ub;
++	return 0;
++
++out_err:
++	put_beancounter(ub);
++	d_bc->d_ub = NULL;
++	return -ENOMEM;
++}
++
++/* 
++ * no locks
++ */
++int ub_dentry_alloc(struct dentry *dentry)
++{
++	int err;
++	struct dentry_beancounter *d_bc;
++
++	err = d_alloc_beancounter(dentry);
++	if (err < 0)
++		return err;
++
++	d_bc = dentry_bc(dentry);
++	d_bc->d_ub = get_beancounter(get_exec_ub());
++	atomic_set(&d_bc->d_inuse, 0); /* see comment in ub_dcache.h */
++	d_bc->d_ubsize = d_charge_size(dentry);
++
++	err = 0;
++	if (d_bc->d_ub != NULL &&
++			charge_dcache(d_bc->d_ub, d_bc->d_ubsize, UB_HARD)) {
++		put_beancounter(d_bc->d_ub);
++		d_free_beancounter(d_bc);
++		err = -ENOMEM;
++	}
++
++	return err;
++}
++
++void ub_dentry_free(struct dentry *dentry)
++{
++}
++
++/*
++ * Charge / uncharge functions.
++ *
++ * We take d_lock to protect dentry_bc from concurrent acces
++ * when simultaneous __d_lookup and d_put happens on one dentry.
++ */
++
++/*
++ * no dcache_lock, d_lock and rcu_read_lock are held
++ * drops d_lock, rcu_read_lock and returns error if any
++ */
++int ub_dentry_charge(struct dentry *dentry)
++{
++	int err;
++
++	err = 0;
++	if (atomic_inc_and_test(&dentry_bc(dentry)->d_inuse))
++		err = d_inc_inuse(dentry);
++
++	/*
++	 * d_lock and rcu_read_lock are dropped here
++	 * (see also __d_lookup)
++	 */
++	spin_unlock(&dentry->d_lock);
++	rcu_read_unlock();
++
++	if (!err)
++		return 0;
++
++	/*
++	 * d_invlaidate is required for real_lookup
++	 * since it tries to create new dentry on
++	 * d_lookup failure.
++	 */
++	if (!d_invalidate(dentry))
++		return err;
++
++	/* didn't succeeded, force dentry to be charged */
++	d_forced_charge(dentry_bc(dentry));
++	return 0;
++}
++
++/*
++ * dcache_lock is held
++ * no d_locks, sequentaly takes and drops from dentry upward
++ */
++void ub_dentry_uncharge(struct dentry *dentry)
++{
++	struct dentry_beancounter *d_bc;
++	struct dentry *parent;
++
++	/* go up until status is changed and root is not reached */
++	while (1) {
++		d_bc = dentry_bc(dentry);
++
++		/*
++		 * We need d_lock here to handle 
++		 * the race with ub_dentry_charge
++		 */
++		spin_lock(&dentry->d_lock);
++		if (!atomic_add_negative(-1, &d_bc->d_inuse)) {
++			spin_unlock(&dentry->d_lock);
++			break;
++		}
++
++		/* state transition 0 => -1 */
++		d_uncharge(d_bc);
++		parent = dentry->d_parent;
++		spin_unlock(&dentry->d_lock);
++
++		/*
++		 * dcache_lock is held (see comment in __dget_locked)
++		 * so we can safely move upwards.
++		 */
++		if (dentry == parent)
++			break;
++		dentry = parent;
++	}
++}
++
++/* 
++ * forced version. for dget in clean cache, when error is not an option
++ *
++ * dcache_lock is held
++ * no d_locks
++ */
++void ub_dentry_charge_nofail(struct dentry *dentry)
++{
++	struct dentry_beancounter *d_bc;
++	struct dentry *parent;
++
++	/* go up until status is changed and root is not reached */
++	while (1) {
++		d_bc = dentry_bc(dentry);
++		if (!atomic_inc_and_test(&d_bc->d_inuse))
++			break;
++
++		/*
++		 * state transition -1 => 0
++		 *
++		 * No need to lock dentry before atomic_inc
++		 * like we do in ub_dentry_uncharge.
++		 * We can't race with ub_dentry_uncharge due
++		 * to dcache_lock. The only possible race with
++		 * ub_dentry_charge is OK since they both
++		 * do atomic_inc.
++		 */
++		d_forced_charge(d_bc);
++		/*
++		 * dcache_lock is held (see comment in __dget_locked)
++		 * so we can safely move upwards.
++		 */
++		parent = dentry->d_parent;
++
++		if (dentry == parent)
++			break;
++		dentry = parent;
++	}
++}
+diff -uprN linux-2.6.8.1.orig/kernel/ub/ub_mem.c linux-2.6.8.1-ve022stab078/kernel/ub/ub_mem.c
+--- linux-2.6.8.1.orig/kernel/ub/ub_mem.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/kernel/ub/ub_mem.c	2006-05-11 13:05:39.000000000 +0400
+@@ -0,0 +1,377 @@
++/*
++ *  kernel/ub/ub_mem.c
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/slab.h>
++#include <linux/kmem_cache.h>
++#include <linux/kmem_slab.h>
++#include <linux/highmem.h>
++#include <linux/vmalloc.h>
++#include <linux/mm.h>
++#include <linux/gfp.h>
++#include <linux/swap.h>
++#include <linux/spinlock.h>
++#include <linux/sched.h>
++#include <linux/module.h>
++#include <ub/beancounter.h>
++#include <ub/ub_mem.h>
++#include <ub/ub_hash.h>
++
++/*
++ * Initialization
++ */
++
++extern void __init page_beancounters_init(void);
++
++void __init page_ubc_init(void)
++{
++#ifdef CONFIG_USER_RSS_ACCOUNTING
++	page_beancounters_init();
++#endif
++}
++
++/*
++ * Slab accounting
++ */
++
++#ifdef CONFIG_UBC_DEBUG_KMEM
++
++#define CC_HASH_SIZE	1024
++static struct ub_cache_counter *cc_hash[CC_HASH_SIZE];
++spinlock_t cc_lock;
++
++static void __free_cache_counters(struct user_beancounter *ub,
++		kmem_cache_t *cachep)
++{
++	struct ub_cache_counter *cc, **pprev, *del;
++	int i;
++	unsigned long flags;
++
++	del = NULL;
++	spin_lock_irqsave(&cc_lock, flags);
++	for (i = 0; i < CC_HASH_SIZE; i++) {
++		pprev = &cc_hash[i];
++		cc = cc_hash[i];
++		while (cc != NULL) {
++			if (cc->ub != ub && cc->cachep != cachep) {
++				pprev = &cc->next;
++				cc = cc->next;
++				continue;
++			}
++
++			list_del(&cc->ulist);
++			*pprev = cc->next;
++			cc->next = del;
++			del = cc;
++			cc = *pprev;
++		}
++	}
++	spin_unlock_irqrestore(&cc_lock, flags);
++
++	while (del != NULL) {
++		cc = del->next;
++		kfree(del);
++		del = cc;
++	}
++}
++
++void ub_free_counters(struct user_beancounter *ub)
++{
++	__free_cache_counters(ub, NULL);
++}
++
++void ub_kmemcache_free(kmem_cache_t *cachep)
++{
++	__free_cache_counters(NULL, cachep);
++}
++
++void __init init_cache_counters(void)
++{
++	memset(cc_hash, 0, CC_HASH_SIZE * sizeof(cc_hash[0]));
++	spin_lock_init(&cc_lock);
++}
++
++#define cc_hash_fun(ub, cachep)	(				\
++	(((unsigned long)(ub) >> L1_CACHE_SHIFT) ^		\
++	 ((unsigned long)(ub) >> (BITS_PER_LONG / 2)) ^		\
++	 ((unsigned long)(cachep) >> L1_CACHE_SHIFT) ^		\
++	 ((unsigned long)(cachep) >> (BITS_PER_LONG / 2))	\
++	) & (CC_HASH_SIZE - 1))
++
++static int change_slab_charged(struct user_beancounter *ub, void *objp,
++		unsigned long val, int mask)
++{
++	struct ub_cache_counter *cc, *new_cnt, **pprev;
++	kmem_cache_t *cachep;
++	unsigned long flags;
++
++	cachep = GET_PAGE_CACHE(virt_to_page(objp));
++	new_cnt = NULL;
++
++again:
++	spin_lock_irqsave(&cc_lock, flags);
++	cc = cc_hash[cc_hash_fun(ub, cachep)];
++	while (cc) {
++		if (cc->ub == ub && cc->cachep == cachep)
++			goto found;
++		cc = cc->next;
++	}
++
++	if (new_cnt != NULL)
++		goto insert;
++
++	spin_unlock_irqrestore(&cc_lock, flags);
++
++	new_cnt = kmalloc(sizeof(*new_cnt), mask & ~__GFP_UBC);
++	if (new_cnt == NULL)
++		return -ENOMEM;
++
++	new_cnt->counter = 0;
++	new_cnt->ub = ub;
++	new_cnt->cachep = cachep;
++	goto again;
++
++insert:
++	pprev = &cc_hash[cc_hash_fun(ub, cachep)];
++	new_cnt->next = *pprev;
++	*pprev = new_cnt;
++	list_add(&new_cnt->ulist, &ub->ub_cclist);
++	cc = new_cnt;
++	new_cnt = NULL;
++
++found:
++	cc->counter += val;
++	spin_unlock_irqrestore(&cc_lock, flags);
++	if (new_cnt)
++		kfree(new_cnt);
++	return 0;
++}
++
++static inline int inc_slab_charged(struct user_beancounter *ub,
++		void *objp, int mask)
++{
++	return change_slab_charged(ub, objp, 1, mask);
++}
++
++static inline void dec_slab_charged(struct user_beancounter *ub, void *objp)
++{
++	if (change_slab_charged(ub, objp, -1, 0) < 0)
++		BUG();
++}
++
++#include <linux/vmalloc.h>
++
++static inline int inc_pages_charged(struct user_beancounter *ub,
++		struct page *pg, int order)
++{
++	int cpu;
++
++	cpu = get_cpu();
++	ub->ub_pages_charged[cpu]++;
++	put_cpu();
++	return 0;
++}
++
++static inline void dec_pages_charged(struct user_beancounter *ub,
++		struct page *pg, int order)
++{
++	int cpu;
++
++	cpu = get_cpu();
++	ub->ub_pages_charged[cpu]--;
++	put_cpu();
++}
++
++void inc_vmalloc_charged(struct vm_struct *vm, int flags)
++{
++	int cpu;
++	struct user_beancounter *ub;
++
++	if (!(flags & __GFP_UBC))
++		return;
++
++	ub = get_exec_ub();
++	if (ub == NULL)
++		return;
++
++	cpu = get_cpu();
++	ub->ub_vmalloc_charged[cpu] += vm->nr_pages;
++	put_cpu();
++}
++
++void dec_vmalloc_charged(struct vm_struct *vm)
++{
++	int cpu;
++	struct user_beancounter *ub;
++
++	ub = page_ub(vm->pages[0]);
++	if (ub == NULL)
++		return;
++
++	cpu = get_cpu();
++	ub->ub_vmalloc_charged[cpu] -= vm->nr_pages;
++	put_cpu();
++}
++
++#else
++#define inc_slab_charged(ub, o, m)	(0)
++#define dec_slab_charged(ub, o)		do { } while (0)
++#define inc_pages_charged(ub, pg, o) 	(0)
++#define dec_pages_charged(ub, pg, o)	do { } while (0)
++#endif
++
++static inline struct user_beancounter **slab_ub_ref(void *objp)
++{
++	struct page *pg;
++	kmem_cache_t *cachep;
++	struct slab *slabp;
++	int objnr;
++
++	pg = virt_to_page(objp);
++	cachep = GET_PAGE_CACHE(pg);
++	BUG_ON(!(cachep->flags & SLAB_UBC));
++	slabp = GET_PAGE_SLAB(pg);
++	objnr = (objp - slabp->s_mem) / cachep->objsize;
++	return slab_ubcs(cachep, slabp) + objnr;
++}
++
++struct user_beancounter *slab_ub(void *objp)
++{
++	struct user_beancounter **ub_ref;
++
++	ub_ref = slab_ub_ref(objp);
++	return *ub_ref;
++}
++
++EXPORT_SYMBOL(slab_ub);
++
++int ub_slab_charge(void *objp, int flags)
++{
++	unsigned int size;
++	struct user_beancounter *ub;
++
++	ub = get_beancounter(get_exec_ub());
++	if (ub == NULL)
++		return 0;
++
++	size = CHARGE_SIZE(kmem_obj_memusage(objp));
++	if (charge_beancounter(ub, UB_KMEMSIZE, size,
++				(flags & __GFP_SOFT_UBC ? UB_SOFT : UB_HARD)))
++		goto out_err;
++
++	if (inc_slab_charged(ub, objp, flags) < 0) {
++		uncharge_beancounter(ub, UB_KMEMSIZE, size);
++		goto out_err;
++	}
++	*slab_ub_ref(objp) = ub;
++	return 0;
++
++out_err:
++	put_beancounter(ub);
++	return -ENOMEM;
++}
++
++void ub_slab_uncharge(void *objp)
++{
++	unsigned int size;
++	struct user_beancounter **ub_ref;
++
++	ub_ref = slab_ub_ref(objp);
++	if (*ub_ref == NULL)
++		return;
++
++	dec_slab_charged(*ub_ref, objp);
++	size = CHARGE_SIZE(kmem_obj_memusage(objp));
++	uncharge_beancounter(*ub_ref, UB_KMEMSIZE, size);
++	put_beancounter(*ub_ref);
++	*ub_ref = NULL;
++}
++
++/*
++ * Pages accounting
++ */
++
++inline int ub_page_charge(struct page *page, int order, int mask)
++{
++	struct user_beancounter *ub;
++
++	ub = NULL;
++	if (!(mask & __GFP_UBC))
++		goto out;
++
++	ub = get_beancounter(get_exec_ub());
++	if (ub == NULL)
++		goto out;
++
++	if (charge_beancounter(ub, UB_KMEMSIZE, CHARGE_ORDER(order),
++				(mask & __GFP_SOFT_UBC ? UB_SOFT : UB_HARD)))
++		goto err;
++	if (inc_pages_charged(ub, page, order) < 0) {
++		uncharge_beancounter(ub, UB_KMEMSIZE, CHARGE_ORDER(order));
++		goto err;
++	}
++out:
++	BUG_ON(page_ub(page) != NULL);
++	page_ub(page) = ub;
++	return 0;
++
++err:
++	BUG_ON(page_ub(page) != NULL);
++	put_beancounter(ub);
++	return -ENOMEM;
++}
++
++inline void ub_page_uncharge(struct page *page, int order)
++{
++	struct user_beancounter *ub;
++
++	ub = page_ub(page);
++	if (ub == NULL)
++		return;
++
++	dec_pages_charged(ub, page, order);
++	BUG_ON(ub->ub_magic != UB_MAGIC);
++	uncharge_beancounter(ub, UB_KMEMSIZE, CHARGE_ORDER(order));
++	put_beancounter(ub);
++	page_ub(page) = NULL;
++}
++
++/* 
++ * takes init_mm.page_table_lock 
++ * some outer lock to protect pages from vmalloced area must be held
++ */
++struct user_beancounter *vmalloc_ub(void *obj)
++{
++	struct page *pg;
++
++	spin_lock(&init_mm.page_table_lock);
++	pg = follow_page_k((unsigned long)obj, 0);
++	spin_unlock(&init_mm.page_table_lock);
++	if (pg == NULL)
++		return NULL;
++
++	return page_ub(pg);
++}
++
++EXPORT_SYMBOL(vmalloc_ub);
++
++struct user_beancounter *mem_ub(void *obj)
++{
++	struct user_beancounter *ub;
++
++	if ((unsigned long)obj >= VMALLOC_START &&
++	    (unsigned long)obj  < VMALLOC_END)
++		ub = vmalloc_ub(obj);
++	else
++		ub = slab_ub(obj);
++
++	return ub;
++}
++
++EXPORT_SYMBOL(mem_ub);
+diff -uprN linux-2.6.8.1.orig/kernel/ub/ub_misc.c linux-2.6.8.1-ve022stab078/kernel/ub/ub_misc.c
+--- linux-2.6.8.1.orig/kernel/ub/ub_misc.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/kernel/ub/ub_misc.c	2006-05-11 13:05:49.000000000 +0400
+@@ -0,0 +1,227 @@
++/*
++ *  kernel/ub/ub_misc.c
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/tty.h>
++#include <linux/tty_driver.h>
++#include <linux/signal.h>
++#include <linux/slab.h>
++#include <linux/fs.h>
++#include <linux/sched.h>
++#include <linux/module.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_mem.h>
++
++/*
++ * Task staff
++ */
++
++static void init_task_sub(struct task_struct *parent,
++		struct task_struct *tsk,
++		struct task_beancounter *old_bc)
++{
++	struct task_beancounter *new_bc;
++	struct user_beancounter *sub;
++
++	new_bc = task_bc(tsk);
++	sub = old_bc->fork_sub;
++	new_bc->fork_sub = get_beancounter(sub);
++	new_bc->task_fnode = NULL;
++	new_bc->task_freserv = old_bc->task_freserv;
++	old_bc->task_freserv = NULL;
++	memset(&new_bc->task_data, 0, sizeof(new_bc->task_data));
++}
++
++int ub_task_charge(struct task_struct *parent, struct task_struct *task)
++{
++	struct task_beancounter *old_bc;
++	struct task_beancounter *new_bc;
++	struct user_beancounter *ub;
++
++	old_bc = task_bc(parent);
++	ub = old_bc->fork_sub;
++
++	if (charge_beancounter(ub, UB_NUMPROC, 1, UB_HARD) < 0)
++		return -ENOMEM;
++
++	new_bc = task_bc(task);
++	new_bc->task_ub = get_beancounter(ub);
++	new_bc->exec_ub = get_beancounter(ub);
++	init_task_sub(parent, task, old_bc);
++	return 0;
++}
++
++void ub_task_uncharge(struct task_struct *task)
++{
++	struct task_beancounter *task_bc;
++
++	task_bc = task_bc(task);
++	if (task_bc->task_ub != NULL)
++		uncharge_beancounter(task_bc->task_ub, UB_NUMPROC, 1);
++
++	put_beancounter(task_bc->exec_ub);
++	put_beancounter(task_bc->task_ub);
++	put_beancounter(task_bc->fork_sub);
++	/* can't be freed elsewhere, failures possible in the middle of fork */
++	if (task_bc->task_freserv != NULL)
++		kfree(task_bc->task_freserv);
++
++	task_bc->exec_ub = (struct user_beancounter *)0xdeadbcbc;
++}
++
++/*
++ * Files and file locks.
++ */
++
++int ub_file_charge(struct file *f)
++{
++	struct user_beancounter *ub;
++
++	/* No need to get_beancounter here since it's already got in slab */
++	ub = slab_ub(f);
++	if (ub == NULL)
++		return 0;
++
++	return charge_beancounter(ub, UB_NUMFILE, 1, UB_HARD);
++}
++
++void ub_file_uncharge(struct file *f)
++{
++	struct user_beancounter *ub;
++
++	/* Ub will be put in slab */
++	ub = slab_ub(f);
++	if (ub == NULL)
++		return;
++
++	uncharge_beancounter(ub, UB_NUMFILE, 1);
++}
++
++int ub_flock_charge(struct file_lock *fl, int hard)
++{
++	struct user_beancounter *ub;
++	int err;
++
++	/* No need to get_beancounter here since it's already got in slab */
++	ub = slab_ub(fl);
++	if (ub == NULL)
++		return 0;
++
++	err = charge_beancounter(ub, UB_NUMFLOCK, 1, hard ? UB_HARD : UB_SOFT);
++	if (!err)
++		fl->fl_charged = 1;
++	return err;
++}
++
++void ub_flock_uncharge(struct file_lock *fl)
++{
++	struct user_beancounter *ub;
++
++	/* Ub will be put in slab */
++	ub = slab_ub(fl);
++	if (ub == NULL || !fl->fl_charged)
++		return;
++
++	uncharge_beancounter(ub, UB_NUMFLOCK, 1);
++	fl->fl_charged = 0;
++}
++
++/*
++ * Signal handling
++ */
++
++static int do_ub_siginfo_charge(struct user_beancounter *ub,
++		unsigned long size)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	if (__charge_beancounter_locked(ub, UB_KMEMSIZE, size, UB_HARD))
++		goto out_kmem;
++
++	if (__charge_beancounter_locked(ub, UB_NUMSIGINFO, 1, UB_HARD))
++		goto out_num;
++
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++	return 0;
++
++out_num:
++	__uncharge_beancounter_locked(ub, UB_KMEMSIZE, size);
++out_kmem:
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++	return -ENOMEM;
++}
++
++static void do_ub_siginfo_uncharge(struct user_beancounter *ub,
++		unsigned long size)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	__uncharge_beancounter_locked(ub, UB_KMEMSIZE, size);
++	__uncharge_beancounter_locked(ub, UB_NUMSIGINFO, 1);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++int ub_siginfo_charge(struct user_beancounter *ub, unsigned long size)
++{
++	struct user_beancounter *p, *q;
++
++	size = CHARGE_SIZE(size);
++	for (p = ub; p != NULL; p = p->parent) {
++		if (do_ub_siginfo_charge(p, size))
++			goto unroll;
++	}
++	return 0;
++
++unroll:
++	for (q = ub; q != p; q = q->parent)
++		do_ub_siginfo_uncharge(q, size);
++	return -ENOMEM;
++}
++
++void ub_siginfo_uncharge(struct user_beancounter *ub, unsigned long size)
++{
++	size = CHARGE_SIZE(size);
++	for (; ub != NULL; ub = ub->parent)
++		do_ub_siginfo_uncharge(ub, size);
++}
++
++/*
++ * PTYs
++ */
++
++int ub_pty_charge(struct tty_struct *tty)
++{
++	struct user_beancounter *ub;
++	int retval;
++
++	ub = tty_ub(tty);
++	retval = 0;
++	if (ub && tty->driver->subtype == PTY_TYPE_MASTER &&
++			!test_bit(TTY_CHARGED, &tty->flags)) {
++		retval = charge_beancounter(ub, UB_NUMPTY, 1, UB_HARD);
++		if (!retval)
++			set_bit(TTY_CHARGED, &tty->flags);
++	}
++	return retval;
++}
++
++void ub_pty_uncharge(struct tty_struct *tty)
++{
++	struct user_beancounter *ub;
++
++	ub = tty_ub(tty);
++	if (ub && tty->driver->subtype == PTY_TYPE_MASTER &&
++			test_bit(TTY_CHARGED, &tty->flags)) {
++		uncharge_beancounter(ub, UB_NUMPTY, 1);
++		clear_bit(TTY_CHARGED, &tty->flags);
++	}
++}
+diff -uprN linux-2.6.8.1.orig/kernel/ub/ub_net.c linux-2.6.8.1-ve022stab078/kernel/ub/ub_net.c
+--- linux-2.6.8.1.orig/kernel/ub/ub_net.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/kernel/ub/ub_net.c	2006-05-11 13:05:39.000000000 +0400
+@@ -0,0 +1,1041 @@
++/*
++ *  linux/kernel/ub/ub_net.c
++ *
++ *  Copyright (C) 1998-2004  Andrey V. Savochkin <saw@saw.sw.com.sg>
++ *  Copyright (C) 2005 SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * TODO:
++ *   - sizeof(struct inode) charge
++ *   = tcp_mem_schedule() feedback based on ub limits
++ *   + measures so that one socket won't exhaust all send buffers,
++ *     see bug in bugzilla
++ *   = sk->socket check for NULL in snd_wakeups
++ *     (tcp_write_space checks for NULL itself)
++ *   + in tcp_close(), orphaned socket abortion should be based on ubc
++ *     resources (same in tcp_out_of_resources)
++ *     Beancounter should also have separate orphaned socket counter...
++ *   + for rcv, in-order segment should be accepted
++ *     if only barrier is exceeded
++ *   = tcp_rmem_schedule() feedback based on ub limits
++ *   - repair forward_alloc mechanism for receive buffers
++ *     It's idea is that some buffer space is pre-charged so that receive fast
++ *     path doesn't need to take spinlocks and do other heavy stuff
++ *   + tcp_prune_queue actions based on ub limits
++ *   + window adjustments depending on available buffers for receive
++ *   - window adjustments depending on available buffers for send
++ *   + race around usewreserv
++ *   + avoid allocating new page for each tiny-gram, see letter from ANK
++ *   + rename ub_sock_lock
++ *   + sk->sleep wait queue probably can be used for all wakeups, and
++ *     sk->ub_wait is unnecessary
++ *   + for UNIX sockets, the current algorithm will lead to
++ *     UB_UNIX_MINBUF-sized messages only for non-blocking case
++ *   - charge for af_packet sockets
++ *   + all datagram sockets should be charged to NUMUNIXSOCK
++ *   - we do not charge for skb copies and clones staying in device queues
++ *   + live-lock if number of sockets is big and buffer limits are small
++ *     [diff-ubc-dbllim3]
++ *   - check that multiple readers/writers on the same socket won't cause fatal
++ *     consequences
++ *   - check allocation/charge orders
++ *   + There is potential problem with callback_lock.  In *snd_wakeup we take
++ *     beancounter first, in sock_def_error_report - callback_lock first.
++ *     then beancounter.  This is not a problem if callback_lock taken
++ *     readonly, but anyway...
++ *   - SKB_CHARGE_SIZE doesn't include the space wasted by slab allocator
++ * General kernel problems:
++ *   - in tcp_sendmsg(), if allocation fails, non-blocking sockets with ASYNC
++ *     notification won't get signals
++ *   - datagram_poll looks racy
++ *
++ */
++
++#include <linux/net.h>
++#include <linux/slab.h>
++#include <linux/kmem_cache.h>
++#include <linux/gfp.h>
++#include <linux/err.h>
++#include <linux/socket.h>
++#include <linux/module.h>
++#include <linux/sched.h>
++
++#include <net/sock.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_net.h>
++#include <ub/ub_debug.h>
++
++
++/* Skb truesize definition. Bad place. Den */
++
++static inline int skb_chargesize_head(struct sk_buff *skb)
++{
++	return skb_charge_size(skb->end - skb->head +
++				sizeof(struct skb_shared_info));
++}
++
++int skb_charge_fullsize(struct sk_buff *skb)
++{
++	int chargesize;
++	struct sk_buff *skbfrag;
++
++	chargesize = skb_chargesize_head(skb) +
++		PAGE_SIZE * skb_shinfo(skb)->nr_frags;
++	if (likely(skb_shinfo(skb)->frag_list == NULL))
++		return chargesize;
++	for (skbfrag = skb_shinfo(skb)->frag_list;
++	     skbfrag != NULL;
++	     skbfrag = skbfrag->next) {
++		chargesize += skb_charge_fullsize(skbfrag);
++	}
++	return chargesize;
++}
++EXPORT_SYMBOL(skb_charge_fullsize);
++
++static int ub_sock_makewreserv_locked(struct sock *sk, 
++		int bufid, int sockid, unsigned long size);
++
++int ub_too_many_orphans(struct sock *sk, int count)
++{
++	struct user_beancounter *ub;
++
++	if (sock_has_ubc(sk)) {
++		for (ub = sock_bc(sk)->ub; ub->parent != NULL; ub = ub->parent);
++		if (count >= ub->ub_parms[UB_NUMTCPSOCK].barrier >> 2)
++			return 1;
++	}
++	return 0;
++}
++
++/*
++ * Queueing
++ */
++
++static void ub_sock_snd_wakeup(struct user_beancounter *ub)
++{
++	struct list_head *p;
++	struct sock_beancounter *skbc;
++	struct sock *sk;
++	struct user_beancounter *cub;
++	unsigned long added;
++
++	while (!list_empty(&ub->ub_other_sk_list)) {
++		p = ub->ub_other_sk_list.next;
++		skbc = list_entry(p, struct sock_beancounter, ub_sock_list);
++		sk = skbc_sock(skbc);
++		ub_debug(UBD_NET_SLEEP, "Found sock to wake up\n");
++		added = -skbc->poll_reserv;
++		if (ub_sock_makewreserv_locked(sk, UB_OTHERSOCKBUF,
++					UB_NUMOTHERSOCK, skbc->ub_waitspc))
++			break;
++		added += skbc->poll_reserv;
++
++		/*
++		 * See comments in ub_tcp_snd_wakeup.
++		 * Locking note: both unix_write_space and
++		 * sock_def_write_space take callback_lock themselves.
++		 * We take it here just to be on the safe side and to
++		 * act the same way as ub_tcp_snd_wakeup does.
++		 */
++		sk->sk_write_space(sk);
++
++		list_del_init(&skbc->ub_sock_list);
++
++		if (skbc->ub != ub && added) {
++			cub = get_beancounter(skbc->ub);
++			spin_unlock(&ub->ub_lock);
++			charge_beancounter_notop(cub, UB_OTHERSOCKBUF, added);
++			put_beancounter(cub);
++			spin_lock(&ub->ub_lock);
++		}
++	}
++}
++
++static void ub_tcp_snd_wakeup(struct user_beancounter *ub)
++{
++	struct list_head *p;
++	struct sock *sk;
++	struct sock_beancounter *skbc;
++	struct socket *sock;
++	struct user_beancounter *cub;
++	unsigned long added;
++
++	while (!list_empty(&ub->ub_tcp_sk_list)) {
++		p = ub->ub_tcp_sk_list.next;
++		skbc = list_entry(p, struct sock_beancounter, ub_sock_list);
++		sk = skbc_sock(skbc);
++
++		added = 0;
++		sock = sk->sk_socket;
++		if (sock == NULL)
++			/* sk being destroyed */
++			goto cont;
++
++		ub_debug(UBD_NET_SLEEP, 
++				"Checking queue, waiting %lu, reserv %lu\n",
++				skbc->ub_waitspc, skbc->poll_reserv);
++		added = -skbc->poll_reserv;
++		if (ub_sock_makewreserv_locked(sk, UB_TCPSNDBUF,
++					UB_NUMTCPSOCK, skbc->ub_waitspc))
++			break;
++		added += skbc->poll_reserv;
++
++		/*
++		 * Send async notifications and wake up.
++		 * Locking note: we get callback_lock here because
++		 * tcp_write_space is over-optimistic about calling context
++		 * (socket lock is presumed).  So we get the lock here although
++		 * it belongs to the callback.
++		 */
++		sk->sk_write_space(sk);
++
++cont:
++		list_del_init(&skbc->ub_sock_list);
++
++		if (skbc->ub != ub && added) {
++			cub = get_beancounter(skbc->ub);
++			spin_unlock(&ub->ub_lock);
++			charge_beancounter_notop(cub, UB_TCPSNDBUF, added);
++			put_beancounter(cub);
++			spin_lock(&ub->ub_lock);
++		}
++	}
++}
++
++void ub_sock_snd_queue_add(struct sock *sk, int res, unsigned long size)
++{
++	unsigned long flags;
++	struct sock_beancounter *skbc;
++	struct user_beancounter *ub;
++	unsigned long added_reserv;
++
++	if (!sock_has_ubc(sk))
++		return;
++
++	skbc = sock_bc(sk);
++	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	ub_debug(UBD_NET_SLEEP, "attempt to charge for %lu\n", size);
++	added_reserv = -skbc->poll_reserv;
++	if (!ub_sock_makewreserv_locked(sk, res, bid2sid(res), size)) {
++		/*
++		 * It looks a bit hackish, but it is compatible with both
++		 * wait_for_xx_ubspace and poll.
++		 * This __set_current_state is equivalent to a wakeup event
++		 * right after spin_unlock_irqrestore.
++		 */
++		__set_current_state(TASK_RUNNING);
++		added_reserv += skbc->poll_reserv;
++		spin_unlock_irqrestore(&ub->ub_lock, flags);
++		if (added_reserv)
++			charge_beancounter_notop(skbc->ub, res, added_reserv);
++		return;
++	}
++
++	ub_debug(UBD_NET_SLEEP, "Adding sk to queue\n");
++	skbc->ub_waitspc = size;
++	if (!list_empty(&skbc->ub_sock_list)) {
++		ub_debug(UBD_NET_SOCKET, 
++				"re-adding socket to beancounter %p.\n", ub);
++		goto out;
++	}
++
++	switch (res) {
++		case UB_TCPSNDBUF:
++			list_add_tail(&skbc->ub_sock_list, 
++					&ub->ub_tcp_sk_list);
++			break;
++		case UB_OTHERSOCKBUF:
++			list_add_tail(&skbc->ub_sock_list, 
++					&ub->ub_other_sk_list);
++			break;
++		default:
++			BUG();
++	}
++out:
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++
++/*
++ * Helpers
++ */
++
++void ub_skb_set_charge(struct sk_buff *skb, struct sock *sk,
++		       unsigned long size, int resource)
++{
++	if (!sock_has_ubc(sk))
++		return;
++
++	if (sock_bc(sk)->ub == NULL)
++		BUG();
++	skb_bc(skb)->ub = sock_bc(sk)->ub;
++	skb_bc(skb)->charged = size;
++	skb_bc(skb)->resource = resource;
++
++	/* Ugly. Ugly. Skb in sk writequeue can live without ref to sk */
++	if (skb->sk == NULL)
++		skb->sk = sk;
++}
++
++static inline void ub_skb_set_uncharge(struct sk_buff *skb)
++{
++	skb_bc(skb)->ub = NULL;
++	skb_bc(skb)->charged = 0;
++	skb_bc(skb)->resource = 0;
++}
++
++static inline void __uncharge_sockbuf(struct sock_beancounter *skbc,
++		struct user_beancounter *ub, int resource, unsigned long size)
++{
++	if (ub != NULL)
++		__uncharge_beancounter_locked(ub, resource, size);
++
++	if (skbc != NULL) {
++		if (skbc->ub_wcharged > size)
++			skbc->ub_wcharged -= size;
++		else
++			skbc->ub_wcharged = 0;
++	}
++}
++
++static void ub_update_rmem_thres(struct sock_beancounter *skub)
++{
++	struct user_beancounter *ub;
++
++	if (skub && skub->ub) {
++		for (ub = skub->ub; ub->parent != NULL; ub = ub->parent);
++		ub->ub_rmem_thres = ub->ub_parms[UB_TCPRCVBUF].barrier /
++			(ub->ub_parms[UB_NUMTCPSOCK].held + 1);
++	}
++}
++inline int ub_skb_alloc_bc(struct sk_buff *skb, int gfp_mask)
++{
++	memset(skb_bc(skb), 0, sizeof(struct skb_beancounter));
++	return 0;
++}
++
++inline void ub_skb_free_bc(struct sk_buff *skb)
++{
++}
++
++
++/*
++ * Charge socket number
++ */
++
++static inline int sk_alloc_beancounter(struct sock *sk)
++{
++	struct sock_beancounter *skbc;
++
++	skbc = sock_bc(sk);
++	memset(skbc, 0, sizeof(struct sock_beancounter));
++	return 0;
++}
++
++static inline void sk_free_beancounter(struct sock *sk)
++{
++}
++
++static int __sock_charge(struct sock *sk, int res)
++{
++	struct sock_beancounter *skbc;
++	struct user_beancounter *ub;
++
++	ub = get_exec_ub();
++	if (ub == NULL)
++		return 0;
++	if (sk_alloc_beancounter(sk) < 0)
++		return -ENOMEM;
++
++	skbc = sock_bc(sk);
++	INIT_LIST_HEAD(&skbc->ub_sock_list);
++
++	if (charge_beancounter(ub, res, 1, UB_HARD) < 0)
++		goto out_limit;
++
++	/* TCP listen sock or process keeps referrence to UB */
++	skbc->ub = get_beancounter(ub);
++	return 0;
++
++out_limit:
++	sk_free_beancounter(sk);
++	return -ENOMEM;
++}
++
++int ub_tcp_sock_charge(struct sock *sk)
++{
++	int ret;
++
++	ret = __sock_charge(sk, UB_NUMTCPSOCK);
++	ub_update_rmem_thres(sock_bc(sk));
++
++	return ret;
++}
++
++int ub_other_sock_charge(struct sock *sk)
++{
++	return __sock_charge(sk, UB_NUMOTHERSOCK);
++}
++
++EXPORT_SYMBOL(ub_other_sock_charge);
++
++int ub_sock_charge(struct sock *sk, int family, int type)
++{
++	return (IS_TCP_SOCK(family, type) ? 
++			ub_tcp_sock_charge(sk) : ub_other_sock_charge(sk));
++}
++
++/*
++ * Uncharge socket number
++ */
++
++void ub_sock_uncharge(struct sock *sk)
++{
++	int is_tcp_sock;
++	unsigned long flags;
++	struct sock_beancounter *skbc;
++	struct user_beancounter *ub;
++	unsigned long reserv;
++
++	if (!sock_has_ubc(sk))
++		return;
++
++	is_tcp_sock = IS_TCP_SOCK(sk->sk_family, sk->sk_type);
++	skbc = sock_bc(sk);
++	ub_debug(UBD_NET_SOCKET, "Calling ub_sock_uncharge on %p\n", sk);
++
++	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
++
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	if (!list_empty(&skbc->ub_sock_list)) {
++		ub_debug(UBD_NET_SOCKET, 
++			 "ub_sock_uncharge: removing from ub(%p) queue.\n",
++			 skbc);
++		list_del_init(&skbc->ub_sock_list);
++	}
++
++	reserv = skbc->poll_reserv;
++	__uncharge_beancounter_locked(ub,
++			(is_tcp_sock ? UB_TCPSNDBUF : UB_OTHERSOCKBUF),
++			reserv);
++	__uncharge_beancounter_locked(ub,
++			(is_tcp_sock ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK), 1);
++
++	/* The check sk->sk_family != PF_NETLINK is made as the skb is
++	 * queued to the kernel end of socket while changed to the user one.
++	 * Den */
++	if (skbc->ub_wcharged > reserv &&
++	    sk->sk_family != PF_NETLINK) {
++		skbc->ub_wcharged -= reserv;
++		printk(KERN_WARNING
++		       "ub_sock_uncharge: wch=%lu for ub %p (%d).\n",
++		       skbc->ub_wcharged, skbc->ub, skbc->ub->ub_uid);
++	} else
++		skbc->ub_wcharged = 0;
++	skbc->poll_reserv = 0;
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++	uncharge_beancounter_notop(skbc->ub,
++			(is_tcp_sock ? UB_TCPSNDBUF : UB_OTHERSOCKBUF),
++			reserv);
++	uncharge_beancounter_notop(skbc->ub,
++			(is_tcp_sock ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK), 1);
++
++	put_beancounter(skbc->ub);
++	sk_free_beancounter(sk);
++}
++
++/*
++ * Send - receive buffers
++ */
++
++/* Special case for netlink_dump - (un)charges precalculated size */
++int ub_nlrcvbuf_charge(struct sk_buff *skb, struct sock *sk)
++{
++	int ret;
++	unsigned long chargesize;
++
++	if (!sock_has_ubc(sk))
++		return 0;
++
++	chargesize = skb_charge_fullsize(skb);
++	ret = charge_beancounter(sock_bc(sk)->ub,
++			UB_DGRAMRCVBUF, chargesize, UB_HARD);
++	if (ret < 0)
++		return ret;
++	ub_skb_set_charge(skb, sk, chargesize, UB_DGRAMRCVBUF);
++	return ret;
++}
++
++/*
++ * Poll reserv accounting
++ */
++static int ub_sock_makewreserv_locked(struct sock *sk, 
++		int bufid, int sockid, unsigned long size)
++{
++	unsigned long wcharge_added;
++	struct sock_beancounter *skbc;
++	struct user_beancounter *ub;
++
++	if (!sock_has_ubc(sk))
++		goto out;
++
++	skbc = sock_bc(sk);
++	if (skbc->poll_reserv >= size) /* no work to be done */
++		goto out;
++
++	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
++	ub->ub_parms[bufid].held += size - skbc->poll_reserv;
++
++	wcharge_added = 0;
++	/*
++	 * Logic:
++	 *  1) when used memory hits barrier, we set wmem_pressure;
++	 *     wmem_pressure is reset under barrier/2;
++	 *     between barrier/2 and barrier we limit per-socket buffer growth;
++	 *  2) each socket is guaranteed to get (limit-barrier)/maxsockets
++	 *     calculated on the base of memory eaten after the barrier is hit
++	 */
++	skbc = sock_bc(sk);
++	if (!ub_hfbarrier_hit(ub, bufid)) {
++		if (ub->ub_wmem_pressure)
++			ub_debug(UBD_NET_SEND, "makewres: pressure -> 0 "
++				"sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
++				sk, size, skbc->poll_reserv,
++				ub->ub_parms[bufid].held,
++				skbc->ub_wcharged, sk->sk_sndbuf);
++		ub->ub_wmem_pressure = 0;
++	}
++	if (ub_barrier_hit(ub, bufid)) {
++		if (!ub->ub_wmem_pressure)
++			ub_debug(UBD_NET_SEND, "makewres: pressure -> 1 "
++				"sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
++				sk, size, skbc->poll_reserv,
++				ub->ub_parms[bufid].held,
++				skbc->ub_wcharged, sk->sk_sndbuf);
++		ub->ub_wmem_pressure = 1;
++		wcharge_added = size - skbc->poll_reserv;
++		skbc->ub_wcharged += wcharge_added;
++		if (skbc->ub_wcharged * ub->ub_parms[sockid].limit +
++				ub->ub_parms[bufid].barrier >
++					ub->ub_parms[bufid].limit)
++			goto unroll;
++	}
++	if (ub->ub_parms[bufid].held > ub->ub_parms[bufid].limit)
++		goto unroll;
++
++	ub_adjust_maxheld(ub, bufid);
++	skbc->poll_reserv = size;
++out:
++	return 0;
++
++unroll:
++	ub_debug(UBD_NET_SEND, 
++			"makewres: deny "
++			"sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
++			sk, size, skbc->poll_reserv, ub->ub_parms[bufid].held,
++			skbc->ub_wcharged, sk->sk_sndbuf);
++	skbc->ub_wcharged -= wcharge_added;
++	ub->ub_parms[bufid].failcnt++;
++	ub->ub_parms[bufid].held -= size - skbc->poll_reserv;
++	return -ENOMEM;
++}
++
++int ub_sock_make_wreserv(struct sock *sk, int bufid, unsigned long size)
++{
++	struct sock_beancounter *skbc;
++	struct user_beancounter *ub;
++	unsigned long flags;
++	unsigned long added_reserv;
++	int err;
++
++	skbc = sock_bc(sk);
++
++	/*
++	 * This function provides that there is sufficient reserve upon return
++	 * only if sk has only one user.  We can check poll_reserv without
++	 * serialization and avoid locking if the reserve already exists.
++	 */
++	if (!sock_has_ubc(sk) || skbc->poll_reserv >= size)
++		return 0;
++
++	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	added_reserv = -skbc->poll_reserv;
++	err = ub_sock_makewreserv_locked(sk, bufid, bid2sid(bufid), size);
++	added_reserv += skbc->poll_reserv;
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++	if (added_reserv)
++		charge_beancounter_notop(skbc->ub, bufid, added_reserv);
++
++	return err;
++}
++
++int ub_sock_get_wreserv(struct sock *sk, int bufid, unsigned long size)
++{
++	struct sock_beancounter *skbc;
++	struct user_beancounter *ub;
++	unsigned long flags;
++	unsigned long added_reserv;
++	int err;
++
++	if (!sock_has_ubc(sk))
++		return 0;
++
++	skbc = sock_bc(sk);
++	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	added_reserv = -skbc->poll_reserv;
++	err = ub_sock_makewreserv_locked(sk, bufid, bid2sid(bufid), size);
++	added_reserv += skbc->poll_reserv;
++	if (!err)
++		skbc->poll_reserv -= size;
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++	if (added_reserv)
++		charge_beancounter_notop(skbc->ub, bufid, added_reserv);
++
++	return err;
++}
++
++void ub_sock_ret_wreserv(struct sock *sk, int bufid, 
++		unsigned long size, unsigned long ressize)
++{
++	struct sock_beancounter *skbc;
++	struct user_beancounter *ub;
++	unsigned long extra;
++	unsigned long flags;
++	
++	if (!sock_has_ubc(sk))
++		return;
++
++	extra = 0;
++	skbc = sock_bc(sk);
++	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	skbc->poll_reserv += size;
++	if (skbc->poll_reserv > ressize) {
++		extra = skbc->poll_reserv - ressize;
++		__uncharge_beancounter_locked(ub, bufid, extra);
++
++		if (skbc->ub_wcharged > skbc->poll_reserv - ressize)
++			skbc->ub_wcharged -= skbc->poll_reserv - ressize;
++		else
++			skbc->ub_wcharged = 0;
++		skbc->poll_reserv = ressize;
++	}
++
++	ub_tcp_snd_wakeup(ub);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++	if (extra)
++		uncharge_beancounter_notop(skbc->ub, bufid, extra);
++}
++
++long ub_sock_wait_for_space(struct sock *sk, long timeo, unsigned long size)
++{
++	DECLARE_WAITQUEUE(wait, current);
++
++	add_wait_queue(sk->sk_sleep, &wait);
++	for (;;) {
++		if (signal_pending(current))
++			break;
++		set_current_state(TASK_INTERRUPTIBLE);
++		if (!ub_sock_make_wreserv(sk, UB_OTHERSOCKBUF, size))
++			break;
++
++		if (sk->sk_shutdown & SEND_SHUTDOWN)
++			break;
++		if (sk->sk_err)
++			break;
++		ub_sock_snd_queue_add(sk, UB_OTHERSOCKBUF, size);
++		timeo = schedule_timeout(timeo);
++	}
++	__set_current_state(TASK_RUNNING);
++	remove_wait_queue(sk->sk_sleep, &wait);
++	return timeo;
++}
++
++int ub_sock_makewres_other(struct sock *sk, unsigned long size)
++{
++	return ub_sock_make_wreserv(sk, UB_OTHERSOCKBUF, size);
++}
++
++int ub_sock_makewres_tcp(struct sock *sk, unsigned long size)
++{
++	return ub_sock_make_wreserv(sk, UB_TCPSNDBUF, size);
++}
++
++int ub_sock_getwres_other(struct sock *sk, unsigned long size)
++{
++	return ub_sock_get_wreserv(sk, UB_OTHERSOCKBUF, size);
++}
++
++int ub_sock_getwres_tcp(struct sock *sk, unsigned long size)
++{
++	return ub_sock_get_wreserv(sk, UB_TCPSNDBUF, size);
++}
++
++void ub_sock_retwres_other(struct sock *sk, unsigned long size, 
++		unsigned long ressize)
++{
++	ub_sock_ret_wreserv(sk, UB_OTHERSOCKBUF, size, ressize);
++}
++
++void ub_sock_retwres_tcp(struct sock *sk, unsigned long size, 
++		unsigned long ressize)
++{
++	ub_sock_ret_wreserv(sk, UB_TCPSNDBUF, size, ressize);
++}
++
++void ub_sock_sndqueueadd_other(struct sock *sk, unsigned long sz)
++{
++	ub_sock_snd_queue_add(sk, UB_OTHERSOCKBUF, sz);
++}
++
++void ub_sock_sndqueueadd_tcp(struct sock *sk, unsigned long sz)
++{
++	ub_sock_snd_queue_add(sk, UB_TCPSNDBUF, sz);
++}
++
++void ub_sock_sndqueuedel(struct sock *sk)
++{
++	struct sock_beancounter *skbc;
++	unsigned long flags;
++
++	if (!sock_has_ubc(sk))
++		return;
++	skbc = sock_bc(sk);
++
++	/* race with write_space callback of other socket */
++	spin_lock_irqsave(&skbc->ub->ub_lock, flags);
++	list_del_init(&skbc->ub_sock_list);
++	spin_unlock_irqrestore(&skbc->ub->ub_lock, flags);
++}
++
++/*
++ * UB_DGRAMRCVBUF
++ */
++
++int ub_sockrcvbuf_charge(struct sock *sk, struct sk_buff *skb)
++{
++	unsigned long chargesize;
++
++	if (!sock_has_ubc(sk))
++		return 0;
++
++	chargesize = skb_charge_fullsize(skb);
++	if (charge_beancounter(sock_bc(sk)->ub, UB_DGRAMRCVBUF, 
++				 chargesize, UB_HARD))
++		return -ENOMEM;
++
++	ub_skb_set_charge(skb, sk, chargesize, UB_DGRAMRCVBUF);
++	return 0;
++}
++
++EXPORT_SYMBOL(ub_sockrcvbuf_charge);
++
++static void ub_sockrcvbuf_uncharge(struct sk_buff *skb)
++{
++	uncharge_beancounter(skb_bc(skb)->ub, UB_DGRAMRCVBUF,
++			     skb_bc(skb)->charged);
++	ub_skb_set_uncharge(skb);
++}
++
++/*
++ * UB_TCPRCVBUF
++ */
++static int charge_tcprcvbuf(struct sock *sk, struct sk_buff *skb,
++			    enum severity strict)
++{
++	int retval;
++	unsigned long flags;
++	struct user_beancounter *ub;
++	unsigned long chargesize;
++
++	if (!sock_has_ubc(sk))
++		return 0;
++
++	/*
++	 * Memory pressure reactions:
++	 *  1) set UB_RMEM_KEEP (clearing UB_RMEM_EXPAND)
++	 *  2) set UB_RMEM_SHRINK and tcp_clamp_window()
++	 *     tcp_collapse_queues() if rmem_alloc > rcvbuf
++	 *  3) drop OFO, tcp_purge_ofo()
++	 *  4) drop all.
++	 * Currently, we do #2 and #3 at once (which means that current
++	 * collapsing of OFO queue in tcp_collapse_queues() is a waste of time,
++	 * for example...)
++	 * On memory pressure we jump from #0 to #3, and when the pressure
++	 * subsides, to #1.
++	 */
++	retval = 0;
++	chargesize = skb_charge_fullsize(skb);
++
++	for (ub = sock_bc(sk)->ub; ub->parent != NULL; ub = ub->parent);
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	ub->ub_parms[UB_TCPRCVBUF].held += chargesize;
++	if (ub->ub_parms[UB_TCPRCVBUF].held >
++			ub->ub_parms[UB_TCPRCVBUF].barrier &&
++			strict != UB_FORCE)
++		goto excess;
++	ub_adjust_maxheld(ub, UB_TCPRCVBUF);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++out:
++	if (retval == 0) {
++		charge_beancounter_notop(sock_bc(sk)->ub, UB_TCPRCVBUF,
++				chargesize);
++		ub_skb_set_charge(skb, sk, chargesize, UB_TCPRCVBUF);
++	}
++	return retval;
++
++excess:
++	ub->ub_rmem_pressure = UB_RMEM_SHRINK;
++	if (strict == UB_HARD)
++		retval = -ENOMEM;
++	if (ub->ub_parms[UB_TCPRCVBUF].held > ub->ub_parms[UB_TCPRCVBUF].limit)
++		retval = -ENOMEM;
++	/*
++	 * We try to leave numsock*maxadvmss as a reserve for sockets not
++	 * queueing any data yet (if the difference between the barrier and the
++	 * limit is enough for this reserve).
++	 */
++	if (ub->ub_parms[UB_TCPRCVBUF].held +
++			ub->ub_parms[UB_NUMTCPSOCK].limit * ub->ub_maxadvmss
++			> ub->ub_parms[UB_TCPRCVBUF].limit &&
++			atomic_read(&sk->sk_rmem_alloc))
++		retval = -ENOMEM;
++	if (retval) {
++		ub->ub_parms[UB_TCPRCVBUF].held -= chargesize;
++		ub->ub_parms[UB_TCPRCVBUF].failcnt++;
++	}
++	ub_adjust_maxheld(ub, UB_TCPRCVBUF);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++	goto out;
++}
++
++int ub_tcprcvbuf_charge(struct sock *sk, struct sk_buff *skb)
++{
++	return charge_tcprcvbuf(sk, skb, UB_HARD);
++}
++
++int ub_tcprcvbuf_charge_forced(struct sock *sk, struct sk_buff *skb)
++{
++	return charge_tcprcvbuf(sk, skb, UB_FORCE);
++}
++
++static void ub_tcprcvbuf_uncharge(struct sk_buff *skb)
++{
++	unsigned long flags;
++	unsigned long held, bar;
++	int prev_pres;
++	struct user_beancounter *ub;
++
++	for (ub = skb_bc(skb)->ub; ub->parent != NULL; ub = ub->parent);
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	if (ub->ub_parms[UB_TCPRCVBUF].held < skb_bc(skb)->charged) {
++		printk(KERN_ERR "Uncharging %d for tcprcvbuf of %p with %lu\n",
++				skb_bc(skb)->charged,
++				ub, ub->ub_parms[UB_TCPRCVBUF].held);
++		/* ass-saving bung */
++		skb_bc(skb)->charged = ub->ub_parms[UB_TCPRCVBUF].held;
++	}
++	ub->ub_parms[UB_TCPRCVBUF].held -= skb_bc(skb)->charged;
++	held = ub->ub_parms[UB_TCPRCVBUF].held;
++	bar = ub->ub_parms[UB_TCPRCVBUF].barrier;
++	prev_pres = ub->ub_rmem_pressure;
++	if (held <= bar - (bar >> 2))
++		ub->ub_rmem_pressure = UB_RMEM_EXPAND;
++	else if (held <= bar)
++		ub->ub_rmem_pressure = UB_RMEM_KEEP;
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++	uncharge_beancounter_notop(skb_bc(skb)->ub, UB_TCPRCVBUF,
++			skb_bc(skb)->charged);
++	ub_skb_set_uncharge(skb);
++}
++
++
++/*
++ * UB_OTHERSOCKBUF
++ */
++
++static void ub_socksndbuf_uncharge(struct sk_buff *skb)
++{
++	unsigned long flags;
++	struct user_beancounter *ub, *cub;
++	struct sock_beancounter *sk_bc;
++
++	/* resource was set. no check for ub required */
++	cub = skb_bc(skb)->ub;
++	for (ub = cub; ub->parent != NULL; ub = ub->parent);
++	skb_bc(skb)->ub = NULL;
++	if (skb->sk != NULL)
++		sk_bc = sock_bc(skb->sk);
++	else
++		sk_bc = NULL;
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	__uncharge_sockbuf(sk_bc, ub, UB_OTHERSOCKBUF,
++			   skb_bc(skb)->charged);
++	ub_sock_snd_wakeup(ub);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++	uncharge_beancounter_notop(cub, UB_OTHERSOCKBUF, skb_bc(skb)->charged);
++	ub_skb_set_uncharge(skb);
++}
++
++static void ub_tcpsndbuf_uncharge(struct sk_buff *skb)
++{
++	unsigned long flags;
++	struct user_beancounter *ub, *cub;
++
++	/* resource can be not set, called manually */
++	cub = skb_bc(skb)->ub;
++	if (cub == NULL)
++		return;
++	for (ub = cub; ub->parent != NULL; ub = ub->parent);
++	skb_bc(skb)->ub = NULL;
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	__uncharge_sockbuf(sock_bc(skb->sk), ub, UB_TCPSNDBUF,
++			   skb_bc(skb)->charged);
++	ub_tcp_snd_wakeup(ub);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++	uncharge_beancounter_notop(cub, UB_TCPSNDBUF, skb_bc(skb)->charged);
++	ub_skb_set_uncharge(skb);
++}
++
++void ub_skb_uncharge(struct sk_buff *skb)
++{
++	switch (skb_bc(skb)->resource) {
++		case UB_TCPSNDBUF:
++			ub_tcpsndbuf_uncharge(skb);
++			break;
++		case UB_TCPRCVBUF:
++			ub_tcprcvbuf_uncharge(skb);
++			break;
++		case UB_DGRAMRCVBUF:
++			ub_sockrcvbuf_uncharge(skb);
++			break;
++		case UB_OTHERSOCKBUF:
++			ub_socksndbuf_uncharge(skb);
++			break;
++	}
++}
++
++EXPORT_SYMBOL(ub_skb_uncharge);	/* due to skb_orphan()/conntracks */
++
++/*
++ * TCP send buffers accouting. Paged part
++ */
++int ub_sock_tcp_chargepage(struct sock *sk)
++{
++	struct sock_beancounter *skbc;
++	struct user_beancounter *ub;
++	unsigned long added;
++	unsigned long flags;
++	int err;
++
++	if (!sock_has_ubc(sk))
++		return 0;
++
++	skbc = sock_bc(sk);
++
++	for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	/* Try to charge full page */
++	err = ub_sock_makewreserv_locked(sk, UB_TCPSNDBUF, UB_NUMTCPSOCK,
++					 PAGE_SIZE);
++	if (err == 0) {
++		skbc->poll_reserv -= PAGE_SIZE;
++		spin_unlock_irqrestore(&ub->ub_lock, flags);
++		charge_beancounter_notop(skbc->ub, UB_TCPSNDBUF, PAGE_SIZE);
++		return 0;
++	}
++
++	/* Try to charge page enough to satisfy sys_select. The possible
++	   overdraft for the rest of the page is generally better then
++	   requesting full page in tcp_poll. This should not happen
++	   frequently. Den */
++	added = -skbc->poll_reserv;
++	err = ub_sock_makewreserv_locked(sk, UB_TCPSNDBUF, UB_NUMTCPSOCK,
++					 SOCK_MIN_UBCSPACE);
++	if (err < 0) {
++		spin_unlock_irqrestore(&ub->ub_lock, flags);
++		return err;
++	}
++	__charge_beancounter_locked(ub, UB_TCPSNDBUF,
++				    PAGE_SIZE - skbc->poll_reserv,
++				    UB_FORCE);
++	added += PAGE_SIZE;
++	skbc->poll_reserv = 0;
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++	charge_beancounter_notop(skbc->ub, UB_TCPSNDBUF, added);
++
++	return 0;
++	 
++}
++
++void ub_sock_tcp_detachpage(struct sock *sk)
++{
++	struct sk_buff *skb;
++
++	if (!sock_has_ubc(sk))
++		return;
++
++	/* The page is just detached from socket. The last skb in queue
++	   with paged part holds referrence to it */
++	skb = skb_peek_tail(&sk->sk_write_queue);
++	if (skb == NULL) {
++	   	/* If the queue is empty - all data is sent and page is about
++		   to be freed */
++		uncharge_beancounter(sock_bc(sk)->ub, UB_TCPSNDBUF, PAGE_SIZE);
++		return;
++	}
++	/* Last skb is a good aproximation for a last skb with paged part */
++	skb_bc(skb)->charged += PAGE_SIZE;
++}
++
++static int charge_tcpsndbuf(struct sock *sk, struct sk_buff *skb,
++			    enum severity strict)
++{
++	int ret;
++	unsigned long chargesize;
++
++	if (!sock_has_ubc(sk))
++		return 0;
++
++	chargesize = skb_charge_fullsize(skb);
++	ret = charge_beancounter(sock_bc(sk)->ub, UB_TCPSNDBUF, chargesize,
++				 strict);
++	if (ret < 0)
++		return ret;
++	ub_skb_set_charge(skb, sk, chargesize, UB_TCPSNDBUF);
++	sock_bc(sk)->ub_wcharged += chargesize;
++	return ret;
++}
++
++int ub_tcpsndbuf_charge(struct sock *sk, struct sk_buff *skb)
++{
++	return charge_tcpsndbuf(sk, skb, UB_HARD);
++}
++
++int ub_tcpsndbuf_charge_forced(struct sock *sk,	struct sk_buff *skb)
++{
++	return charge_tcpsndbuf(sk, skb, UB_FORCE);
++}
++
++/*
++ * Initialization staff
++ */
++int __init skbc_cache_init(void)
++{
++	return 0;
++}
+diff -uprN linux-2.6.8.1.orig/kernel/ub/ub_oom.c linux-2.6.8.1-ve022stab078/kernel/ub/ub_oom.c
+--- linux-2.6.8.1.orig/kernel/ub/ub_oom.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/kernel/ub/ub_oom.c	2006-05-11 13:05:48.000000000 +0400
+@@ -0,0 +1,93 @@
++/*
++ *  kernel/ub/ub_oom.c
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/sched.h>
++#include <linux/spinlock.h>
++#include <linux/mm.h>
++#include <linux/swap.h>
++
++#include <asm/page.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_misc.h>
++#include <ub/ub_hash.h>
++
++static inline long ub_current_overdraft(struct user_beancounter *ub)
++{
++	return ub->ub_parms[UB_OOMGUARPAGES].held +
++		((ub->ub_parms[UB_KMEMSIZE].held
++		  + ub->ub_parms[UB_TCPSNDBUF].held
++		  + ub->ub_parms[UB_TCPRCVBUF].held
++		  + ub->ub_parms[UB_OTHERSOCKBUF].held
++		  + ub->ub_parms[UB_DGRAMRCVBUF].held)
++		 >> PAGE_SHIFT)	- ub->ub_parms[UB_OOMGUARPAGES].barrier;
++}
++
++/*
++ * Select an user_beancounter to find task inside it to be killed.
++ * Select the beancounter with the biggest excess of resource usage
++ * to kill a process belonging to that beancounter later, or returns
++ * NULL if there are no beancounters with such excess.
++ */
++
++struct user_beancounter *ub_select_worst(long *ub_maxover)
++{
++	struct user_beancounter *ub, *walkp;
++	unsigned long flags;
++	int i;
++
++	*ub_maxover = 0;
++	ub = NULL;
++	spin_lock_irqsave(&ub_hash_lock, flags);
++
++	for_each_beancounter(i, walkp) {
++		long ub_overdraft;
++
++		if (walkp->parent != NULL)
++			continue;
++		if (walkp->ub_oom_noproc)
++			continue;
++		ub_overdraft = ub_current_overdraft(walkp);
++		if (ub_overdraft > *ub_maxover) {
++			ub = walkp;
++			*ub_maxover = ub_overdraft;
++		}
++	}
++	get_beancounter(ub);
++	if(ub)
++		ub->ub_oom_noproc = 1;
++	spin_unlock_irqrestore(&ub_hash_lock, flags);
++
++	return ub;
++}
++
++void ub_oomkill_task(struct mm_struct * mm, struct user_beancounter *ub, 
++		long maxover)
++{
++	static struct ub_rate_info ri = { 5, 60*HZ };
++
++	/* increment is serialized with oom_generation_lock */
++	mm_ub(mm)->ub_parms[UB_OOMGUARPAGES].failcnt++;
++
++	if (ub_ratelimit(&ri))
++		show_mem();
++}
++
++void ub_clear_oom(void)
++{
++	unsigned long flags;
++	int i;
++	struct user_beancounter *walkp;
++
++	spin_lock_irqsave(&ub_hash_lock, flags);
++	for_each_beancounter(i, walkp)
++		walkp->ub_oom_noproc = 0;
++	spin_unlock_irqrestore(&ub_hash_lock, flags);
++}
+diff -uprN linux-2.6.8.1.orig/kernel/ub/ub_page_bc.c linux-2.6.8.1-ve022stab078/kernel/ub/ub_page_bc.c
+--- linux-2.6.8.1.orig/kernel/ub/ub_page_bc.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/kernel/ub/ub_page_bc.c	2006-05-11 13:05:39.000000000 +0400
+@@ -0,0 +1,403 @@
++/*
++ *  kernel/ub/ub_page_bc.c
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/spinlock.h>
++#include <linux/slab.h>
++#include <linux/mm.h>
++#include <linux/gfp.h>
++#include <linux/vmalloc.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_hash.h>
++#include <ub/ub_vmpages.h>
++#include <ub/ub_page.h>
++
++static kmem_cache_t *pb_cachep;
++static spinlock_t pb_lock = SPIN_LOCK_UNLOCKED;
++static struct page_beancounter **pb_hash_table;
++static unsigned int pb_hash_mask;
++
++/*
++ * Auxiliary staff
++ */
++
++static inline struct page_beancounter *next_page_pb(struct page_beancounter *p)
++{
++	return list_entry(p->page_list.next, struct page_beancounter,
++			page_list);
++}
++
++static inline struct page_beancounter *prev_page_pb(struct page_beancounter *p)
++{
++	return list_entry(p->page_list.prev, struct page_beancounter,
++			page_list);
++}
++
++/*
++ * Held pages manipulation
++ */
++static inline void set_held_pages(struct user_beancounter *bc)
++{
++	/* all three depend on ub_held_pages */
++	__ub_update_physpages(bc);
++	__ub_update_oomguarpages(bc);
++	__ub_update_privvm(bc);
++}
++
++static inline void do_dec_held_pages(struct user_beancounter *ub, int value)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	ub->ub_held_pages -= value;
++	set_held_pages(ub);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++static void dec_held_pages(struct user_beancounter *ub, int value)
++{
++	for (; ub != NULL; ub = ub->parent)
++		do_dec_held_pages(ub, value);
++}
++
++static inline void do_inc_held_pages(struct user_beancounter *ub, int value)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	ub->ub_held_pages += value;
++	set_held_pages(ub);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++static void inc_held_pages(struct user_beancounter *ub, int value)
++{
++	for (; ub != NULL; ub = ub->parent)
++		do_inc_held_pages(ub, value);
++}
++
++/*
++ * Alloc - free
++ */
++
++inline int pb_alloc(struct page_beancounter **pbc)
++{
++	*pbc = kmem_cache_alloc(pb_cachep, GFP_KERNEL);
++	if (*pbc != NULL)
++		(*pbc)->pb_magic = PB_MAGIC;
++	return (*pbc == NULL);
++}
++
++inline void pb_free(struct page_beancounter **pb)
++{
++	if (*pb != NULL) {
++		kmem_cache_free(pb_cachep, *pb);
++		*pb = NULL;
++	}
++}
++
++void pb_free_list(struct page_beancounter **p_pb)
++{
++	struct page_beancounter *list = *p_pb, *pb;
++	while (list) {
++		pb = list;
++	        list = list->next_hash;
++		pb_free(&pb);
++	}
++	*p_pb = NULL;
++}
++
++/*
++ * head -> <new objs> -> <old objs> -> ...
++ */
++static int __alloc_list(struct page_beancounter **head, int num)
++{
++	struct page_beancounter *pb;
++
++	while (num > 0) {
++		if (pb_alloc(&pb))
++			return -1;
++		pb->next_hash = *head;
++		*head = pb;
++		num--;
++	}
++
++	return num;
++}
++
++/* 
++ * Ensure that the list contains at least num elements.
++ * p_pb points to an initialized list, may be of the zero length. 
++ *
++ * mm->page_table_lock should be held
++ */
++int pb_alloc_list(struct page_beancounter **p_pb, int num,
++		struct mm_struct *mm)
++{
++	struct page_beancounter *list;
++
++	for (list = *p_pb; list != NULL && num; list = list->next_hash, num--);
++	if (!num)
++		return 0;
++
++	spin_unlock(&mm->page_table_lock);
++	/*
++	 *  *p_pb(after)       *p_pb (before)
++	 *     \                  \
++	 *     <new objs> -...-> <old objs> -> ...
++	 */
++	if (__alloc_list(p_pb, num) < 0)
++		goto nomem;
++	spin_lock(&mm->page_table_lock);
++	return 0;
++
++nomem:
++	spin_lock(&mm->page_table_lock);
++	pb_free_list(p_pb);
++	return -ENOMEM;
++}
++
++/*
++ * Hash routines
++ */
++
++static inline int pb_hash(struct user_beancounter *ub, struct page *page)
++{
++	return (((unsigned long)ub << 16) + ((unsigned long)ub >> 16) +
++		(page_to_pfn(page) >> 7)) & pb_hash_mask;
++}
++
++/* pb_lock should be held */
++static inline void insert_pb(struct page_beancounter *p, struct page *page,
++		struct user_beancounter *ub, int hash)
++{
++	p->page = page;
++	p->ub = get_beancounter(ub);
++	p->next_hash = pb_hash_table[hash];
++	pb_hash_table[hash] = p;
++}
++
++/*
++ * Heart
++ */
++
++int pb_reserve_all(struct page_beancounter **pbs)
++{
++	int i, need_alloc;
++	unsigned long flags;
++	struct user_beancounter *ub;
++
++	spin_lock_irqsave(&ub_hash_lock, flags);
++	need_alloc = 0;
++	for_each_beancounter(i, ub)
++		need_alloc++;
++	spin_unlock_irqrestore(&ub_hash_lock, flags);
++
++	if (!__alloc_list(pbs, need_alloc))
++		return 0;
++
++	pb_free_list(pbs);
++	return -ENOMEM;
++}
++
++int pb_add_ref(struct page *page, struct user_beancounter *bc,
++		struct page_beancounter **p_pb)
++{
++	int hash;
++	struct page_beancounter *p;
++	int shift;
++	struct page_beancounter *head;
++
++	if (bc == NULL || is_shmem_mapping(page->mapping))
++		return 0;
++
++	hash = pb_hash(bc, page);
++
++	spin_lock(&pb_lock);
++	for (p = pb_hash_table[hash];
++			p != NULL && (p->page != page || p->ub != bc);
++			p = p->next_hash);
++	if (p != NULL) {
++		/* 
++		 * This page is already associated with this beancounter,
++		 * increment the usage counter. 
++		 */
++		PB_COUNT_INC(p->refcount);
++		spin_unlock(&pb_lock);
++		return 0;
++	}
++
++	p = *p_pb;
++	if (p == NULL) {
++		spin_unlock(&pb_lock);
++		return -1;
++	}
++
++	*p_pb = NULL;
++	insert_pb(p, page, bc, hash);
++	head = page_pbc(page);
++
++	if (head != NULL) {
++		/* 
++		 * Move the first element to the end of the list.
++		 * List head (pb_head) is set to the next entry.
++		 * Note that this code works even if head is the only element
++		 * on the list (because it's cyclic). 
++		 */
++		BUG_ON(head->pb_magic != PB_MAGIC);
++		page_pbc(page) = next_page_pb(head);
++		PB_SHIFT_INC(head->refcount);
++		shift = PB_SHIFT_GET(head->refcount);
++		/* 
++		 * Update user beancounter, the share of head has been changed.
++		 * Note that the shift counter is taken after increment. 
++		 */
++		dec_held_pages(head->ub, UB_PAGE_WEIGHT >> shift);
++		/* add the new page beancounter to the end of the list */
++		list_add_tail(&p->page_list, &page_pbc(page)->page_list);
++	} else {
++		page_pbc(page) = p;
++		shift = 0;
++		INIT_LIST_HEAD(&p->page_list);
++	}
++
++	p->refcount = PB_REFCOUNT_MAKE(shift, 1);
++	spin_unlock(&pb_lock);
++
++	/* update user beancounter for the new page beancounter */
++	inc_held_pages(bc, UB_PAGE_WEIGHT >> shift);
++	return 0;
++}
++
++void pb_remove_ref(struct page *page, struct user_beancounter *bc)
++{
++	int hash;
++	struct page_beancounter *p, **q;
++	int shift, shiftt;
++
++	if (bc == NULL || is_shmem_mapping(page->mapping))
++		return;
++
++	hash = pb_hash(bc, page);
++
++	spin_lock(&pb_lock);
++	BUG_ON(page_pbc(page) != NULL && page_pbc(page)->pb_magic != PB_MAGIC);
++	for (q = pb_hash_table + hash, p = *q;
++			p != NULL && (p->page != page || p->ub != bc);
++			q = &p->next_hash, p = *q);
++	if (p == NULL)
++		goto out_unlock;
++
++	PB_COUNT_DEC(p->refcount);
++	if (PB_COUNT_GET(p->refcount))
++		/* 
++		 * More references from the same user beancounter exist.
++		 * Nothing needs to be done. 
++		 */
++		goto out_unlock;
++
++	/* remove from the hash list */
++	*q = p->next_hash;
++
++	shift = PB_SHIFT_GET(p->refcount);
++
++	dec_held_pages(p->ub, UB_PAGE_WEIGHT >> shift);
++
++	if (page_pbc(page) == p) {
++		if (list_empty(&p->page_list))
++			goto out_free;
++		page_pbc(page) = next_page_pb(p);
++	}
++	list_del(&p->page_list);
++	put_beancounter(p->ub);
++	pb_free(&p);
++
++	/* Now balance the list.  Move the tail and adjust its shift counter. */
++	p = prev_page_pb(page_pbc(page));
++	shiftt = PB_SHIFT_GET(p->refcount);
++	page_pbc(page) = p;
++	PB_SHIFT_DEC(p->refcount);
++
++	inc_held_pages(p->ub, UB_PAGE_WEIGHT >> shiftt);
++
++	/* 
++	 * If the shift counter of the moved beancounter is different from the
++	 * removed one's, repeat the procedure for one more tail beancounter 
++	 */
++	if (shiftt > shift) {
++		p = prev_page_pb(page_pbc(page));
++		page_pbc(page) = p;
++		PB_SHIFT_DEC(p->refcount);
++		inc_held_pages(p->ub, UB_PAGE_WEIGHT >> shiftt);
++	}
++	spin_unlock(&pb_lock);
++	return;
++
++out_free:
++	page_pbc(page) = NULL;
++	put_beancounter(p->ub);
++	pb_free(&p);
++out_unlock:
++	spin_unlock(&pb_lock);
++	return;
++}
++
++void pb_add_list_ref(struct page *page, struct user_beancounter *bc,
++		struct page_beancounter **p_pb)
++{
++	struct page_beancounter *list, *pb;
++
++	pb = *p_pb;
++	if (pb == NULL) {
++		/* Typical case due to caller constraints */
++		if (pb_add_ref(page, bc, &pb))
++			BUG();
++		return;
++	}
++
++	list = pb->next_hash;
++	if (pb_add_ref(page, bc, &pb))
++		BUG();
++	if (pb != NULL) {
++		pb->next_hash = list;
++		list = pb;
++	}
++	*p_pb = list;
++}
++
++struct user_beancounter *pb_grab_page_ub(struct page *page)
++{
++	struct page_beancounter *pb;
++	struct user_beancounter *ub;
++
++	spin_lock(&pb_lock);
++	pb = page_pbc(page);
++	ub = (pb == NULL ? ERR_PTR(-EINVAL) :
++			get_beancounter(pb->ub));
++	spin_unlock(&pb_lock);
++	return ub;
++}
++
++void __init page_beancounters_init(void)
++{
++	unsigned long hash_size;
++
++	pb_cachep = kmem_cache_create("page_beancounter", 
++			sizeof(struct page_beancounter), 0,
++			SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL, NULL);
++	hash_size = num_physpages >> 2;
++	for (pb_hash_mask = 1;
++		(hash_size & pb_hash_mask) != hash_size;
++		pb_hash_mask = (pb_hash_mask << 1) + 1);
++	hash_size = pb_hash_mask + 1;
++	printk(KERN_INFO "Page beancounter hash is %lu entries.\n", hash_size);
++	pb_hash_table = vmalloc(hash_size * sizeof(struct page_beancounter *));
++	memset(pb_hash_table, 0, hash_size * sizeof(struct page_beancounter *));
++}
+diff -uprN linux-2.6.8.1.orig/kernel/ub/ub_pages.c linux-2.6.8.1-ve022stab078/kernel/ub/ub_pages.c
+--- linux-2.6.8.1.orig/kernel/ub/ub_pages.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/kernel/ub/ub_pages.c	2006-05-11 13:05:40.000000000 +0400
+@@ -0,0 +1,483 @@
++/*
++ *  kernel/ub/ub_pages.c
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/mm.h>
++#include <linux/highmem.h>
++#include <linux/virtinfo.h>
++#include <linux/module.h>
++
++#include <asm/page.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_vmpages.h>
++
++void fastcall __ub_update_physpages(struct user_beancounter *ub)
++{
++	ub->ub_parms[UB_PHYSPAGES].held = ub->ub_tmpfs_respages
++		+ (ub->ub_held_pages >> UB_PAGE_WEIGHT_SHIFT);
++	ub_adjust_maxheld(ub, UB_PHYSPAGES);
++}
++
++void fastcall __ub_update_oomguarpages(struct user_beancounter *ub)
++{
++	ub->ub_parms[UB_OOMGUARPAGES].held =
++		ub->ub_parms[UB_PHYSPAGES].held + ub->ub_swap_pages;
++	ub_adjust_maxheld(ub, UB_OOMGUARPAGES);
++}
++
++void fastcall __ub_update_privvm(struct user_beancounter *ub)
++{
++	ub->ub_parms[UB_PRIVVMPAGES].held =
++		(ub->ub_held_pages >> UB_PAGE_WEIGHT_SHIFT)
++		+ ub->ub_unused_privvmpages
++		+ ub->ub_parms[UB_SHMPAGES].held;
++	ub_adjust_maxheld(ub, UB_PRIVVMPAGES);
++}
++
++static inline unsigned long pages_in_pte(pte_t *pte)
++{
++	struct page *pg;
++
++	if (!pte_present(*pte))
++		return 0;
++
++	pg = pte_page(*pte);
++	if (!pfn_valid(page_to_pfn(pg)))
++		return 0;
++	if (PageReserved(pg))
++		return 0;
++	return 1;
++}
++
++static inline unsigned long pages_in_pmd(pmd_t *pmd,
++		unsigned long start, unsigned long end)
++{
++	unsigned long pages, pmd_end, address;
++	pte_t *pte;
++
++	pages = 0;
++	if (pmd_none(*pmd))
++		goto out;
++	if (pmd_bad(*pmd)) {
++		pmd_ERROR(*pmd);
++		pmd_clear(pmd);
++		goto out;
++	}
++
++	pte = pte_offset_map(pmd, start);
++	pmd_end = (start + PMD_SIZE) & PMD_MASK;
++	if (pmd_end && (end > pmd_end))
++		end = pmd_end;
++
++	address = start;
++	do {
++		pages += pages_in_pte(pte);
++		address += PAGE_SIZE;
++		pte++;
++	} while (address && (address < end));
++	pte_unmap(pte-1);	
++out:
++	return pages;
++}
++
++static inline unsigned long pages_in_pgd(pgd_t *pgd, 
++		unsigned long start, unsigned long end)
++{
++	unsigned long pages, pgd_end, address;
++	pmd_t *pmd;
++
++	pages = 0;
++	if (pgd_none(*pgd))
++		goto out;
++	if (pgd_bad(*pgd)) {
++		pgd_ERROR(*pgd);
++		pgd_clear(pgd);
++		goto out;
++	}
++
++	pmd = pmd_offset(pgd, start);
++	pgd_end = (start + PGDIR_SIZE) & PGDIR_MASK;
++	if (pgd_end && (end > pgd_end))
++		end = pgd_end;
++
++	address = start;
++	do {
++		pages += pages_in_pmd(pmd, address, end);
++		address = (address + PMD_SIZE) & PMD_MASK;
++		pmd++;
++	} while (address && (address < end));
++out:
++	return pages;
++}
++
++/*
++ * Calculate number of pages presenting in the address space within single
++ * vm_area.  mm->page_table_lock must be already held.
++ */
++unsigned long pages_in_vma_range(struct vm_area_struct *vma, 
++		unsigned long start, unsigned long end)
++{
++	unsigned long address, pages;
++	pgd_t *pgd;
++
++	pages = 0;
++	address = start;
++	pgd = pgd_offset(vma->vm_mm, start);
++	do {
++		pages += pages_in_pgd(pgd, address, end);
++		address = (address + PGDIR_SIZE) & PGDIR_MASK;
++		pgd++;
++	} while (address && (address < end));
++
++	return pages;
++}
++
++int ub_unused_privvm_inc(struct user_beancounter *ub, long size,
++		struct vm_area_struct *vma)
++{
++	unsigned long flags;
++
++	if (ub == NULL || !VM_UB_PRIVATE(vma->vm_flags, vma->vm_file))
++		return 0;
++
++	for (; ub->parent != NULL; ub = ub->parent);
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	ub->ub_unused_privvmpages += size;
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++	return 0;
++}
++
++static void __unused_privvm_dec_locked(struct user_beancounter *ub, 
++		long size)
++{
++	/* catch possible overflow */
++	if (ub->ub_unused_privvmpages < size) {
++		uncharge_warn(ub, UB_UNUSEDPRIVVM,
++				size, ub->ub_unused_privvmpages);
++		size = ub->ub_unused_privvmpages;
++	}
++	ub->ub_unused_privvmpages -= size;
++	__ub_update_privvm(ub);
++}
++
++void __ub_unused_privvm_dec(struct user_beancounter *ub, long size)
++{
++	unsigned long flags;
++
++	if (ub == NULL)
++		return;
++
++	for (; ub->parent != NULL; ub = ub->parent);
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	__unused_privvm_dec_locked(ub, size);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++void ub_unused_privvm_dec(struct user_beancounter *ub, long size,
++		struct vm_area_struct *vma)
++{
++	if (VM_UB_PRIVATE(vma->vm_flags, vma->vm_file))
++		__ub_unused_privvm_dec(ub, size);
++}
++
++static inline int __charge_privvm_locked(struct user_beancounter *ub, 
++		unsigned long s, enum severity strict)
++{
++	if (__charge_beancounter_locked(ub, UB_PRIVVMPAGES, s, strict) < 0)
++		return -ENOMEM;
++
++	ub->ub_unused_privvmpages += s;
++	return 0;
++}
++
++int ub_privvm_charge(struct user_beancounter *ub, unsigned long vm_flags, 
++		struct file *vm_file, unsigned long size)
++{
++	int retval;
++	unsigned long flags;
++
++	if (ub == NULL || !VM_UB_PRIVATE(vm_flags, vm_file))
++		return 0;
++
++	for (; ub->parent != NULL; ub = ub->parent);
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	retval = __charge_privvm_locked(ub, size >> PAGE_SHIFT, UB_SOFT);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++	return retval;
++}
++
++void ub_privvm_uncharge(struct user_beancounter *ub, unsigned long vm_flags,
++		struct file *vm_file, unsigned long size)
++{
++	unsigned long flags;
++
++	if (ub == NULL || !VM_UB_PRIVATE(vm_flags, vm_file))
++		return;
++
++	for (; ub->parent != NULL; ub = ub->parent);
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	__unused_privvm_dec_locked(ub, size >> PAGE_SHIFT);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++int ub_protected_charge(struct user_beancounter *ub, unsigned long size,
++		unsigned long newflags, struct vm_area_struct *vma)
++{
++	unsigned long flags;
++	struct file *file;
++
++	if (ub == NULL)
++		return PRIVVM_NO_CHARGE;
++
++	flags = vma->vm_flags;
++	if (!((newflags ^ flags) & VM_WRITE))
++		return PRIVVM_NO_CHARGE;
++
++	file = vma->vm_file;
++	if (!VM_UB_PRIVATE(newflags | VM_WRITE, file))
++		return PRIVVM_NO_CHARGE;
++
++	if (flags & VM_WRITE)
++		return PRIVVM_TO_SHARED;
++
++	for (; ub->parent != NULL; ub = ub->parent);
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	if (__charge_privvm_locked(ub, size, UB_SOFT) < 0)
++		goto err;
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++	return PRIVVM_TO_PRIVATE;
++
++err:
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++	return PRIVVM_ERROR;
++}
++
++int ub_locked_mem_charge(struct user_beancounter *ub, long size)
++{
++	if (ub == NULL)
++		return 0;
++
++	return charge_beancounter(ub, UB_LOCKEDPAGES,
++				size >> PAGE_SHIFT, UB_HARD);
++}
++
++void ub_locked_mem_uncharge(struct user_beancounter *ub, long size)
++{
++	if (ub == NULL)
++		return;
++
++	uncharge_beancounter(ub, UB_LOCKEDPAGES, size >> PAGE_SHIFT);
++}
++
++int ub_shmpages_charge(struct user_beancounter *ub, unsigned long size)
++{
++	int ret;
++	unsigned long flags;
++
++	ret = 0;
++	if (ub == NULL)
++		return 0;
++
++	for (; ub->parent != NULL; ub = ub->parent);
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	ret = __charge_beancounter_locked(ub, UB_SHMPAGES, size, UB_HARD);
++	if (ret == 0)
++		__ub_update_privvm(ub);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++	return ret;
++}
++
++void ub_shmpages_uncharge(struct user_beancounter *ub, unsigned long size)
++{
++	unsigned long flags;
++
++	if (ub == NULL)
++		return;
++
++	for (; ub->parent != NULL; ub = ub->parent);
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	__uncharge_beancounter_locked(ub, UB_SHMPAGES, size);
++	__ub_update_privvm(ub);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++int ub_memory_charge(struct user_beancounter *ub, unsigned long size,
++		unsigned vm_flags, struct file *vm_file, int sv)
++{
++	struct user_beancounter *ubl;
++	unsigned long flags;
++
++	if (ub == NULL)
++		return 0;
++
++	size >>= PAGE_SHIFT;
++
++	if (size > UB_MAXVALUE)
++		return -EINVAL;
++
++	BUG_ON(sv != UB_SOFT && sv != UB_HARD);
++
++	if ((vm_flags & VM_LOCKED) &&
++	    charge_beancounter(ub, UB_LOCKEDPAGES, size, sv))
++		goto out_err;
++	if (VM_UB_PRIVATE(vm_flags, vm_file)) {
++		for (ubl = ub; ubl->parent != NULL; ubl = ubl->parent);
++		spin_lock_irqsave(&ubl->ub_lock, flags);
++		if (__charge_privvm_locked(ubl, size, sv))
++			goto out_private;
++		spin_unlock_irqrestore(&ubl->ub_lock, flags);
++	}
++	return 0;
++
++out_private:
++	spin_unlock_irqrestore(&ubl->ub_lock, flags);
++	if (vm_flags & VM_LOCKED)
++		uncharge_beancounter(ub, UB_LOCKEDPAGES, size);
++out_err:
++	return -ENOMEM;
++}
++
++void ub_memory_uncharge(struct user_beancounter *ub, unsigned long size,
++		unsigned vm_flags, struct file *vm_file)
++{
++	unsigned long flags;
++
++	if (ub == NULL)
++		return;
++
++	size >>= PAGE_SHIFT;
++
++	if (vm_flags & VM_LOCKED)
++		uncharge_beancounter(ub, UB_LOCKEDPAGES, size);
++	if (VM_UB_PRIVATE(vm_flags, vm_file)) {
++		for (; ub->parent != NULL; ub = ub->parent);
++		spin_lock_irqsave(&ub->ub_lock, flags);
++		__unused_privvm_dec_locked(ub, size);
++		spin_unlock_irqrestore(&ub->ub_lock, flags);
++	}
++}
++
++static inline void do_ub_tmpfs_respages_inc(struct user_beancounter *ub,
++		unsigned long size)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	ub->ub_tmpfs_respages += size;
++	__ub_update_physpages(ub);
++	__ub_update_oomguarpages(ub);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++void ub_tmpfs_respages_inc(struct user_beancounter *ub,
++		unsigned long size)
++{
++	for (; ub != NULL; ub = ub->parent)
++		do_ub_tmpfs_respages_inc(ub, size);
++}
++
++static inline void do_ub_tmpfs_respages_dec(struct user_beancounter *ub,
++		unsigned long size)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	/* catch possible overflow */
++	if (ub->ub_tmpfs_respages < size) {
++		uncharge_warn(ub, UB_TMPFSPAGES,
++				size, ub->ub_tmpfs_respages);
++		size = ub->ub_tmpfs_respages;
++	}
++	ub->ub_tmpfs_respages -= size;
++	/* update values what is the most interesting */
++	__ub_update_physpages(ub);
++	__ub_update_oomguarpages(ub);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++void ub_tmpfs_respages_dec(struct user_beancounter *ub,
++		unsigned long size)
++{
++	for (; ub != NULL; ub = ub->parent)
++		do_ub_tmpfs_respages_dec(ub, size);
++}
++
++#ifdef CONFIG_USER_SWAP_ACCOUNTING
++static inline void do_ub_swapentry_inc(struct user_beancounter *ub)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	ub->ub_swap_pages++;
++	__ub_update_oomguarpages(ub);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++void ub_swapentry_inc(struct user_beancounter *ub)
++{
++	for (; ub != NULL; ub = ub->parent)
++		do_ub_swapentry_inc(ub);
++}
++EXPORT_SYMBOL(ub_swapentry_inc);
++
++static inline void do_ub_swapentry_dec(struct user_beancounter *ub)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	if (ub->ub_swap_pages < 1)
++		uncharge_warn(ub, UB_SWAPPAGES, 1, ub->ub_swap_pages);
++	else
++		ub->ub_swap_pages -= 1;
++	__ub_update_oomguarpages(ub);
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++}
++
++void ub_swapentry_dec(struct user_beancounter *ub)
++{
++	for (; ub != NULL; ub = ub->parent)
++		do_ub_swapentry_dec(ub);
++}
++#endif
++
++static int vmguar_enough_memory(struct vnotifier_block *self,
++		unsigned long event, void *arg, int old_ret)
++{
++	struct user_beancounter *ub;
++
++	if (event != VIRTINFO_ENOUGHMEM)
++		return old_ret;
++
++	for (ub = mm_ub(current->mm); ub->parent != NULL; ub = ub->parent);
++	if (ub->ub_parms[UB_PRIVVMPAGES].held >
++			ub->ub_parms[UB_VMGUARPAGES].barrier)
++		return old_ret;
++
++	return NOTIFY_OK;
++}
++
++static struct vnotifier_block vmguar_notifier_block = {
++	.notifier_call = vmguar_enough_memory
++};
++
++static int __init init_vmguar_notifier(void)
++{
++	virtinfo_notifier_register(VITYPE_GENERAL, &vmguar_notifier_block);
++	return 0;
++}
++
++static void __exit fini_vmguar_notifier(void)
++{
++	virtinfo_notifier_unregister(VITYPE_GENERAL, &vmguar_notifier_block);
++}
++
++module_init(init_vmguar_notifier);
++module_exit(fini_vmguar_notifier);
+diff -uprN linux-2.6.8.1.orig/kernel/ub/ub_proc.c linux-2.6.8.1-ve022stab078/kernel/ub/ub_proc.c
+--- linux-2.6.8.1.orig/kernel/ub/ub_proc.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/kernel/ub/ub_proc.c	2006-05-11 13:05:39.000000000 +0400
+@@ -0,0 +1,380 @@
++/*
++ *  linux/fs/proc/proc_ub.c
++ *
++ *  Copyright (C)  1998-2000  Andrey V. Savochkin <saw@saw.sw.com.sg>
++ *  Copyright (C)  2005       SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ * TODO:
++ *
++ * Changes:
++ */
++
++#include <linux/errno.h>
++#include <linux/sched.h>
++#include <linux/kernel.h>
++#include <linux/mm.h>
++#include <linux/proc_fs.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_hash.h>
++#include <ub/ub_debug.h>
++
++#include <asm/page.h>
++#include <asm/uaccess.h>
++
++/* 
++ * we have 8 format strings depending on:
++ * 1. BITS_PER_LONG
++ * 2. CONFIG_UBC_KEEP_UNUSED
++ * 3. resource number (see out_proc_beancounter)
++ */
++
++#ifdef CONFIG_UBC_KEEP_UNUSED
++#define REF_FORMAT	"%5.5s %4i: %-12s "
++#define UID_HEAD_STR	"uid ref"
++#else
++#define REF_FORMAT	"%10.10s: %-12s "
++#define UID_HEAD_STR	"uid"
++#endif
++#define REF2_FORMAT	"%10s  %-12s "
++
++#if BITS_PER_LONG == 32
++#define RES_FORMAT	"%10lu %10lu %10lu %10lu %10lu"
++#define HEAD_FORMAT	"%10s %10s %10s %10s %10s"
++#define UB_PROC_LINE_TEXT	(10+2+12+1+10+1+10+1+10+1+10+1+10)
++#else
++#define RES_FORMAT	"%20lu %20lu %20lu %20lu %20lu"
++#define HEAD_FORMAT	"%20s %20s %20s %20s %20s"
++#define UB_PROC_LINE_TEXT	(10+2+12+1+20+1+20+1+20+1+20+1+20)
++#endif
++
++#define UB_PROC_LINE_LEN	(UB_PROC_LINE_TEXT + 1)
++
++static void out_proc_version(char *buf)
++{
++	int len;
++
++	len = sprintf(buf, "Version: 2.5");
++	memset(buf + len, ' ', UB_PROC_LINE_TEXT - len);
++	buf[UB_PROC_LINE_TEXT] = '\n';
++}
++
++static void out_proc_head(char *buf)
++{
++	sprintf(buf, REF2_FORMAT HEAD_FORMAT,
++			UID_HEAD_STR, "resource", "held", "maxheld", 
++			"barrier", "limit", "failcnt");
++	buf[UB_PROC_LINE_TEXT] = '\n';
++}
++
++static void out_proc_beancounter(char *buf, struct user_beancounter *ub, int r)
++{
++	if (r == 0) {
++		char tmpbuf[64];
++		print_ub_uid(ub, tmpbuf, sizeof(tmpbuf));
++		sprintf(buf, REF_FORMAT RES_FORMAT, 
++			tmpbuf, 
++#ifdef CONFIG_UBC_KEEP_UNUSED
++			atomic_read(&ub->ub_refcount),
++#endif
++			ub_rnames[r], ub->ub_parms[r].held, 
++			ub->ub_parms[r].maxheld, ub->ub_parms[r].barrier, 
++			ub->ub_parms[r].limit, ub->ub_parms[r].failcnt);
++	} else
++		sprintf(buf, REF2_FORMAT RES_FORMAT, 
++			"", ub_rnames[r],
++			ub->ub_parms[r].held, ub->ub_parms[r].maxheld,
++			ub->ub_parms[r].barrier, ub->ub_parms[r].limit,
++			ub->ub_parms[r].failcnt);
++
++	buf[UB_PROC_LINE_TEXT] = '\n';
++}
++
++static int ub_accessible(struct user_beancounter *ub,
++		struct user_beancounter *exec_ub,
++		struct file *file)
++{
++	struct user_beancounter *p, *q;
++
++	for (p = exec_ub; p->parent != NULL; p = p->parent);
++	for (q = ub; q->parent != NULL; q = q->parent);
++	if (p != get_ub0() && q != p)
++		return 0;
++	if (ub->parent == NULL)
++		return 1;
++	return file->private_data == NULL ? 0 : 1;
++}
++
++static ssize_t ub_proc_read(struct file *file, char *usrbuf, size_t len,
++		loff_t *poff)
++{
++	ssize_t retval;
++	char *buf;
++	unsigned long flags;
++	int i, resource;
++	struct ub_hash_slot *slot;
++	struct user_beancounter *ub;
++	struct user_beancounter *exec_ub = get_exec_ub();
++	loff_t n, off;
++	int rem, produced, job, tocopy;
++	const int is_capable =
++		(capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH));
++
++	retval = -ENOBUFS;
++	buf = (char *)__get_free_page(GFP_KERNEL);
++	if (buf == NULL)
++		goto out;
++
++	retval = 0;
++	if (!is_capable)
++		goto out_free;
++
++	off = *poff;
++	if (off < 0) /* can't happen, just in case */
++		goto inval;
++
++again:
++	i = 0;
++	slot = ub_hash;
++	n = off; /* The amount of data tp skip */
++	produced = 0;
++	if (n < (UB_PROC_LINE_LEN * 2)) {
++		if (n < UB_PROC_LINE_LEN) {
++			out_proc_version(buf);
++			produced += UB_PROC_LINE_LEN;
++			n += UB_PROC_LINE_LEN;
++		}
++		out_proc_head(buf + produced);
++		produced += UB_PROC_LINE_LEN;
++		n += UB_PROC_LINE_LEN;
++	}
++	n -= (2 * UB_PROC_LINE_LEN);
++	spin_lock_irqsave(&ub_hash_lock, flags);
++	while (1) {
++		for (ub = slot->ubh_beans; 
++		     ub != NULL && n >= (UB_RESOURCES * UB_PROC_LINE_LEN);
++		     ub = ub->ub_next)
++			if (is_capable && ub_accessible(ub, exec_ub, file))
++				n -= (UB_RESOURCES * UB_PROC_LINE_LEN);
++		if (ub != NULL || ++i >= UB_HASH_SIZE)
++			break;
++		++slot;
++	}
++	rem = n; /* the amount of the data in the buffer to skip */
++	job = PAGE_SIZE - UB_PROC_LINE_LEN + 1; /* end of buffer data */
++	if (len < job - rem)
++		job = rem + len;
++	while (ub != NULL && produced < job) {
++		if (is_capable && ub_accessible(ub, exec_ub, file))
++			for (resource = 0;
++				produced < job && resource < UB_RESOURCES;
++				resource++, produced += UB_PROC_LINE_LEN)
++			{
++				out_proc_beancounter(buf + produced,
++						ub, resource);
++			}
++		if (produced >= job)
++			break;
++		/* Find the next beancounter to produce more data. */
++		ub = ub->ub_next;
++		while (ub == NULL && ++i < UB_HASH_SIZE) {
++			++slot;
++			ub = slot->ubh_beans;
++		}
++	}
++
++	spin_unlock_irqrestore(&ub_hash_lock, flags);
++	ub_debug(UBD_ALLOC, KERN_DEBUG "UB_PROC: produced %d, job %d, rem %d\n",
++			produced, job, rem);
++
++	/* 
++	 * Temporary buffer `buf' contains `produced' bytes.
++	 * Extract no more than `len' bytes at offset `rem'.
++	 */
++	if (produced <= rem)
++		goto out_free;
++	tocopy = produced - rem;
++	if (len < tocopy)
++		tocopy = len;
++	if (!tocopy)
++		goto out_free;
++	if (copy_to_user(usrbuf, buf + rem, tocopy))
++		goto fault;
++	off += tocopy; /* can't overflow */
++	*poff = off;
++	len -= tocopy;
++	retval += tocopy;
++	if (!len)
++		goto out_free;
++	usrbuf += tocopy;
++	goto again;
++
++fault:
++	retval = -EFAULT;
++out_free:
++	free_page((unsigned long)buf);
++out:
++	return retval;
++
++inval:
++	retval = -EINVAL;
++	goto out_free;
++}
++
++static int ub_proc_open(struct inode *inode, struct file *file)
++{
++	file->private_data = strcmp(file->f_dentry->d_name.name,
++						"user_beancounters") ?
++						(void *)-1 : NULL;
++	return 0;
++}
++
++static struct file_operations ub_file_operations = {
++	.read = &ub_proc_read,
++	.open = &ub_proc_open
++};
++
++#ifdef CONFIG_UBC_DEBUG_KMEM
++#include <linux/seq_file.h>
++#include <linux/kmem_cache.h>
++
++static void *ubd_start(struct seq_file *m, loff_t *pos)
++{
++	loff_t n = *pos;
++	struct user_beancounter *ub;
++	long slot;
++
++	spin_lock_irq(&ub_hash_lock);
++	for (slot = 0; slot < UB_HASH_SIZE; slot++)
++		for (ub = ub_hash[slot].ubh_beans; ub; ub = ub->ub_next) {
++			if (n == 0) {
++				m->private = (void *)slot;
++				return (void *)ub;
++			}
++			n--;
++		}
++	return NULL;
++}
++
++static void *ubd_next(struct seq_file *m, void *p, loff_t *pos)
++{
++	struct user_beancounter *ub;
++	long slot;
++
++	ub = (struct user_beancounter *)p;
++	slot = (long)m->private;
++
++	++*pos;
++	ub = ub->ub_next;
++	while (1) {
++		for (; ub; ub = ub->ub_next) {
++			m->private = (void *)slot;
++			return (void *)ub;
++		}
++		slot++;
++		if (slot == UB_HASH_SIZE)
++			break;
++		ub = ub_hash[slot].ubh_beans;
++	}
++	return NULL;
++}
++
++static void ubd_stop(struct seq_file *m, void *p)
++{
++	spin_unlock_irq(&ub_hash_lock);
++}
++
++#define PROC_LINE_FMT	"\t%-17s\t%5lu\t%5lu\n"
++
++static int ubd_show(struct seq_file *m, void *p)
++{
++	struct user_beancounter *ub;
++	struct ub_cache_counter *cc;
++	long pages, vmpages;
++	int i;
++	char id[64];
++
++	ub = (struct user_beancounter *)p;
++	print_ub_uid(ub, id, sizeof(id));
++	seq_printf(m, "%s:\n", id);
++
++	pages = vmpages = 0;
++	for (i = 0; i < NR_CPUS; i++) {
++		pages += ub->ub_pages_charged[i];
++		vmpages += ub->ub_vmalloc_charged[i];
++	}
++	if (pages < 0)
++		pages = 0;
++	if (vmpages < 0)
++		vmpages = 0;
++	seq_printf(m, PROC_LINE_FMT, "pages", pages, PAGE_SIZE);
++	seq_printf(m, PROC_LINE_FMT, "vmalloced", vmpages, PAGE_SIZE);
++
++	seq_printf(m, PROC_LINE_FMT, ub_rnames[UB_UNUSEDPRIVVM],
++			ub->ub_unused_privvmpages, PAGE_SIZE);
++	seq_printf(m, PROC_LINE_FMT, ub_rnames[UB_TMPFSPAGES],
++			ub->ub_tmpfs_respages, PAGE_SIZE);
++	seq_printf(m, PROC_LINE_FMT, ub_rnames[UB_SWAPPAGES],
++			ub->ub_swap_pages, PAGE_SIZE);
++	/* interrupts are disabled by locking ub_hash_lock */
++	spin_lock(&cc_lock);
++	list_for_each_entry (cc, &ub->ub_cclist, ulist) {
++		kmem_cache_t *cachep;
++
++		cachep = cc->cachep;
++		seq_printf(m, PROC_LINE_FMT,
++				cachep->name,
++				cc->counter,
++				(unsigned long)cachep->objuse);
++	}
++	spin_unlock(&cc_lock);
++	return 0;
++}
++
++static struct seq_operations kmemdebug_op = {
++	.start	= ubd_start,
++	.next	= ubd_next,
++	.stop	= ubd_stop,
++	.show	= ubd_show,
++};
++
++static int kmem_debug_open(struct inode *inode, struct file *file)
++{
++	return seq_open(file, &kmemdebug_op);
++}
++
++static struct file_operations kmem_debug_ops = {
++	.open		= kmem_debug_open,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= seq_release,
++};
++#endif
++
++void __init beancounter_proc_init(void)
++{
++	struct proc_dir_entry *entry;
++
++	entry = create_proc_entry("user_beancounters", S_IRUGO, NULL);
++	if (entry)
++		entry->proc_fops = &ub_file_operations;
++	else
++		panic("Can't create /proc/user_beancounters entry!\n");
++
++	entry = create_proc_entry("user_beancounters_sub", S_IRUGO, NULL);
++	if (entry)
++		entry->proc_fops = &ub_file_operations;
++	else
++		panic("Can't create /proc/user_beancounters2 entry!\n");
++
++#ifdef CONFIG_UBC_DEBUG_KMEM
++	entry = create_proc_entry("user_beancounters_debug", S_IRUGO, NULL);
++	if (entry)
++		entry->proc_fops = &kmem_debug_ops;
++	else
++		panic("Can't create /proc/user_beancounters_debug entry!\n");
++#endif
++}
+diff -uprN linux-2.6.8.1.orig/kernel/ub/ub_stat.c linux-2.6.8.1-ve022stab078/kernel/ub/ub_stat.c
+--- linux-2.6.8.1.orig/kernel/ub/ub_stat.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/kernel/ub/ub_stat.c	2006-05-11 13:05:39.000000000 +0400
+@@ -0,0 +1,465 @@
++/*
++ *  kernel/ub/ub_stat.c
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/timer.h>
++#include <linux/sched.h>
++#include <linux/init.h>
++#include <linux/jiffies.h>
++#include <linux/list.h>
++#include <linux/errno.h>
++#include <linux/suspend.h>
++
++#include <asm/uaccess.h>
++#include <asm/param.h>
++
++#include <ub/beancounter.h>
++#include <ub/ub_hash.h>
++#include <ub/ub_stat.h>
++
++static spinlock_t ubs_notify_lock = SPIN_LOCK_UNLOCKED;
++static LIST_HEAD(ubs_notify_list);
++static long ubs_min_interval;
++static ubstattime_t ubs_start_time, ubs_end_time;
++static struct timer_list ubs_timer;
++
++static int ubstat_get_list(void *buf, long size)
++{
++	int retval;
++	unsigned long flags;
++	int slotnr;
++	struct ub_hash_slot *slot;
++	struct user_beancounter *ub, *last_ub;
++	long *page, *ptr, *end;
++	int len;
++
++	page = (long *)__get_free_page(GFP_KERNEL);
++	if (page == NULL)
++		return -ENOMEM;
++
++	retval = 0;
++	slotnr = 0;
++	slot = ub_hash;
++	last_ub = NULL;
++	while (1) {
++		ptr = page;
++		end = page + PAGE_SIZE / sizeof(*ptr);
++
++		spin_lock_irqsave(&ub_hash_lock, flags);
++		if (last_ub == NULL)
++			ub = slot->ubh_beans;
++		else
++			ub = last_ub->ub_next;
++		while (1) {
++			for (; ub != NULL; ub = ub->ub_next) {
++				if (ub->parent != NULL)
++					continue;
++				*ptr++ = ub->ub_uid;
++				if (ptr == end)
++					break;
++			}
++			if (ptr == end)
++				break;
++			++slot;
++			if (++slotnr >= UB_HASH_SIZE)
++				break;
++			ub = slot->ubh_beans;
++		}
++		if (ptr == page)
++			goto out_unlock;
++		if (ub != NULL)
++			get_beancounter(ub);
++		spin_unlock_irqrestore(&ub_hash_lock, flags);
++
++		if (last_ub != NULL)
++			put_beancounter(last_ub);
++		last_ub = ub; /* last visited beancounter in the slot */
++
++		len = min_t(long, (ptr - page) * sizeof(*ptr), size);
++		if (copy_to_user(buf, page, len)) {
++			retval = -EFAULT;
++			break;
++		}
++		retval += len;
++		if (len < PAGE_SIZE)
++			break;
++		buf += len;
++		size -= len;
++	}
++out:
++	if (last_ub != NULL)
++		put_beancounter(last_ub);
++	free_page((unsigned long)page);
++	return retval;
++
++out_unlock:
++	spin_unlock_irqrestore(&ub_hash_lock, flags);
++	goto out;
++}
++
++static int ubstat_gettime(void *buf, long size)
++{
++	ubgettime_t data;
++	int retval;
++
++	spin_lock(&ubs_notify_lock);
++	data.start_time = ubs_start_time;
++	data.end_time = ubs_end_time;
++	data.cur_time = ubs_start_time + (jiffies - ubs_start_time * HZ) / HZ;
++	spin_unlock(&ubs_notify_lock);
++
++	retval = min_t(long, sizeof(data), size);
++	if (copy_to_user(buf, &data, retval))
++		retval = -EFAULT;
++	return retval;
++}
++
++static int ubstat_do_read_one(struct user_beancounter *ub, int res, void *kbuf)
++{
++	struct {
++		ubstattime_t	start_time;
++		ubstattime_t	end_time;
++		ubstatparm_t	param[1];
++	} *data;
++
++	data = kbuf;
++	data->start_time = ubs_start_time;
++	data->end_time = ubs_end_time;
++
++	data->param[0].maxheld = ub->ub_store[res].maxheld;
++	data->param[0].failcnt = ub->ub_store[res].failcnt;
++
++	return sizeof(*data);
++}
++
++static int ubstat_do_read_all(struct user_beancounter *ub, void *kbuf, int size)
++{
++	int wrote;
++	struct {
++		ubstattime_t	start_time;
++		ubstattime_t	end_time;
++		ubstatparm_t	param[UB_RESOURCES];
++	} *data;
++	int resource;
++
++	data = kbuf;
++	data->start_time = ubs_start_time;
++	data->end_time = ubs_end_time;
++	wrote = sizeof(data->start_time) + sizeof(data->end_time);
++
++	for (resource = 0; resource < UB_RESOURCES; resource++) {
++		if (size < wrote + sizeof(data->param[resource]))
++			break;
++		data->param[resource].maxheld = ub->ub_store[resource].maxheld;
++		data->param[resource].failcnt = ub->ub_store[resource].failcnt;
++		wrote += sizeof(data->param[resource]); 
++	}
++
++	return wrote;
++}
++
++static int ubstat_do_read_full(struct user_beancounter *ub, void *kbuf,
++		int size)
++{
++	int wrote;
++	struct {
++		ubstattime_t	start_time;
++		ubstattime_t	end_time;
++		ubstatparmf_t	param[UB_RESOURCES];
++	} *data;
++	int resource;
++
++	data = kbuf;
++	data->start_time = ubs_start_time;
++	data->end_time = ubs_end_time;
++	wrote = sizeof(data->start_time) + sizeof(data->end_time);
++
++	for (resource = 0; resource < UB_RESOURCES; resource++) {
++		if (size < wrote + sizeof(data->param[resource]))
++			break;
++		/* The beginning of ubstatparmf_t matches struct ubparm. */
++		memcpy(&data->param[resource], &ub->ub_store[resource],
++				sizeof(ub->ub_store[resource]));
++		data->param[resource].__unused1 = 0;
++		data->param[resource].__unused2 = 0;
++		wrote += sizeof(data->param[resource]);
++	}
++	return wrote;
++}
++
++static int ubstat_get_stat(struct user_beancounter *ub, long cmd,
++		void *buf, long size)
++{
++	void *kbuf;
++	int retval;
++
++	kbuf = (void *)__get_free_page(GFP_KERNEL);
++	if (kbuf == NULL)
++		return -ENOMEM;
++
++	spin_lock(&ubs_notify_lock);
++	switch (UBSTAT_CMD(cmd)) {
++		case UBSTAT_READ_ONE:
++			retval = -EINVAL;
++			if (UBSTAT_PARMID(cmd) >= UB_RESOURCES)
++				break;
++			retval = ubstat_do_read_one(ub,
++					UBSTAT_PARMID(cmd), kbuf);
++			break;
++		case UBSTAT_READ_ALL:
++			retval = ubstat_do_read_all(ub, kbuf, PAGE_SIZE);
++			break;
++		case UBSTAT_READ_FULL:
++			retval = ubstat_do_read_full(ub, kbuf, PAGE_SIZE);
++			break;
++		default:
++			retval = -EINVAL;
++	}
++	spin_unlock(&ubs_notify_lock);
++
++	if (retval > 0) {
++		retval = min_t(long, retval, size);
++		if (copy_to_user(buf, kbuf, retval))
++			retval = -EFAULT;
++	}
++
++	free_page((unsigned long)kbuf);
++	return retval;
++}
++
++static int ubstat_handle_notifrq(ubnotifrq_t *req)
++{
++	int retval;
++	struct ub_stat_notify *new_notify;
++	struct list_head *entry;
++	struct task_struct *tsk_to_free;
++
++	new_notify = kmalloc(sizeof(new_notify), GFP_KERNEL);
++	if (new_notify == NULL)
++		return -ENOMEM;
++
++	tsk_to_free = NULL;
++	INIT_LIST_HEAD(&new_notify->list);
++
++	spin_lock(&ubs_notify_lock);
++	list_for_each(entry, &ubs_notify_list) {
++		struct ub_stat_notify *notify;
++
++		notify = list_entry(entry, struct ub_stat_notify, list);
++		if (notify->task == current) {
++			kfree(new_notify);
++			new_notify = notify;
++			break;
++		}
++	}
++
++	retval = -EINVAL;
++	if (req->maxinterval < 1)
++		goto out_unlock;
++	if (req->maxinterval > TIME_MAX_SEC)
++		req->maxinterval = TIME_MAX_SEC;
++	if (req->maxinterval < ubs_min_interval) {
++		unsigned long dif;
++
++		ubs_min_interval = req->maxinterval;
++		dif = (ubs_timer.expires - jiffies + HZ - 1) / HZ;
++		if (dif > req->maxinterval)
++			mod_timer(&ubs_timer,
++					ubs_timer.expires -
++					(dif - req->maxinterval) * HZ);
++	}
++
++	if (entry != &ubs_notify_list) {
++		list_del(&new_notify->list);
++		tsk_to_free = new_notify->task;
++	}
++	if (req->signum) {
++		new_notify->task = current;
++		get_task_struct(new_notify->task);
++		new_notify->signum = req->signum;
++		list_add(&new_notify->list, &ubs_notify_list);
++	} else
++		kfree(new_notify);
++	retval = 0;
++out_unlock:
++	spin_unlock(&ubs_notify_lock);
++	if (tsk_to_free != NULL)
++		put_task_struct(tsk_to_free);
++	return retval;
++}
++
++/*
++ * former sys_ubstat
++ */
++long do_ubstat(int func, unsigned long arg1, unsigned long arg2, void *buf, 
++		long size)
++{
++	int retval;
++	struct user_beancounter *ub;
++
++	if (func == UBSTAT_UBPARMNUM)
++		return UB_RESOURCES;
++	if (func == UBSTAT_UBLIST)
++		return ubstat_get_list(buf, size);
++	if (!(capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)))
++		return -EPERM;
++
++	if (func == UBSTAT_GETTIME) {
++		retval = ubstat_gettime(buf, size);
++		goto notify;
++	}
++
++	ub = get_exec_ub();
++	if (ub != NULL && ub->ub_uid == arg1)
++		get_beancounter(ub);
++	else /* FIXME must be if (ve_is_super) */
++		ub = get_beancounter_byuid(arg1, 0);
++
++	if (ub == NULL)
++		return -ESRCH;
++
++	retval = ubstat_get_stat(ub, func, buf, size);
++	put_beancounter(ub);
++notify:
++	/* Handle request for notification */
++	if (retval >= 0) {
++		ubnotifrq_t notifrq;
++		int err;
++
++		err = -EFAULT;
++		if (!copy_from_user(&notifrq, (void *)arg2, sizeof(notifrq)))
++			err = ubstat_handle_notifrq(&notifrq);
++		if (err)
++			retval = err;
++	}
++
++	return retval;
++}
++
++static void ubstat_save_onestat(struct user_beancounter *ub)
++{
++	int resource;
++
++	/* called with local irq disabled */
++	spin_lock(&ub->ub_lock);
++	for (resource = 0; resource < UB_RESOURCES; resource++) {
++		memcpy(&ub->ub_store[resource], &ub->ub_parms[resource],
++			sizeof(struct ubparm));
++		ub->ub_parms[resource].minheld = 
++			ub->ub_parms[resource].maxheld =
++			ub->ub_parms[resource].held;
++	}
++	spin_unlock(&ub->ub_lock);
++}
++
++static void ubstat_save_statistics(void)
++{
++	unsigned long flags;
++	int i;
++	struct user_beancounter *ub;
++
++	spin_lock_irqsave(&ub_hash_lock, flags);
++	for_each_beancounter(i, ub)
++			ubstat_save_onestat(ub);
++	spin_unlock_irqrestore(&ub_hash_lock, flags);
++}
++
++static void ubstatd_timeout(unsigned long __data)
++{
++	struct task_struct *p;
++
++	p = (struct task_struct *) __data;
++	wake_up_process(p);
++}
++
++/*
++ * Safe wrapper for send_sig. It prevents a race with release_task
++ * for sighand.
++ * Should be called under tasklist_lock.
++ */
++static void task_send_sig(struct ub_stat_notify *notify)
++{
++	if (likely(notify->task->sighand != NULL))
++		send_sig(notify->signum, notify->task, 1);
++}
++
++static inline void do_notifies(void)
++{
++	LIST_HEAD(notif_free_list);
++	struct ub_stat_notify *notify;
++	struct ub_stat_notify *tmp;
++
++	spin_lock(&ubs_notify_lock);
++	ubs_start_time = ubs_end_time;
++	/*
++	 * the expression below relies on time being unsigned long and
++	 * arithmetic promotion rules
++	 */
++	ubs_end_time += (ubs_timer.expires - ubs_start_time * HZ) / HZ;
++	mod_timer(&ubs_timer, ubs_timer.expires + ubs_min_interval * HZ);
++	ubs_min_interval = TIME_MAX_SEC;
++	/* save statistics accumulated for the interval */
++	ubstat_save_statistics();
++	/* send signals */
++	read_lock(&tasklist_lock);
++	while (!list_empty(&ubs_notify_list)) {
++		notify = list_entry(ubs_notify_list.next,
++				struct ub_stat_notify, list);
++		task_send_sig(notify);
++		list_del(&notify->list);
++		list_add(&notify->list, &notif_free_list);
++	}
++	read_unlock(&tasklist_lock);
++	spin_unlock(&ubs_notify_lock);
++
++	list_for_each_entry_safe(notify, tmp, &notif_free_list, list) {
++		put_task_struct(notify->task);
++		kfree(notify);
++	}
++}
++
++/*
++ * Kernel thread
++ */
++static int ubstatd(void *unused)
++{
++	/* daemonize call will take care of signals */
++	daemonize("ubstatd");
++
++	ubs_timer.data = (unsigned long)current;
++	ubs_timer.function = ubstatd_timeout;
++	add_timer(&ubs_timer);
++
++	while (1) {
++		set_task_state(current, TASK_INTERRUPTIBLE);
++		if (time_after(ubs_timer.expires, jiffies)) {
++			schedule();
++			if (test_thread_flag(TIF_FREEZE))
++				refrigerator();
++			continue;
++		}
++
++		__set_task_state(current, TASK_RUNNING);
++		do_notifies();
++	}
++}
++
++static int __init ubstatd_init(void)
++{
++	init_timer(&ubs_timer);
++	ubs_timer.expires = TIME_MAX_JIF;
++	ubs_min_interval = TIME_MAX_SEC;
++	ubs_start_time = ubs_end_time = 0;
++
++	kernel_thread(ubstatd, NULL, 0);
++	return 0;
++}
++
++module_init(ubstatd_init);
+diff -uprN linux-2.6.8.1.orig/kernel/ub/ub_sys.c linux-2.6.8.1-ve022stab078/kernel/ub/ub_sys.c
+--- linux-2.6.8.1.orig/kernel/ub/ub_sys.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/kernel/ub/ub_sys.c	2006-05-11 13:05:48.000000000 +0400
+@@ -0,0 +1,168 @@
++/*
++ *  kernel/ub/ub_sys.c
++ *
++ *  Copyright (C) 2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/virtinfo.h>
++#include <asm/uaccess.h>
++
++#include <ub/beancounter.h>
++
++#ifndef CONFIG_USER_RESOURCE
++asmlinkage long sys_getluid(void)
++{
++	return -ENOSYS;
++}
++
++asmlinkage long sys_setluid(uid_t uid)
++{
++	return -ENOSYS;
++}
++
++asmlinkage long sys_setublimit(uid_t uid, unsigned long resource, 
++		unsigned long *limits)
++{
++	return -ENOSYS;
++}
++
++asmlinkage long sys_ubstat(int func, unsigned long arg1, unsigned long arg2, 
++		void *buf, long size)
++{
++	return -ENOSYS;
++}
++#else /* CONFIG_USER_RESOURCE */
++
++/*
++ *	The (rather boring) getluid syscall
++ */
++asmlinkage long sys_getluid(void)
++{
++	struct user_beancounter *ub;
++
++	ub = get_exec_ub();
++	if (ub == NULL)
++		return -EINVAL;
++
++	return ub->ub_uid;
++}
++
++/*
++ *	The setluid syscall
++ */
++asmlinkage long sys_setluid(uid_t uid)
++{
++	struct user_beancounter *ub;
++	struct task_beancounter *task_bc;
++	int error;
++
++	task_bc = task_bc(current);
++
++	/* You may not disown a setluid */
++	error = -EINVAL;
++	if (uid == (uid_t)-1)
++		goto out;
++
++	/* You may only set an ub as root */
++	error = -EPERM;
++	if (!capable(CAP_SETUID))
++		goto out;
++
++	/* 
++	 * The ub once set is irrevocable to all
++	 * unless it's set from ve0.
++	 */
++	if (!ve_is_super(get_exec_env()))
++		goto out;
++
++	/* Ok - set up a beancounter entry for this user */
++	error = -ENOBUFS;
++	ub = get_beancounter_byuid(uid, 1);
++	if (ub == NULL)
++		goto out;
++
++	ub_debug(UBD_ALLOC | UBD_LIMIT, "setluid, bean %p (count %d) "
++			"for %.20s pid %d\n",
++			ub, atomic_read(&ub->ub_refcount),
++			current->comm, current->pid);
++	/* install bc */
++	error = virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_NEWUBC, ub);
++	if (!(error & NOTIFY_FAIL)) {
++		put_beancounter(task_bc->exec_ub);
++		task_bc->exec_ub = ub;
++		if (!(error & NOTIFY_OK)) {
++			put_beancounter(task_bc->fork_sub);
++			task_bc->fork_sub = get_beancounter(ub);
++		}
++		error = 0;
++	} else
++		error = -ENOBUFS;
++out:
++	return error;
++}
++
++/*
++ *	The setbeanlimit syscall
++ */
++asmlinkage long sys_setublimit(uid_t uid, unsigned long resource,
++		unsigned long *limits)
++{
++	int error;
++	unsigned long flags;
++	struct user_beancounter *ub;
++	unsigned long new_limits[2];
++
++	error = -EPERM;
++	if(!capable(CAP_SYS_RESOURCE))
++		goto out;
++
++	if (!ve_is_super(get_exec_env()))
++		goto out;
++
++	error = -EINVAL;
++	if (resource >= UB_RESOURCES)
++		goto out;
++
++	error = -EFAULT;
++	if (copy_from_user(&new_limits, limits, sizeof(new_limits)))
++		goto out;
++
++	error = -EINVAL;
++	if (new_limits[0] > UB_MAXVALUE || new_limits[1] > UB_MAXVALUE)
++		goto out;
++
++	error = -ENOENT;
++	ub = get_beancounter_byuid(uid, 0);
++	if (ub == NULL) {
++		ub_debug(UBD_LIMIT, "No login bc for uid %d\n", uid);
++		goto out;
++	}
++
++	spin_lock_irqsave(&ub->ub_lock, flags);
++	ub->ub_parms[resource].barrier = new_limits[0];
++	ub->ub_parms[resource].limit = new_limits[1];
++	spin_unlock_irqrestore(&ub->ub_lock, flags);
++
++	put_beancounter(ub);
++
++	error = 0;
++out:
++	return error;
++}
++
++extern long do_ubstat(int func, unsigned long arg1, unsigned long arg2, 
++		void *buf, long size);
++asmlinkage long sys_ubstat(int func, unsigned long arg1, unsigned long arg2, 
++		void *buf, long size)
++{
++	if (!ve_is_super(get_exec_env()))
++		return -EPERM;
++
++	return do_ubstat(func, arg1, arg2, buf, size);
++}
++#endif
+diff -uprN linux-2.6.8.1.orig/kernel/user.c linux-2.6.8.1-ve022stab078/kernel/user.c
+--- linux-2.6.8.1.orig/kernel/user.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/kernel/user.c	2006-05-11 13:05:40.000000000 +0400
+@@ -21,7 +21,20 @@
+ #define UIDHASH_SZ		(1 << UIDHASH_BITS)
+ #define UIDHASH_MASK		(UIDHASH_SZ - 1)
+ #define __uidhashfn(uid)	(((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK)
+-#define uidhashentry(uid)	(uidhash_table + __uidhashfn((uid)))
++#define __uidhashentry(uid)	(uidhash_table + __uidhashfn((uid)))
++
++#ifdef CONFIG_VE
++#define UIDHASH_MASK_VE			(UIDHASH_SZ_VE - 1)
++#define __uidhashfn_ve(uid)		(((uid >> UIDHASH_BITS_VE) ^ uid) & \
++						UIDHASH_MASK_VE)
++#define __uidhashentry_ve(uid, envid)	((envid)->uidhash_table + \
++						__uidhashfn_ve(uid))
++#define uidhashentry_ve(uid)		(ve_is_super(get_exec_env()) ?	\
++						__uidhashentry(uid) :	\
++						__uidhashentry_ve(uid, get_exec_env()))
++#else
++#define uidhashentry_ve(uid)		__uidhashentry(uid)
++#endif
+ 
+ static kmem_cache_t *uid_cachep;
+ static struct list_head uidhash_table[UIDHASH_SZ];
+@@ -77,7 +90,7 @@ struct user_struct *find_user(uid_t uid)
+ 	struct user_struct *ret;
+ 
+ 	spin_lock(&uidhash_lock);
+-	ret = uid_hash_find(uid, uidhashentry(uid));
++	ret = uid_hash_find(uid, uidhashentry_ve(uid));
+ 	spin_unlock(&uidhash_lock);
+ 	return ret;
+ }
+@@ -93,7 +106,7 @@ void free_uid(struct user_struct *up)
+ 
+ struct user_struct * alloc_uid(uid_t uid)
+ {
+-	struct list_head *hashent = uidhashentry(uid);
++	struct list_head *hashent = uidhashentry_ve(uid);
+ 	struct user_struct *up;
+ 
+ 	spin_lock(&uidhash_lock);
+@@ -154,14 +167,14 @@ static int __init uid_cache_init(void)
+ 	int n;
+ 
+ 	uid_cachep = kmem_cache_create("uid_cache", sizeof(struct user_struct),
+-			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
++			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
+ 
+ 	for(n = 0; n < UIDHASH_SZ; ++n)
+ 		INIT_LIST_HEAD(uidhash_table + n);
+ 
+ 	/* Insert the root user immediately (init already runs as root) */
+ 	spin_lock(&uidhash_lock);
+-	uid_hash_insert(&root_user, uidhashentry(0));
++	uid_hash_insert(&root_user, __uidhashentry(0));
+ 	spin_unlock(&uidhash_lock);
+ 
+ 	return 0;
+diff -uprN linux-2.6.8.1.orig/kernel/ve.c linux-2.6.8.1-ve022stab078/kernel/ve.c
+--- linux-2.6.8.1.orig/kernel/ve.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/kernel/ve.c	2006-05-11 13:05:42.000000000 +0400
+@@ -0,0 +1,178 @@
++/*
++ *  linux/kernel/ve.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++/*
++ * 've.c' helper file performing VE sub-system initialization
++ */
++
++#include <linux/sched.h>
++#include <linux/delay.h>
++#include <linux/capability.h>
++#include <linux/ve.h>
++#include <linux/smp_lock.h>
++#include <linux/init.h>
++
++#include <linux/errno.h>
++#include <linux/unistd.h>
++#include <linux/slab.h>
++#include <linux/sys.h>
++#include <linux/kdev_t.h>
++#include <linux/termios.h>
++#include <linux/tty_driver.h>
++#include <linux/netdevice.h>
++#include <linux/utsname.h>
++#include <linux/proc_fs.h>
++#include <linux/kernel_stat.h>
++#include <linux/module.h>
++#include <linux/rcupdate.h>
++#include <linux/ve_proto.h>
++#include <linux/ve_owner.h>
++
++#include <linux/nfcalls.h>
++
++unsigned long vz_rstamp = 0x37e0f59d;
++
++#ifdef CONFIG_MODULES
++struct module no_module = { .state = MODULE_STATE_GOING };
++EXPORT_SYMBOL(no_module);
++#endif
++
++#ifdef CONFIG_VE
++
++DCL_VE_OWNER(SKB, SLAB, struct sk_buff, owner_env, , (noinline, regparm(1)))
++DCL_VE_OWNER(SK, SLAB, struct sock, sk_owner_env, , (noinline, regparm(1)))
++DCL_VE_OWNER(TW, SLAB, struct tcp_tw_bucket, tw_owner_env, , (noinline, regparm(1)))
++DCL_VE_OWNER(FILP, GENERIC, struct file, owner_env, inline, (always_inline))
++DCL_VE_OWNER(FSTYPE, MODULE, struct file_system_type, owner_env, , ())
++
++#if defined(CONFIG_VE_IPTABLES)
++INIT_KSYM_MODULE(ip_tables);
++INIT_KSYM_MODULE(iptable_filter);
++INIT_KSYM_MODULE(iptable_mangle);
++INIT_KSYM_MODULE(ipt_limit);
++INIT_KSYM_MODULE(ipt_multiport);
++INIT_KSYM_MODULE(ipt_tos);
++INIT_KSYM_MODULE(ipt_TOS);
++INIT_KSYM_MODULE(ipt_REJECT);
++INIT_KSYM_MODULE(ipt_TCPMSS);
++INIT_KSYM_MODULE(ipt_tcpmss);
++INIT_KSYM_MODULE(ipt_ttl);
++INIT_KSYM_MODULE(ipt_LOG);
++INIT_KSYM_MODULE(ipt_length);
++INIT_KSYM_MODULE(ip_conntrack);
++INIT_KSYM_MODULE(ip_conntrack_ftp);
++INIT_KSYM_MODULE(ip_conntrack_irc);
++INIT_KSYM_MODULE(ipt_conntrack);
++INIT_KSYM_MODULE(ipt_state);
++INIT_KSYM_MODULE(ipt_helper);
++INIT_KSYM_MODULE(iptable_nat);
++INIT_KSYM_MODULE(ip_nat_ftp);
++INIT_KSYM_MODULE(ip_nat_irc);
++INIT_KSYM_MODULE(ipt_REDIRECT);
++
++INIT_KSYM_CALL(int, init_netfilter, (void));
++INIT_KSYM_CALL(int, init_iptables, (void));
++INIT_KSYM_CALL(int, init_iptable_filter, (void));
++INIT_KSYM_CALL(int, init_iptable_mangle, (void));
++INIT_KSYM_CALL(int, init_iptable_limit, (void));
++INIT_KSYM_CALL(int, init_iptable_multiport, (void));
++INIT_KSYM_CALL(int, init_iptable_tos, (void));
++INIT_KSYM_CALL(int, init_iptable_TOS, (void));
++INIT_KSYM_CALL(int, init_iptable_REJECT, (void));
++INIT_KSYM_CALL(int, init_iptable_TCPMSS, (void));
++INIT_KSYM_CALL(int, init_iptable_tcpmss, (void));
++INIT_KSYM_CALL(int, init_iptable_ttl, (void));
++INIT_KSYM_CALL(int, init_iptable_LOG, (void));
++INIT_KSYM_CALL(int, init_iptable_length, (void));
++INIT_KSYM_CALL(int, init_iptable_conntrack, (void));
++INIT_KSYM_CALL(int, init_iptable_ftp, (void));
++INIT_KSYM_CALL(int, init_iptable_irc, (void));
++INIT_KSYM_CALL(int, init_iptable_conntrack_match, (void));
++INIT_KSYM_CALL(int, init_iptable_state, (void));
++INIT_KSYM_CALL(int, init_iptable_helper, (void));
++INIT_KSYM_CALL(int, init_iptable_nat, (void));
++INIT_KSYM_CALL(int, init_iptable_nat_ftp, (void));
++INIT_KSYM_CALL(int, init_iptable_nat_irc, (void));
++INIT_KSYM_CALL(int, init_iptable_REDIRECT, (void));
++INIT_KSYM_CALL(void, fini_iptable_nat_irc, (void));
++INIT_KSYM_CALL(void, fini_iptable_nat_ftp, (void));
++INIT_KSYM_CALL(void, fini_iptable_nat, (void));
++INIT_KSYM_CALL(void, fini_iptable_helper, (void));
++INIT_KSYM_CALL(void, fini_iptable_state, (void));
++INIT_KSYM_CALL(void, fini_iptable_conntrack_match, (void));
++INIT_KSYM_CALL(void, fini_iptable_irc, (void));
++INIT_KSYM_CALL(void, fini_iptable_ftp, (void));
++INIT_KSYM_CALL(void, fini_iptable_conntrack, (void));
++INIT_KSYM_CALL(void, fini_iptable_length, (void));
++INIT_KSYM_CALL(void, fini_iptable_LOG, (void));
++INIT_KSYM_CALL(void, fini_iptable_ttl, (void));
++INIT_KSYM_CALL(void, fini_iptable_tcpmss, (void));
++INIT_KSYM_CALL(void, fini_iptable_TCPMSS, (void));
++INIT_KSYM_CALL(void, fini_iptable_REJECT, (void));
++INIT_KSYM_CALL(void, fini_iptable_TOS, (void));
++INIT_KSYM_CALL(void, fini_iptable_tos, (void));
++INIT_KSYM_CALL(void, fini_iptable_multiport, (void));
++INIT_KSYM_CALL(void, fini_iptable_limit, (void));
++INIT_KSYM_CALL(void, fini_iptable_filter, (void));
++INIT_KSYM_CALL(void, fini_iptable_mangle, (void));
++INIT_KSYM_CALL(void, fini_iptables, (void));
++INIT_KSYM_CALL(void, fini_netfilter, (void));
++INIT_KSYM_CALL(void, fini_iptable_REDIRECT, (void));
++
++INIT_KSYM_CALL(void, ipt_flush_table, (struct ipt_table *table));
++#endif
++
++#if defined(CONFIG_VE_CALLS_MODULE) || defined(CONFIG_VE_CALLS)
++INIT_KSYM_MODULE(vzmon);
++INIT_KSYM_CALL(int, real_get_device_perms_ve,
++		(int dev_type, dev_t dev, int access_mode));
++INIT_KSYM_CALL(void, real_do_env_cleanup, (struct ve_struct *env));
++INIT_KSYM_CALL(void, real_do_env_free, (struct ve_struct *env));
++INIT_KSYM_CALL(void, real_update_load_avg_ve, (void));
++
++int get_device_perms_ve(int dev_type, dev_t dev, int access_mode)
++{
++	return KSYMSAFECALL(int, vzmon, real_get_device_perms_ve,
++					(dev_type, dev, access_mode));
++}
++EXPORT_SYMBOL(get_device_perms_ve);
++
++void do_env_cleanup(struct ve_struct *env)
++{
++	KSYMSAFECALL_VOID(vzmon, real_do_env_cleanup, (env));
++}
++
++void do_env_free(struct ve_struct *env)
++{
++	KSYMSAFECALL_VOID(vzmon, real_do_env_free, (env));
++}
++EXPORT_SYMBOL(do_env_free);
++
++void do_update_load_avg_ve(void)
++{
++	KSYMSAFECALL_VOID(vzmon, real_update_load_avg_ve, ());
++}
++#endif
++
++extern struct ipv4_devconf ipv4_devconf;
++extern struct ipv4_devconf *get_ipv4_devconf_dflt_addr(void);
++
++struct ve_struct ve0 = {
++	.utsname		= &system_utsname,
++	.vetask_lh		= LIST_HEAD_INIT(ve0.vetask_lh),
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	._net_dev_tail		= &ve0._net_dev_base,
++	.ifindex		= -1,
++#endif
++};
++
++EXPORT_SYMBOL(ve0);
++
++#endif /* CONFIG_VE */
+diff -uprN linux-2.6.8.1.orig/kernel/vecalls.c linux-2.6.8.1-ve022stab078/kernel/vecalls.c
+--- linux-2.6.8.1.orig/kernel/vecalls.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/kernel/vecalls.c	2006-05-11 13:05:48.000000000 +0400
+@@ -0,0 +1,3202 @@
++/*
++ *  linux/kernel/vecalls.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *
++ */
++
++/*
++ * 'vecalls.c' is file with basic VE support. It provides basic primities
++ * along with initialization script
++ */
++
++#include <linux/sched.h>
++#include <linux/delay.h>
++#include <linux/capability.h>
++#include <linux/ve.h>
++#include <linux/smp_lock.h>
++#include <linux/init.h>
++#include <linux/list.h>
++#include <linux/ve_owner.h>
++#include <linux/errno.h>
++#include <linux/unistd.h>
++#include <linux/slab.h>
++#include <linux/vmalloc.h>
++#include <linux/sys.h>
++#include <linux/fs.h>
++#include <linux/namespace.h>
++#include <linux/termios.h>
++#include <linux/tty_driver.h>
++#include <linux/netdevice.h>
++#include <linux/wait.h>
++#include <linux/inetdevice.h>
++#include <linux/utsname.h>
++#include <linux/sysctl.h>
++#include <linux/proc_fs.h>
++#include <linux/seq_file.h>
++#include <linux/kernel_stat.h>
++#include <linux/module.h>
++#include <linux/suspend.h>
++#include <linux/rcupdate.h>
++#include <linux/in.h>
++#include <linux/major.h>
++#include <linux/kdev_t.h>
++#include <linux/idr.h>
++#include <linux/inetdevice.h>
++#include <net/pkt_sched.h>
++#include <linux/divert.h>
++#include <ub/beancounter.h>
++
++#include <net/route.h>
++#include <net/ip_fib.h>
++
++#include <linux/ve_proto.h>
++#include <linux/venet.h>
++#include <linux/vzctl.h>
++#include <linux/vzcalluser.h>
++#include <linux/fairsched.h>
++
++#include <linux/nfcalls.h>
++
++struct ve_struct *ve_list_head = NULL;
++int nr_ve = 1;	/* One VE always exists. Compatibility with vestat */
++rwlock_t ve_list_guard = RW_LOCK_UNLOCKED;
++static rwlock_t devperms_hash_guard = RW_LOCK_UNLOCKED;
++
++extern int glob_virt_pids;
++
++static int	do_env_enter(struct ve_struct *ve, unsigned int flags);
++int		real_env_create(envid_t veid, unsigned flags, u32 class_id,
++				env_create_param_t *data, int datalen);
++static void	do_clean_devperms(envid_t veid);
++static int	alloc_ve_tty_drivers(struct ve_struct* ve);
++static void	free_ve_tty_drivers(struct ve_struct* ve);
++static int	register_ve_tty_drivers(struct ve_struct* ve);
++static void	unregister_ve_tty_drivers(struct ve_struct* ve);
++static int	init_ve_tty_drivers(struct ve_struct *);
++static void	fini_ve_tty_drivers(struct ve_struct *);
++static void	clear_termios(struct tty_driver* driver );
++static void	ve_mapped_devs_cleanup(struct ve_struct *ve);
++
++static int ve_get_cpu_stat(envid_t veid, struct vz_cpu_stat *buf);
++
++static void vecalls_exit(void);
++
++struct ve_struct *__find_ve_by_id(envid_t veid)
++{
++	struct ve_struct *ve;
++	for (ve = ve_list_head;
++	     ve != NULL && ve->veid != veid;
++	     ve = ve->next);
++	return ve;
++}
++
++struct ve_struct *get_ve_by_id(envid_t veid)
++{
++	struct ve_struct *ve;
++	read_lock(&ve_list_guard);
++	ve = __find_ve_by_id(veid);
++	get_ve(ve);
++	read_unlock(&ve_list_guard);
++	return ve;
++}
++
++/*
++ * real_put_ve() MUST be used instead of put_ve() inside vecalls.
++ */
++void real_do_env_free(struct ve_struct *ve);
++static inline void real_put_ve(struct ve_struct *ve)
++{
++	if (ve && atomic_dec_and_test(&ve->counter)) {
++		if (atomic_read(&ve->pcounter) > 0)
++			BUG();
++		if (ve->is_running)
++			BUG();
++		real_do_env_free(ve);
++	}
++}
++
++extern struct file_system_type devpts_fs_type;
++extern struct file_system_type sysfs_fs_type;
++extern struct file_system_type tmpfs_fs_type;
++extern struct file_system_type proc_fs_type;
++
++extern spinlock_t task_capability_lock;
++extern void ve_ipc_free(struct ve_struct * ve);
++extern void ip_fragment_cleanup(struct ve_struct *ve);
++
++static int ve_get_cpu_stat(envid_t veid, struct vz_cpu_stat *buf)
++{
++	struct ve_struct *ve;
++	struct vz_cpu_stat *vstat;
++	int retval;
++	int i, cpu;
++	unsigned long tmp;
++
++	if (!ve_is_super(get_exec_env()) && (veid != get_exec_env()->veid))
++		return -EPERM;
++	if (veid == 0)
++		return -ESRCH;
++
++	vstat = kmalloc(sizeof(*vstat), GFP_KERNEL);
++	if (!vstat)
++		return -ENOMEM;
++	memset(vstat, 0, sizeof(*vstat));
++	
++	retval = -ESRCH;
++	read_lock(&ve_list_guard);
++	ve = __find_ve_by_id(veid);
++	if (ve == NULL)
++		goto out_unlock;
++	for (cpu = 0; cpu < NR_CPUS; cpu++) {
++		vstat->user_jif += VE_CPU_STATS(ve, cpu)->user;
++		vstat->nice_jif += VE_CPU_STATS(ve, cpu)->nice;
++		vstat->system_jif += VE_CPU_STATS(ve, cpu)->system;
++		vstat->idle_clk += ve_sched_get_idle_time(ve, cpu);
++	}
++	vstat->uptime_clk = get_cycles() - ve->start_cycles;
++	vstat->uptime_jif = jiffies - ve->start_jiffies;
++	for (i = 0; i < 3; i++) {
++		tmp = ve->avenrun[i] + (FIXED_1/200);
++		vstat->avenrun[i].val_int = LOAD_INT(tmp);
++		vstat->avenrun[i].val_frac = LOAD_FRAC(tmp);
++	}
++	read_unlock(&ve_list_guard);
++
++	retval = 0;
++	if (copy_to_user(buf, vstat, sizeof(*vstat)))
++		retval = -EFAULT;
++out_free:
++	kfree(vstat);
++	return retval;
++
++out_unlock:
++	read_unlock(&ve_list_guard);
++	goto out_free;
++}
++
++/**********************************************************************
++ * Devices permissions routines,
++ * character and block devices separately
++ **********************************************************************/
++
++/* Rules applied in the following order:
++   MAJOR!=0, MINOR!=0
++   MAJOR!=0, MINOR==0
++   MAJOR==0, MINOR==0
++*/
++struct devperms_struct
++{
++	dev_t   	dev;	/* device id */
++	unsigned char	mask;
++	unsigned 	type;
++	envid_t	 	veid;
++
++	struct devperms_struct *devhash_next;
++	struct devperms_struct **devhash_pprev;
++};
++
++static struct devperms_struct original_perms[] =
++{{
++	MKDEV(0,0),	/*device*/
++	S_IROTH | S_IWOTH,
++	S_IFCHR,	/*type*/
++	0,		/*veid*/
++	NULL, NULL
++},
++{
++	MKDEV(0,0),	/*device*/
++	S_IXGRP | S_IROTH | S_IWOTH,
++	S_IFBLK,	/*type*/
++	0,		/*veid*/
++	NULL, NULL
++}};
++
++static struct devperms_struct default_major_perms[] = {
++	{MKDEV(UNIX98_PTY_MASTER_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},
++	{MKDEV(UNIX98_PTY_SLAVE_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},
++	{MKDEV(PTY_MASTER_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},
++	{MKDEV(PTY_SLAVE_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},
++};
++static struct devperms_struct default_minor_perms[] = {
++	{MKDEV(MEM_MAJOR, 3), S_IROTH | S_IWOTH, S_IFCHR},   /* null */
++	{MKDEV(MEM_MAJOR, 5), S_IROTH | S_IWOTH, S_IFCHR},   /* zero */
++	{MKDEV(MEM_MAJOR, 7), S_IROTH | S_IWOTH, S_IFCHR},   /* full */
++	{MKDEV(TTYAUX_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},/* tty */
++	{MKDEV(TTYAUX_MAJOR, 2), S_IROTH | S_IWOTH, S_IFCHR},/* ptmx */
++	{MKDEV(MEM_MAJOR, 8), S_IROTH, S_IFCHR},  /* random */
++	{MKDEV(MEM_MAJOR, 9), S_IROTH, S_IFCHR},  /* urandom */
++};
++
++static struct devperms_struct default_deny_perms = {
++	MKDEV(0, 0), 0, S_IFCHR
++};
++
++static inline struct devperms_struct *find_default_devperms(int type,
++						    dev_t dev)
++{
++	int i;
++
++	/* XXX all defaults perms are S_IFCHR */
++	if (type != S_IFCHR)
++		return &default_deny_perms;
++
++	for (i = 0; 
++	     i < sizeof(default_minor_perms)/sizeof(struct devperms_struct);
++	     i++)
++		if (MAJOR(dev) == MAJOR(default_minor_perms[i].dev) &&
++		    MINOR(dev) == MINOR(default_minor_perms[i].dev))
++			return &default_minor_perms[i];
++	for (i = 0; 
++	     i < sizeof(default_major_perms)/sizeof(struct devperms_struct);
++	     i++)
++		if (MAJOR(dev) == MAJOR(default_major_perms[i].dev))
++			return &default_major_perms[i];
++
++	return &default_deny_perms;
++}
++
++#define DEVPERMS_HASH_SZ 512
++struct devperms_struct *devperms_hash[DEVPERMS_HASH_SZ];
++
++#define devperms_hashfn(id,dev) \
++	( (id << 5) ^ (id >> 5) ^ (MAJOR(dev)) ^ MINOR(dev) ) & \
++						(DEVPERMS_HASH_SZ - 1)
++
++static inline void hash_devperms(struct devperms_struct *p)
++{
++	struct devperms_struct **htable =
++		&devperms_hash[devperms_hashfn(p->veid,p->dev)];
++
++	if ((p->devhash_next = *htable) != NULL)
++		(*htable)->devhash_pprev = &p->devhash_next;
++	*htable = p;
++	p->devhash_pprev = htable;
++}
++
++static inline void unhash_devperms(struct devperms_struct *p)
++{
++	if (p->devhash_next)
++		p->devhash_next->devhash_pprev = p->devhash_pprev;
++	*p->devhash_pprev = p->devhash_next;
++}
++
++static int __init init_devperms_hash(void)
++{
++	write_lock_irq(&devperms_hash_guard);
++	memset(devperms_hash, 0, sizeof(devperms_hash));
++	hash_devperms(original_perms);
++	hash_devperms(original_perms+1);
++	write_unlock_irq(&devperms_hash_guard);
++	return 0;
++}
++
++static inline void fini_devperms_hash(void)
++{
++}
++
++static inline struct devperms_struct *find_devperms(envid_t veid,
++						    int type,
++						    dev_t dev)
++{
++	struct devperms_struct *p, **htable =
++		&devperms_hash[devperms_hashfn(veid,dev)];
++
++	for (p = *htable; p && !(p->type==type &&
++				 MAJOR(dev)==MAJOR(p->dev) &&
++				 MINOR(dev)==MINOR(p->dev) &&
++				 p->veid==veid);
++	     p = p->devhash_next)
++		;
++	return p;
++}
++
++
++static void do_clean_devperms(envid_t veid)
++{
++	int i;
++	struct devperms_struct* ve;
++
++	write_lock_irq(&devperms_hash_guard);
++	for (i = 0; i < DEVPERMS_HASH_SZ; i++)
++		for (ve = devperms_hash[i]; ve;) {
++			struct devperms_struct *next = ve->devhash_next;
++			if (ve->veid == veid) {
++				unhash_devperms(ve);
++				kfree(ve);
++			}
++
++			ve = next;
++		}
++	write_unlock_irq(&devperms_hash_guard);
++}
++
++/*
++ * Mode is a mask of
++ *	FMODE_READ	for read access (configurable by S_IROTH)
++ *	FMODE_WRITE	for write access (configurable by S_IWOTH)
++ *	FMODE_QUOTACTL	for quotactl access (configurable by S_IXGRP)
++ */
++int real_get_device_perms_ve(int dev_type, dev_t dev, int access_mode)
++{
++	struct devperms_struct *perms;
++	struct ve_struct *ve;
++	envid_t veid;
++
++	perms = NULL;
++	ve = get_exec_env();
++	veid = ve->veid;
++
++	read_lock(&devperms_hash_guard);
++
++	perms = find_devperms(veid, dev_type|VE_USE_MINOR, dev);
++	if (perms)
++		goto end;
++
++	perms = find_devperms(veid, dev_type|VE_USE_MAJOR, MKDEV(MAJOR(dev),0));
++	if (perms)
++		goto end;
++
++	perms = find_devperms(veid, dev_type, MKDEV(0,0));
++	if (perms)
++		goto end;
++
++	perms = find_default_devperms(dev_type, dev);
++
++end:
++	read_unlock(&devperms_hash_guard);
++
++	access_mode = "\000\004\002\006\010\014\012\016"[access_mode];
++	return perms ?
++		(((perms->mask & access_mode) == access_mode) ? 0 : -EACCES) :
++		-ENODEV;
++}
++
++int do_setdevperms(envid_t veid, unsigned type, dev_t dev, unsigned mask)
++{
++	struct devperms_struct   *perms;
++
++	write_lock_irq(&devperms_hash_guard);
++	perms = find_devperms(veid, type, dev);
++	if (!perms) {
++		struct devperms_struct   *perms_new;
++		write_unlock_irq(&devperms_hash_guard);
++
++		perms_new = kmalloc(sizeof(struct devperms_struct), GFP_KERNEL);
++		if (!perms_new)
++			return -ENOMEM;
++
++		write_lock_irq(&devperms_hash_guard);
++		perms = find_devperms(veid, type, dev);
++		if (perms) {
++			kfree(perms_new);
++			perms_new = perms;
++		}
++
++		switch (type & VE_USE_MASK) {
++		case 0:
++			dev = 0;
++			break;
++		case VE_USE_MAJOR:
++			dev = MKDEV(MAJOR(dev),0);
++			break;
++		}
++
++		perms_new->veid = veid;
++		perms_new->dev = dev;
++		perms_new->type = type;
++		perms_new->mask = mask & S_IALLUGO;
++		hash_devperms(perms_new);
++	} else
++		perms->mask = mask & S_IALLUGO;
++	write_unlock_irq(&devperms_hash_guard);
++	return 0;
++}
++EXPORT_SYMBOL(do_setdevperms);
++
++int real_setdevperms(envid_t veid, unsigned type, dev_t dev, unsigned mask)
++{
++	struct ve_struct *ve;
++	int err;
++
++	if (!capable(CAP_SETVEID) || veid == 0)
++		return -EPERM;
++
++	if ((ve = get_ve_by_id(veid)) == NULL)
++		return -ESRCH;
++
++	down_read(&ve->op_sem);
++	err = -ESRCH;
++	if (ve->is_running)
++		err = do_setdevperms(veid, type, dev, mask);
++	up_read(&ve->op_sem);
++	real_put_ve(ve);
++	return err;
++}
++
++void real_update_load_avg_ve(void)
++{
++	struct ve_struct *ve;
++	unsigned long nr_active;
++
++	read_lock(&ve_list_guard);
++	for (ve = ve_list_head; ve != NULL; ve = ve->next) {
++		nr_active = nr_running_ve(ve) + nr_uninterruptible_ve(ve);
++		nr_active *= FIXED_1;
++		CALC_LOAD(ve->avenrun[0], EXP_1, nr_active);
++		CALC_LOAD(ve->avenrun[1], EXP_5, nr_active);
++		CALC_LOAD(ve->avenrun[2], EXP_15, nr_active);
++	}
++	read_unlock(&ve_list_guard);
++}
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * FS-related helpers to VE start/stop
++ *
++ **********************************************************************
++ **********************************************************************/
++
++/*
++ * DEVPTS needs a virtualization: each environment should see each own list of
++ * pseudo-terminals.
++ * To implement it we need to have separate devpts superblocks for each
++ * VE, and each VE should mount its own one.
++ * Thus, separate vfsmount structures are required.
++ * To minimize intrusion into vfsmount lookup code, separate file_system_type
++ * structures are created.
++ *
++ * In addition to this, patch fo character device itself is required, as file
++ * system itself is used only for MINOR/MAJOR lookup.
++ */
++static int register_ve_fs_type(struct ve_struct *ve,
++		struct file_system_type *template,
++		struct file_system_type **p_fs_type, struct vfsmount **p_mnt)
++{
++	struct vfsmount *mnt;
++	struct file_system_type *local_fs_type;
++	int ret;
++
++	VZTRACE("register_ve_fs_type(\"%s\")\n", template->name);
++
++	local_fs_type = kmalloc(sizeof(*local_fs_type) + sizeof(void *),
++					GFP_KERNEL);
++	if (local_fs_type == NULL)
++		return -ENOMEM;
++
++	memset(local_fs_type, 0, sizeof(*local_fs_type));
++	local_fs_type->name = template->name;
++	local_fs_type->fs_flags = template->fs_flags;
++	local_fs_type->get_sb = template->get_sb;
++	local_fs_type->kill_sb = template->kill_sb;
++	local_fs_type->owner = template->owner;
++	/*
++	 * 1. we do not have refcounter on fstype
++	 * 2. fstype holds reference to ve using get_ve()/put_ve().
++	 * so we free fstype when freeing ve and we are sure it's ok to free it
++	 */
++	SET_VE_OWNER_FSTYPE(local_fs_type, ve);
++	get_filesystem(local_fs_type);	/* get_ve() inside */
++
++	ret = register_filesystem(local_fs_type); /* does not get */
++	if (ret)
++		goto reg_err;
++
++	mnt = kern_mount(local_fs_type);
++	if (IS_ERR(mnt))
++		goto mnt_err;
++
++	/* Usage counters after succesful execution kern_mount:
++	 * local_fs_type - +1 (get_fs_type,get_sb_single,put_filesystem)
++	 * mnt - +1 == 1 (alloc_vfsmnt)
++	 */
++
++	*p_fs_type = local_fs_type;
++	*p_mnt = mnt;
++	return 0;
++
++mnt_err:
++	ret = PTR_ERR(mnt);
++	unregister_filesystem(local_fs_type); /* does not put */
++
++reg_err:
++	put_filesystem(local_fs_type);
++	kfree(local_fs_type);
++	printk(KERN_DEBUG
++	       "register_ve_fs_type(\"%s\") err=%d\n", template->name, ret);
++	return ret;
++}
++
++static void umount_ve_fs_type(struct file_system_type *local_fs_type)
++{
++	struct vfsmount *mnt;
++	struct list_head *p, *q;
++	LIST_HEAD(kill);
++
++	down_write(&current->namespace->sem);
++	spin_lock(&vfsmount_lock);
++	list_for_each_safe(p, q, &current->namespace->list) {
++		mnt = list_entry(p, struct vfsmount, mnt_list);
++		if (mnt->mnt_sb->s_type != local_fs_type)
++			continue;
++		list_del(p);
++		list_add(p, &kill);
++	}
++
++	while (!list_empty(&kill)) {
++		mnt = list_entry(kill.next, struct vfsmount, mnt_list);
++		umount_tree(mnt);
++	}
++	spin_unlock(&vfsmount_lock);
++	up_write(&current->namespace->sem);
++}
++
++static void unregister_ve_fs_type(struct file_system_type *local_fs_type,
++		struct vfsmount *local_fs_mount)
++{
++	if (local_fs_mount == NULL ||
++	    local_fs_type == NULL) {
++		if (local_fs_mount != NULL ||
++		    local_fs_type != NULL)
++			BUG();
++		return;
++	}
++
++	VZTRACE("unregister_ve_fs_type(\"%s\")\n", local_fs_type->name);
++
++	unregister_filesystem(local_fs_type);
++	umount_ve_fs_type(local_fs_type);
++	kern_umount(local_fs_mount); /* alias to mntput, drop our ref */
++	put_filesystem(local_fs_type);
++}
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * FS-related helpers to VE start/stop
++ *
++ **********************************************************************
++ **********************************************************************/
++
++#ifdef CONFIG_SYSCTL
++static ctl_table ve_sysctl_tables[] = {
++	/* kernel */
++	{
++		.ctl_name	= CTL_KERN,
++		.procname	= "kernel",
++		.mode		= 0555,
++		.child		= &ve_sysctl_tables[2],
++	},
++	{ .ctl_name = 0 },
++	/* kernel/[vars] */
++	{
++		.ctl_name	= KERN_NODENAME,
++		.procname	= "hostname",
++		.maxlen 	= 64,
++		.mode		= 0644,
++		.proc_handler	= &proc_doutsstring,
++		.strategy	= &sysctl_string,
++	},
++	{
++		.ctl_name	= KERN_DOMAINNAME,
++		.procname	= "domainname",
++		.maxlen		= 64,
++		.mode		= 0644,
++		.proc_handler	= &proc_doutsstring,
++		.strategy	= &sysctl_string,
++	},
++	{
++		.ctl_name	= KERN_SHMMAX,
++		.procname	= "shmmax",
++		.maxlen		= sizeof(size_t),
++		.mode		= 0644,
++		.proc_handler	= &proc_doulongvec_minmax,
++	},
++	{
++		.ctl_name	= KERN_SHMALL,
++		.procname	= "shmall",
++		.maxlen		= sizeof(size_t),
++		.mode		= 0644,
++		.proc_handler	= &proc_doulongvec_minmax,
++	},
++	{
++		.ctl_name	= KERN_SHMMNI,
++		.procname	= "shmmni",
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec,
++	},
++	{
++		.ctl_name	= KERN_MSGMAX,
++		.procname	= "msgmax",
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec,
++	},
++	{
++		.ctl_name	= KERN_MSGMNI,
++		.procname	= "msgmni",
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec,
++	},
++	{
++		.ctl_name	= KERN_MSGMNB,
++		.procname	= "msgmnb",
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec,
++	},
++	{
++		.ctl_name	= KERN_SEM,
++		.procname	= "sem",
++		.maxlen		= 4 * sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec
++	},
++	{ .ctl_name = 0, }
++};
++
++static int register_ve_sysctltables(struct ve_struct *ve)
++{
++	struct ctl_table_header *header;
++	ctl_table *root, *table;
++	
++	VZTRACE("register_ve_sysctltables\n");
++
++	root = clone_sysctl_template(ve_sysctl_tables,
++			sizeof(ve_sysctl_tables) / sizeof(ctl_table));
++	if (root == NULL)
++		goto out;
++
++	table = root->child;
++	table[0].data = &ve->utsname->nodename;
++	table[1].data = &ve->utsname->domainname;
++	table[2].data = &ve->_shm_ctlmax;
++	table[3].data = &ve->_shm_ctlall;
++	table[4].data = &ve->_shm_ctlmni;
++	table[5].data = &ve->_msg_ctlmax;
++	table[6].data = &ve->_msg_ctlmni;
++	table[7].data = &ve->_msg_ctlmnb;
++	table[8].data = &ve->_sem_ctls[0];
++
++	/* insert at head to override kern entries */
++	header = register_sysctl_table(root, 1);
++	if (header == NULL)
++		goto out_free;
++
++	ve->kern_header = header;
++	ve->kern_table = root;
++	return 0;
++
++out_free:
++	free_sysctl_clone(root);
++out:
++	return -ENOMEM;
++}
++
++static inline void unregister_ve_sysctltables(struct ve_struct *ve)
++{
++	unregister_sysctl_table(ve->kern_header);
++}
++
++static inline void free_ve_sysctltables(struct ve_struct *ve)
++{
++	free_sysctl_clone(ve->kern_table);
++}
++#endif
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * VE start: subsystems
++ *
++ **********************************************************************
++ **********************************************************************/
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#include <net/ip.h>
++#include <net/tcp.h>
++#include <net/udp.h>
++#include <net/icmp.h>
++
++extern struct new_utsname virt_utsname;
++
++static int init_ve_utsname(struct ve_struct *ve)
++{
++	ve->utsname = kmalloc(sizeof(*ve->utsname), GFP_KERNEL);
++	if (ve->utsname == NULL)
++		return -ENOMEM;
++
++	down_read(&uts_sem); /* protect the source */
++	memcpy(ve->utsname, &system_utsname, sizeof(*ve->utsname));
++	memcpy(ve->utsname->release, virt_utsname.release,
++			sizeof(virt_utsname.release));
++	up_read(&uts_sem);
++
++	return 0;
++}
++
++static void free_ve_utsname(struct ve_struct *ve)
++{
++	kfree(ve->utsname);
++	ve->utsname = NULL;
++}
++
++static int init_fini_ve_mibs(struct ve_struct *ve, int fini)
++{
++	if (fini)
++		goto fini;
++	if (!(ve->_net_statistics[0] = alloc_percpu(struct linux_mib)))
++		goto out1;
++	if (!(ve->_net_statistics[1] = alloc_percpu(struct linux_mib)))
++		goto out2;
++	if (!(ve->_ip_statistics[0] = alloc_percpu(struct ipstats_mib)))
++		goto out3;
++	if (!(ve->_ip_statistics[1] = alloc_percpu(struct ipstats_mib)))
++		goto out4;
++	if (!(ve->_icmp_statistics[0] = alloc_percpu(struct icmp_mib)))
++		goto out5;
++	if (!(ve->_icmp_statistics[1] = alloc_percpu(struct icmp_mib)))
++		goto out6;
++	if (!(ve->_tcp_statistics[0] = alloc_percpu(struct tcp_mib)))
++		goto out7;
++	if (!(ve->_tcp_statistics[1] = alloc_percpu(struct tcp_mib)))
++		goto out8;
++	if (!(ve->_udp_statistics[0] = alloc_percpu(struct udp_mib)))
++		goto out9;
++	if (!(ve->_udp_statistics[1] = alloc_percpu(struct udp_mib)))
++		goto out10;
++	return 0;
++fini:
++	free_percpu(ve->_udp_statistics[1]);
++out10:
++	free_percpu(ve->_udp_statistics[0]);
++out9:
++	free_percpu(ve->_tcp_statistics[1]);
++out8:
++	free_percpu(ve->_tcp_statistics[0]);
++out7:
++	free_percpu(ve->_icmp_statistics[1]);
++out6:
++	free_percpu(ve->_icmp_statistics[0]);
++out5:
++	free_percpu(ve->_ip_statistics[1]);
++out4:
++	free_percpu(ve->_ip_statistics[0]);
++out3:
++	free_percpu(ve->_net_statistics[1]);
++out2:
++	free_percpu(ve->_net_statistics[0]);
++out1:
++	return -ENOMEM;
++}
++
++static inline int init_ve_mibs(struct ve_struct *ve)
++{
++	return init_fini_ve_mibs(ve, 0);
++}
++
++static inline void fini_ve_mibs(struct ve_struct *ve)
++{
++	(void)init_fini_ve_mibs(ve, 1);
++}
++
++extern struct net_device templ_loopback_dev;
++static void veloop_setup(struct net_device *dev)
++{
++	int padded;
++	padded = dev->padded;
++	memcpy(dev, &templ_loopback_dev, sizeof(struct net_device));
++	dev->padded = padded;
++}
++
++static int init_ve_netdev(void)
++{
++	struct ve_struct *ve;
++	struct net_device_stats *stats;
++	int err;
++
++	ve = get_exec_env();
++	INIT_HLIST_HEAD(&ve->_net_dev_head);
++	ve->_net_dev_base = NULL;
++	ve->_net_dev_tail = &ve->_net_dev_base;
++
++	ve->_loopback_dev = alloc_netdev(0, templ_loopback_dev.name, 
++					 veloop_setup);
++	if (ve->_loopback_dev == NULL)
++		return -ENOMEM;
++	if (loopback_dev.get_stats != NULL) {
++		stats = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
++		if (stats != NULL) {
++			memset(stats, 0, sizeof(struct net_device_stats));
++			ve->_loopback_dev->priv = stats;
++			ve->_loopback_dev->get_stats = loopback_dev.get_stats;
++			ve->_loopback_dev->destructor = loopback_dev.destructor;
++		}
++	}
++	err = register_netdev(ve->_loopback_dev);
++	if (err) {
++		if (ve->_loopback_dev->priv != NULL)
++			kfree(ve->_loopback_dev->priv);
++		free_netdev(ve->_loopback_dev);
++	}
++	return err;
++}
++
++static void fini_ve_netdev(void)
++{
++	struct ve_struct *ve;
++	struct net_device *dev;
++
++	ve = get_exec_env();
++	while (1) {
++		rtnl_lock();
++		/* 
++		 * loopback is special, it can be referenced in  fib's, 
++		 * so it must be freed the last. Doing so is 
++		 * sufficient to guarantee absence of such references.
++		 */
++		if (dev_base == ve->_loopback_dev)
++			dev = dev_base->next;
++		else
++			dev = dev_base;
++		if (dev == NULL)
++			break;
++		unregister_netdevice(dev);
++		rtnl_unlock();
++		free_netdev(dev);
++	}
++	unregister_netdevice(ve->_loopback_dev);
++	rtnl_unlock();
++	free_netdev(ve->_loopback_dev);
++	ve->_loopback_dev = NULL;
++}
++#else
++#define init_ve_mibs(ve)	(0)
++#define fini_ve_mibs(ve)	do { } while (0)
++#define init_ve_netdev()	(0)
++#define fini_ve_netdev()	do { } while (0)
++#endif
++
++static int prepare_proc_root(struct ve_struct *ve)
++{
++	struct proc_dir_entry *de;
++
++	de = kmalloc(sizeof(struct proc_dir_entry) + 6, GFP_KERNEL);
++	if (de == NULL)
++		return -ENOMEM;
++	memset(de, 0, sizeof(struct proc_dir_entry));
++	memcpy(de + 1, "/proc", 6);
++	de->name = (char *)(de + 1);
++	de->namelen = 5;
++	de->mode = S_IFDIR | S_IRUGO | S_IXUGO;
++	de->nlink = 2;
++	atomic_set(&de->count, 1);
++
++	ve->proc_root = de;
++	return 0;
++}
++
++#ifdef CONFIG_PROC_FS
++static int init_ve_proc(struct ve_struct *ve)
++{
++	int err;
++	struct proc_dir_entry *de;
++
++	err = prepare_proc_root(ve);
++	if (err)
++		goto out_root;
++
++	err = register_ve_fs_type(ve, &proc_fs_type,
++			&ve->proc_fstype, &ve->proc_mnt);
++	if (err)
++		goto out_reg;
++
++	/* create /proc/vz in VE local proc tree */
++	err = -ENOMEM;
++	de = create_proc_entry("vz", S_IFDIR|S_IRUGO|S_IXUGO, NULL);
++	if (!de)
++		goto out_vz;
++
++	return 0;
++
++out_vz:
++	unregister_ve_fs_type(ve->proc_fstype, ve->proc_mnt);
++	ve->proc_mnt = NULL;
++out_reg:
++	/* proc_fstype and proc_root are freed in real_put_ve -> free_ve_proc */
++	;
++out_root:
++	return err;
++}
++
++static void fini_ve_proc(struct ve_struct *ve)
++{
++	remove_proc_entry("vz", NULL);
++	unregister_ve_fs_type(ve->proc_fstype, ve->proc_mnt);
++	ve->proc_mnt = NULL;
++}
++
++static void free_ve_proc(struct ve_struct *ve)
++{
++	/* proc filesystem frees proc_dir_entries on remove_proc_entry() only,
++	   so we check that everything was removed and not lost */
++	if (ve->proc_root && ve->proc_root->subdir) {
++		struct proc_dir_entry *p = ve->proc_root;
++		printk(KERN_WARNING "VPS: %d: proc entry /proc", ve->veid);
++		while ((p = p->subdir) != NULL)
++			printk("/%s", p->name);
++		printk(" is not removed!\n");
++	}
++
++	kfree(ve->proc_root);
++	kfree(ve->proc_fstype);
++
++	ve->proc_fstype = NULL;
++	ve->proc_root = NULL;
++}
++#else
++#define init_ve_proc(ve)	(0)
++#define fini_ve_proc(ve)	do { } while (0)
++#define free_ve_proc(ve)	do { } while (0)
++#endif
++
++#ifdef CONFIG_SYSCTL
++static int init_ve_sysctl(struct ve_struct *ve)
++{
++	int err;
++
++#ifdef CONFIG_PROC_FS
++	err = -ENOMEM;
++	ve->proc_sys_root = proc_mkdir("sys", 0);
++	if (ve->proc_sys_root == NULL)
++		goto out_proc;
++#endif
++	INIT_LIST_HEAD(&ve->sysctl_lh);
++	err = register_ve_sysctltables(ve);
++	if (err)
++		goto out_reg;
++
++	err = devinet_sysctl_init(ve);
++	if (err)
++		goto out_dev;
++
++	return 0;
++
++out_dev:
++	unregister_ve_sysctltables(ve);
++	free_ve_sysctltables(ve);
++out_reg:
++#ifdef CONFIG_PROC_FS
++	remove_proc_entry("sys", NULL);
++out_proc:
++#endif
++	return err;
++}
++
++static void fini_ve_sysctl(struct ve_struct *ve)
++{
++	devinet_sysctl_fini(ve);
++	unregister_ve_sysctltables(ve);
++	remove_proc_entry("sys", NULL);
++}
++
++static void free_ve_sysctl(struct ve_struct *ve)
++{
++	devinet_sysctl_free(ve);
++	free_ve_sysctltables(ve);
++}
++#else
++#define init_ve_sysctl(ve)	(0)
++#define fini_ve_sysctl(ve)	do { } while (0)
++#define free_ve_sysctl(ve)	do { } while (0)
++#endif
++
++#ifdef CONFIG_UNIX98_PTYS
++#include <linux/devpts_fs.h>
++
++static int init_ve_devpts(struct ve_struct *ve)
++{
++	int err;
++
++	err = -ENOMEM;
++	ve->devpts_config = kmalloc(sizeof(struct devpts_config), GFP_KERNEL);
++	if (ve->devpts_config == NULL)
++		goto out;
++	memset(ve->devpts_config, 0, sizeof(struct devpts_config));
++	ve->devpts_config->mode = 0600;
++	err = register_ve_fs_type(ve, &devpts_fs_type,
++			&ve->devpts_fstype, &ve->devpts_mnt);
++	if (err) {
++		kfree(ve->devpts_config);
++		ve->devpts_config = NULL;
++	}
++out:
++	return err;
++}
++
++static void fini_ve_devpts(struct ve_struct *ve)
++{
++	unregister_ve_fs_type(ve->devpts_fstype, ve->devpts_mnt);
++	/* devpts_fstype is freed in real_put_ve -> free_ve_filesystems */
++	ve->devpts_mnt = NULL;
++	kfree(ve->devpts_config);
++	ve->devpts_config = NULL;
++}
++#else
++#define init_ve_devpts(ve)	(0)
++#define fini_ve_devpts(ve)	do { } while (0)
++#endif
++
++static int init_ve_shmem(struct ve_struct *ve)
++{
++	return register_ve_fs_type(ve,
++				   &tmpfs_fs_type,
++				   &ve->shmem_fstype,
++				   &ve->shmem_mnt);
++}
++
++static void fini_ve_shmem(struct ve_struct *ve)
++{
++	unregister_ve_fs_type(ve->shmem_fstype, ve->shmem_mnt);
++	/* shmem_fstype is freed in real_put_ve -> free_ve_filesystems */
++	ve->shmem_mnt = NULL;
++}
++
++static int init_ve_sysfs(struct ve_struct *ve)
++{
++	struct subsystem *subsys;
++	struct class *nc;
++	int err;
++	extern struct subsystem class_obj_subsys;
++	extern struct subsystem class_subsys;
++	extern struct class net_class;
++
++#ifdef CONFIG_SYSFS
++	err = 0;
++	if (ve->features & VE_FEATURE_SYSFS)
++		err = register_ve_fs_type(ve,
++				   &sysfs_fs_type,
++				   &ve->sysfs_fstype,
++				   &ve->sysfs_mnt);
++	if (err != 0)
++		goto out_fs_type;
++#endif
++	err = -ENOMEM;
++	subsys = kmalloc(sizeof(*subsys), GFP_KERNEL);
++	if (subsys == NULL)
++		goto out_class_obj;
++	/* ick, this is ugly, the things we go through to keep from showing up
++	 * in sysfs... */
++	memset(subsys, 0, sizeof(*subsys));
++	memcpy(&subsys->kset.kobj.name, &class_obj_subsys.kset.kobj.name,
++			sizeof(subsys->kset.kobj.name));
++	subsys->kset.ktype = class_obj_subsys.kset.ktype;
++	subsys->kset.hotplug_ops = class_obj_subsys.kset.hotplug_ops;
++	subsystem_init(subsys);
++	if (!subsys->kset.subsys)
++			subsys->kset.subsys = subsys;
++	ve->class_obj_subsys = subsys;
++
++	err = -ENOMEM;
++	subsys = kmalloc(sizeof(*subsys), GFP_KERNEL);
++	if (subsys == NULL)
++		goto out_class_subsys;
++	/* ick, this is ugly, the things we go through to keep from showing up
++	 * in sysfs... */
++	memset(subsys, 0, sizeof(*subsys));
++	memcpy(&subsys->kset.kobj.name, &class_subsys.kset.kobj.name,
++			sizeof(subsys->kset.kobj.name));
++	subsys->kset.ktype = class_subsys.kset.ktype;
++	subsys->kset.hotplug_ops = class_subsys.kset.hotplug_ops;
++	ve->class_subsys = subsys;
++	err = subsystem_register(subsys);
++	if (err != 0)
++		goto out_register;
++
++	err = -ENOMEM;
++	nc = kmalloc(sizeof(*nc), GFP_KERNEL);
++	if (nc == NULL)
++		goto out_nc;
++	memset(nc, 0, sizeof(*nc));
++	nc->name = net_class.name;
++	nc->release = net_class.release;
++	nc->hotplug = net_class.hotplug;
++	err = class_register(nc);
++	if (err != 0)
++		goto out_class_register;
++	ve->net_class = nc;
++
++	return err;
++
++out_class_register:
++	kfree(nc);
++out_nc:
++	subsystem_unregister(subsys);
++out_register:
++	kfree(ve->class_subsys);
++out_class_subsys:
++	kfree(ve->class_obj_subsys);
++out_class_obj:
++#ifdef CONFIG_SYSFS
++	unregister_ve_fs_type(ve->sysfs_fstype, ve->sysfs_mnt);
++	/* sysfs_fstype is freed in real_put_ve -> free_ve_filesystems */
++out_fs_type:
++#endif
++	ve->class_subsys = NULL;
++	ve->class_obj_subsys = NULL;
++	return err;
++}
++
++static void fini_ve_sysfs(struct ve_struct *ve)
++{
++	class_unregister(ve->net_class);
++	subsystem_unregister(ve->class_subsys);
++
++	kfree(ve->net_class);
++	kfree(ve->class_subsys);
++	kfree(ve->class_obj_subsys);
++
++	ve->net_class = NULL;
++	ve->class_subsys = NULL;
++	ve->class_obj_subsys = NULL;
++#ifdef CONFIG_SYSFS
++	unregister_ve_fs_type(ve->sysfs_fstype, ve->sysfs_mnt);
++	ve->sysfs_mnt = NULL;
++	/* sysfs_fstype is freed in real_put_ve -> free_ve_filesystems */
++#endif
++}
++
++static void free_ve_filesystems(struct ve_struct *ve)
++{
++#ifdef CONFIG_SYSFS
++	kfree(ve->sysfs_fstype);
++	ve->sysfs_fstype = NULL;
++#endif
++	kfree(ve->shmem_fstype);
++	ve->shmem_fstype = NULL;
++
++	kfree(ve->devpts_fstype);
++	ve->devpts_fstype = NULL;
++
++	free_ve_proc(ve);
++}
++
++static int init_printk(struct ve_struct *ve)
++{
++	struct ve_prep_printk {
++		wait_queue_head_t       log_wait;
++		unsigned long           log_start;
++		unsigned long           log_end;
++		unsigned long           logged_chars;
++	} *tmp;
++
++	tmp = kmalloc(sizeof(struct ve_prep_printk), GFP_KERNEL);
++	if (!tmp)
++		return -ENOMEM;
++	memset(tmp, 0, sizeof(struct ve_prep_printk));
++	init_waitqueue_head(&tmp->log_wait);
++	ve->_log_wait = &tmp->log_wait;
++	ve->_log_start = &tmp->log_start;
++	ve->_log_end = &tmp->log_end;
++	ve->_logged_chars = &tmp->logged_chars;
++	/* ve->log_buf will be initialized later by ve_log_init() */
++	return 0;
++}
++
++static void fini_printk(struct ve_struct *ve)
++{
++	/* 
++	 * there is no spinlock protection here because nobody can use
++	 * log_buf at the moments when this code is called. 
++	 */
++	kfree(ve->log_buf);
++	kfree(ve->_log_wait);
++}
++
++static void fini_venet(struct ve_struct *ve)
++{
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	tcp_v4_kill_ve_sockets(ve);
++#endif
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	ve_mapped_devs_cleanup(ve);
++#endif
++}
++
++static int init_ve_sched(struct ve_struct *ve)
++{
++#ifdef CONFIG_FAIRSCHED
++	int err;
++
++	/*
++	 * We refuse to switch to an already existing node since nodes
++	 * keep a pointer to their ve_struct...
++	 */
++	err = sys_fairsched_mknod(0, 1, ve->veid);
++	if (err < 0) {
++		printk(KERN_WARNING "Can't create fairsched node %d\n",
++				ve->veid);
++		return err;
++	}
++	err = sys_fairsched_mvpr(current->pid, ve->veid);
++	if (err) {
++		printk(KERN_WARNING "Can't switch to fairsched node %d\n",
++				ve->veid);
++		if (sys_fairsched_rmnod(ve->veid))
++			printk(KERN_ERR "Can't clean fairsched node %d\n",
++					ve->veid);
++		return err;
++	}
++#endif
++	ve_sched_attach(ve);
++	return 0;
++}
++
++static void fini_ve_sched(struct ve_struct *ve)
++{
++#ifdef CONFIG_FAIRSCHED
++	if (task_vsched_id(current) == ve->veid)
++		if (sys_fairsched_mvpr(current->pid, fairsched_init_node.id))
++			printk(KERN_WARNING "Can't leave fairsched node %d\n",
++					ve->veid);
++	if (sys_fairsched_rmnod(ve->veid))
++		printk(KERN_ERR "Can't remove fairsched node %d\n",
++				ve->veid);
++#endif
++}
++
++static int init_ve_struct(struct ve_struct *ve, envid_t veid,
++		u32 class_id, env_create_param_t *data,
++		struct task_struct *init_tsk)
++{
++	int n;
++
++	memset(ve, 0, sizeof(*ve));
++	(void)get_ve(ve);
++	ve->veid = veid;
++	ve->class_id = class_id;
++	ve->init_entry = init_tsk;
++	ve->features = data->feature_mask;
++	INIT_LIST_HEAD(&ve->vetask_lh);
++	init_rwsem(&ve->op_sem);
++	ve->ifindex = -1;
++
++	for(n = 0; n < UIDHASH_SZ_VE; ++n)
++		INIT_LIST_HEAD(&ve->uidhash_table[n]);
++
++	do_posix_clock_monotonic_gettime(&ve->start_timespec);
++	ve->start_jiffies = jiffies;
++	ve->start_cycles = get_cycles();
++	ve->virt_pids = glob_virt_pids;
++
++	return 0;
++}
++
++static void set_ve_root(struct ve_struct *ve, struct task_struct *tsk)
++{
++	read_lock(&tsk->fs->lock);
++	ve->fs_rootmnt = tsk->fs->rootmnt;
++	ve->fs_root = tsk->fs->root;
++	read_unlock(&tsk->fs->lock);
++	mark_tree_virtual(ve->fs_rootmnt, ve->fs_root);
++}
++
++static void set_ve_caps(struct ve_struct *ve, struct task_struct *tsk)
++{
++	/* required for real_setdevperms from register_ve_<fs> above */
++	memcpy(&ve->cap_default, &tsk->cap_effective, sizeof(kernel_cap_t));
++	cap_lower(ve->cap_default, CAP_SETVEID);
++}
++
++static int ve_list_add(struct ve_struct *ve)
++{
++	write_lock_irq(&ve_list_guard);
++	if (__find_ve_by_id(ve->veid) != NULL)
++		goto err_exists;
++
++	ve->prev = NULL;
++	ve->next = ve_list_head;
++	if (ve_list_head)
++		ve_list_head->prev = ve;
++	ve_list_head = ve;
++	nr_ve++;
++	write_unlock_irq(&ve_list_guard);
++	return 0;
++
++err_exists:
++	write_unlock_irq(&ve_list_guard);
++	return -EEXIST;
++}
++
++static void ve_list_del(struct ve_struct *ve)
++{
++	write_lock_irq(&ve_list_guard);
++	if (ve->prev)
++		ve->prev->next = ve->next;
++	else
++		ve_list_head = ve->next;
++	if (ve->next)
++		ve->next->prev = ve->prev;
++	nr_ve--;
++	write_unlock_irq(&ve_list_guard);
++}
++
++static void set_task_ve_caps(struct task_struct *tsk, struct ve_struct *ve)
++{
++	spin_lock(&task_capability_lock);
++	cap_mask(tsk->cap_effective, ve->cap_default);
++	cap_mask(tsk->cap_inheritable, ve->cap_default);
++	cap_mask(tsk->cap_permitted, ve->cap_default);
++	spin_unlock(&task_capability_lock);
++}
++
++static void move_task(struct task_struct *tsk, struct ve_struct *new,
++		struct ve_struct *old)
++{
++	/* this probihibts ptracing of task entered to VPS from host system */
++	tsk->mm->vps_dumpable = 0;
++	/* setup capabilities before enter */
++	set_task_ve_caps(tsk, new);
++
++	write_lock_irq(&tasklist_lock);
++	VE_TASK_INFO(tsk)->owner_env = new;
++	VE_TASK_INFO(tsk)->exec_env = new;
++	REMOVE_VE_LINKS(tsk);
++	SET_VE_LINKS(tsk);
++
++	atomic_dec(&old->pcounter);
++	atomic_inc(&new->pcounter);
++	real_put_ve(old);
++	get_ve(new);
++	write_unlock_irq(&tasklist_lock);
++}
++
++#if (defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)) && \
++	defined(CONFIG_NETFILTER) && defined(CONFIG_VE_IPTABLES)
++extern int init_netfilter(void);
++extern void fini_netfilter(void);
++#define init_ve_netfilter()	init_netfilter()
++#define fini_ve_netfilter()	fini_netfilter()
++#else
++#define init_ve_netfilter()	(0)
++#define fini_ve_netfilter()	do { } while (0)
++#endif
++
++#define KSYMIPTINIT(mask, ve, full_mask, mod, name, args)	\
++({								\
++	int ret = 0;						\
++	if (VE_IPT_CMP(mask, full_mask) &&			\
++		VE_IPT_CMP((ve)->_iptables_modules, 		\
++			full_mask & ~(full_mask##_MOD))) {	\
++		ret = KSYMERRCALL(1, mod, name, args);		\
++		if (ret == 0)					\
++			(ve)->_iptables_modules |=		\
++					full_mask##_MOD;	\
++		if (ret == 1)					\
++			ret = 0;				\
++	}							\
++	ret;							\
++})
++
++#define KSYMIPTFINI(mask, full_mask, mod, name, args)		\
++({								\
++ 	if (VE_IPT_CMP(mask, full_mask##_MOD))			\
++		KSYMSAFECALL_VOID(mod, name, args);		\
++})
++
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++static int do_ve_iptables(struct ve_struct *ve, __u64 init_mask,
++		int init_or_cleanup)
++{
++	int err;
++
++	err = 0;
++	if (!init_or_cleanup)
++		goto cleanup;
++
++	/* init part */
++#if defined(CONFIG_IP_NF_IPTABLES) || \
++    defined(CONFIG_IP_NF_IPTABLES_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_IPTABLES,
++			ip_tables, init_iptables, ());
++	if (err < 0)
++		goto err_iptables;
++#endif
++#if defined(CONFIG_IP_NF_CONNTRACK) || \
++    defined(CONFIG_IP_NF_CONNTRACK_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_CONNTRACK,
++			ip_conntrack, init_iptable_conntrack, ());
++	if (err < 0)
++		goto err_iptable_conntrack;
++#endif
++#if defined(CONFIG_IP_NF_FTP) || \
++    defined(CONFIG_IP_NF_FTP_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_CONNTRACK_FTP,
++			ip_conntrack_ftp, init_iptable_ftp, ());
++	if (err < 0)
++		goto err_iptable_ftp;
++#endif
++#if defined(CONFIG_IP_NF_IRC) || \
++    defined(CONFIG_IP_NF_IRC_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_CONNTRACK_IRC,
++			ip_conntrack_irc, init_iptable_irc, ());
++	if (err < 0)
++		goto err_iptable_irc;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_CONNTRACK) || \
++    defined(CONFIG_IP_NF_MATCH_CONNTRACK_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_CONNTRACK,
++			ipt_conntrack, init_iptable_conntrack_match, ());
++	if (err < 0)
++		goto err_iptable_conntrack_match;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_STATE) || \
++    defined(CONFIG_IP_NF_MATCH_STATE_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_STATE,
++			ipt_state, init_iptable_state, ());
++	if (err < 0)
++		goto err_iptable_state;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_HELPER) || \
++    defined(CONFIG_IP_NF_MATCH_HELPER_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_HELPER,
++			ipt_helper, init_iptable_helper, ());
++	if (err < 0)
++		goto err_iptable_helper;
++#endif
++#if defined(CONFIG_IP_NF_NAT) || \
++    defined(CONFIG_IP_NF_NAT_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT,
++			iptable_nat, init_iptable_nat, ());
++	if (err < 0)
++		goto err_iptable_nat;
++#endif
++#if defined(CONFIG_IP_NF_NAT_FTP) || \
++    defined(CONFIG_IP_NF_NAT_FTP_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT_FTP,
++			ip_nat_ftp, init_iptable_nat_ftp, ());
++	if (err < 0)
++		goto err_iptable_nat_ftp;
++#endif
++#if defined(CONFIG_IP_NF_NAT_IRC) || \
++    defined(CONFIG_IP_NF_NAT_IRC_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT_IRC,
++			ip_nat_irc, init_iptable_nat_irc, ());
++	if (err < 0)
++		goto err_iptable_nat_irc;
++#endif
++#if defined(CONFIG_IP_NF_FILTER) || \
++    defined(CONFIG_IP_NF_FILTER_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_FILTER,
++			iptable_filter,	init_iptable_filter, ());
++	if (err < 0)
++		goto err_iptable_filter;
++#endif
++#if defined(CONFIG_IP_NF_MANGLE) || \
++    defined(CONFIG_IP_NF_MANGLE_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_MANGLE,
++			iptable_mangle,	init_iptable_mangle, ());
++	if (err < 0)
++		goto err_iptable_mangle;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_LIMIT) || \
++    defined(CONFIG_IP_NF_MATCH_LIMIT_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_LIMIT,
++			ipt_limit, init_iptable_limit, ());
++	if (err < 0)
++		goto err_iptable_limit;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_MULTIPORT) || \
++    defined(CONFIG_IP_NF_MATCH_MULTIPORT_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_MULTIPORT,
++			ipt_multiport, init_iptable_multiport, ());
++	if (err < 0)
++		goto err_iptable_multiport;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_TOS) || \
++    defined(CONFIG_IP_NF_MATCH_TOS_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_TOS,
++			ipt_tos, init_iptable_tos, ());
++	if (err < 0)
++		goto err_iptable_tos;
++#endif
++#if defined(CONFIG_IP_NF_TARGET_TOS) || \
++    defined(CONFIG_IP_NF_TARGET_TOS_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_TOS,
++			ipt_TOS, init_iptable_TOS, ());
++	if (err < 0)
++		goto err_iptable_TOS;
++#endif
++#if defined(CONFIG_IP_NF_TARGET_REJECT) || \
++    defined(CONFIG_IP_NF_TARGET_REJECT_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_REJECT,
++			ipt_REJECT, init_iptable_REJECT, ());
++	if (err < 0)
++		goto err_iptable_REJECT;
++#endif
++#if defined(CONFIG_IP_NF_TARGET_TCPMSS) || \
++    defined(CONFIG_IP_NF_TARGET_TCPMSS_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_TCPMSS,
++			ipt_TCPMSS, init_iptable_TCPMSS, ());
++	if (err < 0)
++		goto err_iptable_TCPMSS;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_TCPMSS) || \
++    defined(CONFIG_IP_NF_MATCH_TCPMSS_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_TCPMSS,
++			ipt_tcpmss, init_iptable_tcpmss, ());
++	if (err < 0)
++		goto err_iptable_tcpmss;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_TTL) || \
++    defined(CONFIG_IP_NF_MATCH_TTL_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_TTL,
++			ipt_ttl, init_iptable_ttl, ());
++	if (err < 0)
++		goto err_iptable_ttl;
++#endif
++#if defined(CONFIG_IP_NF_TARGET_LOG) || \
++    defined(CONFIG_IP_NF_TARGET_LOG_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_LOG,
++			ipt_LOG, init_iptable_LOG, ());
++	if (err < 0)
++		goto err_iptable_LOG;
++#endif
++#if defined(CONFIG_IP_NF_MATCH_LENGTH) || \
++    defined(CONFIG_IP_NF_MATCH_LENGTH_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_LENGTH,
++			ipt_length, init_iptable_length, ());
++	if (err < 0)
++		goto err_iptable_length;
++#endif
++#if defined(CONFIG_IP_NF_TARGET_REDIRECT) || \
++    defined(CONFIG_IP_NF_TARGET_REDIRECT_MODULE)
++	err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_REDIRECT,
++			ipt_REDIRECT, init_iptable_REDIRECT, ());
++	if (err < 0)
++		goto err_iptable_REDIRECT;
++#endif
++	return 0;
++
++/* ------------------------------------------------------------------------- */
++
++cleanup:
++#if defined(CONFIG_IP_NF_TARGET_REDIRECT) || \
++    defined(CONFIG_IP_NF_TARGET_REDIRECT_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_REDIRECT,
++			ipt_REDIRECT, fini_iptable_REDIRECT, ());
++err_iptable_REDIRECT:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_LENGTH) || \
++    defined(CONFIG_IP_NF_MATCH_LENGTH_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_LENGTH,
++			ipt_length, fini_iptable_length, ());
++err_iptable_length:
++#endif
++#if defined(CONFIG_IP_NF_TARGET_LOG) || \
++    defined(CONFIG_IP_NF_TARGET_LOG_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_LOG,
++			ipt_LOG, fini_iptable_LOG, ());
++err_iptable_LOG:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_TTL) || \
++    defined(CONFIG_IP_NF_MATCH_TTL_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_TTL,
++			ipt_ttl, fini_iptable_ttl, ());
++err_iptable_ttl:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_TCPMSS) || \
++    defined(CONFIG_IP_NF_MATCH_TCPMSS_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_TCPMSS,
++			ipt_tcpmss, fini_iptable_tcpmss, ());
++err_iptable_tcpmss:
++#endif
++#if defined(CONFIG_IP_NF_TARGET_TCPMSS) || \
++    defined(CONFIG_IP_NF_TARGET_TCPMSS_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_TCPMSS,
++			ipt_TCPMSS, fini_iptable_TCPMSS, ());
++err_iptable_TCPMSS:
++#endif
++#if defined(CONFIG_IP_NF_TARGET_REJECT) || \
++    defined(CONFIG_IP_NF_TARGET_REJECT_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_REJECT,
++			ipt_REJECT, fini_iptable_REJECT, ());
++err_iptable_REJECT:
++#endif
++#if defined(CONFIG_IP_NF_TARGET_TOS) || \
++    defined(CONFIG_IP_NF_TARGET_TOS_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_TOS,
++			ipt_TOS, fini_iptable_TOS, ());
++err_iptable_TOS:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_TOS) || \
++    defined(CONFIG_IP_NF_MATCH_TOS_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_TOS,
++			ipt_tos, fini_iptable_tos, ());
++err_iptable_tos:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_MULTIPORT) || \
++    defined(CONFIG_IP_NF_MATCH_MULTIPORT_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_MULTIPORT,
++			ipt_multiport, fini_iptable_multiport, ());
++err_iptable_multiport:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_LIMIT) || \
++    defined(CONFIG_IP_NF_MATCH_LIMIT_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_LIMIT,
++			ipt_limit, fini_iptable_limit, ());
++err_iptable_limit:
++#endif
++#if defined(CONFIG_IP_NF_MANGLE) || \
++    defined(CONFIG_IP_NF_MANGLE_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MANGLE,
++			iptable_mangle,	fini_iptable_mangle, ());
++err_iptable_mangle:
++#endif
++#if defined(CONFIG_IP_NF_FILTER) || \
++    defined(CONFIG_IP_NF_FILTER_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_FILTER,
++			iptable_filter,	fini_iptable_filter, ());
++err_iptable_filter:
++#endif
++#if defined(CONFIG_IP_NF_NAT_IRC) || \
++    defined(CONFIG_IP_NF_NAT_IRC_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT_IRC,
++			ip_nat_irc, fini_iptable_nat_irc, ());
++err_iptable_nat_irc:
++#endif
++#if defined(CONFIG_IP_NF_NAT_FTP) || \
++    defined(CONFIG_IP_NF_NAT_FTP_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT_FTP,
++			ip_nat_ftp, fini_iptable_nat_ftp, ());
++err_iptable_nat_ftp:
++#endif
++#if defined(CONFIG_IP_NF_NAT) || \
++    defined(CONFIG_IP_NF_NAT_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT,
++			iptable_nat, fini_iptable_nat, ());
++err_iptable_nat:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_HELPER) || \
++    defined(CONFIG_IP_NF_MATCH_HELPER_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_HELPER,
++			ipt_helper, fini_iptable_helper, ());
++err_iptable_helper:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_STATE) || \
++    defined(CONFIG_IP_NF_MATCH_STATE_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_STATE,
++			ipt_state, fini_iptable_state, ());
++err_iptable_state:
++#endif
++#if defined(CONFIG_IP_NF_MATCH_CONNTRACK) || \
++    defined(CONFIG_IP_NF_MATCH_CONNTRACK_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_CONNTRACK,
++			ipt_conntrack, fini_iptable_conntrack_match, ());
++err_iptable_conntrack_match:
++#endif
++#if defined(CONFIG_IP_NF_IRC) || \
++    defined(CONFIG_IP_NF_IRC_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_CONNTRACK_IRC,
++			ip_conntrack_irc, fini_iptable_irc, ());
++err_iptable_irc:
++#endif
++#if defined(CONFIG_IP_NF_FTP) || \
++    defined(CONFIG_IP_NF_FTP_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_CONNTRACK_FTP,
++			ip_conntrack_ftp, fini_iptable_ftp, ());
++err_iptable_ftp:
++#endif
++#if defined(CONFIG_IP_NF_CONNTRACK) || \
++    defined(CONFIG_IP_NF_CONNTRACK_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_CONNTRACK,
++			ip_conntrack, fini_iptable_conntrack, ());
++err_iptable_conntrack:
++#endif
++#if defined(CONFIG_IP_NF_IPTABLES) || \
++    defined(CONFIG_IP_NF_IPTABLES_MODULE)
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_IPTABLES,
++			ip_tables, fini_iptables, ());
++err_iptables:
++#endif
++	ve->_iptables_modules = 0;
++
++	return err;
++}
++#else
++#define do_ve_iptables(ve, initmask, init)	(0)
++#endif
++
++static inline int init_ve_iptables(struct ve_struct *ve, __u64 init_mask)
++{
++	return do_ve_iptables(ve, init_mask, 1);
++}
++
++static inline void fini_ve_iptables(struct ve_struct *ve, __u64 init_mask)
++{
++	(void)do_ve_iptables(ve, init_mask, 0);
++}
++
++static void flush_ve_iptables(struct ve_struct *ve)
++{
++	/*
++	 * flush all rule tables first,
++	 * this helps us to avoid refs to freed objs
++	 */
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_MANGLE, ip_tables,
++			ipt_flush_table, (ve->_ipt_mangle_table));
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_FILTER, ip_tables,
++			ipt_flush_table, (ve->_ve_ipt_filter_pf));
++	KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT, ip_tables,
++			ipt_flush_table, (ve->_ip_conntrack->_ip_nat_table));
++}
++
++static struct list_head ve_hooks[VE_MAX_HOOKS];
++static DECLARE_RWSEM(ve_hook_sem);
++
++int ve_hook_register(struct ve_hook *vh)
++{
++	struct list_head *lh;
++	struct ve_hook *tmp;
++
++	down_write(&ve_hook_sem);
++	list_for_each(lh, &ve_hooks[vh->hooknum]) {
++		tmp = list_entry(lh, struct ve_hook, list);
++		if (vh->priority < tmp->priority)
++			break;
++	}
++	list_add_tail(&vh->list, lh);
++	up_write(&ve_hook_sem);
++	return 0;
++}
++EXPORT_SYMBOL(ve_hook_register);
++
++void ve_hook_unregister(struct ve_hook *vh)
++{
++	down_write(&ve_hook_sem);
++	list_del(&vh->list);
++	up_write(&ve_hook_sem);
++}
++EXPORT_SYMBOL(ve_hook_unregister);
++
++static int ve_hook_iterate(unsigned int hooknum, void *data)
++{
++	struct ve_hook *vh;
++	int err;
++
++	err = 0;
++	down_read(&ve_hook_sem);
++	list_for_each_entry(vh, &ve_hooks[hooknum], list) {
++		if (!try_module_get(vh->owner))
++			continue;
++		err = vh->hook(hooknum, data);
++		module_put(vh->owner);
++		if (err)
++			break;
++	}
++
++	if (err) {
++		list_for_each_entry_continue_reverse(vh,
++					&ve_hooks[hooknum], list) {
++			if (!try_module_get(vh->owner))
++				continue;
++			if (vh->undo)
++				vh->undo(hooknum, data);
++			module_put(vh->owner);
++		}
++	}
++	up_read(&ve_hook_sem);
++	return err;
++}
++
++static void ve_hook_iterate_cleanup(unsigned int hooknum, void *data)
++{
++	struct ve_hook *vh;
++
++	down_read(&ve_hook_sem);
++	list_for_each_entry_reverse(vh, &ve_hooks[hooknum], list) {
++		if (!try_module_get(vh->owner))
++			continue;
++		(void)vh->hook(hooknum, data);
++		module_put(vh->owner);
++	}
++	up_read(&ve_hook_sem);
++}
++
++static int do_env_create(envid_t veid, unsigned int flags, u32 class_id,
++			 env_create_param_t *data, int datalen)
++{
++	struct task_struct *tsk;
++	struct ve_struct *old;
++	struct ve_struct *old_exec;
++	struct ve_struct *ve;
++ 	__u64 init_mask;
++	int err;
++
++	tsk = current;
++	old = VE_TASK_INFO(tsk)->owner_env;
++
++	if (!thread_group_leader(tsk))
++		return -EINVAL;
++
++	if (tsk->signal->tty) {
++		printk("ERR: VE init has controlling terminal\n");
++		return -EINVAL;
++	}
++	if (tsk->signal->pgrp != tsk->pid || tsk->signal->session != tsk->pid) {
++		int may_setsid;
++		read_lock(&tasklist_lock);
++		may_setsid = (find_pid(PIDTYPE_PGID, tsk->pid) == NULL);
++		read_unlock(&tasklist_lock);
++		if (!may_setsid) {
++			printk("ERR: VE init is process group leader\n");
++			return -EINVAL;
++		}
++	}
++
++
++	VZTRACE("%s: veid=%d classid=%d pid=%d\n",
++		__FUNCTION__, veid, class_id, current->pid);
++
++	err = -ENOMEM;
++	ve = kmalloc(sizeof(struct ve_struct), GFP_KERNEL);
++	if (ve == NULL)
++		goto err_struct;
++
++	init_ve_struct(ve, veid, class_id, data, tsk);
++	__module_get(THIS_MODULE);
++	down_write(&ve->op_sem);
++	if (flags & VE_LOCK)
++		ve->is_locked = 1;
++	if ((err = ve_list_add(ve)) < 0)
++		goto err_exist;
++
++	/* this should be done before context switching */
++	if ((err = init_printk(ve)) < 0)
++		goto err_log_wait;
++
++	old_exec = set_exec_env(ve);
++
++	if ((err = init_ve_sched(ve)) < 0)
++		goto err_sched;
++
++	/* move user to VE */
++	if ((err = set_user(0, 0)) < 0)
++		goto err_set_user;
++
++	set_ve_root(ve, tsk);
++
++	if ((err = init_ve_utsname(ve)))
++		goto err_utsname;
++
++	if ((err = init_ve_mibs(ve)))
++		goto err_mibs;
++
++	if ((err = init_ve_proc(ve)))
++		goto err_proc;
++
++	if ((err = init_ve_sysctl(ve)))
++		goto err_sysctl;
++
++	if ((err = init_ve_sysfs(ve)))
++		goto err_sysfs;
++
++	if ((err = init_ve_netdev()))
++		goto err_dev;
++
++	if ((err = init_ve_tty_drivers(ve)) < 0)
++		goto err_tty;
++
++	if ((err = init_ve_shmem(ve)))
++		goto err_shmem;
++
++	if ((err = init_ve_devpts(ve)))
++		goto err_devpts;
++
++	/* init SYSV IPC variables */
++	if ((err = init_ve_ipc(ve)) < 0)
++		goto err_ipc;
++
++	set_ve_caps(ve, tsk);
++
++	/* It is safe to initialize netfilter here as routing initialization and
++	   interface setup will be done below. This means that NO skb can be
++	   passed inside. Den */
++	/* iptables ve initialization for non ve0;
++	   ve0 init is in module_init */
++	if ((err = init_ve_netfilter()) < 0)
++		goto err_netfilter;
++
++	init_mask = data ? data->iptables_mask : VE_IP_DEFAULT;
++	if ((err = init_ve_iptables(ve, init_mask)) < 0)
++		goto err_iptables;
++
++	if ((err = init_ve_route(ve)) < 0)
++		goto err_route;
++
++	if ((err = alloc_vpid(tsk->pid, 1)) < 0)
++		goto err_vpid;
++
++	if ((err = ve_hook_iterate(VE_HOOK_INIT, (void *)ve)) < 0)
++		goto err_ve_hook;
++
++	/* finally: set vpids and move inside */
++	move_task(tsk, ve, old);
++
++	set_virt_pid(tsk, 1);
++	set_virt_tgid(tsk, 1);
++
++	set_special_pids(tsk->pid, tsk->pid);
++	current->signal->tty_old_pgrp = 0;
++	set_virt_pgid(tsk, 1);
++	set_virt_sid(tsk, 1);
++
++	ve->is_running = 1;
++	up_write(&ve->op_sem);
++
++	printk(KERN_INFO "VPS: %d: started\n", veid);
++	return veid;
++
++err_ve_hook:
++	free_vpid(1, ve);
++err_vpid:
++	fini_venet(ve);
++	fini_ve_route(ve);
++err_route:
++	fini_ve_iptables(ve, init_mask);
++err_iptables:
++	fini_ve_netfilter();
++err_netfilter:
++	fini_ve_ipc(ve);
++err_ipc:
++	fini_ve_devpts(ve);
++err_devpts:
++	fini_ve_shmem(ve);
++err_shmem:
++	fini_ve_tty_drivers(ve);
++err_tty:
++	fini_ve_netdev();
++err_dev:
++	fini_ve_sysfs(ve);
++err_sysfs:
++	fini_ve_sysctl(ve);
++err_sysctl:
++	fini_ve_proc(ve);
++err_proc:
++	do_clean_devperms(ve->veid); /* register procfs adds devperms */
++	fini_ve_mibs(ve);
++err_mibs:
++	/* free_ve_utsname() is called inside real_put_ve() */ ;
++err_utsname:
++	/* It is safe to restore current->envid here because
++	 * ve_fairsched_detach does not use current->envid. */
++	/* Really fairsched code uses current->envid in sys_fairsched_mknod 
++	 * only.  It is correct if sys_fairsched_mknod is called from
++	 * userspace.  If sys_fairsched_mknod is called from
++	 * ve_fairsched_attach, then node->envid and node->parent_node->envid
++	 * are explicitly set to valid value after the call. */
++	/* FIXME */
++	VE_TASK_INFO(tsk)->owner_env = old;
++	VE_TASK_INFO(tsk)->exec_env = old_exec;
++	/* move user back */
++	if (set_user(0, 0) < 0)
++		printk(KERN_WARNING"Can't restore UID\n");
++
++err_set_user:
++	fini_ve_sched(ve);
++err_sched:
++	(void)set_exec_env(old_exec);
++
++	/* we can jump here having incorrect envid */
++	VE_TASK_INFO(tsk)->owner_env = old;
++	fini_printk(ve);
++err_log_wait:
++	ve_list_del(ve);
++	up_write(&ve->op_sem);
++
++	real_put_ve(ve);
++err_struct:
++	printk(KERN_INFO "VPS: %d: failed to start with err=%d\n", veid, err);
++	return err;
++
++err_exist:
++	kfree(ve);
++	goto err_struct;
++}
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * VE start/stop callbacks
++ *
++ **********************************************************************
++ **********************************************************************/
++
++int real_env_create(envid_t veid, unsigned flags, u32 class_id,
++			env_create_param_t *data, int datalen)
++{
++	int status;
++	struct ve_struct *ve;
++
++	if (!flags) {
++		status = get_exec_env()->veid;
++		goto out;
++	}
++
++	status = -EPERM;
++	if (!capable(CAP_SETVEID))
++		goto out;
++
++	status = -EINVAL;
++	if ((flags & VE_TEST) && (flags & (VE_ENTER|VE_CREATE)))
++		goto out;
++
++	status = -EINVAL;
++	ve = get_ve_by_id(veid);
++	if (ve) {
++		if (flags & VE_TEST) {
++			status = 0;
++			goto out_put;
++		}
++		if (flags & VE_EXCLUSIVE) {
++			status = -EACCES;
++			goto out_put;
++		}
++		if (flags & VE_CREATE) {
++			flags &= ~VE_CREATE;
++			flags |= VE_ENTER;
++		}
++	} else {
++		if (flags & (VE_TEST|VE_ENTER)) {
++			status = -ESRCH;
++			goto out;
++		}
++	}
++
++	if (flags & VE_CREATE) {
++		status = do_env_create(veid, flags, class_id, data, datalen);
++		goto out;
++	} else if (flags & VE_ENTER)
++		status = do_env_enter(ve, flags);
++
++	/* else: returning EINVAL */
++
++out_put:
++	real_put_ve(ve);
++out:
++	return status;
++}
++
++static int do_env_enter(struct ve_struct *ve, unsigned int flags)
++{
++	struct task_struct *tsk = current;
++	int err;
++
++	VZTRACE("%s: veid=%d\n", __FUNCTION__, ve->veid);
++
++	err = -EBUSY;
++	down_read(&ve->op_sem);
++	if (!ve->is_running)
++		goto out_up;
++	if (ve->is_locked && !(flags & VE_SKIPLOCK))
++		goto out_up;
++
++#ifdef CONFIG_FAIRSCHED
++	err = sys_fairsched_mvpr(current->pid, ve->veid);
++	if (err)
++		goto out_up;
++#endif
++
++	ve_sched_attach(ve);
++	move_task(current, ve, VE_TASK_INFO(tsk)->owner_env);
++	err = VE_TASK_INFO(tsk)->owner_env->veid;
++
++out_up:
++	up_read(&ve->op_sem);
++	return err;
++}
++
++static void env_cleanup(struct ve_struct *ve)
++{
++	struct ve_struct *old_ve;
++
++	VZTRACE("real_do_env_cleanup\n");
++
++	down_read(&ve->op_sem);
++	old_ve = set_exec_env(ve);
++
++	ve_hook_iterate_cleanup(VE_HOOK_FINI, (void *)ve);
++
++	fini_venet(ve);
++	fini_ve_route(ve);
++
++	/* no new packets in flight beyond this point */
++	synchronize_net();
++	/* skb hold dst_entry, and in turn lies in the ip fragment queue */
++	ip_fragment_cleanup(ve);
++
++	fini_ve_netdev();
++
++	/* kill iptables */
++	/* No skb belonging to VE can exist at this point as unregister_netdev
++	   is an operation awaiting until ALL skb's gone */
++	flush_ve_iptables(ve);
++	fini_ve_iptables(ve, ve->_iptables_modules);
++	fini_ve_netfilter();
++
++	ve_ipc_cleanup();
++
++	fini_ve_sched(ve);
++	do_clean_devperms(ve->veid);
++
++	fini_ve_devpts(ve);
++	fini_ve_shmem(ve);
++	fini_ve_sysfs(ve);
++	unregister_ve_tty_drivers(ve);
++	fini_ve_sysctl(ve);
++	fini_ve_proc(ve);
++
++	fini_ve_mibs(ve);
++
++	(void)set_exec_env(old_ve);
++	fini_printk(ve);	/* no printk can happen in ve context anymore */
++
++	ve_list_del(ve);
++	up_read(&ve->op_sem);
++
++	real_put_ve(ve);
++}
++
++static struct list_head ve_cleanup_list;
++static spinlock_t ve_cleanup_lock;
++
++static DECLARE_COMPLETION(vzmond_complete);
++static struct task_struct *vzmond_thread;
++static volatile int stop_vzmond;
++
++void real_do_env_cleanup(struct ve_struct *ve)
++{
++	spin_lock(&ve_cleanup_lock);
++	list_add_tail(&ve->cleanup_list, &ve_cleanup_list);
++	spin_unlock(&ve_cleanup_lock);
++	wake_up_process(vzmond_thread);
++}
++
++static void do_pending_env_cleanups(void)
++{
++	struct ve_struct *ve;
++
++	spin_lock(&ve_cleanup_lock);
++	while (1) {
++		if (list_empty(&ve_cleanup_list) || need_resched())
++			break;
++		ve = list_first_entry(&ve_cleanup_list, struct ve_struct,
++				cleanup_list);
++		list_del(&ve->cleanup_list);
++		spin_unlock(&ve_cleanup_lock);
++		env_cleanup(ve);
++		spin_lock(&ve_cleanup_lock);
++	}
++	spin_unlock(&ve_cleanup_lock);
++}
++
++static int have_pending_cleanups(void)
++{
++	return !list_empty(&ve_cleanup_list);
++}
++
++static int vzmond(void *arg)
++{
++	daemonize("vzmond");
++	vzmond_thread = current;
++	set_current_state(TASK_INTERRUPTIBLE);
++
++	while (!stop_vzmond) {
++		schedule();
++		if (signal_pending(current))
++			flush_signals(current);
++		if (test_thread_flag(TIF_FREEZE))
++			refrigerator();
++
++		do_pending_env_cleanups();
++		set_current_state(TASK_INTERRUPTIBLE);
++		if (have_pending_cleanups())
++			__set_current_state(TASK_RUNNING);
++	}
++
++	__set_task_state(current, TASK_RUNNING);
++	complete_and_exit(&vzmond_complete, 0);
++}
++
++static int __init init_vzmond(void)
++{
++	INIT_LIST_HEAD(&ve_cleanup_list);
++	spin_lock_init(&ve_cleanup_lock);
++	stop_vzmond = 0;
++	return kernel_thread(vzmond, NULL, 0);
++}
++
++static void fini_vzmond(void)
++{
++	stop_vzmond = 1;
++	wake_up_process(vzmond_thread);
++	wait_for_completion(&vzmond_complete);
++	WARN_ON(!list_empty(&ve_cleanup_list));
++}
++
++void real_do_env_free(struct ve_struct *ve)
++{
++	VZTRACE("real_do_env_free\n");
++
++	ve_ipc_free(ve); /* free SYSV IPC resources */
++	free_ve_tty_drivers(ve);
++	free_ve_utsname(ve);
++	free_ve_sysctl(ve); /* free per ve sysctl data */
++	free_ve_filesystems(ve);
++	printk(KERN_INFO "VPS: %d: stopped\n", VEID(ve));
++	kfree(ve);
++
++	module_put(THIS_MODULE);
++}
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * VE TTY handling
++ *
++ **********************************************************************
++ **********************************************************************/
++
++DCL_VE_OWNER(TTYDRV, TAIL_SOFT, struct tty_driver, owner_env, , ())
++
++static struct tty_driver *alloc_ve_tty_driver(struct tty_driver *base,
++					   struct ve_struct *ve)
++{
++	size_t size;
++	struct tty_driver *driver;
++
++	driver = kmalloc(sizeof(struct tty_driver), GFP_KERNEL);
++	if (!driver)
++		goto out;
++
++	memcpy(driver, base, sizeof(struct tty_driver));
++
++	driver->driver_state = NULL;
++
++	size = base->num * 3 * sizeof(void *);
++	if (!(driver->flags & TTY_DRIVER_DEVPTS_MEM)) {
++		void **p;
++		p = kmalloc(size, GFP_KERNEL);
++		if (!p)
++			goto out_free;
++		memset(p, 0, size);
++		driver->ttys = (struct tty_struct **)p;
++		driver->termios = (struct termios **)(p + driver->num);
++		driver->termios_locked = (struct termios **)(p + driver->num * 2);
++	} else {
++		driver->ttys = NULL;
++		driver->termios = NULL;
++		driver->termios_locked = NULL;
++	}
++
++	SET_VE_OWNER_TTYDRV(driver, ve);
++	driver->flags |= TTY_DRIVER_INSTALLED;
++
++	return driver;
++
++out_free:
++	kfree(driver);
++out:
++	return NULL;
++}
++
++static void free_ve_tty_driver(struct tty_driver *driver)
++{
++	if (!driver)
++		return;
++
++	clear_termios(driver);
++	kfree(driver->ttys);
++	kfree(driver);
++}
++
++static int alloc_ve_tty_drivers(struct ve_struct* ve)
++{
++#ifdef CONFIG_LEGACY_PTYS
++	extern struct tty_driver *get_pty_driver(void);
++	extern struct tty_driver *get_pty_slave_driver(void);
++
++	/* Traditional BSD devices */
++	ve->pty_driver = alloc_ve_tty_driver(get_pty_driver(), ve);
++	if (!ve->pty_driver)
++		goto out_mem;
++
++	ve->pty_slave_driver = alloc_ve_tty_driver(
++					get_pty_slave_driver(), ve);
++	if (!ve->pty_slave_driver)
++		goto out_mem;
++
++	ve->pty_driver->other       = ve->pty_slave_driver;
++	ve->pty_slave_driver->other = ve->pty_driver;
++#endif	
++
++#ifdef CONFIG_UNIX98_PTYS
++	ve->ptm_driver = alloc_ve_tty_driver(ptm_driver, ve);
++	if (!ve->ptm_driver)
++		goto out_mem;
++
++	ve->pts_driver = alloc_ve_tty_driver(pts_driver, ve);
++	if (!ve->pts_driver)
++		goto out_mem;
++
++	ve->ptm_driver->other = ve->pts_driver;
++	ve->pts_driver->other = ve->ptm_driver;
++
++	ve->allocated_ptys = kmalloc(sizeof(*ve->allocated_ptys), GFP_KERNEL);
++	if (!ve->allocated_ptys)
++		goto out_mem;
++	idr_init(ve->allocated_ptys);
++#endif
++	return 0;
++
++out_mem:
++	free_ve_tty_drivers(ve);
++	return -ENOMEM;
++}
++
++static void free_ve_tty_drivers(struct ve_struct* ve)
++{
++#ifdef CONFIG_LEGACY_PTYS
++	free_ve_tty_driver(ve->pty_driver);
++	free_ve_tty_driver(ve->pty_slave_driver);
++	ve->pty_driver = ve->pty_slave_driver = NULL;
++#endif	
++#ifdef CONFIG_UNIX98_PTYS
++	free_ve_tty_driver(ve->ptm_driver);
++	free_ve_tty_driver(ve->pts_driver);
++	kfree(ve->allocated_ptys);
++	ve->ptm_driver = ve->pts_driver = NULL;
++	ve->allocated_ptys = NULL;
++#endif
++}
++
++static inline void __register_tty_driver(struct tty_driver *driver)
++{
++	list_add(&driver->tty_drivers, &tty_drivers);
++}
++
++static inline void __unregister_tty_driver(struct tty_driver *driver)
++{
++	if (!driver)
++		return;
++	list_del(&driver->tty_drivers);
++}
++
++static int register_ve_tty_drivers(struct ve_struct* ve)
++{
++	write_lock_irq(&tty_driver_guard);
++#ifdef CONFIG_UNIX98_PTYS
++	__register_tty_driver(ve->ptm_driver);
++	__register_tty_driver(ve->pts_driver);
++#endif
++#ifdef CONFIG_LEGACY_PTYS
++	__register_tty_driver(ve->pty_driver);
++	__register_tty_driver(ve->pty_slave_driver);
++#endif	
++	write_unlock_irq(&tty_driver_guard);
++
++	return 0;
++}
++
++static void unregister_ve_tty_drivers(struct ve_struct* ve)
++{
++	VZTRACE("unregister_ve_tty_drivers\n");
++
++	write_lock_irq(&tty_driver_guard);
++	__unregister_tty_driver(ve->pty_driver);
++	__unregister_tty_driver(ve->pty_slave_driver);
++#ifdef CONFIG_UNIX98_PTYS
++	__unregister_tty_driver(ve->ptm_driver);
++	__unregister_tty_driver(ve->pts_driver);
++#endif
++	write_unlock_irq(&tty_driver_guard);
++}
++
++static int init_ve_tty_drivers(struct ve_struct *ve)
++{
++	int err;
++
++	if ((err = alloc_ve_tty_drivers(ve)))
++		goto err_ttyalloc;
++	if ((err = register_ve_tty_drivers(ve)))
++		goto err_ttyreg;
++	return 0;
++
++err_ttyreg:
++	free_ve_tty_drivers(ve);
++err_ttyalloc:
++	return err;
++}
++
++static void fini_ve_tty_drivers(struct ve_struct *ve)
++{
++	unregister_ve_tty_drivers(ve);
++	free_ve_tty_drivers(ve);
++}
++
++/*
++ * Free the termios and termios_locked structures because
++ * we don't want to get memory leaks when modular tty
++ * drivers are removed from the kernel.
++ */
++static void clear_termios(struct tty_driver *driver)
++{
++	int i;
++	struct termios *tp;
++
++	if (driver->termios == NULL)
++		return;
++	for (i = 0; i < driver->num; i++) {
++		tp = driver->termios[i];
++		if (tp) {
++			driver->termios[i] = NULL;
++			kfree(tp);
++		}
++		tp = driver->termios_locked[i];
++		if (tp) {
++			driver->termios_locked[i] = NULL;
++			kfree(tp);
++		}
++	}
++}
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * Pieces of VE network
++ *
++ **********************************************************************
++ **********************************************************************/
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#include <asm/uaccess.h>
++#include <net/sock.h>
++#include <linux/netlink.h>
++#include <linux/rtnetlink.h>
++#include <net/route.h>
++#include <net/ip_fib.h>
++#endif
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++static void ve_del_ip_addrs(struct net_device *dev)
++{
++	struct in_device *in_dev;
++
++	in_dev = in_dev_get(dev);
++	if (in_dev == NULL)
++		return;
++
++	while (in_dev->ifa_list != NULL) {
++		inet_del_ifa(in_dev, &in_dev->ifa_list, 1);
++	}
++	in_dev_put(in_dev);
++}
++
++static int ve_netdev_cleanup(struct net_device *dev, int to_ve)
++{
++	int err;
++
++	err = 0;
++	ve_del_ip_addrs(dev);
++	if ((dev->flags & IFF_UP) != 0)
++		err = dev_close(dev);
++	synchronize_net();
++	dev_shutdown(dev);
++	dev_mc_discard(dev);
++	free_divert_blk(dev);
++	synchronize_net();
++
++	if (to_ve)
++		dev->orig_mtu = dev->mtu;
++	else {
++		int rc = dev_set_mtu(dev, dev->orig_mtu);
++		if (err == 0)
++			err = rc;
++	}
++
++	return err;
++}
++
++static void __ve_dev_move(struct net_device *dev, struct ve_struct *ve_src,
++	struct ve_struct *ve_dst, struct user_beancounter *exec_ub)
++{
++	struct net_device **dp, *d;
++	struct user_beancounter *ub;
++
++	for (d = ve_src->_net_dev_base, dp = NULL; d != NULL; 
++	     dp = &d->next, d = d->next) {
++		if (d == dev) {
++			hlist_del(&dev->name_hlist);
++			hlist_del(&dev->index_hlist);
++			if (ve_src->_net_dev_tail == &dev->next)
++				ve_src->_net_dev_tail = dp;
++			if (dp)
++				*dp = dev->next;
++			dev->next = NULL;
++			break;
++		}
++	}
++	*ve_dst->_net_dev_tail = dev;
++	ve_dst->_net_dev_tail = &dev->next;
++	hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name, ve_dst));
++	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex, ve_dst));
++	dev->owner_env = ve_dst;
++
++	ub = netdev_bc(dev)->exec_ub;
++	netdev_bc(dev)->exec_ub = get_beancounter(exec_ub);
++	put_beancounter(ub);
++}
++
++static int ve_dev_add(envid_t veid, char *dev_name)
++{
++	int err;
++	struct net_device *dev;
++	struct ve_struct *ve;
++	struct hlist_node *p;
++
++	dev = NULL;
++	err = -ESRCH;
++
++	ve = get_ve_by_id(veid);
++	if (ve == NULL)
++		goto out;
++
++	rtnl_lock();
++
++	read_lock(&dev_base_lock);
++	hlist_for_each(p, dev_name_hash(dev_name, get_ve0())) {
++		struct net_device *d = hlist_entry(p, struct net_device, 
++						   name_hlist);
++		if (strncmp(d->name, dev_name, IFNAMSIZ) == 0) {
++			dev = d;
++			break;
++		}
++	}
++	read_unlock(&dev_base_lock);
++	if (dev == NULL)
++		goto out_unlock;
++
++	err = -EPERM;
++	if (!ve_is_dev_movable(dev))
++		goto out_unlock;
++
++	err = -EINVAL;
++	if (dev->flags & (IFF_SLAVE|IFF_MASTER))
++		goto out_unlock;
++
++	ve_netdev_cleanup(dev, 1);
++
++	write_lock_bh(&dev_base_lock);
++	__ve_dev_move(dev, get_ve0(), ve, get_exec_ub());
++	write_unlock_bh(&dev_base_lock);
++
++	err = 0;
++
++out_unlock:
++	rtnl_unlock();
++	real_put_ve(ve);
++
++	if (dev == NULL)
++		printk(KERN_WARNING "Device %s not found\n", dev_name);
++
++out:
++	return err;
++}
++
++static int ve_dev_del(envid_t veid, char *dev_name)
++{
++	int err;
++	struct net_device *dev;
++	struct ve_struct *ve, *old_exec;
++	struct hlist_node *p;
++
++	dev = NULL;
++	err = -ESRCH;
++
++	ve = get_ve_by_id(veid);
++	if (ve == NULL)
++		goto out;
++
++	rtnl_lock();
++
++	read_lock(&dev_base_lock);
++	hlist_for_each(p, dev_name_hash(dev_name, ve)) {
++		struct net_device *d = hlist_entry(p, struct net_device, 
++						   name_hlist);
++		if (strncmp(d->name, dev_name, IFNAMSIZ) == 0) {
++			dev = d;
++			break;
++		}
++	}
++	read_unlock(&dev_base_lock);
++	if (dev == NULL)
++		goto out_unlock;
++
++	err = -EPERM;
++	if (!ve_is_dev_movable(dev))
++		goto out_unlock;
++
++	old_exec = set_exec_env(ve);
++	ve_netdev_cleanup(dev, 0);
++	(void)set_exec_env(old_exec);
++
++	write_lock_bh(&dev_base_lock);
++	__ve_dev_move(dev, ve, get_ve0(), netdev_bc(dev)->owner_ub);
++	write_unlock_bh(&dev_base_lock);
++
++	err = 0;
++
++out_unlock:
++	rtnl_unlock();
++	real_put_ve(ve);
++
++	if (dev == NULL)
++		printk(KERN_WARNING "Device %s not found\n", dev_name);
++
++out:
++	return err;
++}
++
++int real_ve_dev_map(envid_t veid, int op, char *dev_name)
++{
++	int err;
++	err = -EPERM;
++	if (!capable(CAP_SETVEID))
++		goto out;
++	switch (op)
++	{
++		case VE_NETDEV_ADD:
++			err = ve_dev_add(veid, dev_name);
++			break;
++		case VE_NETDEV_DEL:
++			err = ve_dev_del(veid, dev_name);
++			break;
++		default:
++			err = -EINVAL;
++			break;
++	}
++out:
++	return err;
++}
++
++static void ve_mapped_devs_cleanup(struct ve_struct *ve)
++{
++	struct net_device *dev;
++
++	rtnl_lock();
++	write_lock_bh(&dev_base_lock);
++restart:
++	for (dev = ve->_net_dev_base; dev != NULL; dev = dev->next)
++	{
++		if ((dev->features & NETIF_F_VENET) ||
++		    (dev == ve->_loopback_dev)) /* Skip loopback dev */
++			continue;
++		write_unlock_bh(&dev_base_lock);
++		ve_netdev_cleanup(dev, 0);
++		write_lock_bh(&dev_base_lock);
++		__ve_dev_move(dev, ve, get_ve0(), netdev_bc(dev)->owner_ub);
++		goto restart;
++	}
++	write_unlock_bh(&dev_base_lock);
++	rtnl_unlock();
++}
++#endif
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * VE information via /proc
++ *
++ **********************************************************************
++ **********************************************************************/
++#ifdef CONFIG_PROC_FS
++static int devperms_seq_show(struct seq_file *m, void *v)
++{
++	struct devperms_struct *dp;
++	char dev_s[32], type_c;
++	unsigned use, type;
++	dev_t dev;
++
++	dp = (struct devperms_struct *)v;
++	if (dp == (struct devperms_struct *)1L) {
++		seq_printf(m, "Version: 2.7\n");
++		return 0;
++	}
++
++	use = dp->type & VE_USE_MASK;
++	type = dp->type & S_IFMT;
++	dev = dp->dev;
++
++	if ((use | VE_USE_MINOR) == use)
++		snprintf(dev_s, sizeof(dev_s), "%d:%d", MAJOR(dev), MINOR(dev));
++	else if ((use | VE_USE_MAJOR) == use)
++		snprintf(dev_s, sizeof(dev_s), "%d:*", MAJOR(dp->dev));
++	else
++		snprintf(dev_s, sizeof(dev_s), "*:*");
++
++	if (type == S_IFCHR)
++		type_c = 'c';
++	else if (type == S_IFBLK)
++		type_c = 'b';
++	else
++		type_c = '?';
++
++	seq_printf(m, "%10u %c %03o %s\n", dp->veid, type_c, dp->mask, dev_s);
++	return 0;
++}
++
++static void *devperms_seq_start(struct seq_file *m, loff_t *pos)
++{
++	loff_t cpos;
++	long slot;
++	struct devperms_struct *dp;
++
++	cpos = *pos;
++	read_lock(&devperms_hash_guard);
++	if (cpos-- == 0)
++		return (void *)1L;
++
++	for (slot = 0; slot < DEVPERMS_HASH_SZ; slot++)
++		for (dp = devperms_hash[slot]; dp; dp = dp->devhash_next)
++			if (cpos-- == 0) {
++				m->private = (void *)slot;
++				return dp;
++			}
++	return NULL;
++}
++
++static void *devperms_seq_next(struct seq_file *m, void *v, loff_t *pos)
++{
++	long slot;
++	struct devperms_struct *dp;
++
++	dp = (struct devperms_struct *)v;
++
++	if (dp == (struct devperms_struct *)1L)
++		slot = 0;
++	else if (dp->devhash_next == NULL)
++		slot = (long)m->private + 1;
++	else {
++		(*pos)++;
++		return dp->devhash_next;
++	}
++
++	for (; slot < DEVPERMS_HASH_SZ; slot++)
++		if (devperms_hash[slot]) {
++			(*pos)++;
++			m->private = (void *)slot;
++			return devperms_hash[slot];
++		}
++	return NULL;
++}
++
++static void devperms_seq_stop(struct seq_file *m, void *v)
++{
++	read_unlock(&devperms_hash_guard);
++}
++
++static struct seq_operations devperms_seq_op = {
++	.start	= devperms_seq_start,
++	.next	= devperms_seq_next,
++	.stop	= devperms_seq_stop,
++	.show	= devperms_seq_show,
++};
++
++static int devperms_open(struct inode *inode, struct file *file)
++{
++        return seq_open(file, &devperms_seq_op);
++}
++
++static struct file_operations proc_devperms_ops = {
++	.open		= devperms_open,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= seq_release,
++};
++
++#if BITS_PER_LONG == 32
++#define VESTAT_LINE_WIDTH (6 * 11 + 6 * 21)
++#define VESTAT_LINE_FMT "%10u %10lu %10lu %10lu %10lu %20Lu %20Lu %20Lu %20Lu %20Lu %20Lu %10lu\n"
++#define VESTAT_HEAD_FMT "%10s %10s %10s %10s %10s %20s %20s %20s %20s %20s %20s %10s\n"
++#else
++#define VESTAT_LINE_WIDTH (12 * 21)
++#define VESTAT_LINE_FMT "%20u %20lu %20lu %20lu %20lu %20Lu %20Lu %20Lu %20Lu %20Lu %20Lu %20lu\n"
++#define VESTAT_HEAD_FMT "%20s %20s %20s %20s %20s %20s %20s %20s %20s %20s %20s %20s\n"
++#endif
++
++static int vestat_seq_show(struct seq_file *m, void *v)
++{
++	struct ve_struct *ve = (struct ve_struct *)v;
++	struct ve_struct *curve;
++	int cpu;
++	unsigned long user_ve, nice_ve, system_ve, uptime;
++	cycles_t uptime_cycles, idle_time, strv_time, used;
++
++	curve = get_exec_env();
++	if (ve == ve_list_head ||
++	    (!ve_is_super(curve) && ve == curve)) {
++		/* print header */
++		seq_printf(m, "%-*s\n",
++			VESTAT_LINE_WIDTH - 1,
++			"Version: 2.2");
++		seq_printf(m, VESTAT_HEAD_FMT, "VEID",
++					"user", "nice", "system",
++					"uptime", "idle",
++					"strv", "uptime", "used",
++					"maxlat", "totlat", "numsched");
++	}
++
++	if (ve == get_ve0())
++		return 0;
++
++	user_ve = nice_ve = system_ve = 0;
++	idle_time = strv_time = used = 0;
++
++	for (cpu = 0; cpu < NR_CPUS; cpu++) {
++		user_ve += VE_CPU_STATS(ve, cpu)->user;
++		nice_ve += VE_CPU_STATS(ve, cpu)->nice;
++		system_ve += VE_CPU_STATS(ve, cpu)->system;
++		used += VE_CPU_STATS(ve, cpu)->used_time;
++		idle_time += ve_sched_get_idle_time(ve, cpu);
++	}
++	uptime_cycles = get_cycles() - ve->start_cycles;
++	uptime = jiffies - ve->start_jiffies;
++
++	seq_printf(m, VESTAT_LINE_FMT, ve->veid,
++				user_ve, nice_ve, system_ve,
++				uptime, idle_time, 
++				strv_time, uptime_cycles, used,
++				ve->sched_lat_ve.last.maxlat,
++				ve->sched_lat_ve.last.totlat,
++				ve->sched_lat_ve.last.count);
++	return 0;
++}
++
++static void *ve_seq_start(struct seq_file *m, loff_t *pos)
++{
++	struct ve_struct *ve, *curve;
++	loff_t l;
++
++	curve = get_exec_env();
++	read_lock(&ve_list_guard);
++	if (!ve_is_super(curve)) {
++		if (*pos != 0)
++			return NULL;
++		return curve;
++	}
++	for (ve = ve_list_head, l = *pos;
++	     ve != NULL && l > 0;
++	     ve = ve->next, l--);
++	return ve;
++}
++
++static void *ve_seq_next(struct seq_file *m, void *v, loff_t *pos)
++{
++	struct ve_struct *ve = (struct ve_struct *)v;
++
++	if (!ve_is_super(get_exec_env()))
++		return NULL;
++	(*pos)++;
++	return ve->next;
++}
++
++static void ve_seq_stop(struct seq_file *m, void *v)
++{
++	read_unlock(&ve_list_guard);
++}
++
++static struct seq_operations vestat_seq_op = {
++        start:  ve_seq_start,
++        next:   ve_seq_next,
++        stop:   ve_seq_stop,
++        show:   vestat_seq_show
++};
++
++static int vestat_open(struct inode *inode, struct file *file)
++{
++        return seq_open(file, &vestat_seq_op);
++}
++
++static struct file_operations proc_vestat_operations = {
++        open:           vestat_open,
++        read:           seq_read,
++        llseek:         seq_lseek,
++        release:        seq_release
++};
++
++static int __init init_vecalls_proc(void)
++{
++	struct proc_dir_entry *de;
++
++	de = create_proc_glob_entry("vz/vestat",
++			S_IFREG|S_IRUSR, NULL);
++	if (de == NULL) {
++		/* create "vz" subdirectory, if not exist */
++		(void) create_proc_glob_entry("vz",
++					      S_IFDIR|S_IRUGO|S_IXUGO, NULL);
++		de = create_proc_glob_entry("vz/vestat",
++				S_IFREG|S_IRUSR, NULL);
++	}
++	if (de)
++		de->proc_fops = &proc_vestat_operations;
++	else
++		printk(KERN_WARNING 
++				"VZMON: can't make vestat proc entry\n");
++
++	de = create_proc_entry("vz/devperms", S_IFREG | S_IRUSR, NULL);
++	if (de)
++		de->proc_fops = &proc_devperms_ops;
++	else
++		printk(KERN_WARNING
++				"VZMON: can't make devperms proc entry\n");
++	return 0;
++}
++
++static void fini_vecalls_proc(void)
++{
++	remove_proc_entry("vz/devperms", NULL);
++	remove_proc_entry("vz/vestat", NULL);
++}
++#else
++#define init_vecalls_proc()	(0)
++#define fini_vecalls_proc()	do { } while (0)
++#endif /* CONFIG_PROC_FS */
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * User ctl
++ *
++ **********************************************************************
++ **********************************************************************/
++
++int vzcalls_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
++static struct vzioctlinfo vzcalls = {
++	type: VZCTLTYPE,
++	func: vzcalls_ioctl,
++	owner: THIS_MODULE,
++};
++
++int vzcalls_ioctl(struct inode *ino, struct file *file, unsigned int cmd,
++		unsigned long arg)
++{
++	int err;
++
++	err = -ENOTTY;
++	switch(cmd) {
++	    case VZCTL_MARK_ENV_TO_DOWN: {
++		        /* Compatibility issue */
++		        err = 0;
++		}
++		break;
++	    case VZCTL_SETDEVPERMS: {
++			/* Device type was mistakenly declared as dev_t
++			 * in the old user-kernel interface.
++			 * That's wrong, dev_t is a kernel internal type.
++			 * I use `unsigned' not having anything better in mind.
++			 * 2001/08/11  SAW  */
++			struct vzctl_setdevperms s;
++			err = -EFAULT;
++			if (copy_from_user(&s, (void *)arg, sizeof(s)))
++				break;
++			err = real_setdevperms(s.veid, s.type,
++					new_decode_dev(s.dev), s.mask);
++		}
++		break;
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	    case VZCTL_VE_NETDEV: {
++			struct vzctl_ve_netdev d;
++			char *s;
++			err = -EFAULT;
++			if (copy_from_user(&d, (void *)arg, sizeof(d)))
++				break;
++			err = -ENOMEM;
++			s = kmalloc(IFNAMSIZ+1, GFP_KERNEL);
++			if (s == NULL)
++				break;
++			strncpy_from_user(s, d.dev_name, IFNAMSIZ);
++			s[IFNAMSIZ] = 0;
++			err = real_ve_dev_map(d.veid, d.op, s);
++			kfree(s);
++		}
++		break;
++#endif
++	    case VZCTL_ENV_CREATE: {
++			struct vzctl_env_create s;
++			err = -EFAULT;
++			if (copy_from_user(&s, (void *)arg, sizeof(s)))
++				break;
++			err = real_env_create(s.veid, s.flags, s.class_id,
++				NULL, 0);
++		}
++		break;
++	    case VZCTL_ENV_CREATE_DATA: {
++			struct vzctl_env_create_data s;
++			env_create_param_t *data;
++			err = -EFAULT;
++			if (copy_from_user(&s, (void *)arg, sizeof(s)))
++				break;
++			err=-EINVAL;
++			if (s.datalen < VZCTL_ENV_CREATE_DATA_MINLEN ||
++			    s.datalen > VZCTL_ENV_CREATE_DATA_MAXLEN ||
++			    s.data == 0)
++				break;
++			err = -ENOMEM;
++			data = kmalloc(sizeof(*data), GFP_KERNEL);
++			if (!data)
++				break;
++			memset(data, 0, sizeof(*data));
++			err = -EFAULT;
++			if (copy_from_user(data, (void *)s.data, s.datalen))
++				goto free_data;
++			err = real_env_create(s.veid, s.flags, s.class_id,
++				data, s.datalen);
++free_data:
++			kfree(data);
++		}
++		break;
++	    case VZCTL_GET_CPU_STAT: {
++			struct vzctl_cpustatctl s;
++			err = -EFAULT;
++			if (copy_from_user(&s, (void *)arg, sizeof(s)))
++				break;
++			err = ve_get_cpu_stat(s.veid, s.cpustat);
++		}
++		break;
++	}
++	return err;
++}
++EXPORT_SYMBOL(real_env_create);
++
++
++/**********************************************************************
++ **********************************************************************
++ *
++ * Init/exit stuff
++ *
++ **********************************************************************
++ **********************************************************************/
++
++#ifdef CONFIG_VE_CALLS_MODULE
++static int __init init_vecalls_symbols(void)
++{
++	KSYMRESOLVE(real_get_device_perms_ve);
++	KSYMRESOLVE(real_do_env_cleanup);
++	KSYMRESOLVE(real_do_env_free);
++	KSYMRESOLVE(real_update_load_avg_ve);
++	KSYMMODRESOLVE(vzmon);
++	return 0;
++}
++
++static void fini_vecalls_symbols(void)
++{
++	KSYMMODUNRESOLVE(vzmon);
++	KSYMUNRESOLVE(real_get_device_perms_ve);
++	KSYMUNRESOLVE(real_do_env_cleanup);
++	KSYMUNRESOLVE(real_do_env_free);
++	KSYMUNRESOLVE(real_update_load_avg_ve);
++}
++#else
++#define init_vecalls_symbols()	(0)
++#define fini_vecalls_symbols()	do { } while (0)
++#endif
++
++static inline __init int init_vecalls_ioctls(void)
++{
++	vzioctl_register(&vzcalls);
++	return 0;
++}
++
++static inline void fini_vecalls_ioctls(void)
++{
++	vzioctl_unregister(&vzcalls);
++}
++
++static int __init vecalls_init(void)
++{
++	int err;
++	int i;
++
++	ve_list_head = get_ve0();
++
++	err = init_vzmond();
++	if (err < 0)
++		goto out_vzmond;
++
++	err = init_devperms_hash();
++	if (err < 0)
++		goto out_perms;
++
++	err = init_vecalls_symbols();
++	if (err < 0)
++		goto out_sym;
++
++	err = init_vecalls_proc();
++	if (err < 0)
++		goto out_proc;
++
++	err = init_vecalls_ioctls();
++	if (err < 0)
++		goto out_ioctls;
++
++	for (i = 0; i < VE_MAX_HOOKS; i++)
++		INIT_LIST_HEAD(&ve_hooks[i]);
++
++	return 0;
++
++out_ioctls:
++	fini_vecalls_proc();
++out_proc:
++	fini_vecalls_symbols();
++out_sym:
++	fini_devperms_hash();
++out_perms:
++	fini_vzmond();
++out_vzmond:
++	return err;
++}
++
++static void vecalls_exit(void)
++{
++	fini_vecalls_ioctls();
++	fini_vecalls_proc();
++	fini_vecalls_symbols();
++	fini_devperms_hash();
++	fini_vzmond();
++}
++
++EXPORT_SYMBOL(get_ve_by_id);
++EXPORT_SYMBOL(__find_ve_by_id);
++EXPORT_SYMBOL(ve_list_guard);
++EXPORT_SYMBOL(ve_list_head);
++EXPORT_SYMBOL(nr_ve);
++
++MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
++MODULE_DESCRIPTION("Virtuozzo Control");
++MODULE_LICENSE("GPL v2");
++
++module_init(vecalls_init)
++module_exit(vecalls_exit)
+diff -uprN linux-2.6.8.1.orig/kernel/veowner.c linux-2.6.8.1-ve022stab078/kernel/veowner.c
+--- linux-2.6.8.1.orig/kernel/veowner.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/kernel/veowner.c	2006-05-11 13:05:42.000000000 +0400
+@@ -0,0 +1,300 @@
++/*
++ *  kernel/veowner.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/sched.h>
++#include <linux/ve.h>
++#include <linux/ve_owner.h>
++#include <linux/ve_proto.h>
++#include <linux/ipc.h>
++#include <linux/fs.h>
++#include <linux/proc_fs.h>
++#include <linux/file.h>
++#include <linux/mm.h>
++#include <linux/delay.h>
++#include <linux/vmalloc.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/list.h>
++#include <asm/system.h>
++#include <asm/io.h>
++
++#include <net/tcp.h>
++
++void prepare_ve0_process(struct task_struct *tsk)
++{
++	set_virt_pid(tsk, tsk->pid);
++	set_virt_tgid(tsk, tsk->tgid);
++	if (tsk->signal) {
++		set_virt_pgid(tsk, tsk->signal->pgrp);
++		set_virt_sid(tsk, tsk->signal->session);
++	}
++	VE_TASK_INFO(tsk)->exec_env = get_ve0();
++	VE_TASK_INFO(tsk)->owner_env = get_ve0();
++	VE_TASK_INFO(tsk)->sleep_time = 0;
++	VE_TASK_INFO(tsk)->wakeup_stamp = 0;
++	VE_TASK_INFO(tsk)->sched_time = 0;
++	seqcount_init(&VE_TASK_INFO(tsk)->wakeup_lock);
++
++	if (tsk->pid) {
++		SET_VE_LINKS(tsk);
++		atomic_inc(&get_ve0()->pcounter);
++	}
++}
++
++void prepare_ve0_loopback(void)
++{
++	get_ve0()->_loopback_dev = &loopback_dev;
++}
++
++/*
++ * ------------------------------------------------------------------------
++ * proc entries
++ * ------------------------------------------------------------------------
++ */
++
++static void proc_move(struct proc_dir_entry *ddir,
++		struct proc_dir_entry *sdir,
++		const char *name)
++{
++	struct proc_dir_entry **p, *q;
++	int len;
++
++	len = strlen(name);
++	for (p = &sdir->subdir, q = *p; q != NULL; p = &q->next, q = *p)
++		if (proc_match(len, name, q))
++			break;
++	if (q == NULL)
++		return;
++	*p = q->next;
++	q->parent = ddir;
++	q->next = ddir->subdir;
++	ddir->subdir = q;
++}
++static void prepare_proc_misc(void)
++{
++	static char *table[] = {
++		"loadavg",
++		"uptime",
++		"meminfo",
++		"version",
++		"stat",
++		"filesystems",
++		"locks",
++		"swaps",
++		"mounts",
++		"cpuinfo",
++		"net",
++		"sysvipc",
++		"sys",
++		"fs",
++		"vz",
++		"user_beancounters",
++		"cmdline",
++		"vmstat",
++		"modules",
++		"kmsg",
++		NULL,
++	};
++	char **p;
++
++	for (p = table; *p != NULL; p++)
++		proc_move(&proc_root, ve0.proc_root, *p);
++}
++int prepare_proc(void)
++{
++	struct ve_struct *envid;
++	struct proc_dir_entry *de;
++	struct proc_dir_entry *ve_root;
++
++	envid = set_exec_env(&ve0);
++	ve_root = ve0.proc_root->subdir;
++	/* move the whole tree to be visible in VE0 only */
++	ve0.proc_root->subdir = proc_root.subdir;
++	for (de = ve0.proc_root->subdir; de->next != NULL; de = de->next)
++		de->parent = ve0.proc_root;
++	de->parent = ve0.proc_root;
++	de->next = ve_root;
++
++	/* move back into the global scope some specific entries */
++	proc_root.subdir = NULL;
++	prepare_proc_misc();
++	proc_mkdir("net", 0);
++	proc_mkdir("vz", 0);
++#ifdef CONFIG_SYSVIPC
++	proc_mkdir("sysvipc", 0);
++#endif
++	proc_root_fs = proc_mkdir("fs", 0);
++	/* XXX proc_tty_init(); */
++
++	/* XXX process inodes */
++
++	(void)set_exec_env(envid);
++
++	(void)create_proc_glob_entry("vz", S_IFDIR|S_IRUGO|S_IXUGO, NULL);
++	return 0;
++}
++
++static struct proc_dir_entry ve0_proc_root = {
++	.name = "/proc",
++	.namelen = 5,
++	.mode = S_IFDIR | S_IRUGO | S_IXUGO,
++	.nlink = 2
++};
++
++void prepare_ve0_proc_root(void)
++{
++	ve0.proc_root = &ve0_proc_root;
++}
++
++/*
++ * ------------------------------------------------------------------------
++ * Virtualized sysctl
++ * ------------------------------------------------------------------------
++ */
++
++static int semmin[4] = { 1, 1, 1, 1 };
++static int semmax[4] = { 8000, INT_MAX, 1000, IPCMNI };
++static ctl_table kern_table[] = {
++	{KERN_NODENAME, "hostname", system_utsname.nodename, 64,
++	 0644, NULL, &proc_doutsstring, &sysctl_string},
++	{KERN_DOMAINNAME, "domainname", system_utsname.domainname, 64,
++	 0644, NULL, &proc_doutsstring, &sysctl_string},
++#ifdef CONFIG_SYSVIPC
++#define get_ve0_field(fname) &ve0._##fname
++	{KERN_SHMMAX, "shmmax", get_ve0_field(shm_ctlmax), sizeof (size_t),
++	 0644, NULL, &proc_doulongvec_minmax },
++	{KERN_SHMALL, "shmall", get_ve0_field(shm_ctlall), sizeof (size_t),
++	 0644, NULL, &proc_doulongvec_minmax },
++	{KERN_SHMMNI, "shmmni", get_ve0_field(shm_ctlmni), sizeof (int),
++	 0644, NULL, &proc_dointvec_minmax, NULL,
++	 NULL, &semmin[0], &semmax[3] },
++	{KERN_MSGMAX, "msgmax", get_ve0_field(msg_ctlmax), sizeof (int),
++	 0644, NULL, &proc_dointvec },
++	{KERN_MSGMNI, "msgmni", get_ve0_field(msg_ctlmni), sizeof (int),
++	 0644, NULL, &proc_dointvec_minmax, NULL,
++	 NULL, &semmin[0], &semmax[3] },
++	{KERN_MSGMNB, "msgmnb", get_ve0_field(msg_ctlmnb), sizeof (int),
++	 0644, NULL, &proc_dointvec },
++	{KERN_SEM, "sem", get_ve0_field(sem_ctls), 4*sizeof (int),
++	 0644, NULL, &proc_dointvec },
++#endif
++	{0}
++};
++static ctl_table root_table[] = {
++	{CTL_KERN, "kernel", NULL, 0, 0555, kern_table},
++	{0}
++};
++extern int ip_rt_src_check;
++extern int ve_area_access_check;
++static ctl_table ipv4_route_table[] = {
++	{
++		ctl_name:	NET_IPV4_ROUTE_SRC_CHECK,
++		procname:	"src_check",
++		data:		&ip_rt_src_check,
++		maxlen:		sizeof(int),
++		mode:		0644,
++		proc_handler:	&proc_dointvec,
++	},
++	{ 0 }
++};
++static ctl_table ipv4_table[] = {
++	{NET_IPV4_ROUTE, "route", NULL, 0, 0555, ipv4_route_table},
++	{ 0 }
++};
++static ctl_table net_table[] = {
++	{NET_IPV4,   "ipv4",      NULL, 0, 0555, ipv4_table},
++	{ 0 }
++};
++static ctl_table fs_table[] = {
++	{
++		ctl_name:	226,
++		procname:	"ve-area-access-check",
++		data:		&ve_area_access_check,
++		maxlen:		sizeof(int),
++		mode:		0644,
++		proc_handler:	&proc_dointvec,
++	},
++	{ 0 }
++};
++static ctl_table root_table2[] = {
++	{CTL_NET, "net", NULL, 0, 0555, net_table},
++	{CTL_FS, "fs", NULL, 0, 0555, fs_table},
++	{ 0 }
++};
++int prepare_sysctl(void)
++{
++	struct ve_struct *envid;
++
++	envid = set_exec_env(&ve0);
++	ve0.kern_header = register_sysctl_table(root_table, 1);
++	register_sysctl_table(root_table2, 0);
++	(void)set_exec_env(envid);
++	return 0;
++}
++
++void prepare_ve0_sysctl(void)
++{
++	INIT_LIST_HEAD(&ve0.sysctl_lh);
++#ifdef CONFIG_SYSCTL
++	ve0.proc_sys_root = proc_mkdir("sys", 0);
++#endif
++}
++
++/*
++ * ------------------------------------------------------------------------
++ * XXX init_ve_system
++ * ------------------------------------------------------------------------
++ */
++
++extern struct ipv4_devconf *get_ipv4_devconf_dflt_addr(void);
++
++void init_ve_system(void)
++{
++	struct task_struct *init_entry, *p, *tsk;
++	struct ve_struct *ptr;
++	unsigned long flags;
++	int i;
++
++	ptr = get_ve0();
++	(void)get_ve(ptr);
++	atomic_set(&ptr->pcounter, 1);
++
++	/* Don't forget about idle tasks */
++	write_lock_irqsave(&tasklist_lock, flags);
++	for (i = 0; i < NR_CPUS; i++) {
++		tsk = idle_task(i);
++		if (tsk == NULL)
++			continue;
++
++		prepare_ve0_process(tsk);
++	}
++	do_each_thread_all(p, tsk) {
++		prepare_ve0_process(tsk);
++	} while_each_thread_all(p, tsk);
++	write_unlock_irqrestore(&tasklist_lock, flags);
++
++	init_entry = child_reaper;
++	ptr->init_entry = init_entry;
++	/* XXX: why? */
++	cap_set_full(ptr->cap_default);
++
++	ptr->_ipv4_devconf = &ipv4_devconf;
++	ptr->_ipv4_devconf_dflt = get_ipv4_devconf_dflt_addr();
++
++	read_lock(&init_entry->fs->lock);
++	ptr->fs_rootmnt = init_entry->fs->rootmnt;
++	ptr->fs_root = init_entry->fs->root;
++	read_unlock(&init_entry->fs->lock);
++
++	/* common prepares */
++	prepare_proc();
++	prepare_sysctl();
++	prepare_ipc();
++}
+diff -uprN linux-2.6.8.1.orig/kernel/vzdev.c linux-2.6.8.1-ve022stab078/kernel/vzdev.c
+--- linux-2.6.8.1.orig/kernel/vzdev.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/kernel/vzdev.c	2006-05-11 13:05:40.000000000 +0400
+@@ -0,0 +1,97 @@
++/*
++ *  kernel/vzdev.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/fs.h>
++#include <linux/list.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/vzctl.h>
++#include <linux/slab.h>
++#include <linux/vmalloc.h>
++#include <linux/vzcalluser.h>
++#include <asm/uaccess.h>
++#include <asm/pgalloc.h>
++
++#define VZCTL_MAJOR 126
++#define VZCTL_NAME "vzctl"
++
++MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
++MODULE_DESCRIPTION("Virtuozzo Interface");
++MODULE_LICENSE("GPL v2");
++
++static LIST_HEAD(ioctls);
++static spinlock_t ioctl_lock = SPIN_LOCK_UNLOCKED;
++
++int vzctl_ioctl(struct inode *ino, struct file *file, unsigned int cmd,
++		unsigned long arg)
++{
++	int err;
++	struct list_head *p;
++	struct vzioctlinfo *inf;
++
++	err = -ENOTTY;
++	spin_lock(&ioctl_lock);
++	list_for_each(p, &ioctls) {
++		inf = list_entry(p, struct vzioctlinfo, list);
++		if (inf->type != _IOC_TYPE(cmd))
++			continue;
++
++		err = try_module_get(inf->owner) ? 0 : -EBUSY;
++		spin_unlock(&ioctl_lock);
++		if (!err) {
++			err = (*inf->func)(ino, file, cmd, arg);
++			module_put(inf->owner);
++		}
++		return err;
++	}
++	spin_unlock(&ioctl_lock);
++	return err;
++}
++
++void vzioctl_register(struct vzioctlinfo *inf)
++{
++	spin_lock(&ioctl_lock);
++	list_add(&inf->list, &ioctls);
++	spin_unlock(&ioctl_lock);
++}
++
++void vzioctl_unregister(struct vzioctlinfo *inf)
++{
++	spin_lock(&ioctl_lock);
++	list_del_init(&inf->list);
++	spin_unlock(&ioctl_lock);
++}
++
++EXPORT_SYMBOL(vzioctl_register);
++EXPORT_SYMBOL(vzioctl_unregister);
++
++/*
++ * Init/exit stuff.
++ */
++static struct file_operations vzctl_fops = {
++	.owner		= THIS_MODULE,
++	.ioctl		= vzctl_ioctl,
++};
++
++static void __exit vzctl_exit(void)
++{
++	unregister_chrdev(VZCTL_MAJOR, VZCTL_NAME);
++}
++
++static int __init vzctl_init(void)
++{
++	int ret;
++
++	ret = register_chrdev(VZCTL_MAJOR, VZCTL_NAME, &vzctl_fops);
++	return ret;
++}
++
++module_init(vzctl_init)
++module_exit(vzctl_exit);
+diff -uprN linux-2.6.8.1.orig/kernel/vzwdog.c linux-2.6.8.1-ve022stab078/kernel/vzwdog.c
+--- linux-2.6.8.1.orig/kernel/vzwdog.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/kernel/vzwdog.c	2006-05-11 13:05:40.000000000 +0400
+@@ -0,0 +1,278 @@
++/*
++ *  kernel/vzwdog.c
++ *
++ *  Copyright (C) 2000-2005  SWsoft
++ *  All rights reserved.
++ *  
++ *  Licensing governed by "linux/COPYING.SWsoft" file.
++ *
++ */
++
++#include <linux/sched.h>
++#include <linux/fs.h>
++#include <linux/list.h>
++#include <linux/ctype.h>
++#include <linux/kobject.h>
++#include <linux/genhd.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/kernel_stat.h>
++#include <linux/smp_lock.h>
++#include <linux/errno.h>
++#include <linux/suspend.h>
++#include <linux/ve.h>
++#include <linux/vzstat.h>
++
++/* Staff regading kernel thread polling VE validity */
++static int sleep_timeout = 60;
++static pid_t wdog_thread_pid;
++static int   wdog_thread_continue = 1;
++static DECLARE_COMPLETION(license_thread_exited);
++
++extern void show_mem(void);
++extern struct ve_struct *ve_list_head;
++
++#if 0
++static char page[PAGE_SIZE];
++
++static void parse_irq_list(int len)
++{
++	int i, k, skip;
++	for (i = 0; i < len; ) {
++		k = i;
++		while (i < len && page[i] != '\n' && page[i] != ':')
++			i++;
++		skip = 0;
++		if (i < len && page[i] != '\n') {
++			i++; /* skip ':' */
++			while (i < len && (page[i] == ' ' || page[i] == '0'))
++				i++;
++			skip = (i < len && (page[i] < '0' || page[i] > '9'));
++			while (i < len && page[i] != '\n')
++				i++;
++		}
++		if (!skip)
++			printk("\n%.*s", i - k, page + k);
++		if (i < len)
++			i++; /* skip '\n' */
++	}
++}
++#endif
++
++static void show_irq_list(void)
++{
++#if 0
++	i = KSYMSAFECALL(int, get_irq_list, (page));
++	parse_irq_list(i);  /* Safe, zero was returned if unassigned */
++#endif
++}
++
++static void show_alloc_latency(void)
++{
++	static const char *alloc_descr[KSTAT_ALLOCSTAT_NR] = {
++		"A0",
++		"L0",
++		"H0",
++		"L1",
++		"H1"
++	};
++	int i;
++
++	printk("lat: ");
++	for (i = 0; i < KSTAT_ALLOCSTAT_NR; i++) {
++		struct kstat_lat_struct *p;
++		cycles_t maxlat, avg0, avg1, avg2;
++
++		p = &kstat_glob.alloc_lat[i];
++		spin_lock_irq(&kstat_glb_lock);
++		maxlat = p->last.maxlat;
++		avg0 = p->avg[0];
++		avg1 = p->avg[1];
++		avg2 = p->avg[2];
++		spin_unlock_irq(&kstat_glb_lock);
++
++		printk("%s %Lu (%Lu %Lu %Lu)",
++				alloc_descr[i],
++				maxlat,
++				avg0,
++				avg1,
++				avg2);
++	}
++	printk("\n");
++}
++
++static void show_schedule_latency(void)
++{
++	struct kstat_lat_pcpu_struct *p;
++	cycles_t maxlat, totlat, avg0, avg1, avg2;
++	unsigned long count;
++
++	p = &kstat_glob.sched_lat;
++	spin_lock_irq(&kstat_glb_lock);
++	maxlat = p->last.maxlat;
++	totlat = p->last.totlat;
++	count = p->last.count;
++	avg0 = p->avg[0];
++	avg1 = p->avg[1];
++	avg2 = p->avg[2];
++	spin_unlock_irq(&kstat_glb_lock);
++
++	printk("sched lat: %Lu/%Lu/%lu (%Lu %Lu %Lu)\n",
++			maxlat,
++			totlat,
++			count,
++			avg0,
++			avg1,
++			avg2);
++}
++
++static void show_header(void)
++{
++	struct timeval tv;
++
++	do_gettimeofday(&tv);
++	printk("*** VZWDOG 1.14: time %lu.%06lu uptime %Lu CPU %d ***\n",
++			tv.tv_sec, tv.tv_usec,
++			get_jiffies_64(), smp_processor_id());
++	printk("*** cycles_per_jiffy %lu jiffies_per_second %u ***\n",
++			cycles_per_jiffy, HZ);		
++}
++
++static void show_pgdatinfo(void)
++{
++	pg_data_t *pgdat;
++
++	printk("pgdat:");
++	for_each_pgdat(pgdat) {
++		printk(" %d: %lu,%lu,%lu,%p",
++			pgdat->node_id,
++			pgdat->node_start_pfn,
++			pgdat->node_present_pages,
++			pgdat->node_spanned_pages,
++			pgdat->node_mem_map);
++	}
++	printk("\n");
++}
++
++extern struct subsystem *get_block_subsys(void);
++static void show_diskio(void)
++{
++	struct gendisk *gd;
++	struct subsystem *block_subsys;
++	char buf[BDEVNAME_SIZE];
++
++	printk("disk_io: ");
++
++	block_subsys = get_block_subsys();
++	down_read(&block_subsys->rwsem);
++	list_for_each_entry(gd, &block_subsys->kset.list, kobj.entry) {
++		char *name;
++		name = disk_name(gd, 0, buf);
++		if ((strlen(name) > 4) && (strncmp(name, "loop", 4) == 0) &&
++		    isdigit(name[4]))
++			continue;
++		if ((strlen(name) > 3) && (strncmp(name, "ram", 3) == 0) &&
++		    isdigit(name[3]))
++			continue;
++		printk("(%u,%u) %s r(%u %u %u) w(%u %u %u)\n",
++			gd->major, gd->first_minor,
++			name,
++			disk_stat_read(gd, reads),
++			disk_stat_read(gd, read_sectors),
++			disk_stat_read(gd, read_merges),
++			disk_stat_read(gd, writes),
++			disk_stat_read(gd, write_sectors),
++			disk_stat_read(gd, write_merges));
++	}
++	up_read(&block_subsys->rwsem);
++
++	printk("\n");
++}
++
++static void show_nrprocs(void)
++{
++	unsigned long _nr_running, _nr_sleeping,
++			_nr_unint, _nr_zombie, _nr_dead, _nr_stopped;
++
++	_nr_running = nr_running();
++	_nr_unint = nr_uninterruptible();
++	_nr_sleeping = nr_sleeping();
++	_nr_zombie = nr_zombie;
++	_nr_dead = nr_dead;
++	_nr_stopped = nr_stopped();
++
++	printk("VEnum: %d, proc R %lu, S %lu, D %lu, "
++		"Z %lu, X %lu, T %lu (tot %d)\n",
++		nr_ve,	_nr_running, _nr_sleeping, _nr_unint,
++		_nr_zombie, _nr_dead, _nr_stopped, nr_threads);
++}
++
++static void wdog_print(void)
++{
++	show_header();
++	show_irq_list();
++	show_pgdatinfo();
++	show_mem();
++	show_diskio();
++	show_schedule_latency();
++	show_alloc_latency();
++	show_nrprocs();
++}
++
++static int wdog_loop(void* data)
++{
++	struct task_struct *tsk = current;
++	DECLARE_WAIT_QUEUE_HEAD(thread_wait_queue);
++
++	/*
++	 * This thread doesn't need any user-level access,
++	 * so get rid of all our resources
++	 */
++	daemonize("wdogd");
++
++	spin_lock_irq(&tsk->sighand->siglock);
++	sigfillset(&tsk->blocked);
++	sigdelset(&tsk->blocked, SIGHUP);
++	recalc_sigpending();
++	spin_unlock_irq(&tsk->sighand->siglock);
++
++	while (wdog_thread_continue) {
++		wdog_print();
++		interruptible_sleep_on_timeout(&thread_wait_queue,
++					       sleep_timeout*HZ);
++		if (test_thread_flag(TIF_FREEZE))
++			refrigerator();
++		/* clear all signals */
++		if (signal_pending(tsk))
++			flush_signals(tsk);
++	}
++
++	complete_and_exit(&license_thread_exited, 0);
++}
++
++static int __init wdog_init(void)
++{
++	wdog_thread_pid = kernel_thread(wdog_loop, NULL, 0);
++	if (wdog_thread_pid < 0)
++		return wdog_thread_pid;
++
++	return 0;
++}
++
++static void __exit wdog_exit(void)
++{
++	wdog_thread_continue = 0;
++	if (wdog_thread_pid > 0) {
++		kill_proc(wdog_thread_pid, SIGHUP, 1);
++		wait_for_completion(&license_thread_exited);
++	}
++}
++
++MODULE_PARM(sleep_timeout, "i");
++MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
++MODULE_DESCRIPTION("Virtuozzo WDOG");
++MODULE_LICENSE("GPL v2");
++
++module_init(wdog_init)
++module_exit(wdog_exit)
+diff -uprN linux-2.6.8.1.orig/lib/bust_spinlocks.c linux-2.6.8.1-ve022stab078/lib/bust_spinlocks.c
+--- linux-2.6.8.1.orig/lib/bust_spinlocks.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/lib/bust_spinlocks.c	2006-05-11 13:05:24.000000000 +0400
+@@ -14,26 +14,15 @@
+ #include <linux/wait.h>
+ #include <linux/vt_kern.h>
+ 
+-
+ void bust_spinlocks(int yes)
+ {
+ 	if (yes) {
+ 		oops_in_progress = 1;
+ 	} else {
+-		int loglevel_save = console_loglevel;
+ #ifdef CONFIG_VT
+ 		unblank_screen();
+ #endif
+ 		oops_in_progress = 0;
+-		/*
+-		 * OK, the message is on the console.  Now we call printk()
+-		 * without oops_in_progress set so that printk() will give klogd
+-		 * and the blanked console a poke.  Hold onto your hats...
+-		 */
+-		console_loglevel = 15;		/* NMI oopser may have shut the console up */
+-		printk(" ");
+-		console_loglevel = loglevel_save;
++		wake_up_klogd();
+ 	}
+ }
+-
+-
+diff -uprN linux-2.6.8.1.orig/lib/inflate.c linux-2.6.8.1-ve022stab078/lib/inflate.c
+--- linux-2.6.8.1.orig/lib/inflate.c	2004-08-14 14:55:31.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/lib/inflate.c	2006-05-11 13:05:34.000000000 +0400
+@@ -322,7 +322,7 @@ DEBG("huft1 ");
+   {
+     *t = (struct huft *)NULL;
+     *m = 0;
+-    return 0;
++    return 2;
+   }
+ 
+ DEBG("huft2 ");
+@@ -370,6 +370,7 @@ DEBG("huft5 ");
+     if ((j = *p++) != 0)
+       v[x[j]++] = i;
+   } while (++i < n);
++  n = x[g];                   /* set n to length of v */
+ 
+ DEBG("h6 ");
+ 
+@@ -406,12 +407,13 @@ DEBG1("1 ");
+ DEBG1("2 ");
+           f -= a + 1;           /* deduct codes from patterns left */
+           xp = c + k;
+-          while (++j < z)       /* try smaller tables up to z bits */
+-          {
+-            if ((f <<= 1) <= *++xp)
+-              break;            /* enough codes to use up j bits */
+-            f -= *xp;           /* else deduct codes from patterns */
+-          }
++          if (j < z)
++            while (++j < z)       /* try smaller tables up to z bits */
++            {
++              if ((f <<= 1) <= *++xp)
++                break;            /* enough codes to use up j bits */
++              f -= *xp;           /* else deduct codes from patterns */
++            }
+         }
+ DEBG1("3 ");
+         z = 1 << j;             /* table entries for j-bit table */
+diff -uprN linux-2.6.8.1.orig/lib/rwsem-spinlock.c linux-2.6.8.1-ve022stab078/lib/rwsem-spinlock.c
+--- linux-2.6.8.1.orig/lib/rwsem-spinlock.c	2004-08-14 14:56:25.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/lib/rwsem-spinlock.c	2006-05-11 13:05:25.000000000 +0400
+@@ -140,12 +140,12 @@ void fastcall __sched __down_read(struct
+ 
+ 	rwsemtrace(sem, "Entering __down_read");
+ 
+-	spin_lock(&sem->wait_lock);
++	spin_lock_irq(&sem->wait_lock);
+ 
+ 	if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
+ 		/* granted */
+ 		sem->activity++;
+-		spin_unlock(&sem->wait_lock);
++		spin_unlock_irq(&sem->wait_lock);
+ 		goto out;
+ 	}
+ 
+@@ -160,7 +160,7 @@ void fastcall __sched __down_read(struct
+ 	list_add_tail(&waiter.list, &sem->wait_list);
+ 
+ 	/* we don't need to touch the semaphore struct anymore */
+-	spin_unlock(&sem->wait_lock);
++	spin_unlock_irq(&sem->wait_lock);
+ 
+ 	/* wait to be given the lock */
+ 	for (;;) {
+@@ -181,10 +181,12 @@ void fastcall __sched __down_read(struct
+  */
+ int fastcall __down_read_trylock(struct rw_semaphore *sem)
+ {
++	unsigned long flags;
+ 	int ret = 0;
++
+ 	rwsemtrace(sem, "Entering __down_read_trylock");
+ 
+-	spin_lock(&sem->wait_lock);
++	spin_lock_irqsave(&sem->wait_lock, flags);
+ 
+ 	if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
+ 		/* granted */
+@@ -192,7 +194,7 @@ int fastcall __down_read_trylock(struct 
+ 		ret = 1;
+ 	}
+ 
+-	spin_unlock(&sem->wait_lock);
++	spin_unlock_irqrestore(&sem->wait_lock, flags);
+ 
+ 	rwsemtrace(sem, "Leaving __down_read_trylock");
+ 	return ret;
+@@ -209,12 +211,12 @@ void fastcall __sched __down_write(struc
+ 
+ 	rwsemtrace(sem, "Entering __down_write");
+ 
+-	spin_lock(&sem->wait_lock);
++	spin_lock_irq(&sem->wait_lock);
+ 
+ 	if (sem->activity == 0 && list_empty(&sem->wait_list)) {
+ 		/* granted */
+ 		sem->activity = -1;
+-		spin_unlock(&sem->wait_lock);
++		spin_unlock_irq(&sem->wait_lock);
+ 		goto out;
+ 	}
+ 
+@@ -229,7 +231,7 @@ void fastcall __sched __down_write(struc
+ 	list_add_tail(&waiter.list, &sem->wait_list);
+ 
+ 	/* we don't need to touch the semaphore struct anymore */
+-	spin_unlock(&sem->wait_lock);
++	spin_unlock_irq(&sem->wait_lock);
+ 
+ 	/* wait to be given the lock */
+ 	for (;;) {
+@@ -250,10 +252,12 @@ void fastcall __sched __down_write(struc
+  */
+ int fastcall __down_write_trylock(struct rw_semaphore *sem)
+ {
++	unsigned long flags;
+ 	int ret = 0;
++
+ 	rwsemtrace(sem, "Entering __down_write_trylock");
+ 
+-	spin_lock(&sem->wait_lock);
++	spin_lock_irqsave(&sem->wait_lock, flags);
+ 
+ 	if (sem->activity == 0 && list_empty(&sem->wait_list)) {
+ 		/* granted */
+@@ -261,7 +265,7 @@ int fastcall __down_write_trylock(struct
+ 		ret = 1;
+ 	}
+ 
+-	spin_unlock(&sem->wait_lock);
++	spin_unlock_irqrestore(&sem->wait_lock, flags);
+ 
+ 	rwsemtrace(sem, "Leaving __down_write_trylock");
+ 	return ret;
+@@ -272,14 +276,16 @@ int fastcall __down_write_trylock(struct
+  */
+ void fastcall __up_read(struct rw_semaphore *sem)
+ {
++	unsigned long flags;
++
+ 	rwsemtrace(sem, "Entering __up_read");
+ 
+-	spin_lock(&sem->wait_lock);
++	spin_lock_irqsave(&sem->wait_lock, flags);
+ 
+ 	if (--sem->activity == 0 && !list_empty(&sem->wait_list))
+ 		sem = __rwsem_wake_one_writer(sem);
+ 
+-	spin_unlock(&sem->wait_lock);
++	spin_unlock_irqrestore(&sem->wait_lock, flags);
+ 
+ 	rwsemtrace(sem, "Leaving __up_read");
+ }
+@@ -289,15 +295,17 @@ void fastcall __up_read(struct rw_semaph
+  */
+ void fastcall __up_write(struct rw_semaphore *sem)
+ {
++	unsigned long flags;
++
+ 	rwsemtrace(sem, "Entering __up_write");
+ 
+-	spin_lock(&sem->wait_lock);
++	spin_lock_irqsave(&sem->wait_lock, flags);
+ 
+ 	sem->activity = 0;
+ 	if (!list_empty(&sem->wait_list))
+ 		sem = __rwsem_do_wake(sem, 1);
+ 
+-	spin_unlock(&sem->wait_lock);
++	spin_unlock_irqrestore(&sem->wait_lock, flags);
+ 
+ 	rwsemtrace(sem, "Leaving __up_write");
+ }
+@@ -308,15 +316,17 @@ void fastcall __up_write(struct rw_semap
+  */
+ void fastcall __downgrade_write(struct rw_semaphore *sem)
+ {
++	unsigned long flags;
++
+ 	rwsemtrace(sem, "Entering __downgrade_write");
+ 
+-	spin_lock(&sem->wait_lock);
++	spin_lock_irqsave(&sem->wait_lock, flags);
+ 
+ 	sem->activity = 1;
+ 	if (!list_empty(&sem->wait_list))
+ 		sem = __rwsem_do_wake(sem, 0);
+ 
+-	spin_unlock(&sem->wait_lock);
++	spin_unlock_irqrestore(&sem->wait_lock, flags);
+ 
+ 	rwsemtrace(sem, "Leaving __downgrade_write");
+ }
+diff -uprN linux-2.6.8.1.orig/lib/rwsem.c linux-2.6.8.1-ve022stab078/lib/rwsem.c
+--- linux-2.6.8.1.orig/lib/rwsem.c	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/lib/rwsem.c	2006-05-11 13:05:25.000000000 +0400
+@@ -150,7 +150,7 @@ rwsem_down_failed_common(struct rw_semap
+ 	set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ 
+ 	/* set up my own style of waitqueue */
+-	spin_lock(&sem->wait_lock);
++	spin_lock_irq(&sem->wait_lock);
+ 	waiter->task = tsk;
+ 	get_task_struct(tsk);
+ 
+@@ -163,7 +163,7 @@ rwsem_down_failed_common(struct rw_semap
+ 	if (!(count & RWSEM_ACTIVE_MASK))
+ 		sem = __rwsem_do_wake(sem, 0);
+ 
+-	spin_unlock(&sem->wait_lock);
++	spin_unlock_irq(&sem->wait_lock);
+ 
+ 	/* wait to be given the lock */
+ 	for (;;) {
+@@ -219,15 +219,17 @@ rwsem_down_write_failed(struct rw_semaph
+  */
+ struct rw_semaphore fastcall *rwsem_wake(struct rw_semaphore *sem)
+ {
++	unsigned long flags;
++
+ 	rwsemtrace(sem, "Entering rwsem_wake");
+ 
+-	spin_lock(&sem->wait_lock);
++	spin_lock_irqsave(&sem->wait_lock, flags);
+ 
+ 	/* do nothing if list empty */
+ 	if (!list_empty(&sem->wait_list))
+ 		sem = __rwsem_do_wake(sem, 0);
+ 
+-	spin_unlock(&sem->wait_lock);
++	spin_unlock_irqrestore(&sem->wait_lock, flags);
+ 
+ 	rwsemtrace(sem, "Leaving rwsem_wake");
+ 
+@@ -241,15 +243,17 @@ struct rw_semaphore fastcall *rwsem_wake
+  */
+ struct rw_semaphore fastcall *rwsem_downgrade_wake(struct rw_semaphore *sem)
+ {
++	unsigned long flags;
++
+ 	rwsemtrace(sem, "Entering rwsem_downgrade_wake");
+ 
+-	spin_lock(&sem->wait_lock);
++	spin_lock_irqsave(&sem->wait_lock, flags);
+ 
+ 	/* do nothing if list empty */
+ 	if (!list_empty(&sem->wait_list))
+ 		sem = __rwsem_do_wake(sem, 1);
+ 
+-	spin_unlock(&sem->wait_lock);
++	spin_unlock_irqrestore(&sem->wait_lock, flags);
+ 
+ 	rwsemtrace(sem, "Leaving rwsem_downgrade_wake");
+ 	return sem;
+diff -uprN linux-2.6.8.1.orig/mm/Makefile linux-2.6.8.1-ve022stab078/mm/Makefile
+--- linux-2.6.8.1.orig/mm/Makefile	2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/mm/Makefile	2006-05-11 13:05:38.000000000 +0400
+@@ -13,5 +13,6 @@ obj-y			:= bootmem.o filemap.o mempool.o
+ 			   $(mmu-y)
+ 
+ obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o
++obj-$(CONFIG_X86_4G)	+= usercopy.o
+ obj-$(CONFIG_HUGETLBFS)	+= hugetlb.o
+ obj-$(CONFIG_NUMA) 	+= mempolicy.o
+diff -uprN linux-2.6.8.1.orig/mm/filemap.c linux-2.6.8.1-ve022stab078/mm/filemap.c
+--- linux-2.6.8.1.orig/mm/filemap.c	2004-08-14 14:56:25.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/mm/filemap.c	2006-05-11 13:05:40.000000000 +0400
+@@ -127,20 +127,6 @@ void remove_from_page_cache(struct page 
+ 	spin_unlock_irq(&mapping->tree_lock);
+ }
+ 
+-static inline int sync_page(struct page *page)
+-{
+-	struct address_space *mapping;
+-
+-	/*
+-	 * FIXME, fercrissake.  What is this barrier here for?
+-	 */
+-	smp_mb();
+-	mapping = page_mapping(page);
+-	if (mapping && mapping->a_ops && mapping->a_ops->sync_page)
+-		return mapping->a_ops->sync_page(page);
+-	return 0;
+-}
+-
+ /**
+  * filemap_fdatawrite - start writeback against all of a mapping's dirty pages
+  * @mapping: address space structure to write
+@@ -828,6 +814,8 @@ int file_read_actor(read_descriptor_t *d
+ 	if (size > count)
+ 		size = count;
+ 
++	left = size;
++#ifndef CONFIG_X86_UACCESS_INDIRECT
+ 	/*
+ 	 * Faults on the destination of a read are common, so do it before
+ 	 * taking the kmap.
+@@ -836,20 +824,21 @@ int file_read_actor(read_descriptor_t *d
+ 		kaddr = kmap_atomic(page, KM_USER0);
+ 		left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
+ 		kunmap_atomic(kaddr, KM_USER0);
+-		if (left == 0)
+-			goto success;
+ 	}
++#endif
+ 
+-	/* Do it the slow way */
+-	kaddr = kmap(page);
+-	left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
+-	kunmap(page);
+-
+-	if (left) {
+-		size -= left;
+-		desc->error = -EFAULT;
++	if (left != 0) {
++		/* Do it the slow way */
++		kaddr = kmap(page);
++		left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
++		kunmap(page);
++
++		if (left) {
++			size -= left;
++			desc->error = -EFAULT;
++		}
+ 	}
+-success:
++
+ 	desc->count = count - size;
+ 	desc->written += size;
+ 	desc->arg.buf += size;
+@@ -926,8 +915,8 @@ __generic_file_aio_read(struct kiocb *io
+ 			desc.error = 0;
+ 			do_generic_file_read(filp,ppos,&desc,file_read_actor);
+ 			retval += desc.written;
+-			if (!retval) {
+-				retval = desc.error;
++			if (desc.error) {
++				retval = retval ?: desc.error;
+ 				break;
+ 			}
+ 		}
+@@ -1629,9 +1618,13 @@ filemap_copy_from_user(struct page *page
+ 	char *kaddr;
+ 	int left;
+ 
++#ifndef CONFIG_X86_UACCESS_INDIRECT
+ 	kaddr = kmap_atomic(page, KM_USER0);
+ 	left = __copy_from_user(kaddr + offset, buf, bytes);
+ 	kunmap_atomic(kaddr, KM_USER0);
++#else
++	left = bytes;
++#endif
+ 
+ 	if (left != 0) {
+ 		/* Do it the slow way */
+@@ -1682,10 +1675,14 @@ filemap_copy_from_user_iovec(struct page
+ 	char *kaddr;
+ 	size_t copied;
+ 
++#ifndef CONFIG_X86_UACCESS_INDIRECT
+ 	kaddr = kmap_atomic(page, KM_USER0);
+ 	copied = __filemap_copy_from_user_iovec(kaddr + offset, iov,
+ 						base, bytes);
+ 	kunmap_atomic(kaddr, KM_USER0);
++#else
++	copied = 0;
++#endif
+ 	if (copied != bytes) {
+ 		kaddr = kmap(page);
+ 		copied = __filemap_copy_from_user_iovec(kaddr + offset, iov,
+diff -uprN linux-2.6.8.1.orig/mm/fremap.c linux-2.6.8.1-ve022stab078/mm/fremap.c
+--- linux-2.6.8.1.orig/mm/fremap.c	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/mm/fremap.c	2006-05-11 13:05:39.000000000 +0400
+@@ -19,6 +19,8 @@
+ #include <asm/cacheflush.h>
+ #include <asm/tlbflush.h>
+ 
++#include <ub/ub_vmpages.h>
++
+ static inline void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
+ 			unsigned long addr, pte_t *ptep)
+ {
+@@ -37,8 +39,11 @@ static inline void zap_pte(struct mm_str
+ 				if (pte_dirty(pte))
+ 					set_page_dirty(page);
+ 				page_remove_rmap(page);
++				pb_remove_ref(page, mm_ub(mm));
+ 				page_cache_release(page);
+ 				mm->rss--;
++				vma->vm_rss--;
++				ub_unused_privvm_inc(mm_ub(mm), 1, vma);
+ 			}
+ 		}
+ 	} else {
+@@ -62,7 +67,10 @@ int install_page(struct mm_struct *mm, s
+ 	pgd_t *pgd;
+ 	pmd_t *pmd;
+ 	pte_t pte_val;
++	struct page_beancounter *pbc;
+ 
++	if (pb_alloc(&pbc))
++		goto err_pb;
+ 	pgd = pgd_offset(mm, addr);
+ 	spin_lock(&mm->page_table_lock);
+ 
+@@ -87,6 +95,9 @@ int install_page(struct mm_struct *mm, s
+ 	zap_pte(mm, vma, addr, pte);
+ 
+ 	mm->rss++;
++	vma->vm_rss++;
++	pb_add_ref(page, mm_ub(mm), &pbc);
++	ub_unused_privvm_dec(mm_ub(mm), 1, vma);
+ 	flush_icache_page(vma, page);
+ 	set_pte(pte, mk_pte(page, prot));
+ 	page_add_file_rmap(page);
+@@ -97,6 +108,8 @@ int install_page(struct mm_struct *mm, s
+ 	err = 0;
+ err_unlock:
+ 	spin_unlock(&mm->page_table_lock);
++	pb_free(&pbc);
++err_pb:
+ 	return err;
+ }
+ EXPORT_SYMBOL(install_page);
+diff -uprN linux-2.6.8.1.orig/mm/highmem.c linux-2.6.8.1-ve022stab078/mm/highmem.c
+--- linux-2.6.8.1.orig/mm/highmem.c	2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/mm/highmem.c	2006-05-11 13:05:28.000000000 +0400
+@@ -284,7 +284,7 @@ static void copy_to_high_bio_irq(struct 
+ 	struct bio_vec *tovec, *fromvec;
+ 	int i;
+ 
+-	bio_for_each_segment(tovec, to, i) {
++	__bio_for_each_segment(tovec, to, i, 0) {
+ 		fromvec = from->bi_io_vec + i;
+ 
+ 		/*
+@@ -316,7 +316,7 @@ static void bounce_end_io(struct bio *bi
+ 	/*
+ 	 * free up bounce indirect pages used
+ 	 */
+-	bio_for_each_segment(bvec, bio, i) {
++	__bio_for_each_segment(bvec, bio, i, 0) {
+ 		org_vec = bio_orig->bi_io_vec + i;
+ 		if (bvec->bv_page == org_vec->bv_page)
+ 			continue;
+@@ -423,7 +423,7 @@ static void __blk_queue_bounce(request_q
+ 	 * at least one page was bounced, fill in possible non-highmem
+ 	 * pages
+ 	 */
+-	bio_for_each_segment(from, *bio_orig, i) {
++	__bio_for_each_segment(from, *bio_orig, i, 0) {
+ 		to = bio_iovec_idx(bio, i);
+ 		if (!to->bv_page) {
+ 			to->bv_page = from->bv_page;
+diff -uprN linux-2.6.8.1.orig/mm/memory.c linux-2.6.8.1-ve022stab078/mm/memory.c
+--- linux-2.6.8.1.orig/mm/memory.c	2004-08-14 14:55:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/mm/memory.c	2006-05-11 13:05:49.000000000 +0400
+@@ -40,6 +40,7 @@
+ #include <linux/mm.h>
+ #include <linux/hugetlb.h>
+ #include <linux/mman.h>
++#include <linux/virtinfo.h>
+ #include <linux/swap.h>
+ #include <linux/highmem.h>
+ #include <linux/pagemap.h>
+@@ -56,6 +57,9 @@
+ #include <linux/swapops.h>
+ #include <linux/elf.h>
+ 
++#include <ub/beancounter.h>
++#include <ub/ub_vmpages.h>
++
+ #ifndef CONFIG_DISCONTIGMEM
+ /* use the per-pgdat data instead for discontigmem - mbligh */
+ unsigned long max_mapnr;
+@@ -117,7 +121,8 @@ static inline void free_one_pmd(struct m
+ 	pte_free_tlb(tlb, page);
+ }
+ 
+-static inline void free_one_pgd(struct mmu_gather *tlb, pgd_t * dir)
++static inline void free_one_pgd(struct mmu_gather *tlb, pgd_t * dir,
++							int pgd_idx)
+ {
+ 	int j;
+ 	pmd_t * pmd;
+@@ -131,8 +136,11 @@ static inline void free_one_pgd(struct m
+ 	}
+ 	pmd = pmd_offset(dir, 0);
+ 	pgd_clear(dir);
+-	for (j = 0; j < PTRS_PER_PMD ; j++)
++	for (j = 0; j < PTRS_PER_PMD ; j++) {
++		if (pgd_idx * PGDIR_SIZE + j * PMD_SIZE >= TASK_SIZE)
++			break;
+ 		free_one_pmd(tlb, pmd+j);
++	}
+ 	pmd_free_tlb(tlb, pmd);
+ }
+ 
+@@ -145,11 +153,13 @@ static inline void free_one_pgd(struct m
+ void clear_page_tables(struct mmu_gather *tlb, unsigned long first, int nr)
+ {
+ 	pgd_t * page_dir = tlb->mm->pgd;
++	int pgd_idx = first;
+ 
+ 	page_dir += first;
+ 	do {
+-		free_one_pgd(tlb, page_dir);
++		free_one_pgd(tlb, page_dir, pgd_idx);
+ 		page_dir++;
++		pgd_idx++;
+ 	} while (--nr);
+ }
+ 
+@@ -205,6 +215,8 @@ out:
+ }
+ #define PTE_TABLE_MASK	((PTRS_PER_PTE-1) * sizeof(pte_t))
+ #define PMD_TABLE_MASK	((PTRS_PER_PMD-1) * sizeof(pmd_t))
++#define pb_list_size(addr)	\
++		(PTRS_PER_PTE - ((addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
+ 
+ /*
+  * copy one vm_area from one task to the other. Assumes the page tables
+@@ -217,13 +229,15 @@ out:
+  * dst->page_table_lock is held on entry and exit,
+  * but may be dropped within pmd_alloc() and pte_alloc_map().
+  */
+-int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
+-			struct vm_area_struct *vma)
++int __copy_page_range(struct vm_area_struct *vma, struct mm_struct *src,
++		      unsigned long address, size_t size)
+ {
++	struct mm_struct *dst = vma->vm_mm;
+ 	pgd_t * src_pgd, * dst_pgd;
+-	unsigned long address = vma->vm_start;
+-	unsigned long end = vma->vm_end;
++	unsigned long end = address + size;
+ 	unsigned long cow;
++	struct page_beancounter *pbc;
++	int need_pbc;
+ 
+ 	if (is_vm_hugetlb_page(vma))
+ 		return copy_hugetlb_page_range(dst, src, vma);
+@@ -231,6 +245,8 @@ int copy_page_range(struct mm_struct *ds
+ 	cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
+ 	src_pgd = pgd_offset(src, address)-1;
+ 	dst_pgd = pgd_offset(dst, address)-1;
++	pbc = NULL;
++	need_pbc = (mm_ub(dst) != mm_ub(src));
+ 
+ 	for (;;) {
+ 		pmd_t * src_pmd, * dst_pmd;
+@@ -272,6 +288,10 @@ skip_copy_pte_range:
+ 				goto cont_copy_pmd_range;
+ 			}
+ 
++			if (need_pbc &&
++			    pb_alloc_list(&pbc, pb_list_size(address), dst))
++				goto nomem;
++
+ 			dst_pte = pte_alloc_map(dst, dst_pmd, address);
+ 			if (!dst_pte)
+ 				goto nomem;
+@@ -326,6 +346,9 @@ skip_copy_pte_range:
+ 				pte = pte_mkold(pte);
+ 				get_page(page);
+ 				dst->rss++;
++				vma->vm_rss++;
++				ub_unused_privvm_dec(mm_ub(dst), 1, vma);
++				pb_add_list_ref(page, mm_ub(dst), &pbc);
+ 				set_pte(dst_pte, pte);
+ 				page_dup_rmap(page);
+ cont_copy_pte_range_noset:
+@@ -350,11 +373,21 @@ cont_copy_pmd_range:
+ out_unlock:
+ 	spin_unlock(&src->page_table_lock);
+ out:
++	pb_free_list(&pbc);
+ 	return 0;
+ nomem:
++	pb_free_list(&pbc);
+ 	return -ENOMEM;
+ }
+ 
++int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
++			struct vm_area_struct *vma)
++{
++	if (vma->vm_mm != dst)
++		BUG();
++	return __copy_page_range(vma, src, vma->vm_start, vma->vm_end-vma->vm_start);
++}
++
+ static void zap_pte_range(struct mmu_gather *tlb,
+ 		pmd_t *pmd, unsigned long address,
+ 		unsigned long size, struct zap_details *details)
+@@ -420,6 +453,7 @@ static void zap_pte_range(struct mmu_gat
+ 				mark_page_accessed(page);
+ 			tlb->freed++;
+ 			page_remove_rmap(page);
++			pb_remove_ref(page, mm_ub(tlb->mm));
+ 			tlb_remove_page(tlb, page);
+ 			continue;
+ 		}
+@@ -441,7 +475,7 @@ static void zap_pmd_range(struct mmu_gat
+ 		unsigned long size, struct zap_details *details)
+ {
+ 	pmd_t * pmd;
+-	unsigned long end;
++	unsigned long end, pgd_boundary;
+ 
+ 	if (pgd_none(*dir))
+ 		return;
+@@ -452,8 +486,9 @@ static void zap_pmd_range(struct mmu_gat
+ 	}
+ 	pmd = pmd_offset(dir, address);
+ 	end = address + size;
+-	if (end > ((address + PGDIR_SIZE) & PGDIR_MASK))
+-		end = ((address + PGDIR_SIZE) & PGDIR_MASK);
++	pgd_boundary = ((address + PGDIR_SIZE) & PGDIR_MASK);
++	if (pgd_boundary && (end > pgd_boundary))
++		end = pgd_boundary;
+ 	do {
+ 		zap_pte_range(tlb, pmd, address, end - address, details);
+ 		address = (address + PMD_SIZE) & PMD_MASK; 
+@@ -461,20 +496,63 @@ static void zap_pmd_range(struct mmu_gat
+ 	} while (address && (address < end));
+ }
+ 
++static void warn_bad_zap(struct vm_area_struct *vma, unsigned long freed)
++{
++#ifdef CONFIG_USER_RESOURCE
++	static struct ub_rate_info ri = {
++		.burst = 10,
++		.interval = 40 * HZ,
++	};
++	struct user_beancounter *ub;
++	char ubuid[64] = "No UB";
++
++	if (!ub_ratelimit(&ri))
++		return;
++
++	ub = mm_ub(vma->vm_mm);
++	if (ub)
++		print_ub_uid(ub, ubuid, sizeof(ubuid));
++
++#else
++	const char ubuid[] = "0";
++#endif
++
++	printk(KERN_WARNING
++			"%s vm_rss: process pid %d comm %.20s flags %lx, "
++			"vma %p %08lx-%08lx %p rss %lu freed %lu\n flags %lx, "
++			"ub %s\n",
++			vma->vm_rss > freed ? "Positive" : "Negative",
++			current->pid, current->comm, current->flags,
++			vma, vma->vm_start, vma->vm_end, vma->vm_file,
++			vma->vm_rss, freed, vma->vm_flags, ubuid);
++	dump_stack();
++}
++
+ static void unmap_page_range(struct mmu_gather *tlb,
+ 		struct vm_area_struct *vma, unsigned long address,
+ 		unsigned long end, struct zap_details *details)
+ {
++	unsigned long freed;
+ 	pgd_t * dir;
+ 
+ 	BUG_ON(address >= end);
+ 	dir = pgd_offset(vma->vm_mm, address);
+ 	tlb_start_vma(tlb, vma);
++	freed = tlb->freed;
+ 	do {
+ 		zap_pmd_range(tlb, dir, address, end - address, details);
+ 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
+ 		dir++;
+ 	} while (address && (address < end));
++	freed = tlb->freed - freed;
++	if (freed) {
++		ub_unused_privvm_inc(mm_ub(tlb->mm), freed, vma);
++		if (vma->vm_rss < freed) {
++			warn_bad_zap(vma, freed);
++			freed = vma->vm_rss;
++		}
++		vma->vm_rss -= freed;
++	}
+ 	tlb_end_vma(tlb, vma);
+ }
+ 
+@@ -596,6 +674,7 @@ void zap_page_range(struct vm_area_struc
+ 	unsigned long nr_accounted = 0;
+ 
+ 	if (is_vm_hugetlb_page(vma)) {
++		/* ub acct is performed in unmap_hugepage_range */
+ 		zap_hugepage_range(vma, address, size);
+ 		return;
+ 	}
+@@ -604,6 +683,8 @@ void zap_page_range(struct vm_area_struc
+ 	spin_lock(&mm->page_table_lock);
+ 	tlb = tlb_gather_mmu(mm, 0);
+ 	unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details);
++	if (vma->vm_rss && address == vma->vm_start && end == vma->vm_end)
++		warn_bad_zap(vma, 0);
+ 	tlb_finish_mmu(tlb, address, end);
+ 	spin_unlock(&mm->page_table_lock);
+ }
+@@ -612,21 +693,98 @@ void zap_page_range(struct vm_area_struc
+  * Do a quick page-table lookup for a single page.
+  * mm->page_table_lock must be held.
+  */
+-struct page *
+-follow_page(struct mm_struct *mm, unsigned long address, int write) 
++static struct page *
++pgd_follow_page(struct mm_struct *mm, pgd_t *pgd, unsigned long address,
++		int write)
+ {
+-	pgd_t *pgd;
+ 	pmd_t *pmd;
+ 	pte_t *ptep, pte;
+ 	unsigned long pfn;
+ 	struct page *page;
+ 
++	pmd = pmd_offset(pgd, address);
++	if (pmd_none(*pmd))
++		goto out;
++	if (pmd_huge(*pmd))
++		return follow_huge_pmd(mm, address, pmd, write);
++	if (unlikely(pmd_bad(*pmd)))
++		goto out;
++
++	ptep = pte_offset_map(pmd, address);
++	if (!ptep)
++		goto out;
++
++	pte = *ptep;
++	pte_unmap(ptep);
++	if (pte_present(pte)) {
++		if (write && !pte_write(pte))
++			goto out;
++		pfn = pte_pfn(pte);
++		if (pfn_valid(pfn)) {
++			page = pfn_to_page(pfn);
++			if (write && !pte_dirty(pte) && !PageDirty(page))
++				set_page_dirty(page);
++			mark_page_accessed(page);
++			return page;
++		}
++	}
++
++out:
++	return NULL;
++}
++
++struct page *
++follow_page(struct mm_struct *mm, unsigned long address, int write)
++{
++	pgd_t *pgd;
++	struct page *page;
++
+ 	page = follow_huge_addr(mm, address, write);
+ 	if (! IS_ERR(page))
+ 		return page;
+ 
+ 	pgd = pgd_offset(mm, address);
+ 	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
++		return NULL;
++
++	return pgd_follow_page(mm, pgd, address, write);
++}
++
++struct page *
++follow_page_k(unsigned long address, int write)
++{
++	pgd_t *pgd;
++	struct page *page;
++
++	page = follow_huge_addr(&init_mm, address, write);
++	if (! IS_ERR(page))
++		return page;
++
++	pgd = pgd_offset_k(address);
++	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
++		return NULL;
++
++	return pgd_follow_page(&init_mm, pgd, address, write);
++}
++
++struct page *
++follow_page_pte(struct mm_struct *mm, unsigned long address, int write,
++		pte_t *page_pte)
++{
++	pgd_t *pgd;
++	pmd_t *pmd;
++	pte_t *ptep, pte;
++	unsigned long pfn;
++	struct page *page;
++
++	
++	memset(page_pte, 0, sizeof(*page_pte));
++	page = follow_huge_addr(mm, address, write);
++	if (!IS_ERR(page))
++		return page;
++
++	pgd = pgd_offset(mm, address);
++	if (pgd_none(*pgd) || pgd_bad(*pgd))
+ 		goto out;
+ 
+ 	pmd = pmd_offset(pgd, address);
+@@ -634,7 +792,7 @@ follow_page(struct mm_struct *mm, unsign
+ 		goto out;
+ 	if (pmd_huge(*pmd))
+ 		return follow_huge_pmd(mm, address, pmd, write);
+-	if (unlikely(pmd_bad(*pmd)))
++	if (pmd_bad(*pmd))
+ 		goto out;
+ 
+ 	ptep = pte_offset_map(pmd, address);
+@@ -643,16 +801,23 @@ follow_page(struct mm_struct *mm, unsign
+ 
+ 	pte = *ptep;
+ 	pte_unmap(ptep);
+-	if (pte_present(pte)) {
++	if (pte_present(pte) && pte_read(pte)) {
+ 		if (write && !pte_write(pte))
+ 			goto out;
++		if (write && !pte_dirty(pte)) {
++			struct page *page = pte_page(pte);
++			if (!PageDirty(page))
++				set_page_dirty(page);
++		}
+ 		pfn = pte_pfn(pte);
+ 		if (pfn_valid(pfn)) {
+-			page = pfn_to_page(pfn);
+-			if (write && !pte_dirty(pte) && !PageDirty(page))
+-				set_page_dirty(page);
++			struct page *page = pfn_to_page(pfn);
++			
+ 			mark_page_accessed(page);
+ 			return page;
++		} else {
++			*page_pte = pte;
++			return NULL;
+ 		}
+ 	}
+ 
+@@ -660,6 +825,7 @@ out:
+ 	return NULL;
+ }
+ 
++
+ /* 
+  * Given a physical address, is there a useful struct page pointing to
+  * it?  This may become more complex in the future if we start dealing
+@@ -674,6 +840,7 @@ static inline struct page *get_page_map(
+ }
+ 
+ 
++#ifndef CONFIG_X86_4G
+ static inline int
+ untouched_anonymous_page(struct mm_struct* mm, struct vm_area_struct *vma,
+ 			 unsigned long address)
+@@ -698,6 +865,7 @@ untouched_anonymous_page(struct mm_struc
+ 	/* There is a pte slot for 'address' in 'mm'. */
+ 	return 0;
+ }
++#endif
+ 
+ 
+ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+@@ -727,16 +895,16 @@ int get_user_pages(struct task_struct *t
+ 			pte_t *pte;
+ 			if (write) /* user gate pages are read-only */
+ 				return i ? : -EFAULT;
+-			pgd = pgd_offset_gate(mm, pg);
+-			if (!pgd)
+-				return i ? : -EFAULT;
++			if (pg > TASK_SIZE)
++				pgd = pgd_offset_k(pg);
++			else
++				pgd = pgd_offset_gate(mm, pg);
++			BUG_ON(pgd_none(*pgd));
+ 			pmd = pmd_offset(pgd, pg);
+-			if (!pmd)
++			if (pmd_none(*pmd))
+ 				return i ? : -EFAULT;
+ 			pte = pte_offset_map(pmd, pg);
+-			if (!pte)
+-				return i ? : -EFAULT;
+-			if (!pte_present(*pte)) {
++			if (pte_none(*pte)) {
+ 				pte_unmap(pte);
+ 				return i ? : -EFAULT;
+ 			}
+@@ -773,12 +941,21 @@ int get_user_pages(struct task_struct *t
+ 				 * insanly big anonymously mapped areas that
+ 				 * nobody touched so far. This is important
+ 				 * for doing a core dump for these mappings.
++				 *
++				 * disable this for 4:4 - it prevents
++			 	 * follow_page() from ever seeing these pages.
++				 *
++				 * (The 'fix' is dubious anyway, there's
++				 * nothing that this code avoids which couldnt
++				 * be triggered from userspace anyway.)
+ 				 */
++#ifndef CONFIG_X86_4G
+ 				if (!lookup_write &&
+ 				    untouched_anonymous_page(mm,vma,start)) {
+ 					map = ZERO_PAGE(start);
+ 					break;
+ 				}
++#endif
+ 				spin_unlock(&mm->page_table_lock);
+ 				switch (handle_mm_fault(mm,vma,start,write)) {
+ 				case VM_FAULT_MINOR:
+@@ -968,6 +1145,15 @@ int remap_page_range(struct vm_area_stru
+ 	if (from >= end)
+ 		BUG();
+ 
++	/*
++	 * Physically remapped pages are special. Tell the
++	 * rest of the world about it:
++	 *   VM_IO tells people not to look at these pages
++	 *	(accesses can have side effects).
++	 *   VM_RESERVED tells swapout not to try to touch
++	 *	this region.
++	 */
++	vma->vm_flags |= VM_IO | VM_RESERVED;
+ 	spin_lock(&mm->page_table_lock);
+ 	do {
+ 		pmd_t *pmd = pmd_alloc(mm, dir, from);
+@@ -1016,6 +1202,7 @@ static inline void break_cow(struct vm_a
+ 			      vma);
+ 	ptep_establish(vma, address, page_table, entry);
+ 	update_mmu_cache(vma, address, entry);
++	lazy_mmu_prot_update(entry);
+ }
+ 
+ /*
+@@ -1042,6 +1229,7 @@ static int do_wp_page(struct mm_struct *
+ 	unsigned long address, pte_t *page_table, pmd_t *pmd, pte_t pte)
+ {
+ 	struct page *old_page, *new_page;
++	struct page_beancounter *pbc;
+ 	unsigned long pfn = pte_pfn(pte);
+ 	pte_t entry;
+ 
+@@ -1068,6 +1256,7 @@ static int do_wp_page(struct mm_struct *
+ 					      vma);
+ 			ptep_set_access_flags(vma, address, page_table, entry, 1);
+ 			update_mmu_cache(vma, address, entry);
++			lazy_mmu_prot_update(entry);
+ 			pte_unmap(page_table);
+ 			spin_unlock(&mm->page_table_lock);
+ 			return VM_FAULT_MINOR;
+@@ -1082,6 +1271,9 @@ static int do_wp_page(struct mm_struct *
+ 		page_cache_get(old_page);
+ 	spin_unlock(&mm->page_table_lock);
+ 
++	if (pb_alloc(&pbc))
++		goto out;
++
+ 	if (unlikely(anon_vma_prepare(vma)))
+ 		goto no_new_page;
+ 	new_page = alloc_page_vma(GFP_HIGHUSER, vma, address);
+@@ -1095,10 +1287,16 @@ static int do_wp_page(struct mm_struct *
+ 	spin_lock(&mm->page_table_lock);
+ 	page_table = pte_offset_map(pmd, address);
+ 	if (likely(pte_same(*page_table, pte))) {
+-		if (PageReserved(old_page))
++		if (PageReserved(old_page)) {
+ 			++mm->rss;
+-		else
++			++vma->vm_rss;
++			ub_unused_privvm_dec(mm_ub(mm), 1, vma);
++		} else {
+ 			page_remove_rmap(old_page);
++			pb_remove_ref(old_page, mm_ub(mm));
++		}
++
++		pb_add_ref(new_page, mm_ub(mm), &pbc);
+ 		break_cow(vma, new_page, address, page_table);
+ 		lru_cache_add_active(new_page);
+ 		page_add_anon_rmap(new_page, vma, address);
+@@ -1113,6 +1311,8 @@ static int do_wp_page(struct mm_struct *
+ 	return VM_FAULT_MINOR;
+ 
+ no_new_page:
++	pb_free(&pbc);
++out:
+ 	page_cache_release(old_page);
+ 	return VM_FAULT_OOM;
+ }
+@@ -1322,12 +1522,21 @@ static int do_swap_page(struct mm_struct
+ 	pte_t *page_table, pmd_t *pmd, pte_t orig_pte, int write_access)
+ {
+ 	struct page *page;
++	struct page_beancounter *pbc;
+ 	swp_entry_t entry = pte_to_swp_entry(orig_pte);
+ 	pte_t pte;
+-	int ret = VM_FAULT_MINOR;
++	int ret;
++	cycles_t start;
+ 
+ 	pte_unmap(page_table);
+ 	spin_unlock(&mm->page_table_lock);
++	start = get_cycles();
++	pbc = NULL;
++	ret = VM_FAULT_OOM;
++	if (pb_alloc(&pbc))
++		goto out_nopbc;
++
++	ret = VM_FAULT_MINOR;
+ 	page = lookup_swap_cache(entry);
+ 	if (!page) {
+  		swapin_readahead(entry, address, vma);
+@@ -1363,21 +1572,25 @@ static int do_swap_page(struct mm_struct
+ 	spin_lock(&mm->page_table_lock);
+ 	page_table = pte_offset_map(pmd, address);
+ 	if (unlikely(!pte_same(*page_table, orig_pte))) {
+-		pte_unmap(page_table);
+-		spin_unlock(&mm->page_table_lock);
+-		unlock_page(page);
+-		page_cache_release(page);
+ 		ret = VM_FAULT_MINOR;
+-		goto out;
++		goto out_nomap;
++	}
++
++	if (unlikely(!PageUptodate(page))) {
++		ret = VM_FAULT_SIGBUS;
++		goto out_nomap;
+ 	}
+ 
+ 	/* The page isn't present yet, go ahead with the fault. */
+ 		
+ 	swap_free(entry);
+-	if (vm_swap_full())
+-		remove_exclusive_swap_page(page);
++	try_to_remove_exclusive_swap_page(page);
+ 
+ 	mm->rss++;
++	vma->vm_rss++;
++	mm_ub(mm)->ub_perfstat[smp_processor_id()].swapin++;
++	ub_unused_privvm_dec(mm_ub(mm), 1, vma);
++	pb_add_ref(page, mm_ub(mm), &pbc);
+ 	pte = mk_pte(page, vma->vm_page_prot);
+ 	if (write_access && can_share_swap_page(page)) {
+ 		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
+@@ -1398,10 +1611,23 @@ static int do_swap_page(struct mm_struct
+ 
+ 	/* No need to invalidate - it was non-present before */
+ 	update_mmu_cache(vma, address, pte);
++	lazy_mmu_prot_update(pte);
+ 	pte_unmap(page_table);
+ 	spin_unlock(&mm->page_table_lock);
+ out:
++	pb_free(&pbc);
++	spin_lock_irq(&kstat_glb_lock);
++	KSTAT_LAT_ADD(&kstat_glob.swap_in, get_cycles() - start);
++	spin_unlock_irq(&kstat_glb_lock);
++out_nopbc:
+ 	return ret;
++
++out_nomap:
++	pte_unmap(page_table);
++	spin_unlock(&mm->page_table_lock);
++	unlock_page(page);
++	page_cache_release(page);
++	goto out;
+ }
+ 
+ /*
+@@ -1416,16 +1642,20 @@ do_anonymous_page(struct mm_struct *mm, 
+ {
+ 	pte_t entry;
+ 	struct page * page = ZERO_PAGE(addr);
++	struct page_beancounter *pbc;
+ 
+ 	/* Read-only mapping of ZERO_PAGE. */
+ 	entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
+ 
+ 	/* ..except if it's a write access */
++	pbc = NULL;
+ 	if (write_access) {
+ 		/* Allocate our own private page. */
+ 		pte_unmap(page_table);
+ 		spin_unlock(&mm->page_table_lock);
+ 
++		if (pb_alloc(&pbc))
++			goto no_mem;
+ 		if (unlikely(anon_vma_prepare(vma)))
+ 			goto no_mem;
+ 		page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
+@@ -1443,6 +1673,9 @@ do_anonymous_page(struct mm_struct *mm, 
+ 			goto out;
+ 		}
+ 		mm->rss++;
++		vma->vm_rss++;
++		ub_unused_privvm_dec(mm_ub(mm), 1, vma);
++		pb_add_ref(page, mm_ub(mm), &pbc);
+ 		entry = maybe_mkwrite(pte_mkdirty(mk_pte(page,
+ 							 vma->vm_page_prot)),
+ 				      vma);
+@@ -1456,10 +1689,13 @@ do_anonymous_page(struct mm_struct *mm, 
+ 
+ 	/* No need to invalidate - it was non-present before */
+ 	update_mmu_cache(vma, addr, entry);
++	lazy_mmu_prot_update(entry);
+ 	spin_unlock(&mm->page_table_lock);
+ out:
++	pb_free(&pbc);
+ 	return VM_FAULT_MINOR;
+ no_mem:
++	pb_free(&pbc);
+ 	return VM_FAULT_OOM;
+ }
+ 
+@@ -1480,6 +1716,7 @@ do_no_page(struct mm_struct *mm, struct 
+ 	unsigned long address, int write_access, pte_t *page_table, pmd_t *pmd)
+ {
+ 	struct page * new_page;
++	struct page_beancounter *pbc;
+ 	struct address_space *mapping = NULL;
+ 	pte_t entry;
+ 	int sequence = 0;
+@@ -1492,6 +1729,9 @@ do_no_page(struct mm_struct *mm, struct 
+ 	pte_unmap(page_table);
+ 	spin_unlock(&mm->page_table_lock);
+ 
++	if (pb_alloc(&pbc))
++		return VM_FAULT_OOM;
++
+ 	if (vma->vm_file) {
+ 		mapping = vma->vm_file->f_mapping;
+ 		sequence = atomic_read(&mapping->truncate_count);
+@@ -1501,10 +1741,14 @@ retry:
+ 	new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
+ 
+ 	/* no page was available -- either SIGBUS or OOM */
+-	if (new_page == NOPAGE_SIGBUS)
++	if (new_page == NOPAGE_SIGBUS) {
++		pb_free(&pbc);
+ 		return VM_FAULT_SIGBUS;
+-	if (new_page == NOPAGE_OOM)
++	}
++	if (new_page == NOPAGE_OOM) {
++		pb_free(&pbc);
+ 		return VM_FAULT_OOM;
++	}
+ 
+ 	/*
+ 	 * Should we do an early C-O-W break?
+@@ -1550,8 +1794,12 @@ retry:
+ 	 */
+ 	/* Only go through if we didn't race with anybody else... */
+ 	if (pte_none(*page_table)) {
+-		if (!PageReserved(new_page))
++		if (!PageReserved(new_page)) {
+ 			++mm->rss;
++			++vma->vm_rss;
++			ub_unused_privvm_dec(mm_ub(mm), 1, vma);
++			pb_add_ref(new_page, mm_ub(mm), &pbc);
++		}
+ 		flush_icache_page(vma, new_page);
+ 		entry = mk_pte(new_page, vma->vm_page_prot);
+ 		if (write_access)
+@@ -1573,8 +1821,10 @@ retry:
+ 
+ 	/* no need to invalidate: a not-present page shouldn't be cached */
+ 	update_mmu_cache(vma, address, entry);
++	lazy_mmu_prot_update(entry);
+ 	spin_unlock(&mm->page_table_lock);
+ out:
++	pb_free(&pbc);
+ 	return ret;
+ oom:
+ 	page_cache_release(new_page);
+@@ -1667,6 +1917,7 @@ static inline int handle_pte_fault(struc
+ 	entry = pte_mkyoung(entry);
+ 	ptep_set_access_flags(vma, address, pte, entry, write_access);
+ 	update_mmu_cache(vma, address, entry);
++	lazy_mmu_prot_update(entry);
+ 	pte_unmap(pte);
+ 	spin_unlock(&mm->page_table_lock);
+ 	return VM_FAULT_MINOR;
+@@ -1681,6 +1932,18 @@ int handle_mm_fault(struct mm_struct *mm
+ 	pgd_t *pgd;
+ 	pmd_t *pmd;
+ 
++#if CONFIG_VZ_GENCALLS
++	if (test_bit(UB_AFLAG_NOTIF_PAGEIN, &mm_ub(mm)->ub_aflags)) {
++		int ret;
++		ret = virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_PAGEIN,
++					(void *)1);
++		if (ret & NOTIFY_FAIL)
++			return VM_FAULT_SIGBUS;
++		if (ret & NOTIFY_OK)
++			return VM_FAULT_MINOR; /* retry */
++	}
++#endif
++
+ 	__set_current_state(TASK_RUNNING);
+ 	pgd = pgd_offset(mm, address);
+ 
+diff -uprN linux-2.6.8.1.orig/mm/mempolicy.c linux-2.6.8.1-ve022stab078/mm/mempolicy.c
+--- linux-2.6.8.1.orig/mm/mempolicy.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/mm/mempolicy.c	2006-05-11 13:05:34.000000000 +0400
+@@ -136,6 +136,8 @@ static int get_nodes(unsigned long *node
+ 	bitmap_zero(nodes, MAX_NUMNODES);
+ 	if (maxnode == 0 || !nmask)
+ 		return 0;
++	if (maxnode > PAGE_SIZE*8 /*BITS_PER_BYTE*/)
++		return -EINVAL;
+ 
+ 	nlongs = BITS_TO_LONGS(maxnode);
+ 	if ((maxnode % BITS_PER_LONG) == 0)
+@@ -210,6 +212,10 @@ static struct mempolicy *mpol_new(int mo
+ 	switch (mode) {
+ 	case MPOL_INTERLEAVE:
+ 		bitmap_copy(policy->v.nodes, nodes, MAX_NUMNODES);
++		if (bitmap_weight(nodes, MAX_NUMNODES) == 0) {
++			kmem_cache_free(policy_cache, policy);
++			return ERR_PTR(-EINVAL);
++		}
+ 		break;
+ 	case MPOL_PREFERRED:
+ 		policy->v.preferred_node = find_first_bit(nodes, MAX_NUMNODES);
+@@ -388,7 +394,7 @@ asmlinkage long sys_set_mempolicy(int mo
+ 	struct mempolicy *new;
+ 	DECLARE_BITMAP(nodes, MAX_NUMNODES);
+ 
+-	if (mode > MPOL_MAX)
++	if (mode < 0 || mode > MPOL_MAX)
+ 		return -EINVAL;
+ 	err = get_nodes(nodes, nmask, maxnode, mode);
+ 	if (err)
+@@ -508,9 +514,13 @@ asmlinkage long sys_get_mempolicy(int __
+ 	} else
+ 		pval = pol->policy;
+ 
+-	err = -EFAULT;
++	if (vma) {
++		up_read(&current->mm->mmap_sem);
++		vma = NULL;
++	}
++
+ 	if (policy && put_user(pval, policy))
+-		goto out;
++		return -EFAULT;
+ 
+ 	err = 0;
+ 	if (nmask) {
+diff -uprN linux-2.6.8.1.orig/mm/mempool.c linux-2.6.8.1-ve022stab078/mm/mempool.c
+--- linux-2.6.8.1.orig/mm/mempool.c	2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/mm/mempool.c	2006-05-11 13:05:39.000000000 +0400
+@@ -10,6 +10,7 @@
+ 
+ #include <linux/mm.h>
+ #include <linux/slab.h>
++#include <linux/kmem_cache.h>
+ #include <linux/module.h>
+ #include <linux/mempool.h>
+ #include <linux/blkdev.h>
+@@ -72,6 +73,9 @@ mempool_t * mempool_create(int min_nr, m
+ 	pool->alloc = alloc_fn;
+ 	pool->free = free_fn;
+ 
++	if (alloc_fn == mempool_alloc_slab)
++		kmem_mark_nocharge((kmem_cache_t *)pool_data);
++
+ 	/*
+ 	 * First pre-allocate the guaranteed number of buffers.
+ 	 */
+@@ -112,6 +116,7 @@ int mempool_resize(mempool_t *pool, int 
+ 	unsigned long flags;
+ 
+ 	BUG_ON(new_min_nr <= 0);
++	gfp_mask &= ~__GFP_UBC;
+ 
+ 	spin_lock_irqsave(&pool->lock, flags);
+ 	if (new_min_nr < pool->min_nr) {
+@@ -194,6 +199,9 @@ void * mempool_alloc(mempool_t *pool, in
+ 	DEFINE_WAIT(wait);
+ 	int gfp_nowait = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
+ 
++	gfp_mask &= ~__GFP_UBC;
++	gfp_nowait &= ~__GFP_UBC;
++
+ repeat_alloc:
+ 	element = pool->alloc(gfp_nowait|__GFP_NOWARN, pool->pool_data);
+ 	if (likely(element != NULL))
+diff -uprN linux-2.6.8.1.orig/mm/mlock.c linux-2.6.8.1-ve022stab078/mm/mlock.c
+--- linux-2.6.8.1.orig/mm/mlock.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/mm/mlock.c	2006-05-11 13:05:39.000000000 +0400
+@@ -8,6 +8,8 @@
+ #include <linux/mman.h>
+ #include <linux/mm.h>
+ 
++#include <ub/ub_vmpages.h>
++
+ 
+ static int mlock_fixup(struct vm_area_struct * vma, 
+ 	unsigned long start, unsigned long end, unsigned int newflags)
+@@ -19,17 +21,23 @@ static int mlock_fixup(struct vm_area_st
+ 	if (newflags == vma->vm_flags)
+ 		goto out;
+ 
++	if (newflags & VM_LOCKED) {
++		ret = ub_locked_mem_charge(mm_ub(mm), end - start);
++		if (ret < 0)
++			goto out;
++	}
++
+ 	if (start != vma->vm_start) {
+ 		if (split_vma(mm, vma, start, 1)) {
+ 			ret = -EAGAIN;
+-			goto out;
++			goto out_uncharge;
+ 		}
+ 	}
+ 
+ 	if (end != vma->vm_end) {
+ 		if (split_vma(mm, vma, end, 0)) {
+ 			ret = -EAGAIN;
+-			goto out;
++			goto out_uncharge;
+ 		}
+ 	}
+ 
+@@ -47,9 +55,17 @@ static int mlock_fixup(struct vm_area_st
+ 	if (newflags & VM_LOCKED) {
+ 		pages = -pages;
+ 		ret = make_pages_present(start, end);
++	} else {
++		/* uncharge this memory, since it was unlocked */
++		ub_locked_mem_uncharge(mm_ub(mm), end - start);
+ 	}
+ 
+ 	vma->vm_mm->locked_vm -= pages;
++	return ret;
++
++out_uncharge:
++	if (newflags & VM_LOCKED)
++		ub_locked_mem_uncharge(mm_ub(mm), end - start);
+ out:
+ 	return ret;
+ }
+diff -uprN linux-2.6.8.1.orig/mm/mmap.c linux-2.6.8.1-ve022stab078/mm/mmap.c
+--- linux-2.6.8.1.orig/mm/mmap.c	2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/mm/mmap.c	2006-05-11 13:05:40.000000000 +0400
+@@ -28,6 +28,8 @@
+ #include <asm/cacheflush.h>
+ #include <asm/tlb.h>
+ 
++#include <ub/ub_vmpages.h>
++
+ /*
+  * WARNING: the debugging will use recursive algorithms so never enable this
+  * unless you know what you are doing.
+@@ -90,6 +92,8 @@ static void remove_vm_struct(struct vm_a
+ {
+ 	struct file *file = vma->vm_file;
+ 
++	ub_memory_uncharge(mm_ub(vma->vm_mm), vma->vm_end - vma->vm_start,
++			vma->vm_flags, vma->vm_file);
+ 	if (file) {
+ 		struct address_space *mapping = file->f_mapping;
+ 		spin_lock(&mapping->i_mmap_lock);
+@@ -105,6 +109,7 @@ static void remove_vm_struct(struct vm_a
+ 	kmem_cache_free(vm_area_cachep, vma);
+ }
+ 
++static unsigned long __do_brk(unsigned long, unsigned long, int);
+ /*
+  *  sys_brk() for the most part doesn't need the global kernel
+  *  lock, except when an application is doing something nasty
+@@ -144,7 +149,7 @@ asmlinkage unsigned long sys_brk(unsigne
+ 		goto out;
+ 
+ 	/* Ok, looks good - let it rip. */
+-	if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
++	if (__do_brk(oldbrk, newbrk-oldbrk, UB_HARD) != oldbrk)
+ 		goto out;
+ set_brk:
+ 	mm->brk = brk;
+@@ -607,6 +612,7 @@ struct vm_area_struct *vma_merge(struct 
+ {
+ 	pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
+ 	struct vm_area_struct *area, *next;
++	unsigned long extra_rss;
+ 
+ 	/*
+ 	 * We later require that vma->vm_flags == vm_flags,
+@@ -620,8 +626,12 @@ struct vm_area_struct *vma_merge(struct 
+ 	else
+ 		next = mm->mmap;
+ 	area = next;
+-	if (next && next->vm_end == end)		/* cases 6, 7, 8 */
++	extra_rss = 0;
++	spin_lock(&mm->page_table_lock);
++	if (next && next->vm_end == end) {		/* cases 6, 7, 8 */
+ 		next = next->vm_next;
++		extra_rss = area->vm_rss; 		/* asterix below */
++	}
+ 
+ 	/*
+ 	 * Can it merge with the predecessor?
+@@ -640,11 +650,28 @@ struct vm_area_struct *vma_merge(struct 
+ 				is_mergeable_anon_vma(prev->anon_vma,
+ 						      next->anon_vma)) {
+ 							/* cases 1, 6 */
++			/* case 1 : prev->vm_rss += next->vm_rss
++			 * case 6*: prev->vm_rss += area->vm_rss + next->vm_rss
++			 */
++			prev->vm_rss += next->vm_rss + extra_rss;
++			spin_unlock(&mm->page_table_lock);
+ 			vma_adjust(prev, prev->vm_start,
+ 				next->vm_end, prev->vm_pgoff, NULL);
+-		} else					/* cases 2, 5, 7 */
++		} else {				/* cases 2, 5, 7 */
++			/* case 2 : nothing
++			 * case 5 : prev->vm_rss += pages_in(addr, end)
++			 *	    next->vm_rss -= pages_in(addr, end)
++			 * case 7*: prev->vm_rss += area->vm_rss
++			 */
++			if (next && addr == next->vm_start) { /* case 5 */
++				extra_rss = pages_in_vma_range(next, addr, end);
++				next->vm_rss -= extra_rss;
++			}
++			prev->vm_rss += extra_rss;
++			spin_unlock(&mm->page_table_lock);
+ 			vma_adjust(prev, prev->vm_start,
+ 				end, prev->vm_pgoff, NULL);
++		}
+ 		return prev;
+ 	}
+ 
+@@ -655,15 +682,29 @@ struct vm_area_struct *vma_merge(struct 
+  			mpol_equal(policy, vma_policy(next)) &&
+ 			can_vma_merge_before(next, vm_flags,
+ 					anon_vma, file, pgoff+pglen)) {
+-		if (prev && addr < prev->vm_end)	/* case 4 */
++		if (prev && addr < prev->vm_end) {	/* case 4 */
++			/* case 4 : prev->vm_rss -= pages_in(addr, end)
++			 *	    next->vm_rss += pages_in(addr, end)
++			 */
++			extra_rss = pages_in_vma_range(prev, addr, end);
++			prev->vm_rss -= extra_rss;
++			next->vm_rss += extra_rss;
++			spin_unlock(&mm->page_table_lock);
+ 			vma_adjust(prev, prev->vm_start,
+ 				addr, prev->vm_pgoff, NULL);
+-		else					/* cases 3, 8 */
++		} else {				/* cases 3, 8 */
++			/* case 3 : nothing
++			 * case 8*: next->vm_rss += area->vm_rss
++			 */
++			next->vm_rss += extra_rss;
++			spin_unlock(&mm->page_table_lock);
+ 			vma_adjust(area, addr, next->vm_end,
+ 				next->vm_pgoff - pglen, NULL);
++		}
+ 		return area;
+ 	}
+ 
++	spin_unlock(&mm->page_table_lock);
+ 	return NULL;
+ }
+ 
+@@ -785,6 +826,12 @@ unsigned long do_mmap_pgoff(struct file 
+ 	if (mm->map_count > sysctl_max_map_count)
+ 		return -ENOMEM;
+ 
++	if (file && (prot & PROT_EXEC)) {
++		error = check_area_execute_ve(file->f_dentry, file->f_vfsmnt);
++		if (error)
++			return error;
++	}
++
+ 	/* Obtain the address to map to. we verify (or select) it and ensure
+ 	 * that it represents a valid section of the address space.
+ 	 */
+@@ -897,6 +944,11 @@ munmap_back:
+ 		}
+ 	}
+ 
++	error = -ENOMEM;
++	if (ub_memory_charge(mm_ub(mm), len, vm_flags, file,
++				(flags & MAP_EXECPRIO ? UB_SOFT : UB_HARD)))
++		goto uncharge_error;
++
+ 	/*
+ 	 * Can we just expand an old private anonymous mapping?
+ 	 * The VM_SHARED test is necessary because shmem_zero_setup
+@@ -912,7 +964,8 @@ munmap_back:
+ 	 * specific mapper. the address has already been validated, but
+ 	 * not unmapped, but the maps are removed from the list.
+ 	 */
+-	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
++	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL |
++			(flags & MAP_EXECPRIO ? __GFP_SOFT_UBC : 0));
+ 	if (!vma) {
+ 		error = -ENOMEM;
+ 		goto unacct_error;
+@@ -923,6 +976,7 @@ munmap_back:
+ 	vma->vm_start = addr;
+ 	vma->vm_end = addr + len;
+ 	vma->vm_flags = vm_flags;
++	vma->vm_rss = 0;
+ 	vma->vm_page_prot = protection_map[vm_flags & 0x0f];
+ 	vma->vm_pgoff = pgoff;
+ 
+@@ -1001,6 +1055,8 @@ unmap_and_free_vma:
+ free_vma:
+ 	kmem_cache_free(vm_area_cachep, vma);
+ unacct_error:
++	ub_memory_uncharge(mm_ub(mm), len, vm_flags, file);
++uncharge_error:
+ 	if (charged)
+ 		vm_unacct_memory(charged);
+ 	return error;
+@@ -1210,15 +1266,28 @@ int expand_stack(struct vm_area_struct *
+ 	address &= PAGE_MASK;
+ 	grow = (address - vma->vm_end) >> PAGE_SHIFT;
+ 
++	/* Somebody else might have raced and expanded it already */
++	if (address <= vma->vm_end)
++		goto raced;
++
+ 	/* Overcommit.. */
+ 	if (security_vm_enough_memory(grow)) {
+ 		anon_vma_unlock(vma);
+ 		return -ENOMEM;
+ 	}
+ 	
++	if ((vma->vm_flags & VM_LOCKED) &&
++			((vma->vm_mm->locked_vm + grow) << PAGE_SHIFT) >
++			current->rlim[RLIMIT_MEMLOCK].rlim_cur)
++		goto nomem;
++
+ 	if (address - vma->vm_start > current->rlim[RLIMIT_STACK].rlim_cur ||
+ 			((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) >
+-			current->rlim[RLIMIT_AS].rlim_cur) {
++			current->rlim[RLIMIT_AS].rlim_cur ||
++			ub_memory_charge(mm_ub(vma->vm_mm), 
++				address - vma->vm_end,
++				vma->vm_flags, vma->vm_file, UB_SOFT)) {
++nomem:
+ 		anon_vma_unlock(vma);
+ 		vm_unacct_memory(grow);
+ 		return -ENOMEM;
+@@ -1227,6 +1296,7 @@ int expand_stack(struct vm_area_struct *
+ 	vma->vm_mm->total_vm += grow;
+ 	if (vma->vm_flags & VM_LOCKED)
+ 		vma->vm_mm->locked_vm += grow;
++raced:
+ 	anon_vma_unlock(vma);
+ 	return 0;
+ }
+@@ -1271,15 +1341,28 @@ int expand_stack(struct vm_area_struct *
+ 	address &= PAGE_MASK;
+ 	grow = (vma->vm_start - address) >> PAGE_SHIFT;
+ 
++	/* Somebody else might have raced and expanded it already */
++	if (address >= vma->vm_start)
++		goto raced;
++
+ 	/* Overcommit.. */
+ 	if (security_vm_enough_memory(grow)) {
+ 		anon_vma_unlock(vma);
+ 		return -ENOMEM;
+ 	}
+ 	
++	if ((vma->vm_flags & VM_LOCKED) &&
++			((vma->vm_mm->locked_vm + grow) << PAGE_SHIFT) >
++			current->rlim[RLIMIT_MEMLOCK].rlim_cur)
++		goto nomem;
++
+ 	if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur ||
+ 			((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) >
+-			current->rlim[RLIMIT_AS].rlim_cur) {
++			current->rlim[RLIMIT_AS].rlim_cur ||
++			ub_memory_charge(mm_ub(vma->vm_mm), 
++				vma->vm_start - address,
++				vma->vm_flags, vma->vm_file, UB_SOFT)) {
++nomem:
+ 		anon_vma_unlock(vma);
+ 		vm_unacct_memory(grow);
+ 		return -ENOMEM;
+@@ -1289,6 +1372,7 @@ int expand_stack(struct vm_area_struct *
+ 	vma->vm_mm->total_vm += grow;
+ 	if (vma->vm_flags & VM_LOCKED)
+ 		vma->vm_mm->locked_vm += grow;
++raced:
+ 	anon_vma_unlock(vma);
+ 	return 0;
+ }
+@@ -1517,6 +1601,11 @@ int split_vma(struct mm_struct * mm, str
+ 	else
+ 		vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
+ 
++	spin_lock(&mm->page_table_lock);
++	new->vm_rss = pages_in_vma(new);
++	vma->vm_rss = pages_in_vma(vma);
++	spin_unlock(&mm->page_table_lock);
++
+ 	return 0;
+ }
+ 
+@@ -1611,7 +1700,7 @@ asmlinkage long sys_munmap(unsigned long
+  *  anonymous maps.  eventually we may be able to do some
+  *  brk-specific accounting here.
+  */
+-unsigned long do_brk(unsigned long addr, unsigned long len)
++static unsigned long __do_brk(unsigned long addr, unsigned long len, int lowpri)
+ {
+ 	struct mm_struct * mm = current->mm;
+ 	struct vm_area_struct * vma, * prev;
+@@ -1637,6 +1726,12 @@ unsigned long do_brk(unsigned long addr,
+ 	}
+ 
+ 	/*
++	 * mm->mmap_sem is required to protect against another thread
++	 * changing the mappings in case we sleep.
++	 */
++	WARN_ON(down_read_trylock(&mm->mmap_sem));
++
++	/*
+ 	 * Clear old maps.  this also does some error checking for us
+ 	 */
+  munmap_back:
+@@ -1660,6 +1755,10 @@ unsigned long do_brk(unsigned long addr,
+ 
+ 	flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
+ 
++	if (ub_memory_charge(mm_ub(mm), len, flags, NULL, lowpri))
++		goto out_unacct;
++		
++
+ 	/* Can we just expand an old private anonymous mapping? */
+ 	if (vma_merge(mm, prev, addr, addr + len, flags,
+ 					NULL, NULL, pgoff, NULL))
+@@ -1668,8 +1767,11 @@ unsigned long do_brk(unsigned long addr,
+ 	/*
+ 	 * create a vma struct for an anonymous mapping
+ 	 */
+-	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
++	vma = kmem_cache_alloc(vm_area_cachep,
++			SLAB_KERNEL | (lowpri ? 0 : __GFP_SOFT_UBC));
+ 	if (!vma) {
++		ub_memory_uncharge(mm_ub(mm), len, flags, NULL);
++out_unacct:
+ 		vm_unacct_memory(len >> PAGE_SHIFT);
+ 		return -ENOMEM;
+ 	}
+@@ -1680,6 +1782,7 @@ unsigned long do_brk(unsigned long addr,
+ 	vma->vm_end = addr + len;
+ 	vma->vm_pgoff = pgoff;
+ 	vma->vm_flags = flags;
++	vma->vm_rss = 0;
+ 	vma->vm_page_prot = protection_map[flags & 0x0f];
+ 	vma_link(mm, vma, prev, rb_link, rb_parent);
+ out:
+@@ -1691,6 +1794,11 @@ out:
+ 	return addr;
+ }
+ 
++unsigned long do_brk(unsigned long addr, unsigned long len)
++{
++	return __do_brk(addr, len, UB_SOFT);
++}
++
+ EXPORT_SYMBOL(do_brk);
+ 
+ /* Release all mmaps. */
+@@ -1740,7 +1848,7 @@ void exit_mmap(struct mm_struct *mm)
+  * and into the inode's i_mmap tree.  If vm_file is non-NULL
+  * then i_mmap_lock is taken here.
+  */
+-void insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
++int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
+ {
+ 	struct vm_area_struct * __vma, * prev;
+ 	struct rb_node ** rb_link, * rb_parent;
+@@ -1763,8 +1871,9 @@ void insert_vm_struct(struct mm_struct *
+ 	}
+ 	__vma = find_vma_prepare(mm,vma->vm_start,&prev,&rb_link,&rb_parent);
+ 	if (__vma && __vma->vm_start < vma->vm_end)
+-		BUG();
++		return -ENOMEM;
+ 	vma_link(mm, vma, prev, rb_link, rb_parent);
++	return 0;
+ }
+ 
+ /*
+@@ -1812,6 +1921,7 @@ struct vm_area_struct *copy_vma(struct v
+ 			new_vma->vm_start = addr;
+ 			new_vma->vm_end = addr + len;
+ 			new_vma->vm_pgoff = pgoff;
++			new_vma->vm_rss = 0;
+ 			if (new_vma->vm_file)
+ 				get_file(new_vma->vm_file);
+ 			if (new_vma->vm_ops && new_vma->vm_ops->open)
+diff -uprN linux-2.6.8.1.orig/mm/mprotect.c linux-2.6.8.1-ve022stab078/mm/mprotect.c
+--- linux-2.6.8.1.orig/mm/mprotect.c	2004-08-14 14:56:26.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/mm/mprotect.c	2006-05-11 13:05:39.000000000 +0400
+@@ -24,6 +24,8 @@
+ #include <asm/cacheflush.h>
+ #include <asm/tlbflush.h>
+ 
++#include <ub/ub_vmpages.h>
++
+ static inline void
+ change_pte_range(pmd_t *pmd, unsigned long address,
+ 		unsigned long size, pgprot_t newprot)
+@@ -51,8 +53,9 @@ change_pte_range(pmd_t *pmd, unsigned lo
+ 			 * bits by wiping the pte and then setting the new pte
+ 			 * into place.
+ 			 */
+-			entry = ptep_get_and_clear(pte);
+-			set_pte(pte, pte_modify(entry, newprot));
++			entry = pte_modify(ptep_get_and_clear(pte), newprot);
++			set_pte(pte, entry);
++			lazy_mmu_prot_update(entry);
+ 		}
+ 		address += PAGE_SIZE;
+ 		pte++;
+@@ -114,6 +117,8 @@ mprotect_fixup(struct vm_area_struct *vm
+ {
+ 	struct mm_struct * mm = vma->vm_mm;
+ 	unsigned long charged = 0;
++	unsigned long vma_rss;
++	int prot_dir;
+ 	pgprot_t newprot;
+ 	pgoff_t pgoff;
+ 	int error;
+@@ -123,6 +128,17 @@ mprotect_fixup(struct vm_area_struct *vm
+ 		return 0;
+ 	}
+ 
++	spin_lock(&mm->page_table_lock);
++	vma_rss = pages_in_vma_range(vma, start, end);
++	spin_unlock(&mm->page_table_lock);
++	charged = ((end - start) >> PAGE_SHIFT);
++
++	prot_dir = ub_protected_charge(mm_ub(mm), charged - vma_rss,
++			newflags, vma);
++	error = -ENOMEM;
++	if (prot_dir == PRIVVM_ERROR)
++		goto fail_nocharge;
++
+ 	/*
+ 	 * If we make a private mapping writable we increase our commit;
+ 	 * but (without finer accounting) cannot reduce our commit if we
+@@ -133,9 +149,8 @@ mprotect_fixup(struct vm_area_struct *vm
+ 	 */
+ 	if (newflags & VM_WRITE) {
+ 		if (!(vma->vm_flags & (VM_ACCOUNT|VM_WRITE|VM_SHARED|VM_HUGETLB))) {
+-			charged = (end - start) >> PAGE_SHIFT;
+ 			if (security_vm_enough_memory(charged))
+-				return -ENOMEM;
++				goto fail_noacct;
+ 			newflags |= VM_ACCOUNT;
+ 		}
+ 	}
+@@ -178,10 +193,16 @@ success:
+ 	vma->vm_flags = newflags;
+ 	vma->vm_page_prot = newprot;
+ 	change_protection(vma, start, end, newprot);
++	if (prot_dir == PRIVVM_TO_SHARED)
++		__ub_unused_privvm_dec(mm_ub(mm), charged - vma_rss);
+ 	return 0;
+ 
+ fail:
+ 	vm_unacct_memory(charged);
++fail_noacct:
++	if (prot_dir == PRIVVM_TO_PRIVATE)
++		__ub_unused_privvm_dec(mm_ub(mm), charged - vma_rss);
++fail_nocharge:
+ 	return error;
+ }
+ 
+diff -uprN linux-2.6.8.1.orig/mm/mremap.c linux-2.6.8.1-ve022stab078/mm/mremap.c
+--- linux-2.6.8.1.orig/mm/mremap.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/mm/mremap.c	2006-05-11 13:05:39.000000000 +0400
+@@ -21,6 +21,8 @@
+ #include <asm/cacheflush.h>
+ #include <asm/tlbflush.h>
+ 
++#include <ub/ub_vmpages.h>
++
+ static pte_t *get_one_pte_map_nested(struct mm_struct *mm, unsigned long addr)
+ {
+ 	pgd_t *pgd;
+@@ -81,6 +83,7 @@ static inline pte_t *alloc_one_pte_map(s
+ 
+ static int
+ move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
++		struct vm_area_struct *new_vma,
+ 		unsigned long new_addr)
+ {
+ 	struct address_space *mapping = NULL;
+@@ -129,6 +132,8 @@ move_one_page(struct vm_area_struct *vma
+ 				pte_t pte;
+ 				pte = ptep_clear_flush(vma, old_addr, src);
+ 				set_pte(dst, pte);
++				vma->vm_rss--;
++				new_vma->vm_rss++;
+ 			} else
+ 				error = -ENOMEM;
+ 			pte_unmap_nested(src);
+@@ -143,6 +148,7 @@ move_one_page(struct vm_area_struct *vma
+ }
+ 
+ static unsigned long move_page_tables(struct vm_area_struct *vma,
++		struct vm_area_struct *new_vma,
+ 		unsigned long new_addr, unsigned long old_addr,
+ 		unsigned long len)
+ {
+@@ -156,7 +162,8 @@ static unsigned long move_page_tables(st
+ 	 * only a few pages.. This also makes error recovery easier.
+ 	 */
+ 	for (offset = 0; offset < len; offset += PAGE_SIZE) {
+-		if (move_one_page(vma, old_addr+offset, new_addr+offset) < 0)
++		if (move_one_page(vma, old_addr+offset, 
++					new_vma, new_addr+offset) < 0)
+ 			break;
+ 		cond_resched();
+ 	}
+@@ -175,26 +182,29 @@ static unsigned long move_vma(struct vm_
+ 	unsigned long excess = 0;
+ 	int split = 0;
+ 
++	if (ub_memory_charge(mm_ub(mm), new_len, vma->vm_flags,
++				vma->vm_file, UB_HARD))
++		return -ENOMEM;
+ 	/*
+ 	 * We'd prefer to avoid failure later on in do_munmap:
+ 	 * which may split one vma into three before unmapping.
+ 	 */
+ 	if (mm->map_count >= sysctl_max_map_count - 3)
+-		return -ENOMEM;
++		goto out_nomem;
+ 
+ 	new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
+ 	new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff);
+ 	if (!new_vma)
+-		return -ENOMEM;
++		goto out_nomem;
+ 
+-	moved_len = move_page_tables(vma, new_addr, old_addr, old_len);
++	moved_len = move_page_tables(vma, new_vma, new_addr, old_addr, old_len);
+ 	if (moved_len < old_len) {
+ 		/*
+ 		 * On error, move entries back from new area to old,
+ 		 * which will succeed since page tables still there,
+ 		 * and then proceed to unmap new area instead of old.
+ 		 */
+-		move_page_tables(new_vma, old_addr, new_addr, moved_len);
++		move_page_tables(new_vma, vma, old_addr, new_addr, moved_len);
+ 		vma = new_vma;
+ 		old_len = new_len;
+ 		old_addr = new_addr;
+@@ -231,7 +241,12 @@ static unsigned long move_vma(struct vm_
+ 					   new_addr + new_len);
+ 	}
+ 
+-	return new_addr;
++	if (new_addr != -ENOMEM)
++		return new_addr;
++
++out_nomem:
++	ub_memory_uncharge(mm_ub(mm), new_len, vma->vm_flags, vma->vm_file);
++	return -ENOMEM;
+ }
+ 
+ /*
+@@ -354,6 +369,12 @@ unsigned long do_mremap(unsigned long ad
+ 		if (max_addr - addr >= new_len) {
+ 			int pages = (new_len - old_len) >> PAGE_SHIFT;
+ 
++			ret = ub_memory_charge(mm_ub(vma->vm_mm), 
++					new_len - old_len, vma->vm_flags, 
++					vma->vm_file, UB_HARD);
++			if (ret != 0)
++				goto out;
++
+ 			vma_adjust(vma, vma->vm_start,
+ 				addr + new_len, vma->vm_pgoff, NULL);
+ 
+diff -uprN linux-2.6.8.1.orig/mm/oom_kill.c linux-2.6.8.1-ve022stab078/mm/oom_kill.c
+--- linux-2.6.8.1.orig/mm/oom_kill.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/mm/oom_kill.c	2006-05-11 13:05:48.000000000 +0400
+@@ -15,12 +15,22 @@
+  *  kernel subsystems and hints as to where to find out what things do.
+  */
+ 
++#include <linux/bitops.h>
+ #include <linux/mm.h>
+ #include <linux/sched.h>
++#include <linux/virtinfo.h>
++#include <linux/module.h>
+ #include <linux/swap.h>
+ #include <linux/timex.h>
+ #include <linux/jiffies.h>
+ 
++#include <ub/beancounter.h>
++#include <ub/ub_mem.h>
++
++spinlock_t oom_generation_lock = SPIN_LOCK_UNLOCKED;
++int oom_kill_counter;
++int oom_generation;
++
+ /* #define DEBUG */
+ 
+ /**
+@@ -106,23 +116,47 @@ static int badness(struct task_struct *p
+  *
+  * (not docbooked, we don't want this one cluttering up the manual)
+  */
+-static struct task_struct * select_bad_process(void)
++static struct task_struct * select_bad_process(struct user_beancounter *ub)
+ {
++	int points;
+ 	int maxpoints = 0;
+ 	struct task_struct *g, *p;
+ 	struct task_struct *chosen = NULL;
++	struct user_beancounter *mub;
++
++	do_each_thread_all(g, p) {
++		if (!p->pid)
++			continue;
++		if (!p->mm)
++			continue;
++
++#if 0
++		/*
++		 * swapoff check.
++		 * Pro: do not let opportunistic swapoff kill the whole system;
++		 * if the system enter OOM state, better stop swapoff.
++		 * Contra: essential services must survive without swap
++		 * (otherwise, the system is grossly misconfigured),
++		 * and disabling swapoff completely, with cryptic diagnostic
++		 * "interrupted system call", looks like a bad idea.
++		 * 2006/02/28 SAW
++		 */
++		if (!(p->flags & PF_MEMDIE) && (p->flags & PF_SWAPOFF))
++			return p;
++#endif
+ 
+-	do_each_thread(g, p)
+-		if (p->pid) {
+-			int points = badness(p);
+-			if (points > maxpoints) {
+-				chosen = p;
+-				maxpoints = points;
+-			}
+-			if (p->flags & PF_SWAPOFF)
+-				return p;
++		for (mub = mm_ub(p->mm); mub != NULL; mub = mub->parent)
++			if (mub == ub)
++				break;
++		if (mub != ub) /* wrong beancounter */
++			continue;
++
++		points = badness(p);
++		if (points > maxpoints) {
++			chosen = p;
++			maxpoints = points;
+ 		}
+-	while_each_thread(g, p);
++	} while_each_thread_all(g, p);
+ 	return chosen;
+ }
+ 
+@@ -141,7 +175,8 @@ static void __oom_kill_task(task_t *p)
+ 		return;
+ 	}
+ 	task_unlock(p);
+-	printk(KERN_ERR "Out of Memory: Killed process %d (%s).\n", p->pid, p->comm);
++	printk(KERN_ERR "Out of Memory: Killing process %d (%.20s), flags=%lx, "
++	       "mm=%p.\n", p->pid, p->comm, p->flags, p->mm);
+ 
+ 	/*
+ 	 * We give our sacrificial lamb high priority and access to
+@@ -149,7 +184,10 @@ static void __oom_kill_task(task_t *p)
+ 	 * exit() and clear out its resources quickly...
+ 	 */
+ 	p->time_slice = HZ;
+-	p->flags |= PF_MEMALLOC | PF_MEMDIE;
++	/* flag should be set atomically since p != current */
++	set_bit(generic_ffs(PF_MEMDIE) - 1, &p->flags);
++	/* oom_generation_lock must be held */
++	oom_kill_counter++;
+ 
+ 	/* This process has hardware access, be more careful. */
+ 	if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO)) {
+@@ -159,53 +197,55 @@ static void __oom_kill_task(task_t *p)
+ 	}
+ }
+ 
+-static struct mm_struct *oom_kill_task(task_t *p)
+-{
+-	struct mm_struct *mm = get_task_mm(p);
+-	if (!mm || mm == &init_mm)
+-		return NULL;
+-	__oom_kill_task(p);
+-	return mm;
+-}
+-
+-
+ /**
+- * oom_kill - kill the "best" process when we run out of memory
++ * oom_kill - do a complete job of killing a process
+  *
+- * If we run out of memory, we have the choice between either
+- * killing a random task (bad), letting the system crash (worse)
+- * OR try to be smart about which process to kill. Note that we
+- * don't have to be perfect here, we just have to be good.
++ * Returns TRUE if selected process is unkillable.
++ * Called with oom_generation_lock and tasklist_lock held, drops them.
+  */
+-static void oom_kill(void)
++static int oom_kill(struct task_struct *p,
++		struct user_beancounter *ub, long ub_maxover)
+ {
+ 	struct mm_struct *mm;
+-	struct task_struct *g, *p, *q;
+-	
+-	read_lock(&tasklist_lock);
+-retry:
+-	p = select_bad_process();
+-
+-	/* Found nothing?!?! Either we hang forever, or we panic. */
+-	if (!p) {
+-		show_free_areas();
+-		panic("Out of memory and no killable processes...\n");
++	struct task_struct *g, *q;
++	uid_t ub_uid;
++	int suicide;
++
++	mm = get_task_mm(p);
++	if (mm == &init_mm) {
++		mmput(mm);
++		mm = NULL;
+ 	}
++	if (mm == NULL)
++		return -1;
++
++	/*
++	 * The following message showing mm, its size, and free space
++	 * should be printed regardless of CONFIG_USER_RESOURCE.
++	 */
++	ub_uid = (ub ? ub->ub_uid : -1);
++	printk(KERN_INFO"MM to kill %p (UB=%d, UBover=%ld, VM=%lu, free=%u).\n",
++			mm, ub_uid, ub_maxover,
++			mm->total_vm, nr_free_pages());
+ 
+-	mm = oom_kill_task(p);
+-	if (!mm)
+-		goto retry;
+ 	/*
+ 	 * kill all processes that share the ->mm (i.e. all threads),
+ 	 * but are in a different thread group
+ 	 */
+-	do_each_thread(g, q)
+-		if (q->mm == mm && q->tgid != p->tgid)
++	suicide = 0;
++	__oom_kill_task(p);
++	if (p == current)
++		suicide = 1;
++	do_each_thread_all(g, q) {
++		if (q->mm == mm && q->tgid != p->tgid) {
+ 			__oom_kill_task(q);
+-	while_each_thread(g, q);
+-	if (!p->mm)
+-		printk(KERN_INFO "Fixed up OOM kill of mm-less task\n");
++			if (q == current)
++				suicide = 1;
++		}
++	} while_each_thread_all(g, q);
+ 	read_unlock(&tasklist_lock);
++	spin_unlock(&oom_generation_lock);
++	ub_oomkill_task(mm, ub, ub_maxover); /* nonblocking but long */
+ 	mmput(mm);
+ 
+ 	/*
+@@ -213,81 +253,132 @@ retry:
+ 	 * killing itself before someone else gets the chance to ask
+ 	 * for more memory.
+ 	 */
+-	yield();
+-	return;
++	if (!suicide)
++		yield();
++
++	return 0;
+ }
+ 
+ /**
+- * out_of_memory - is the system out of memory?
++ * oom_select_and_kill - kill the "best" process when we run out of memory
++ *
++ * If we run out of memory, we have the choice between either
++ * killing a random task (bad), letting the system crash (worse)
++ * OR try to be smart about which process to kill. Note that we
++ * don't have to be perfect here, we just have to be good.
++ *
++ * Called with oom_generation_lock held, drops it.
+  */
+-void out_of_memory(int gfp_mask)
++static void oom_select_and_kill(void)
+ {
+-	/*
+-	 * oom_lock protects out_of_memory()'s static variables.
+-	 * It's a global lock; this is not performance-critical.
+-	 */
+-	static spinlock_t oom_lock = SPIN_LOCK_UNLOCKED;
+-	static unsigned long first, last, count, lastkill;
+-	unsigned long now, since;
+-
+-	spin_lock(&oom_lock);
+-	now = jiffies;
+-	since = now - last;
+-	last = now;
++	struct user_beancounter *ub;
++	struct task_struct *p;
++	long ub_maxover;
++	int r;
+ 
+-	/*
+-	 * If it's been a long time since last failure,
+-	 * we're not oom.
+-	 */
+-	if (since > 5*HZ)
+-		goto reset;
++	ub_clear_oom();
+ 
+-	/*
+-	 * If we haven't tried for at least one second,
+-	 * we're not really oom.
+-	 */
+-	since = now - first;
+-	if (since < HZ)
+-		goto out_unlock;
++	read_lock(&tasklist_lock);
++retry:
++	ub = ub_select_worst(&ub_maxover);
++	p = select_bad_process(ub);
+ 
+-	/*
+-	 * If we have gotten only a few failures,
+-	 * we're not really oom. 
+-	 */
+-	if (++count < 10)
+-		goto out_unlock;
++	/* Found nothing?!?! Either we hang forever, or we panic. */
++	if (!p) {
++		if (!ub) {
++			show_free_areas();
++			panic("Out of memory and no killable processes...\n");
++		}
+ 
+-	/*
+-	 * If we just killed a process, wait a while
+-	 * to give that task a chance to exit. This
+-	 * avoids killing multiple processes needlessly.
+-	 */
+-	since = now - lastkill;
+-	if (since < HZ*5)
+-		goto out_unlock;
++		goto retry;
++	}
+ 
+-	/*
+-	 * Ok, really out of memory. Kill something.
+-	 */
+-	lastkill = now;
++	r = oom_kill(p, ub, ub_maxover);
++	put_beancounter(ub);
++	if (r)
++		goto retry;
++}
+ 
+-	printk("oom-killer: gfp_mask=0x%x\n", gfp_mask);
+-	show_free_areas();
++void oom_select_and_kill_sc(struct user_beancounter *scope)
++{
++	struct user_beancounter *ub;
++	struct task_struct *p;
+ 
+-	/* oom_kill() sleeps */
+-	spin_unlock(&oom_lock);
+-	oom_kill();
+-	spin_lock(&oom_lock);
++	ub_clear_oom();
++	ub = get_beancounter(scope);
+ 
+-reset:
+-	/*
+-	 * We dropped the lock above, so check to be sure the variable
+-	 * first only ever increases to prevent false OOM's.
+-	 */
+-	if (time_after(now, first))
+-		first = now;
+-	count = 0;
++	read_lock(&tasklist_lock);
++retry:
++	p = select_bad_process(ub);
++	if (!p) {
++		read_unlock(&tasklist_lock);
++		return;
++	}
++
++	if (oom_kill(p, ub, 0))
++		goto retry;
++
++	put_beancounter(ub);
++}
++
++static void do_out_of_memory(struct oom_freeing_stat *stat)
++{
++	spin_lock(&oom_generation_lock);
++	if (oom_generation != stat->oom_generation) {
++		/* OOM-killed process has exited */
++		spin_unlock(&oom_generation_lock);
++		return;
++	}
++	if (oom_kill_counter) {
++		/* OOM in progress */
++		spin_unlock(&oom_generation_lock);
++		__set_current_state(TASK_UNINTERRUPTIBLE);
++		schedule_timeout(5 * HZ);
++
++		spin_lock(&oom_generation_lock);
++		if (oom_generation != stat->oom_generation) {
++			spin_unlock(&oom_generation_lock);
++			return;
++		}
++		/*
++		 * Some process is stuck exiting.
++		 * No choice other than to kill something else.
++		 */
++		oom_kill_counter = 0;
++	}
++	oom_select_and_kill();
++}
++
++void do_out_of_memory_sc(struct user_beancounter *ub)
++{
++	spin_lock(&oom_generation_lock);
++	oom_select_and_kill_sc(ub);
++}
++EXPORT_SYMBOL(do_out_of_memory_sc);
++
++/**
++ * out_of_memory - is the system out of memory?
++ */
++void out_of_memory(struct oom_freeing_stat *stat, int gfp_mask)
++{
++	if (nr_swap_pages > 0) {
++		/* some pages have been freed */
++		if (stat->freed)
++			return;
++		/* some IO was started */
++		if (stat->written)
++			return;
++		/* some pages have been swapped out, ref. counter removed */
++		if (stat->swapped)
++			return;
++		/* some slabs were shrinked */
++		if (stat->slabs)
++			return;
++	}
++
++	if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_OUTOFMEM, stat)
++			& (NOTIFY_OK | NOTIFY_FAIL))
++		return;
+ 
+-out_unlock:
+-	spin_unlock(&oom_lock);
++	do_out_of_memory(stat);
+ }
+diff -uprN linux-2.6.8.1.orig/mm/page_alloc.c linux-2.6.8.1-ve022stab078/mm/page_alloc.c
+--- linux-2.6.8.1.orig/mm/page_alloc.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/mm/page_alloc.c	2006-05-11 13:05:44.000000000 +0400
+@@ -31,9 +31,12 @@
+ #include <linux/topology.h>
+ #include <linux/sysctl.h>
+ #include <linux/cpu.h>
++#include <linux/kernel_stat.h>
+ 
+ #include <asm/tlbflush.h>
+ 
++#include <ub/ub_mem.h>
++
+ DECLARE_BITMAP(node_online_map, MAX_NUMNODES);
+ struct pglist_data *pgdat_list;
+ unsigned long totalram_pages;
+@@ -41,7 +44,9 @@ unsigned long totalhigh_pages;
+ long nr_swap_pages;
+ int numnodes = 1;
+ int sysctl_lower_zone_protection = 0;
++int alloc_fail_warn = 0;
+ 
++EXPORT_SYMBOL(pgdat_list);
+ EXPORT_SYMBOL(totalram_pages);
+ EXPORT_SYMBOL(nr_swap_pages);
+ 
+@@ -284,6 +289,7 @@ void __free_pages_ok(struct page *page, 
+ 		free_pages_check(__FUNCTION__, page + i);
+ 	list_add(&page->lru, &list);
+ 	kernel_map_pages(page, 1<<order, 0);
++	ub_page_uncharge(page, order);
+ 	free_pages_bulk(page_zone(page), 1, &list, order);
+ }
+ 
+@@ -513,6 +519,7 @@ static void fastcall free_hot_cold_page(
+ 	inc_page_state(pgfree);
+ 	free_pages_check(__FUNCTION__, page);
+ 	pcp = &zone->pageset[get_cpu()].pcp[cold];
++	ub_page_uncharge(page, 0);
+ 	local_irq_save(flags);
+ 	if (pcp->count >= pcp->high)
+ 		pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
+@@ -578,6 +585,26 @@ buffered_rmqueue(struct zone *zone, int 
+ 	return page;
+ }
+ 
++static void __alloc_collect_stats(unsigned int gfp_mask,
++			unsigned int order, struct page *page, cycles_t time)
++{
++	int ind;
++	unsigned long flags;
++
++	time = get_cycles() - time;
++	if (!(gfp_mask & __GFP_WAIT))
++		ind = 0;
++	else if (!(gfp_mask & __GFP_HIGHMEM))
++		ind = (order > 0 ? 2 : 1);
++	else
++		ind = (order > 0 ? 4 : 3);
++	spin_lock_irqsave(&kstat_glb_lock, flags);
++	KSTAT_LAT_ADD(&kstat_glob.alloc_lat[ind], time);
++	if (!page)
++		kstat_glob.alloc_fails[ind]++;
++	spin_unlock_irqrestore(&kstat_glb_lock, flags);
++}
++
+ /*
+  * This is the 'heart' of the zoned buddy allocator.
+  *
+@@ -607,6 +634,7 @@ __alloc_pages(unsigned int gfp_mask, uns
+ 	int i;
+ 	int alloc_type;
+ 	int do_retry;
++	cycles_t start_time;
+ 
+ 	might_sleep_if(wait);
+ 
+@@ -614,6 +642,7 @@ __alloc_pages(unsigned int gfp_mask, uns
+ 	if (zones[0] == NULL)     /* no zones in the zonelist */
+ 		return NULL;
+ 
++	start_time = get_cycles();
+ 	alloc_type = zone_idx(zones[0]);
+ 
+ 	/* Go through the zonelist once, looking for a zone with enough free */
+@@ -678,6 +707,10 @@ rebalance:
+ 				goto got_pg;
+ 			}
+ 		}
++		if (gfp_mask & __GFP_NOFAIL) {
++			blk_congestion_wait(WRITE, HZ/50);
++			goto rebalance;
++		}
+ 		goto nopage;
+ 	}
+ 
+@@ -730,15 +763,24 @@ rebalance:
+ 	}
+ 
+ nopage:
+-	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) {
++	if (alloc_fail_warn && !(gfp_mask & __GFP_NOWARN)
++		&& printk_ratelimit()) {
+ 		printk(KERN_WARNING "%s: page allocation failure."
+ 			" order:%d, mode:0x%x\n",
+ 			p->comm, order, gfp_mask);
+ 		dump_stack();
+ 	}
++	__alloc_collect_stats(gfp_mask, order, NULL, start_time);
+ 	return NULL;
+ got_pg:
+ 	kernel_map_pages(page, 1 << order, 1);
++	__alloc_collect_stats(gfp_mask, order, page, start_time);
++
++	if (ub_page_charge(page, order, gfp_mask)) {
++		__free_pages(page, order);
++		page = NULL;
++	}
++
+ 	return page;
+ }
+ 
+@@ -887,6 +929,17 @@ unsigned int nr_free_highpages (void)
+ }
+ #endif
+ 
++unsigned int nr_free_lowpages (void)
++{
++	pg_data_t *pgdat;
++	unsigned int pages = 0;
++
++	for_each_pgdat(pgdat)
++		pages += pgdat->node_zones[ZONE_NORMAL].free_pages;
++
++	return pages;
++}
++
+ #ifdef CONFIG_NUMA
+ static void show_node(struct zone *zone)
+ {
+@@ -1710,7 +1763,10 @@ static void *vmstat_start(struct seq_fil
+ 	m->private = ps;
+ 	if (!ps)
+ 		return ERR_PTR(-ENOMEM);
+-	get_full_page_state(ps);
++	if (ve_is_super(get_exec_env()))
++		get_full_page_state(ps);
++	else
++		memset(ps, 0, sizeof(*ps));
+ 	ps->pgpgin /= 2;		/* sectors -> kbytes */
+ 	ps->pgpgout /= 2;
+ 	return (unsigned long *)ps + *pos;
+diff -uprN linux-2.6.8.1.orig/mm/pdflush.c linux-2.6.8.1-ve022stab078/mm/pdflush.c
+--- linux-2.6.8.1.orig/mm/pdflush.c	2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/mm/pdflush.c	2006-05-11 13:05:25.000000000 +0400
+@@ -106,8 +106,8 @@ static int __pdflush(struct pdflush_work
+ 		spin_unlock_irq(&pdflush_lock);
+ 
+ 		schedule();
+-		if (current->flags & PF_FREEZE) {
+-			refrigerator(PF_FREEZE);
++		if (test_thread_flag(TIF_FREEZE)) {
++			refrigerator();
+ 			spin_lock_irq(&pdflush_lock);
+ 			continue;
+ 		}
+diff -uprN linux-2.6.8.1.orig/mm/prio_tree.c linux-2.6.8.1-ve022stab078/mm/prio_tree.c
+--- linux-2.6.8.1.orig/mm/prio_tree.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/mm/prio_tree.c	2006-05-11 13:05:38.000000000 +0400
+@@ -81,6 +81,8 @@ static inline unsigned long prio_tree_ma
+ 	return index_bits_to_maxindex[bits - 1];
+ }
+ 
++static void prio_tree_remove(struct prio_tree_root *, struct prio_tree_node *);
++
+ /*
+  * Extend a priority search tree so that it can store a node with heap_index
+  * max_heap_index. In the worst case, this algorithm takes O((log n)^2).
+@@ -90,8 +92,6 @@ static inline unsigned long prio_tree_ma
+ static struct prio_tree_node *prio_tree_expand(struct prio_tree_root *root,
+ 		struct prio_tree_node *node, unsigned long max_heap_index)
+ {
+-	static void prio_tree_remove(struct prio_tree_root *,
+-					struct prio_tree_node *);
+ 	struct prio_tree_node *first = NULL, *prev, *last = NULL;
+ 
+ 	if (max_heap_index > prio_tree_maxindex(root->index_bits))
+@@ -245,7 +245,7 @@ static struct prio_tree_node *prio_tree_
+ 		mask >>= 1;
+ 
+ 		if (!mask) {
+-			mask = 1UL << (root->index_bits - 1);
++			mask = 1UL << (BITS_PER_LONG - 1);
+ 			size_flag = 1;
+ 		}
+ 	}
+@@ -336,7 +336,7 @@ static struct prio_tree_node *prio_tree_
+ 				iter->mask = ULONG_MAX;
+ 			} else {
+ 				iter->size_level = 1;
+-				iter->mask = 1UL << (root->index_bits - 1);
++				iter->mask = 1UL << (BITS_PER_LONG - 1);
+ 			}
+ 		}
+ 		return iter->cur;
+@@ -380,7 +380,7 @@ static struct prio_tree_node *prio_tree_
+ 				iter->mask = ULONG_MAX;
+ 			} else {
+ 				iter->size_level = 1;
+-				iter->mask = 1UL << (root->index_bits - 1);
++				iter->mask = 1UL << (BITS_PER_LONG - 1);
+ 			}
+ 		}
+ 		return iter->cur;
+diff -uprN linux-2.6.8.1.orig/mm/rmap.c linux-2.6.8.1-ve022stab078/mm/rmap.c
+--- linux-2.6.8.1.orig/mm/rmap.c	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/mm/rmap.c	2006-05-11 13:05:39.000000000 +0400
+@@ -33,6 +33,8 @@
+ 
+ #include <asm/tlbflush.h>
+ 
++#include <ub/ub_vmpages.h>
++
+ //#define RMAP_DEBUG /* can be enabled only for debugging */
+ 
+ kmem_cache_t *anon_vma_cachep;
+@@ -160,7 +162,8 @@ static void anon_vma_ctor(void *data, km
+ void __init anon_vma_init(void)
+ {
+ 	anon_vma_cachep = kmem_cache_create("anon_vma",
+-		sizeof(struct anon_vma), 0, SLAB_PANIC, anon_vma_ctor, NULL);
++		sizeof(struct anon_vma), 0, SLAB_PANIC | SLAB_UBC,
++		anon_vma_ctor, NULL);
+ }
+ 
+ /* this needs the page->flags PG_maplock held */
+@@ -369,8 +372,8 @@ void page_add_anon_rmap(struct page *pag
+ 		inc_page_state(nr_mapped);
+ 	} else {
+ 		BUG_ON(!PageAnon(page));
+-		BUG_ON(page->index != index);
+-		BUG_ON(page->mapping != (struct address_space *) anon_vma);
++		WARN_ON(page->index != index);
++		WARN_ON(page->mapping != (struct address_space *) anon_vma);
+ 	}
+ 	page->mapcount++;
+ 	page_map_unlock(page);
+@@ -513,6 +516,10 @@ static int try_to_unmap_one(struct page 
+ 	}
+ 
+ 	mm->rss--;
++	vma->vm_rss--;
++	mm_ub(mm)->ub_perfstat[smp_processor_id()].unmap++;
++	ub_unused_privvm_inc(mm_ub(mm), 1, vma);
++	pb_remove_ref(page, mm_ub(mm));
+ 	BUG_ON(!page->mapcount);
+ 	page->mapcount--;
+ 	page_cache_release(page);
+@@ -553,12 +560,13 @@ static int try_to_unmap_cluster(unsigned
+ 	struct mm_struct *mm = vma->vm_mm;
+ 	pgd_t *pgd;
+ 	pmd_t *pmd;
+-	pte_t *pte;
++	pte_t *pte, *original_pte;
+ 	pte_t pteval;
+ 	struct page *page;
+ 	unsigned long address;
+ 	unsigned long end;
+ 	unsigned long pfn;
++	unsigned long old_rss;
+ 
+ 	/*
+ 	 * We need the page_table_lock to protect us from page faults,
+@@ -582,7 +590,8 @@ static int try_to_unmap_cluster(unsigned
+ 	if (!pmd_present(*pmd))
+ 		goto out_unlock;
+ 
+-	for (pte = pte_offset_map(pmd, address);
++	old_rss = mm->rss;
++	for (original_pte = pte = pte_offset_map(pmd, address);
+ 			address < end; pte++, address += PAGE_SIZE) {
+ 
+ 		if (!pte_present(*pte))
+@@ -613,12 +622,17 @@ static int try_to_unmap_cluster(unsigned
+ 			set_page_dirty(page);
+ 
+ 		page_remove_rmap(page);
+-		page_cache_release(page);
+ 		mm->rss--;
++		vma->vm_rss--;
++		mm_ub(mm)->ub_perfstat[smp_processor_id()].unmap++;
++		pb_remove_ref(page, mm_ub(mm));
++		page_cache_release(page);
+ 		(*mapcount)--;
+ 	}
++	if (old_rss > mm->rss)
++		ub_unused_privvm_inc(mm_ub(mm), old_rss - mm->rss, vma);
+ 
+-	pte_unmap(pte);
++	pte_unmap(original_pte);
+ 
+ out_unlock:
+ 	spin_unlock(&mm->page_table_lock);
+diff -uprN linux-2.6.8.1.orig/mm/shmem.c linux-2.6.8.1-ve022stab078/mm/shmem.c
+--- linux-2.6.8.1.orig/mm/shmem.c	2004-08-14 14:55:20.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/mm/shmem.c	2006-05-11 13:05:42.000000000 +0400
+@@ -45,6 +45,9 @@
+ #include <asm/div64.h>
+ #include <asm/pgtable.h>
+ 
++#include <ub/beancounter.h>
++#include <ub/ub_vmpages.h>
++
+ /* This magic number is used in glibc for posix shared memory */
+ #define TMPFS_MAGIC	0x01021994
+ 
+@@ -204,7 +207,7 @@ static void shmem_free_block(struct inod
+  *
+  * It has to be called with the spinlock held.
+  */
+-static void shmem_recalc_inode(struct inode *inode)
++static void shmem_recalc_inode(struct inode *inode, unsigned long swp_freed)
+ {
+ 	struct shmem_inode_info *info = SHMEM_I(inode);
+ 	long freed;
+@@ -217,6 +220,9 @@ static void shmem_recalc_inode(struct in
+ 		sbinfo->free_blocks += freed;
+ 		inode->i_blocks -= freed*BLOCKS_PER_PAGE;
+ 		spin_unlock(&sbinfo->stat_lock);
++		if (freed > swp_freed)
++			ub_tmpfs_respages_dec(shm_info_ub(info), 
++					freed - swp_freed);
+ 		shmem_unacct_blocks(info->flags, freed);
+ 	}
+ }
+@@ -321,6 +327,11 @@ static void shmem_swp_set(struct shmem_i
+ 	info->swapped += incdec;
+ 	if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT)
+ 		kmap_atomic_to_page(entry)->nr_swapped += incdec;
++
++	if (incdec == 1)
++		ub_tmpfs_respages_dec(shm_info_ub(info), 1);
++	else
++		ub_tmpfs_respages_inc(shm_info_ub(info), 1);
+ }
+ 
+ /*
+@@ -337,14 +348,24 @@ static swp_entry_t *shmem_swp_alloc(stru
+ 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+ 	struct page *page = NULL;
+ 	swp_entry_t *entry;
++	unsigned long ub_val;
+ 
+ 	if (sgp != SGP_WRITE &&
+ 	    ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode))
+ 		return ERR_PTR(-EINVAL);
+ 
++	ub_val = 0;
++	if (info->next_index <= index) {
++		ub_val = index + 1 - info->next_index;
++		if (ub_shmpages_charge(shm_info_ub(info), ub_val))
++			return ERR_PTR(-ENOSPC);
++	}
++
+ 	while (!(entry = shmem_swp_entry(info, index, &page))) {
+-		if (sgp == SGP_READ)
+-			return shmem_swp_map(ZERO_PAGE(0));
++		if (sgp == SGP_READ) {
++			entry = shmem_swp_map(ZERO_PAGE(0));
++			goto out;
++		}
+ 		/*
+ 		 * Test free_blocks against 1 not 0, since we have 1 data
+ 		 * page (and perhaps indirect index pages) yet to allocate:
+@@ -353,14 +374,16 @@ static swp_entry_t *shmem_swp_alloc(stru
+ 		spin_lock(&sbinfo->stat_lock);
+ 		if (sbinfo->free_blocks <= 1) {
+ 			spin_unlock(&sbinfo->stat_lock);
+-			return ERR_PTR(-ENOSPC);
++			entry = ERR_PTR(-ENOSPC);
++			goto out;
+ 		}
+ 		sbinfo->free_blocks--;
+ 		inode->i_blocks += BLOCKS_PER_PAGE;
+ 		spin_unlock(&sbinfo->stat_lock);
+ 
+ 		spin_unlock(&info->lock);
+-		page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping));
++		page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) | 
++				__GFP_UBC);
+ 		if (page) {
+ 			clear_highpage(page);
+ 			page->nr_swapped = 0;
+@@ -368,25 +391,36 @@ static swp_entry_t *shmem_swp_alloc(stru
+ 		spin_lock(&info->lock);
+ 
+ 		if (!page) {
+-			shmem_free_block(inode);
+-			return ERR_PTR(-ENOMEM);
++			entry = ERR_PTR(-ENOMEM);
++			goto out_block;
+ 		}
+ 		if (sgp != SGP_WRITE &&
+ 		    ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
+ 			entry = ERR_PTR(-EINVAL);
+-			break;
++			goto out_page;
+ 		}
+-		if (info->next_index <= index)
++		if (info->next_index <= index) {
++			ub_val = 0;
+ 			info->next_index = index + 1;
++		}
+ 	}
+ 	if (page) {
+ 		/* another task gave its page, or truncated the file */
+ 		shmem_free_block(inode);
+ 		shmem_dir_free(page);
+ 	}
+-	if (info->next_index <= index && !IS_ERR(entry))
++	if (info->next_index <= index)
+ 		info->next_index = index + 1;
+ 	return entry;
++
++out_page:
++	shmem_dir_free(page);
++out_block:
++	shmem_free_block(inode);
++out:
++	if (ub_val)
++		ub_shmpages_uncharge(shm_info_ub(info), ub_val);
++	return entry;
+ }
+ 
+ /*
+@@ -423,13 +457,16 @@ static void shmem_truncate(struct inode 
+ 	swp_entry_t *ptr;
+ 	int offset;
+ 	int freed;
++	unsigned long swp_freed;
+ 
++	swp_freed = 0;
+ 	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+ 	idx = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ 	if (idx >= info->next_index)
+ 		return;
+ 
+ 	spin_lock(&info->lock);
++	ub_shmpages_uncharge(shm_info_ub(info), info->next_index - idx);
+ 	info->flags |= SHMEM_TRUNCATE;
+ 	limit = info->next_index;
+ 	info->next_index = idx;
+@@ -438,7 +475,9 @@ static void shmem_truncate(struct inode 
+ 		size = limit;
+ 		if (size > SHMEM_NR_DIRECT)
+ 			size = SHMEM_NR_DIRECT;
+-		info->swapped -= shmem_free_swp(ptr+idx, ptr+size);
++		freed = shmem_free_swp(ptr+idx, ptr+size);
++		swp_freed += freed;
++		info->swapped -= freed;
+ 	}
+ 	if (!info->i_indirect)
+ 		goto done2;
+@@ -508,6 +547,7 @@ static void shmem_truncate(struct inode 
+ 			shmem_swp_unmap(ptr);
+ 			info->swapped -= freed;
+ 			subdir->nr_swapped -= freed;
++			swp_freed += freed;
+ 			BUG_ON(subdir->nr_swapped > offset);
+ 		}
+ 		if (offset)
+@@ -544,7 +584,7 @@ done2:
+ 		spin_lock(&info->lock);
+ 	}
+ 	info->flags &= ~SHMEM_TRUNCATE;
+-	shmem_recalc_inode(inode);
++	shmem_recalc_inode(inode, swp_freed);
+ 	spin_unlock(&info->lock);
+ }
+ 
+@@ -609,6 +649,8 @@ static void shmem_delete_inode(struct in
+ 	spin_lock(&sbinfo->stat_lock);
+ 	sbinfo->free_inodes++;
+ 	spin_unlock(&sbinfo->stat_lock);
++	put_beancounter(shm_info_ub(info));
++	shm_info_ub(info) = NULL;
+ 	clear_inode(inode);
+ }
+ 
+@@ -752,12 +794,11 @@ static int shmem_writepage(struct page *
+ 	info = SHMEM_I(inode);
+ 	if (info->flags & VM_LOCKED)
+ 		goto redirty;
+-	swap = get_swap_page();
++	swap = get_swap_page(shm_info_ub(info));
+ 	if (!swap.val)
+ 		goto redirty;
+ 
+ 	spin_lock(&info->lock);
+-	shmem_recalc_inode(inode);
+ 	if (index >= info->next_index) {
+ 		BUG_ON(!(info->flags & SHMEM_TRUNCATE));
+ 		goto unlock;
+@@ -890,7 +931,6 @@ repeat:
+ 		goto failed;
+ 
+ 	spin_lock(&info->lock);
+-	shmem_recalc_inode(inode);
+ 	entry = shmem_swp_alloc(info, idx, sgp);
+ 	if (IS_ERR(entry)) {
+ 		spin_unlock(&info->lock);
+@@ -1051,6 +1091,7 @@ repeat:
+ 		clear_highpage(filepage);
+ 		flush_dcache_page(filepage);
+ 		SetPageUptodate(filepage);
++		ub_tmpfs_respages_inc(shm_info_ub(info), 1);
+ 	}
+ done:
+ 	if (!*pagep) {
+@@ -1082,6 +1123,8 @@ struct page *shmem_nopage(struct vm_area
+ 	idx = (address - vma->vm_start) >> PAGE_SHIFT;
+ 	idx += vma->vm_pgoff;
+ 	idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
++	if (((loff_t) idx << PAGE_CACHE_SHIFT) >= i_size_read(inode))
++		return NOPAGE_SIGBUS;
+ 
+ 	error = shmem_getpage(inode, idx, &page, SGP_CACHE, type);
+ 	if (error)
+@@ -1151,19 +1194,6 @@ shmem_get_policy(struct vm_area_struct *
+ }
+ #endif
+ 
+-void shmem_lock(struct file *file, int lock)
+-{
+-	struct inode *inode = file->f_dentry->d_inode;
+-	struct shmem_inode_info *info = SHMEM_I(inode);
+-
+-	spin_lock(&info->lock);
+-	if (lock)
+-		info->flags |= VM_LOCKED;
+-	else
+-		info->flags &= ~VM_LOCKED;
+-	spin_unlock(&info->lock);
+-}
+-
+ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
+ {
+ 	file_accessed(file);
+@@ -1198,6 +1228,7 @@ shmem_get_inode(struct super_block *sb, 
+ 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ 		info = SHMEM_I(inode);
+ 		memset(info, 0, (char *)inode - (char *)info);
++		shm_info_ub(info) = get_beancounter(get_exec_ub());
+ 		spin_lock_init(&info->lock);
+  		mpol_shared_policy_init(&info->policy);
+ 		switch (mode & S_IFMT) {
+@@ -1317,6 +1348,7 @@ shmem_file_write(struct file *file, cons
+ 			break;
+ 
+ 		left = bytes;
++#ifndef CONFIG_X86_UACCESS_INDIRECT
+ 		if (PageHighMem(page)) {
+ 			volatile unsigned char dummy;
+ 			__get_user(dummy, buf);
+@@ -1326,6 +1358,7 @@ shmem_file_write(struct file *file, cons
+ 			left = __copy_from_user(kaddr + offset, buf, bytes);
+ 			kunmap_atomic(kaddr, KM_USER0);
+ 		}
++#endif
+ 		if (left) {
+ 			kaddr = kmap(page);
+ 			left = __copy_from_user(kaddr + offset, buf, bytes);
+@@ -1960,20 +1993,42 @@ static struct vm_operations_struct shmem
+ #endif
+ };
+ 
++int is_shmem_mapping(struct address_space *map)
++{
++	return (map != NULL && map->a_ops == &shmem_aops);
++}
++
+ static struct super_block *shmem_get_sb(struct file_system_type *fs_type,
+ 	int flags, const char *dev_name, void *data)
+ {
+ 	return get_sb_nodev(fs_type, flags, data, shmem_fill_super);
+ }
+ 
+-static struct file_system_type tmpfs_fs_type = {
++struct file_system_type tmpfs_fs_type = {
+ 	.owner		= THIS_MODULE,
+ 	.name		= "tmpfs",
+ 	.get_sb		= shmem_get_sb,
+ 	.kill_sb	= kill_litter_super,
+ };
++
++EXPORT_SYMBOL(tmpfs_fs_type);
++
+ static struct vfsmount *shm_mnt;
+ 
++#ifndef CONFIG_VE
++#define visible_shm_mnt shm_mnt
++#else
++#define visible_shm_mnt (get_exec_env()->shmem_mnt)
++#endif
++
++void prepare_shmmnt(void)
++{
++#ifdef CONFIG_VE
++	get_ve0()->shmem_mnt = shm_mnt;
++	shm_mnt = (struct vfsmount *)0x10111213;
++#endif
++}
++
+ static int __init init_tmpfs(void)
+ {
+ 	int error;
+@@ -1999,6 +2054,7 @@ static int __init init_tmpfs(void)
+ 
+ 	/* The internal instance should not do size checking */
+ 	shmem_set_size(SHMEM_SB(shm_mnt->mnt_sb), ULONG_MAX, ULONG_MAX);
++	prepare_shmmnt();
+ 	return 0;
+ 
+ out1:
+@@ -2011,6 +2067,32 @@ out3:
+ }
+ module_init(init_tmpfs)
+ 
++static inline int shm_charge_ahead(struct inode *inode)
++{
++	struct shmem_inode_info *info = SHMEM_I(inode);
++	unsigned long idx;
++	swp_entry_t *entry;
++
++	if (!inode->i_size)
++		return 0;
++	idx = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
++	/* 
++	 * Just touch info to allocate space for entry and
++	 * make all UBC checks 
++	 */
++	spin_lock(&info->lock);
++	entry = shmem_swp_alloc(info, idx, SGP_CACHE);
++	if (IS_ERR(entry))
++		goto err;
++	shmem_swp_unmap(entry);
++	spin_unlock(&info->lock);
++	return 0;
++
++err:
++	spin_unlock(&info->lock);
++	return PTR_ERR(entry);
++}
++
+ /*
+  * shmem_file_setup - get an unlinked file living in tmpfs
+  *
+@@ -2026,8 +2108,8 @@ struct file *shmem_file_setup(char *name
+ 	struct dentry *dentry, *root;
+ 	struct qstr this;
+ 
+-	if (IS_ERR(shm_mnt))
+-		return (void *)shm_mnt;
++	if (IS_ERR(visible_shm_mnt))
++		return (void *)visible_shm_mnt;
+ 
+ 	if (size > SHMEM_MAX_BYTES)
+ 		return ERR_PTR(-EINVAL);
+@@ -2039,7 +2121,7 @@ struct file *shmem_file_setup(char *name
+ 	this.name = name;
+ 	this.len = strlen(name);
+ 	this.hash = 0; /* will go */
+-	root = shm_mnt->mnt_root;
++	root = visible_shm_mnt->mnt_root;
+ 	dentry = d_alloc(root, &this);
+ 	if (!dentry)
+ 		goto put_memory;
+@@ -2058,7 +2140,10 @@ struct file *shmem_file_setup(char *name
+ 	d_instantiate(dentry, inode);
+ 	inode->i_size = size;
+ 	inode->i_nlink = 0;	/* It is unlinked */
+-	file->f_vfsmnt = mntget(shm_mnt);
++	error = shm_charge_ahead(inode);
++	if (error)
++		goto close_file;	
++	file->f_vfsmnt = mntget(visible_shm_mnt);
+ 	file->f_dentry = dentry;
+ 	file->f_mapping = inode->i_mapping;
+ 	file->f_op = &shmem_file_operations;
+@@ -2090,6 +2175,8 @@ int shmem_zero_setup(struct vm_area_stru
+ 
+ 	if (vma->vm_file)
+ 		fput(vma->vm_file);
++	else if (vma->vm_flags & VM_WRITE) /* should match VM_UB_PRIVATE */
++		__ub_unused_privvm_dec(mm_ub(vma->vm_mm), size >> PAGE_SHIFT);
+ 	vma->vm_file = file;
+ 	vma->vm_ops = &shmem_vm_ops;
+ 	return 0;
+diff -uprN linux-2.6.8.1.orig/mm/slab.c linux-2.6.8.1-ve022stab078/mm/slab.c
+--- linux-2.6.8.1.orig/mm/slab.c	2004-08-14 14:56:26.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/mm/slab.c	2006-05-11 13:05:41.000000000 +0400
+@@ -91,32 +91,21 @@
+ #include	<linux/cpu.h>
+ #include	<linux/sysctl.h>
+ #include	<linux/module.h>
++#include	<linux/kmem_slab.h>
++#include	<linux/kmem_cache.h>
++#include	<linux/kernel_stat.h>
++#include	<linux/ve_owner.h>
+ 
+ #include	<asm/uaccess.h>
+ #include	<asm/cacheflush.h>
+ #include	<asm/tlbflush.h>
+ 
+-/*
+- * DEBUG	- 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
+- *		  SLAB_RED_ZONE & SLAB_POISON.
+- *		  0 for faster, smaller code (especially in the critical paths).
+- *
+- * STATS	- 1 to collect stats for /proc/slabinfo.
+- *		  0 for faster, smaller code (especially in the critical paths).
+- *
+- * FORCED_DEBUG	- 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
+- */
+-
+-#ifdef CONFIG_DEBUG_SLAB
+-#define	DEBUG		1
+-#define	STATS		1
+-#define	FORCED_DEBUG	1
+-#else
+-#define	DEBUG		0
+-#define	STATS		0
+-#define	FORCED_DEBUG	0
+-#endif
++#include	<ub/beancounter.h>
++#include	<ub/ub_mem.h>
+ 
++#define	DEBUG		SLAB_DEBUG
++#define	STATS		SLAB_STATS
++#define	FORCED_DEBUG	SLAB_FORCED_DEBUG
+ 
+ /* Shouldn't this be in a header file somewhere? */
+ #define	BYTES_PER_WORD		sizeof(void *)
+@@ -139,182 +128,20 @@
+ 			 SLAB_POISON | SLAB_HWCACHE_ALIGN | \
+ 			 SLAB_NO_REAP | SLAB_CACHE_DMA | \
+ 			 SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \
+-			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC)
++			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
++			 SLAB_UBC | SLAB_NO_CHARGE)
+ #else
+ # define CREATE_MASK	(SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \
+ 			 SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \
+-			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC)
++			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
++			 SLAB_UBC | SLAB_NO_CHARGE)
+ #endif
+ 
+-/*
+- * kmem_bufctl_t:
+- *
+- * Bufctl's are used for linking objs within a slab
+- * linked offsets.
+- *
+- * This implementation relies on "struct page" for locating the cache &
+- * slab an object belongs to.
+- * This allows the bufctl structure to be small (one int), but limits
+- * the number of objects a slab (not a cache) can contain when off-slab
+- * bufctls are used. The limit is the size of the largest general cache
+- * that does not use off-slab slabs.
+- * For 32bit archs with 4 kB pages, is this 56.
+- * This is not serious, as it is only for large objects, when it is unwise
+- * to have too many per slab.
+- * Note: This limit can be raised by introducing a general cache whose size
+- * is less than 512 (PAGE_SIZE<<3), but greater than 256.
+- */
+-
+-#define BUFCTL_END	(((kmem_bufctl_t)(~0U))-0)
+-#define BUFCTL_FREE	(((kmem_bufctl_t)(~0U))-1)
+-#define	SLAB_LIMIT	(((kmem_bufctl_t)(~0U))-2)
+-
+ /* Max number of objs-per-slab for caches which use off-slab slabs.
+  * Needed to avoid a possible looping condition in cache_grow().
+  */
+ static unsigned long offslab_limit;
+ 
+-/*
+- * struct slab
+- *
+- * Manages the objs in a slab. Placed either at the beginning of mem allocated
+- * for a slab, or allocated from an general cache.
+- * Slabs are chained into three list: fully used, partial, fully free slabs.
+- */
+-struct slab {
+-	struct list_head	list;
+-	unsigned long		colouroff;
+-	void			*s_mem;		/* including colour offset */
+-	unsigned int		inuse;		/* num of objs active in slab */
+-	kmem_bufctl_t		free;
+-};
+-
+-/*
+- * struct array_cache
+- *
+- * Per cpu structures
+- * Purpose:
+- * - LIFO ordering, to hand out cache-warm objects from _alloc
+- * - reduce the number of linked list operations
+- * - reduce spinlock operations
+- *
+- * The limit is stored in the per-cpu structure to reduce the data cache
+- * footprint.
+- *
+- */
+-struct array_cache {
+-	unsigned int avail;
+-	unsigned int limit;
+-	unsigned int batchcount;
+-	unsigned int touched;
+-};
+-
+-/* bootstrap: The caches do not work without cpuarrays anymore,
+- * but the cpuarrays are allocated from the generic caches...
+- */
+-#define BOOT_CPUCACHE_ENTRIES	1
+-struct arraycache_init {
+-	struct array_cache cache;
+-	void * entries[BOOT_CPUCACHE_ENTRIES];
+-};
+-
+-/*
+- * The slab lists of all objects.
+- * Hopefully reduce the internal fragmentation
+- * NUMA: The spinlock could be moved from the kmem_cache_t
+- * into this structure, too. Figure out what causes
+- * fewer cross-node spinlock operations.
+- */
+-struct kmem_list3 {
+-	struct list_head	slabs_partial;	/* partial list first, better asm code */
+-	struct list_head	slabs_full;
+-	struct list_head	slabs_free;
+-	unsigned long	free_objects;
+-	int		free_touched;
+-	unsigned long	next_reap;
+-	struct array_cache	*shared;
+-};
+-
+-#define LIST3_INIT(parent) \
+-	{ \
+-		.slabs_full	= LIST_HEAD_INIT(parent.slabs_full), \
+-		.slabs_partial	= LIST_HEAD_INIT(parent.slabs_partial), \
+-		.slabs_free	= LIST_HEAD_INIT(parent.slabs_free) \
+-	}
+-#define list3_data(cachep) \
+-	(&(cachep)->lists)
+-
+-/* NUMA: per-node */
+-#define list3_data_ptr(cachep, ptr) \
+-		list3_data(cachep)
+-
+-/*
+- * kmem_cache_t
+- *
+- * manages a cache.
+- */
+-	
+-struct kmem_cache_s {
+-/* 1) per-cpu data, touched during every alloc/free */
+-	struct array_cache	*array[NR_CPUS];
+-	unsigned int		batchcount;
+-	unsigned int		limit;
+-/* 2) touched by every alloc & free from the backend */
+-	struct kmem_list3	lists;
+-	/* NUMA: kmem_3list_t	*nodelists[MAX_NUMNODES] */
+-	unsigned int		objsize;
+-	unsigned int	 	flags;	/* constant flags */
+-	unsigned int		num;	/* # of objs per slab */
+-	unsigned int		free_limit; /* upper limit of objects in the lists */
+-	spinlock_t		spinlock;
+-
+-/* 3) cache_grow/shrink */
+-	/* order of pgs per slab (2^n) */
+-	unsigned int		gfporder;
+-
+-	/* force GFP flags, e.g. GFP_DMA */
+-	unsigned int		gfpflags;
+-
+-	size_t			colour;		/* cache colouring range */
+-	unsigned int		colour_off;	/* colour offset */
+-	unsigned int		colour_next;	/* cache colouring */
+-	kmem_cache_t		*slabp_cache;
+-	unsigned int		slab_size;
+-	unsigned int		dflags;		/* dynamic flags */
+-
+-	/* constructor func */
+-	void (*ctor)(void *, kmem_cache_t *, unsigned long);
+-
+-	/* de-constructor func */
+-	void (*dtor)(void *, kmem_cache_t *, unsigned long);
+-
+-/* 4) cache creation/removal */
+-	const char		*name;
+-	struct list_head	next;
+-
+-/* 5) statistics */
+-#if STATS
+-	unsigned long		num_active;
+-	unsigned long		num_allocations;
+-	unsigned long		high_mark;
+-	unsigned long		grown;
+-	unsigned long		reaped;
+-	unsigned long 		errors;
+-	unsigned long		max_freeable;
+-	atomic_t		allochit;
+-	atomic_t		allocmiss;
+-	atomic_t		freehit;
+-	atomic_t		freemiss;
+-#endif
+-#if DEBUG
+-	int			dbghead;
+-	int			reallen;
+-#endif
+-};
+-
+-#define CFLGS_OFF_SLAB		(0x80000000UL)
+-#define	OFF_SLAB(x)	((x)->flags & CFLGS_OFF_SLAB)
+-
+ #define BATCHREFILL_LIMIT	16
+ /* Optimization question: fewer reaps means less 
+  * probability for unnessary cpucache drain/refill cycles.
+@@ -446,15 +273,6 @@ static void **dbg_userword(kmem_cache_t 
+ #define	BREAK_GFP_ORDER_LO	0
+ static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
+ 
+-/* Macros for storing/retrieving the cachep and or slab from the
+- * global 'mem_map'. These are used to find the slab an obj belongs to.
+- * With kfree(), these are used to find the cache which an obj belongs to.
+- */
+-#define	SET_PAGE_CACHE(pg,x)  ((pg)->lru.next = (struct list_head *)(x))
+-#define	GET_PAGE_CACHE(pg)    ((kmem_cache_t *)(pg)->lru.next)
+-#define	SET_PAGE_SLAB(pg,x)   ((pg)->lru.prev = (struct list_head *)(x))
+-#define	GET_PAGE_SLAB(pg)     ((struct slab *)(pg)->lru.prev)
+-
+ /* These are the default caches for kmalloc. Custom caches can have other sizes. */
+ struct cache_sizes malloc_sizes[] = {
+ #define CACHE(x) { .cs_size = (x) },
+@@ -543,13 +361,24 @@ static void cache_estimate (unsigned lon
+ 	size_t wastage = PAGE_SIZE<<gfporder;
+ 	size_t extra = 0;
+ 	size_t base = 0;
++	size_t ub_align, ub_extra;
++
++	ub_align = 1;
++	ub_extra = 0;
+ 
+ 	if (!(flags & CFLGS_OFF_SLAB)) {
+ 		base = sizeof(struct slab);
+ 		extra = sizeof(kmem_bufctl_t);
++#ifdef CONFIG_USER_RESOURCE
++		if (flags & SLAB_UBC) {
++			ub_extra = sizeof(void *);
++			ub_align = sizeof(void *);
++		}
++#endif
+ 	}
+ 	i = 0;
+-	while (i*size + ALIGN(base+i*extra, align) <= wastage)
++	while (i * size + ALIGN(ALIGN(base + i * extra, ub_align) +
++				i * ub_extra, align) <= wastage)
+ 		i++;
+ 	if (i > 0)
+ 		i--;
+@@ -558,8 +387,8 @@ static void cache_estimate (unsigned lon
+ 		i = SLAB_LIMIT;
+ 
+ 	*num = i;
+-	wastage -= i*size;
+-	wastage -= ALIGN(base+i*extra, align);
++	wastage -= i * size + ALIGN(ALIGN(base + i * extra, ub_align) +
++			i * ub_extra, align);
+ 	*left_over = wastage;
+ }
+ 
+@@ -747,17 +576,18 @@ void __init kmem_cache_init(void)
+ 		 * allow tighter packing of the smaller caches. */
+ 		sizes->cs_cachep = kmem_cache_create(names->name,
+ 			sizes->cs_size, ARCH_KMALLOC_MINALIGN,
+-			(ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, NULL);
++			(ARCH_KMALLOC_FLAGS | SLAB_PANIC |
++			 SLAB_UBC | SLAB_NO_CHARGE),
++			NULL, NULL);
+ 
+ 		/* Inc off-slab bufctl limit until the ceiling is hit. */
+-		if (!(OFF_SLAB(sizes->cs_cachep))) {
+-			offslab_limit = sizes->cs_size-sizeof(struct slab);
+-			offslab_limit /= sizeof(kmem_bufctl_t);
+-		}
++		if (!(OFF_SLAB(sizes->cs_cachep)))
++			offslab_limit = sizes->cs_size;
+ 
+ 		sizes->cs_dmacachep = kmem_cache_create(names->name_dma,
+ 			sizes->cs_size, ARCH_KMALLOC_MINALIGN,
+-			(ARCH_KMALLOC_FLAGS | SLAB_CACHE_DMA | SLAB_PANIC),
++			(ARCH_KMALLOC_FLAGS | SLAB_CACHE_DMA | SLAB_PANIC |
++			 SLAB_UBC | SLAB_NO_CHARGE),
+ 			NULL, NULL);
+ 
+ 		sizes++;
+@@ -1115,7 +945,7 @@ kmem_cache_create (const char *name, siz
+ 	unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long),
+ 	void (*dtor)(void*, kmem_cache_t *, unsigned long))
+ {
+-	size_t left_over, slab_size;
++	size_t left_over, slab_size, ub_size, ub_align;
+ 	kmem_cache_t *cachep = NULL;
+ 
+ 	/*
+@@ -1249,6 +1079,7 @@ kmem_cache_create (const char *name, siz
+ 		 */
+ 		do {
+ 			unsigned int break_flag = 0;
++			unsigned long off_slab_size;
+ cal_wastage:
+ 			cache_estimate(cachep->gfporder, size, align, flags,
+ 						&left_over, &cachep->num);
+@@ -1258,12 +1089,22 @@ cal_wastage:
+ 				break;
+ 			if (!cachep->num)
+ 				goto next;
+-			if (flags & CFLGS_OFF_SLAB &&
+-					cachep->num > offslab_limit) {
++			if (flags & CFLGS_OFF_SLAB) {
++				off_slab_size = sizeof(struct slab) +
++					cachep->num * sizeof(kmem_bufctl_t);
++#ifdef CONFIG_USER_RESOURCE
++				if (flags & SLAB_UBC)
++					off_slab_size = ALIGN(off_slab_size,
++							sizeof(void *)) +
++						cachep->num * sizeof(void *);
++#endif
++
+ 				/* This num of objs will cause problems. */
+-				cachep->gfporder--;
+-				break_flag++;
+-				goto cal_wastage;
++				if (off_slab_size > offslab_limit) {
++					cachep->gfporder--;
++					break_flag++;
++					goto cal_wastage;
++				}
+ 			}
+ 
+ 			/*
+@@ -1286,8 +1127,19 @@ next:
+ 		cachep = NULL;
+ 		goto opps;
+ 	}
+-	slab_size = ALIGN(cachep->num*sizeof(kmem_bufctl_t)
+-				+ sizeof(struct slab), align);
++
++	ub_size = 0;
++	ub_align = 1;
++#ifdef CONFIG_USER_RESOURCE
++	if (flags & SLAB_UBC) {
++		ub_size = sizeof(void *);
++		ub_align = sizeof(void *);
++	}
++#endif
++
++	slab_size = ALIGN(ALIGN(cachep->num * sizeof(kmem_bufctl_t) +
++			sizeof(struct slab), ub_align) +
++			cachep->num * ub_size, align);
+ 
+ 	/*
+ 	 * If the slab has been placed off-slab, and we have enough space then
+@@ -1300,7 +1152,9 @@ next:
+ 
+ 	if (flags & CFLGS_OFF_SLAB) {
+ 		/* really off slab. No need for manual alignment */
+-		slab_size = cachep->num*sizeof(kmem_bufctl_t)+sizeof(struct slab);
++		slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t) +
++			sizeof(struct slab), ub_align) +
++			cachep->num * ub_size;
+ 	}
+ 
+ 	cachep->colour_off = cache_line_size();
+@@ -1337,10 +1191,13 @@ next:
+ 			 * the cache that's used by kmalloc(24), otherwise
+ 			 * the creation of further caches will BUG().
+ 			 */
+-			cachep->array[smp_processor_id()] = &initarray_generic.cache;
++			cachep->array[smp_processor_id()] =
++					&initarray_generic.cache;
+ 			g_cpucache_up = PARTIAL;
+ 		} else {
+-			cachep->array[smp_processor_id()] = kmalloc(sizeof(struct arraycache_init),GFP_KERNEL);
++			cachep->array[smp_processor_id()] =
++				kmalloc(sizeof(struct arraycache_init),
++					GFP_KERNEL);
+ 		}
+ 		BUG_ON(!ac_data(cachep));
+ 		ac_data(cachep)->avail = 0;
+@@ -1354,7 +1211,7 @@ next:
+ 	} 
+ 
+ 	cachep->lists.next_reap = jiffies + REAPTIMEOUT_LIST3 +
+-					((unsigned long)cachep)%REAPTIMEOUT_LIST3;
++				((unsigned long)cachep)%REAPTIMEOUT_LIST3;
+ 
+ 	/* Need the semaphore to access the chain. */
+ 	down(&cache_chain_sem);
+@@ -1367,16 +1224,24 @@ next:
+ 		list_for_each(p, &cache_chain) {
+ 			kmem_cache_t *pc = list_entry(p, kmem_cache_t, next);
+ 			char tmp;
+-			/* This happens when the module gets unloaded and doesn't
+-			   destroy its slab cache and noone else reuses the vmalloc
+-			   area of the module. Print a warning. */
+-			if (__get_user(tmp,pc->name)) { 
+-				printk("SLAB: cache with size %d has lost its name\n", 
+-					pc->objsize); 
++
++			/*
++			 * This happens when the module gets unloaded and
++			 * doesn't destroy its slab cache and noone else reuses
++			 * the vmalloc area of the module. Print a warning.
++			 */
++#ifdef CONFIG_X86_UACCESS_INDIRECT
++			if (__direct_get_user(tmp,pc->name)) {
++#else
++			if (__get_user(tmp,pc->name)) {
++#endif
++				printk("SLAB: cache with size %d has lost its "
++						"name\n", pc->objsize);
+ 				continue; 
+ 			} 	
+ 			if (!strcmp(pc->name,name)) { 
+-				printk("kmem_cache_create: duplicate cache %s\n",name); 
++				printk("kmem_cache_create: duplicate "
++						"cache %s\n",name);
+ 				up(&cache_chain_sem); 
+ 				unlock_cpu_hotplug();
+ 				BUG(); 
+@@ -1389,6 +1254,16 @@ next:
+ 	list_add(&cachep->next, &cache_chain);
+ 	up(&cache_chain_sem);
+ 	unlock_cpu_hotplug();
++
++#ifdef CONFIG_USER_RESOURCE
++	cachep->objuse = ((PAGE_SIZE << cachep->gfporder) + cachep->num - 1) /
++		cachep->num;
++	if (OFF_SLAB(cachep))
++		cachep->objuse +=
++			(cachep->slabp_cache->objuse + cachep->num - 1)
++			/ cachep->num;
++#endif
++
+ opps:
+ 	if (!cachep && (flags & SLAB_PANIC))
+ 		panic("kmem_cache_create(): failed to create slab `%s'\n",
+@@ -1572,6 +1447,7 @@ int kmem_cache_destroy (kmem_cache_t * c
+ 	/* NUMA: free the list3 structures */
+ 	kfree(cachep->lists.shared);
+ 	cachep->lists.shared = NULL;
++	ub_kmemcache_free(cachep);
+ 	kmem_cache_free(&cache_cache, cachep);
+ 
+ 	unlock_cpu_hotplug();
+@@ -1586,28 +1462,30 @@ static struct slab* alloc_slabmgmt (kmem
+ 			void *objp, int colour_off, int local_flags)
+ {
+ 	struct slab *slabp;
+-	
++
+ 	if (OFF_SLAB(cachep)) {
+ 		/* Slab management obj is off-slab. */
+-		slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
++		slabp = kmem_cache_alloc(cachep->slabp_cache,
++				local_flags & (~__GFP_UBC));
+ 		if (!slabp)
+ 			return NULL;
+ 	} else {
+ 		slabp = objp+colour_off;
+ 		colour_off += cachep->slab_size;
+ 	}
++
+ 	slabp->inuse = 0;
+ 	slabp->colouroff = colour_off;
+ 	slabp->s_mem = objp+colour_off;
+ 
++#ifdef CONFIG_USER_RESOURCE
++	if (cachep->flags & SLAB_UBC)
++		memset(slab_ubcs(cachep, slabp), 0, cachep->num *
++				sizeof(struct user_beancounter *));
++#endif
+ 	return slabp;
+ }
+ 
+-static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
+-{
+-	return (kmem_bufctl_t *)(slabp+1);
+-}
+-
+ static void cache_init_objs (kmem_cache_t * cachep,
+ 			struct slab * slabp, unsigned long ctor_flags)
+ {
+@@ -1735,7 +1613,7 @@ static int cache_grow (kmem_cache_t * ca
+ 
+ 
+ 	/* Get mem for the objs. */
+-	if (!(objp = kmem_getpages(cachep, flags, -1)))
++	if (!(objp = kmem_getpages(cachep, flags & (~__GFP_UBC), -1)))
+ 		goto failed;
+ 
+ 	/* Get slab management. */
+@@ -2038,6 +1916,16 @@ cache_alloc_debugcheck_after(kmem_cache_
+ #define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
+ #endif
+ 
++static inline int should_charge(kmem_cache_t *cachep, int flags, void *objp)
++{
++	if (objp == NULL)
++		return 0;
++	if (!(cachep->flags & SLAB_UBC))
++		return 0;
++	if ((cachep->flags & SLAB_NO_CHARGE) && !(flags & __GFP_UBC))
++		return 0;
++	return 1;
++}
+ 
+ static inline void * __cache_alloc (kmem_cache_t *cachep, int flags)
+ {
+@@ -2058,8 +1946,18 @@ static inline void * __cache_alloc (kmem
+ 		objp = cache_alloc_refill(cachep, flags);
+ 	}
+ 	local_irq_restore(save_flags);
++
++	if (should_charge(cachep, flags, objp) &&
++			ub_slab_charge(objp, flags) < 0)
++		goto out_err;
++
+ 	objp = cache_alloc_debugcheck_after(cachep, flags, objp, __builtin_return_address(0));
+ 	return objp;
++
++out_err:
++	objp = cache_alloc_debugcheck_after(cachep, flags, objp, __builtin_return_address(0));
++	kmem_cache_free(cachep, objp);
++	return NULL;
+ }
+ 
+ /* 
+@@ -2182,6 +2080,9 @@ static inline void __cache_free (kmem_ca
+ 	check_irq_off();
+ 	objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
+ 
++	if (cachep->flags & SLAB_UBC)
++		ub_slab_uncharge(objp);
++
+ 	if (likely(ac->avail < ac->limit)) {
+ 		STATS_INC_FREEHIT(cachep);
+ 		ac_entry(ac)[ac->avail++] = objp;
+@@ -2434,6 +2335,20 @@ void kmem_cache_free (kmem_cache_t *cach
+ EXPORT_SYMBOL(kmem_cache_free);
+ 
+ /**
++ * kzalloc - allocate memory. The memory is set to zero.
++ * @size: how many bytes of memory are required.
++ * @flags: the type of memory to allocate.
++ */
++void *kzalloc(size_t size, gfp_t flags)
++{
++	void *ret = kmalloc(size, flags);
++	if (ret)
++		memset(ret, 0, size);
++	return ret;
++}
++EXPORT_SYMBOL(kzalloc);
++
++/**
+  * kfree - free previously allocated memory
+  * @objp: pointer returned by kmalloc.
+  *
+@@ -2475,6 +2390,7 @@ free_percpu(const void *objp)
+ 			continue;
+ 		kfree(p->ptrs[i]);
+ 	}
++	kfree(p);
+ }
+ 
+ EXPORT_SYMBOL(free_percpu);
+@@ -2693,6 +2609,7 @@ static void cache_reap (void)
+ 	if (down_trylock(&cache_chain_sem))
+ 		return;
+ 
++	{KSTAT_PERF_ENTER(cache_reap)
+ 	list_for_each(walk, &cache_chain) {
+ 		kmem_cache_t *searchp;
+ 		struct list_head* p;
+@@ -2755,6 +2672,7 @@ next:
+ 	}
+ 	check_irq_on();
+ 	up(&cache_chain_sem);
++	KSTAT_PERF_LEAVE(cache_reap)}
+ }
+ 
+ /*
+diff -uprN linux-2.6.8.1.orig/mm/swap.c linux-2.6.8.1-ve022stab078/mm/swap.c
+--- linux-2.6.8.1.orig/mm/swap.c	2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/mm/swap.c	2006-05-11 13:05:32.000000000 +0400
+@@ -351,7 +351,9 @@ void pagevec_strip(struct pagevec *pvec)
+ 		struct page *page = pvec->pages[i];
+ 
+ 		if (PagePrivate(page) && !TestSetPageLocked(page)) {
+-			try_to_release_page(page, 0);
++			/* need to recheck after lock */
++			if (page_has_buffers(page))
++				try_to_release_page(page, 0);
+ 			unlock_page(page);
+ 		}
+ 	}
+diff -uprN linux-2.6.8.1.orig/mm/swap_state.c linux-2.6.8.1-ve022stab078/mm/swap_state.c
+--- linux-2.6.8.1.orig/mm/swap_state.c	2004-08-14 14:55:20.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/mm/swap_state.c	2006-05-11 13:05:42.000000000 +0400
+@@ -14,9 +14,15 @@
+ #include <linux/pagemap.h>
+ #include <linux/buffer_head.h>
+ #include <linux/backing-dev.h>
++#include <linux/kernel_stat.h>
+ 
+ #include <asm/pgtable.h>
+ 
++#include <ub/beancounter.h>
++#include <ub/ub_mem.h>
++#include <ub/ub_page.h>
++#include <ub/ub_vmpages.h>
++
+ /*
+  * swapper_space is a fiction, retained to simplify the path through
+  * vmscan's shrink_list, to make sync_page look nicer, and to allow
+@@ -42,23 +48,20 @@ struct address_space swapper_space = {
+ };
+ EXPORT_SYMBOL(swapper_space);
+ 
++/* can't remove variable swap_cache_info due to dynamic kernel */
+ #define INC_CACHE_INFO(x)	do { swap_cache_info.x++; } while (0)
+ 
+-static struct {
+-	unsigned long add_total;
+-	unsigned long del_total;
+-	unsigned long find_success;
+-	unsigned long find_total;
+-	unsigned long noent_race;
+-	unsigned long exist_race;
+-} swap_cache_info;
++struct swap_cache_info_struct swap_cache_info;
++EXPORT_SYMBOL(swap_cache_info);
+ 
+ void show_swap_cache_info(void)
+ {
+-	printk("Swap cache: add %lu, delete %lu, find %lu/%lu, race %lu+%lu\n",
++	printk("Swap cache: add %lu, delete %lu, find %lu/%lu, "
++		"race %lu+%lu+%lu\n",
+ 		swap_cache_info.add_total, swap_cache_info.del_total,
+ 		swap_cache_info.find_success, swap_cache_info.find_total,
+-		swap_cache_info.noent_race, swap_cache_info.exist_race);
++		swap_cache_info.noent_race, swap_cache_info.exist_race,
++		swap_cache_info.remove_race);
+ }
+ 
+ /*
+@@ -148,7 +151,14 @@ int add_to_swap(struct page * page)
+ 		BUG();
+ 
+ 	for (;;) {
+-		entry = get_swap_page();
++		struct user_beancounter *ub;
++
++		ub = pb_grab_page_ub(page);
++		if (IS_ERR(ub))
++			return 0;
++
++		entry = get_swap_page(ub);
++		put_beancounter(ub);
+ 		if (!entry.val)
+ 			return 0;
+ 
+@@ -264,10 +274,13 @@ int move_from_swap_cache(struct page *pa
+  */
+ static inline void free_swap_cache(struct page *page)
+ {
+-	if (PageSwapCache(page) && !TestSetPageLocked(page)) {
++	if (!PageSwapCache(page))
++		return;
++	if (!TestSetPageLocked(page)) {
+ 		remove_exclusive_swap_page(page);
+ 		unlock_page(page);
+-	}
++	} else
++		INC_CACHE_INFO(remove_race);
+ }
+ 
+ /* 
+diff -uprN linux-2.6.8.1.orig/mm/swapfile.c linux-2.6.8.1-ve022stab078/mm/swapfile.c
+--- linux-2.6.8.1.orig/mm/swapfile.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/mm/swapfile.c	2006-05-11 13:05:45.000000000 +0400
+@@ -30,6 +30,8 @@
+ #include <asm/tlbflush.h>
+ #include <linux/swapops.h>
+ 
++#include <ub/ub_vmpages.h>
++
+ spinlock_t swaplock = SPIN_LOCK_UNLOCKED;
+ unsigned int nr_swapfiles;
+ long total_swap_pages;
+@@ -147,7 +149,7 @@ static inline int scan_swap_map(struct s
+ 	return 0;
+ }
+ 
+-swp_entry_t get_swap_page(void)
++swp_entry_t get_swap_page(struct user_beancounter *ub)
+ {
+ 	struct swap_info_struct * p;
+ 	unsigned long offset;
+@@ -164,7 +166,7 @@ swp_entry_t get_swap_page(void)
+ 
+ 	while (1) {
+ 		p = &swap_info[type];
+-		if ((p->flags & SWP_ACTIVE) == SWP_ACTIVE) {
++		if ((p->flags & (SWP_ACTIVE|SWP_READONLY)) == SWP_ACTIVE) {
+ 			swap_device_lock(p);
+ 			offset = scan_swap_map(p);
+ 			swap_device_unlock(p);
+@@ -177,6 +179,12 @@ swp_entry_t get_swap_page(void)
+ 				} else {
+ 					swap_list.next = type;
+ 				}
++#if CONFIG_USER_SWAP_ACCOUNTING
++				if (p->owner_map[offset] != NULL)
++					BUG();
++				ub_swapentry_inc(ub);
++				p->owner_map[offset] = get_beancounter(ub);
++#endif
+ 				goto out;
+ 			}
+ 		}
+@@ -248,6 +256,11 @@ static int swap_entry_free(struct swap_i
+ 		count--;
+ 		p->swap_map[offset] = count;
+ 		if (!count) {
++#if CONFIG_USER_SWAP_ACCOUNTING
++			ub_swapentry_dec(p->owner_map[offset]);
++			put_beancounter(p->owner_map[offset]);
++			p->owner_map[offset] = NULL;
++#endif
+ 			if (offset < p->lowest_bit)
+ 				p->lowest_bit = offset;
+ 			if (offset > p->highest_bit)
+@@ -288,7 +301,8 @@ static int exclusive_swap_page(struct pa
+ 	p = swap_info_get(entry);
+ 	if (p) {
+ 		/* Is the only swap cache user the cache itself? */
+-		if (p->swap_map[swp_offset(entry)] == 1) {
++		if ((p->flags & (SWP_ACTIVE|SWP_READONLY)) == SWP_ACTIVE &&
++		    p->swap_map[swp_offset(entry)] == 1) {
+ 			/* Recheck the page count with the swapcache lock held.. */
+ 			spin_lock_irq(&swapper_space.tree_lock);
+ 			if (page_count(page) == 2)
+@@ -379,6 +393,54 @@ int remove_exclusive_swap_page(struct pa
+ 	return retval;
+ }
+ 
++int try_to_remove_exclusive_swap_page(struct page *page)
++{
++	int retval;
++	struct swap_info_struct * p;
++	swp_entry_t entry;
++
++	BUG_ON(PagePrivate(page));
++	BUG_ON(!PageLocked(page));
++
++	if (!PageSwapCache(page))
++		return 0;
++	if (PageWriteback(page))
++		return 0;
++	if (page_count(page) != 2) /* 2: us + cache */
++		return 0;
++
++	entry.val = page->private;
++	p = swap_info_get(entry);
++	if (!p)
++		return 0;
++	if (!vm_swap_full() &&
++	    (p->flags & (SWP_ACTIVE|SWP_READONLY)) == SWP_ACTIVE) {
++		swap_info_put(p);
++		return 0;
++	}
++
++	/* Is the only swap cache user the cache itself? */
++	retval = 0;
++	if (p->swap_map[swp_offset(entry)] == 1) {
++		/* Recheck the page count with the swapcache lock held.. */
++		spin_lock_irq(&swapper_space.tree_lock);
++		if ((page_count(page) == 2) && !PageWriteback(page)) {
++			__delete_from_swap_cache(page);
++			SetPageDirty(page);
++			retval = 1;
++		}
++		spin_unlock_irq(&swapper_space.tree_lock);
++	}
++	swap_info_put(p);
++
++	if (retval) {
++		swap_free(entry);
++		page_cache_release(page);
++	}
++
++	return retval;
++}
++
+ /*
+  * Free the swap entry like above, but also try to
+  * free the page cache entry if it is the last user.
+@@ -428,9 +490,12 @@ void free_swap_and_cache(swp_entry_t ent
+ /* vma->vm_mm->page_table_lock is held */
+ static void
+ unuse_pte(struct vm_area_struct *vma, unsigned long address, pte_t *dir,
+-	swp_entry_t entry, struct page *page)
++	swp_entry_t entry, struct page *page, struct page_beancounter **ppbs)
+ {
+ 	vma->vm_mm->rss++;
++	vma->vm_rss++;
++	ub_unused_privvm_dec(mm_ub(vma->vm_mm), 1, vma);
++	pb_add_list_ref(page, mm_ub(vma->vm_mm), ppbs);
+ 	get_page(page);
+ 	set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot)));
+ 	page_add_anon_rmap(page, vma, address);
+@@ -440,7 +505,7 @@ unuse_pte(struct vm_area_struct *vma, un
+ /* vma->vm_mm->page_table_lock is held */
+ static unsigned long unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
+ 	unsigned long address, unsigned long size, unsigned long offset,
+-	swp_entry_t entry, struct page *page)
++	swp_entry_t entry, struct page *page, struct page_beancounter **ppbs)
+ {
+ 	pte_t * pte;
+ 	unsigned long end;
+@@ -465,7 +530,8 @@ static unsigned long unuse_pmd(struct vm
+ 		 * Test inline before going to call unuse_pte.
+ 		 */
+ 		if (unlikely(pte_same(*pte, swp_pte))) {
+-			unuse_pte(vma, offset + address, pte, entry, page);
++			unuse_pte(vma, offset + address, pte, entry, page,
++					ppbs);
+ 			pte_unmap(pte);
+ 
+ 			/*
+@@ -486,8 +552,8 @@ static unsigned long unuse_pmd(struct vm
+ 
+ /* vma->vm_mm->page_table_lock is held */
+ static unsigned long unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
+-	unsigned long address, unsigned long size,
+-	swp_entry_t entry, struct page *page)
++	unsigned long address, unsigned long size, swp_entry_t entry,
++	struct page *page, struct page_beancounter **ppbs)
+ {
+ 	pmd_t * pmd;
+ 	unsigned long offset, end;
+@@ -510,7 +576,7 @@ static unsigned long unuse_pgd(struct vm
+ 		BUG();
+ 	do {
+ 		foundaddr = unuse_pmd(vma, pmd, address, end - address,
+-						offset, entry, page);
++				offset, entry, page, ppbs);
+ 		if (foundaddr)
+ 			return foundaddr;
+ 		address = (address + PMD_SIZE) & PMD_MASK;
+@@ -521,7 +587,7 @@ static unsigned long unuse_pgd(struct vm
+ 
+ /* vma->vm_mm->page_table_lock is held */
+ static unsigned long unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
+-	swp_entry_t entry, struct page *page)
++	swp_entry_t entry, struct page *page, struct page_beancounter **ppbs)
+ {
+ 	unsigned long start = vma->vm_start, end = vma->vm_end;
+ 	unsigned long foundaddr;
+@@ -530,7 +596,7 @@ static unsigned long unuse_vma(struct vm
+ 		BUG();
+ 	do {
+ 		foundaddr = unuse_pgd(vma, pgdir, start, end - start,
+-						entry, page);
++						entry, page, ppbs);
+ 		if (foundaddr)
+ 			return foundaddr;
+ 		start = (start + PGDIR_SIZE) & PGDIR_MASK;
+@@ -540,7 +606,8 @@ static unsigned long unuse_vma(struct vm
+ }
+ 
+ static int unuse_process(struct mm_struct * mm,
+-			swp_entry_t entry, struct page* page)
++			swp_entry_t entry, struct page* page,
++			struct page_beancounter **ppbs)
+ {
+ 	struct vm_area_struct* vma;
+ 	unsigned long foundaddr = 0;
+@@ -561,7 +628,7 @@ static int unuse_process(struct mm_struc
+ 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ 		if (!is_vm_hugetlb_page(vma)) {
+ 			pgd_t * pgd = pgd_offset(mm, vma->vm_start);
+-			foundaddr = unuse_vma(vma, pgd, entry, page);
++			foundaddr = unuse_vma(vma, pgd, entry, page, ppbs);
+ 			if (foundaddr)
+ 				break;
+ 		}
+@@ -629,6 +696,7 @@ static int try_to_unuse(unsigned int typ
+ 	int retval = 0;
+ 	int reset_overflow = 0;
+ 	int shmem;
++	struct page_beancounter *pb_list;
+ 
+ 	/*
+ 	 * When searching mms for an entry, a good strategy is to
+@@ -687,6 +755,13 @@ static int try_to_unuse(unsigned int typ
+ 			break;
+ 		}
+ 
++		pb_list = NULL;
++		if (pb_reserve_all(&pb_list)) {
++			page_cache_release(page);
++			retval = -ENOMEM;
++			break;
++		}
++
+ 		/*
+ 		 * Don't hold on to start_mm if it looks like exiting.
+ 		 */
+@@ -709,6 +784,20 @@ static int try_to_unuse(unsigned int typ
+ 		lock_page(page);
+ 		wait_on_page_writeback(page);
+ 
++		/* If read failed we cannot map not-uptodate page to 
++		 * user space. Actually, we are in serious troubles,
++		 * we do not even know what process to kill. So, the only
++		 * variant remains: to stop swapoff() and allow someone
++		 * to kill processes to zap invalid pages.
++		 */
++		if (unlikely(!PageUptodate(page))) {
++			pb_free_list(&pb_list);
++			unlock_page(page);
++			page_cache_release(page);
++			retval = -EIO;
++			break;
++		}
++
+ 		/*
+ 		 * Remove all references to entry, without blocking.
+ 		 * Whenever we reach init_mm, there's no address space
+@@ -720,8 +809,10 @@ static int try_to_unuse(unsigned int typ
+ 			if (start_mm == &init_mm)
+ 				shmem = shmem_unuse(entry, page);
+ 			else
+-				retval = unuse_process(start_mm, entry, page);
++				retval = unuse_process(start_mm, entry, page,
++						&pb_list);
+ 		}
++
+ 		if (*swap_map > 1) {
+ 			int set_start_mm = (*swap_map >= swcount);
+ 			struct list_head *p = &start_mm->mmlist;
+@@ -749,7 +840,8 @@ static int try_to_unuse(unsigned int typ
+ 					set_start_mm = 1;
+ 					shmem = shmem_unuse(entry, page);
+ 				} else
+-					retval = unuse_process(mm, entry, page);
++					retval = unuse_process(mm, entry, page,
++							&pb_list);
+ 				if (set_start_mm && *swap_map < swcount) {
+ 					mmput(new_start_mm);
+ 					atomic_inc(&mm->mm_users);
+@@ -763,6 +855,8 @@ static int try_to_unuse(unsigned int typ
+ 			mmput(start_mm);
+ 			start_mm = new_start_mm;
+ 		}
++
++		pb_free_list(&pb_list);
+ 		if (retval) {
+ 			unlock_page(page);
+ 			page_cache_release(page);
+@@ -1078,6 +1172,7 @@ asmlinkage long sys_swapoff(const char _
+ {
+ 	struct swap_info_struct * p = NULL;
+ 	unsigned short *swap_map;
++	struct user_beancounter **owner_map;
+ 	struct file *swap_file, *victim;
+ 	struct address_space *mapping;
+ 	struct inode *inode;
+@@ -1085,6 +1180,10 @@ asmlinkage long sys_swapoff(const char _
+ 	int i, type, prev;
+ 	int err;
+ 	
++	/* VE admin check is just to be on the safe side, the admin may affect
++	 * swaps only if he has access to special, i.e. if he has been granted
++	 * access to the block device or if the swap file is in the area
++	 * visible to him. */
+ 	if (!capable(CAP_SYS_ADMIN))
+ 		return -EPERM;
+ 
+@@ -1168,12 +1267,15 @@ asmlinkage long sys_swapoff(const char _
+ 	p->max = 0;
+ 	swap_map = p->swap_map;
+ 	p->swap_map = NULL;
++	owner_map = p->owner_map;
++	p->owner_map = NULL;
+ 	p->flags = 0;
+ 	destroy_swap_extents(p);
+ 	swap_device_unlock(p);
+ 	swap_list_unlock();
+ 	up(&swapon_sem);
+ 	vfree(swap_map);
++	vfree(owner_map);
+ 	inode = mapping->host;
+ 	if (S_ISBLK(inode->i_mode)) {
+ 		struct block_device *bdev = I_BDEV(inode);
+@@ -1310,6 +1412,7 @@ asmlinkage long sys_swapon(const char __
+ 	struct page *page = NULL;
+ 	struct inode *inode = NULL;
+ 	int did_down = 0;
++	struct user_beancounter **owner_map;
+ 
+ 	if (!capable(CAP_SYS_ADMIN))
+ 		return -EPERM;
+@@ -1347,6 +1450,7 @@ asmlinkage long sys_swapon(const char __
+ 	p->highest_bit = 0;
+ 	p->cluster_nr = 0;
+ 	p->inuse_pages = 0;
++	p->owner_map = NULL;
+ 	p->sdev_lock = SPIN_LOCK_UNLOCKED;
+ 	p->next = -1;
+ 	if (swap_flags & SWAP_FLAG_PREFER) {
+@@ -1513,6 +1617,15 @@ asmlinkage long sys_swapon(const char __
+ 		error = -EINVAL;
+ 		goto bad_swap;
+ 	}
++#if CONFIG_USER_SWAP_ACCOUNTING
++	p->owner_map = vmalloc(maxpages * sizeof(struct user_beancounter *));
++	if (!p->owner_map) {
++		error = -ENOMEM;
++		goto bad_swap;
++	}
++	memset(p->owner_map, 0,
++			maxpages * sizeof(struct user_beancounter *));
++#endif
+ 	p->swap_map[0] = SWAP_MAP_BAD;
+ 	p->max = maxpages;
+ 	p->pages = nr_good_pages;
+@@ -1525,6 +1638,8 @@ asmlinkage long sys_swapon(const char __
+ 	swap_list_lock();
+ 	swap_device_lock(p);
+ 	p->flags = SWP_ACTIVE;
++	if (swap_flags & SWAP_FLAG_READONLY)
++		p->flags |= SWP_READONLY;
+ 	nr_swap_pages += nr_good_pages;
+ 	total_swap_pages += nr_good_pages;
+ 	printk(KERN_INFO "Adding %dk swap on %s.  Priority:%d extents:%d\n",
+@@ -1558,6 +1673,7 @@ bad_swap:
+ bad_swap_2:
+ 	swap_list_lock();
+ 	swap_map = p->swap_map;
++	owner_map = p->owner_map;
+ 	p->swap_file = NULL;
+ 	p->swap_map = NULL;
+ 	p->flags = 0;
+@@ -1567,6 +1683,8 @@ bad_swap_2:
+ 	destroy_swap_extents(p);
+ 	if (swap_map)
+ 		vfree(swap_map);
++	if (owner_map)
++		vfree(owner_map);
+ 	if (swap_file)
+ 		filp_close(swap_file, NULL);
+ out:
+diff -uprN linux-2.6.8.1.orig/mm/truncate.c linux-2.6.8.1-ve022stab078/mm/truncate.c
+--- linux-2.6.8.1.orig/mm/truncate.c	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/mm/truncate.c	2006-05-11 13:05:28.000000000 +0400
+@@ -79,6 +79,12 @@ invalidate_complete_page(struct address_
+ 		spin_unlock_irq(&mapping->tree_lock);
+ 		return 0;
+ 	}
++
++	BUG_ON(PagePrivate(page));
++	if (page_count(page) != 2) {
++		spin_unlock_irq(&mapping->tree_lock);
++		return 0;
++	}
+ 	__remove_from_page_cache(page);
+ 	spin_unlock_irq(&mapping->tree_lock);
+ 	ClearPageUptodate(page);
+@@ -268,7 +274,11 @@ void invalidate_inode_pages2(struct addr
+ 					clear_page_dirty(page);
+ 					ClearPageUptodate(page);
+ 				} else {
+-					invalidate_complete_page(mapping, page);
++					if (!invalidate_complete_page(mapping,
++								      page)) {
++						clear_page_dirty(page);
++						ClearPageUptodate(page);
++					}
+ 				}
+ 			}
+ 			unlock_page(page);
+diff -uprN linux-2.6.8.1.orig/mm/usercopy.c linux-2.6.8.1-ve022stab078/mm/usercopy.c
+--- linux-2.6.8.1.orig/mm/usercopy.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.8.1-ve022stab078/mm/usercopy.c	2006-05-11 13:05:38.000000000 +0400
+@@ -0,0 +1,310 @@
++/*
++ * linux/mm/usercopy.c
++ *
++ * (C) Copyright 2003 Ingo Molnar
++ *
++ * Generic implementation of all the user-VM access functions, without
++ * relying on being able to access the VM directly.
++ */
++
++#include <linux/module.h>
++#include <linux/sched.h>
++#include <linux/errno.h>
++#include <linux/mm.h>
++#include <linux/highmem.h>
++#include <linux/pagemap.h>
++#include <linux/smp_lock.h>
++#include <linux/ptrace.h>
++#include <linux/interrupt.h>
++
++#include <asm/pgtable.h>
++#include <asm/uaccess.h>
++#include <asm/atomic_kmap.h>
++
++/*
++ * Get kernel address of the user page and pin it.
++ */
++static inline struct page *pin_page(unsigned long addr, int write,
++				    pte_t *pte)
++{
++	struct mm_struct *mm = current->mm ? : &init_mm;
++	struct page *page = NULL;
++	int ret;
++
++	if (addr >= current_thread_info()->addr_limit.seg)
++		return (struct page *)-1UL;
++	/*
++	 * Do a quick atomic lookup first - this is the fastpath.
++	 */
++retry:
++	page = follow_page_pte(mm, addr, write, pte);
++	if (likely(page != NULL)) {
++		if (!PageReserved(page))
++			get_page(page);
++		return page;
++	}
++	if (pte_present(*pte))
++		return NULL;
++	/*
++	 * No luck - bad address or need to fault in the page:
++	 */
++
++	/* Release the lock so get_user_pages can sleep */
++	spin_unlock(&mm->page_table_lock);
++
++	/*
++	 * In the context of filemap_copy_from_user(), we are not allowed
++	 * to sleep.  We must fail this usercopy attempt and allow
++	 * filemap_copy_from_user() to recover: drop its atomic kmap and use
++	 * a sleeping kmap instead.
++	 */
++	if (in_atomic()) {
++		spin_lock(&mm->page_table_lock);
++		return NULL;
++	}
++
++	down_read(&mm->mmap_sem);
++	ret = get_user_pages(current, mm, addr, 1, write, 0, NULL, NULL);
++	up_read(&mm->mmap_sem);
++	spin_lock(&mm->page_table_lock);
++
++	if (ret <= 0)
++		return NULL;
++
++	/*
++	 * Go try the follow_page again.
++	 */
++	goto retry;
++}
++
++static inline void unpin_page(struct page *page)
++{
++	put_page(page);
++}
++
++/*
++ * Access another process' address space.
++ * Source/target buffer must be kernel space,
++ * Do not walk the page table directly, use get_user_pages
++ */
++static int rw_vm(unsigned long addr, void *buf, int len, int write)
++{
++	struct mm_struct *mm = current->mm ? : &init_mm;
++
++	if (!len)
++		return 0;
++
++	spin_lock(&mm->page_table_lock);
++
++	/* ignore errors, just check how much was sucessfully transfered */
++	while (len) {
++		struct page *page = NULL;
++		pte_t pte;
++		int bytes, offset;
++		void *maddr;
++
++		page = pin_page(addr, write, &pte);
++		if ((page == (struct page *)-1UL) ||
++					(!page && !pte_present(pte)))
++			break;
++
++		bytes = len;
++		offset = addr & (PAGE_SIZE-1);
++		if (bytes > PAGE_SIZE-offset)
++			bytes = PAGE_SIZE-offset;
++
++		if (page)
++			maddr = kmap_atomic(page, KM_USER_COPY);
++		else
++			/* we will map with user pte
++			 */
++			maddr = kmap_atomic_pte(&pte, KM_USER_COPY);
++
++#define HANDLE_TYPE(type) \
++	case sizeof(type): *(type *)(maddr+offset) = *(type *)(buf); break;
++
++		if (write) {
++			switch (bytes) {
++			HANDLE_TYPE(char);
++			HANDLE_TYPE(int);
++			HANDLE_TYPE(long long);
++			default:
++				memcpy(maddr + offset, buf, bytes);
++			}
++		} else {
++#undef HANDLE_TYPE
++#define HANDLE_TYPE(type) \
++	case sizeof(type): *(type *)(buf) = *(type *)(maddr+offset); break;
++			switch (bytes) {
++			HANDLE_TYPE(char);
++			HANDLE_TYPE(int);
++			HANDLE_TYPE(long long);
++			default:
++				memcpy(buf, maddr + offset, bytes);
++			}
++#undef HANDLE_TYPE
++		}
++		kunmap_atomic(maddr, KM_USER_COPY);
++		if (page)
++			unpin_page(page);
++		len -= bytes;
++		buf += bytes;
++		addr += bytes;
++	}
++	spin_unlock(&mm->page_table_lock);
++
++	return len;
++}
++
++static int str_vm(unsigned long addr, void *buf0, int len, int copy)
++{
++	struct mm_struct *mm = current->mm ? : &init_mm;
++	struct page *page;
++	void *buf = buf0;
++
++	if (!len)
++		return len;
++
++	spin_lock(&mm->page_table_lock);
++
++	/* ignore errors, just check how much was sucessfully transfered */
++	while (len) {
++		int bytes, offset, left, copied;
++		pte_t pte;
++		char *maddr;
++
++		page = pin_page(addr, copy == 2, &pte);
++		if ((page == (struct page *)-1UL) ||
++					(!page && !pte_present(pte))) {
++			spin_unlock(&mm->page_table_lock);
++			return -EFAULT;
++		}
++		bytes = len;
++		offset = addr & (PAGE_SIZE-1);
++		if (bytes > PAGE_SIZE-offset)
++			bytes = PAGE_SIZE-offset;
++
++		if (page)
++			maddr = kmap_atomic(page, KM_USER_COPY);
++		else
++			/* we will map with user pte
++			 */
++			maddr = kmap_atomic_pte(&pte, KM_USER_COPY);
++		if (copy == 2) {
++			memset(maddr + offset, 0, bytes);
++			copied = bytes;
++			left = 0;
++		} else if (copy == 1) {
++			left = strncpy_count(buf, maddr + offset, bytes);
++			copied = bytes - left;
++		} else {
++			copied = strnlen(maddr + offset, bytes);
++			left = bytes - copied;
++		}
++		BUG_ON(bytes < 0 || copied < 0);
++		kunmap_atomic(maddr, KM_USER_COPY);
++		if (page)
++			unpin_page(page);
++		len -= copied;
++		buf += copied;
++		addr += copied;
++		if (left)
++			break;
++	}
++	spin_unlock(&mm->page_table_lock);
++
++	return len;
++}
++
++/*
++ * Copies memory from userspace (ptr) into kernelspace (val).
++ *
++ * returns # of bytes not copied.
++ */
++int get_user_size(unsigned int size, void *val, const void *ptr)
++{
++	int ret;
++
++	if (unlikely(segment_eq(get_fs(), KERNEL_DS)))
++		ret = __direct_copy_from_user(val, ptr, size);
++	else
++		ret = rw_vm((unsigned long)ptr, val, size, 0);
++	if (ret)
++		/*
++		 * Zero the rest:
++		 */
++		memset(val + size - ret, 0, ret);
++	return ret;
++}
++
++/*
++ * Copies memory from kernelspace (val) into userspace (ptr).
++ *
++ * returns # of bytes not copied.
++ */
++int put_user_size(unsigned int size, const void *val, void *ptr)
++{
++	if (unlikely(segment_eq(get_fs(), KERNEL_DS)))
++		return __direct_copy_to_user(ptr, val, size);
++	else
++		return rw_vm((unsigned long)ptr, (void *)val, size, 1);
++}
++
++int copy_str_fromuser_size(unsigned int size, void *val, const void *ptr)
++{
++	int copied, left;
++
++	if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
++		left = strncpy_count(val, ptr, size);
++		copied = size - left;
++		BUG_ON(copied < 0);
++
++		return copied;
++	}
++	left = str_vm((unsigned long)ptr, val, size, 1);
++	if (left < 0)
++		return left;
++	copied = size - left;
++	BUG_ON(copied < 0);
++
++	return copied;
++}
++
++int strlen_fromuser_size(unsigned int size, const void *ptr)
++{
++	int copied, left;
++
++	if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
++		copied = strnlen(ptr, size) + 1;
++		BUG_ON(copied < 0);
++
++		return copied;
++	}
++	left = str_vm((unsigned long)ptr, NULL, size, 0);
++	if (left < 0)
++		return 0;
++	copied = size - left + 1;
++	BUG_ON(copied < 0);
++
++	return copied;
++}
++
++int zero_user_size(unsigned int size, void *ptr)
++{
++	int left;
++
++	if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
++		memset(ptr, 0, size);
++		return 0;
++	}
++	left = str_vm((unsigned long)ptr, NULL, size, 2);
++	if (left < 0)
++		return size;
++	return left;
++}
++
++EXPORT_SYMBOL(get_user_size);
++EXPORT_SYMBOL(put_user_size);
++EXPORT_SYMBOL(zero_user_size);
++EXPORT_SYMBOL(copy_str_fromuser_size);
++EXPORT_SYMBOL(strlen_fromuser_size);
+diff -uprN linux-2.6.8.1.orig/mm/vmalloc.c linux-2.6.8.1-ve022stab078/mm/vmalloc.c
+--- linux-2.6.8.1.orig/mm/vmalloc.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/mm/vmalloc.c	2006-05-11 13:05:41.000000000 +0400
+@@ -19,6 +19,7 @@
+ #include <asm/uaccess.h>
+ #include <asm/tlbflush.h>
+ 
++#include <ub/ub_debug.h>
+ 
+ rwlock_t vmlist_lock = RW_LOCK_UNLOCKED;
+ struct vm_struct *vmlist;
+@@ -246,6 +247,66 @@ struct vm_struct *get_vm_area(unsigned l
+ 	return __get_vm_area(size, flags, VMALLOC_START, VMALLOC_END);
+ }
+ 
++struct vm_struct * get_vm_area_best(unsigned long size, unsigned long flags)
++{
++	unsigned long addr, best_addr, delta, best_delta;
++	struct vm_struct **p, **best_p, *tmp, *area;
++
++	area = (struct vm_struct *) kmalloc(sizeof(*area), GFP_KERNEL);
++	if (!area)
++		return NULL;
++
++	size += PAGE_SIZE; /* one-page gap at the end */
++	addr = VMALLOC_START;
++	best_addr = 0UL;
++	best_p = NULL;
++	best_delta = PAGE_ALIGN(VMALLOC_END) - VMALLOC_START;
++
++	write_lock(&vmlist_lock);
++	for (p = &vmlist; (tmp = *p) ; p = &tmp->next) {
++		if ((size + addr) < addr)
++			break;
++		delta = (unsigned long) tmp->addr - (size + addr);
++		if (delta < best_delta) {
++			best_delta = delta;
++			best_addr = addr;
++			best_p = p;
++		}
++		addr = tmp->size + (unsigned long) tmp->addr;
++		if (addr > VMALLOC_END-size)
++			break;
++	}
++
++	if (!tmp) {
++		/* check free area after list end */
++		delta = (unsigned long) PAGE_ALIGN(VMALLOC_END) - (size + addr);
++		if (delta < best_delta) {
++			best_delta = delta;
++			best_addr = addr;
++			best_p = p;
++		}
++	}
++	if (best_addr) {
++		area->flags = flags;
++		/* allocate at the end of this area */
++		area->addr = (void *)(best_addr + best_delta);
++		area->size = size;
++		area->next = *best_p;
++		area->pages = NULL;
++		area->nr_pages = 0;
++		area->phys_addr = 0;
++		*best_p = area;
++		/* check like in __vunmap */
++		WARN_ON((PAGE_SIZE - 1) & (unsigned long)area->addr);
++	} else {
++		kfree(area);
++		area = NULL;
++	}
++	write_unlock(&vmlist_lock);
++
++	return area;
++}
++
+ /**
+  *	remove_vm_area  -  find and remove a contingous kernel virtual area
+  *
+@@ -298,6 +359,7 @@ void __vunmap(void *addr, int deallocate
+ 	if (deallocate_pages) {
+ 		int i;
+ 
++		dec_vmalloc_charged(area);
+ 		for (i = 0; i < area->nr_pages; i++) {
+ 			if (unlikely(!area->pages[i]))
+ 				BUG();
+@@ -390,17 +452,20 @@ EXPORT_SYMBOL(vmap);
+  *	allocator with @gfp_mask flags.  Map them into contiguous
+  *	kernel virtual space, using a pagetable protection of @prot.
+  */
+-void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot)
++void *____vmalloc(unsigned long size, int gfp_mask, pgprot_t prot, int best)
+ {
+ 	struct vm_struct *area;
+ 	struct page **pages;
+-	unsigned int nr_pages, array_size, i;
++	unsigned int nr_pages, array_size, i, j;
+ 
+ 	size = PAGE_ALIGN(size);
+ 	if (!size || (size >> PAGE_SHIFT) > num_physpages)
+ 		return NULL;
+ 
+-	area = get_vm_area(size, VM_ALLOC);
++	if (best)
++		area = get_vm_area_best(size, VM_ALLOC);
++	else
++		area = get_vm_area(size, VM_ALLOC);
+ 	if (!area)
+ 		return NULL;
+ 
+@@ -409,31 +474,38 @@ void *__vmalloc(unsigned long size, int 
+ 
+ 	area->nr_pages = nr_pages;
+ 	area->pages = pages = kmalloc(array_size, (gfp_mask & ~__GFP_HIGHMEM));
+-	if (!area->pages) {
+-		remove_vm_area(area->addr);
+-		kfree(area);
+-		return NULL;
+-	}
++	if (!area->pages)
++		goto fail_area;
+ 	memset(area->pages, 0, array_size);
+ 
+ 	for (i = 0; i < area->nr_pages; i++) {
+ 		area->pages[i] = alloc_page(gfp_mask);
+-		if (unlikely(!area->pages[i])) {
+-			/* Successfully allocated i pages, free them in __vunmap() */
+-			area->nr_pages = i;
++		if (unlikely(!area->pages[i]))
+ 			goto fail;
+-		}
+ 	}
+ 	
+ 	if (map_vm_area(area, prot, &pages))
+ 		goto fail;
++
++	inc_vmalloc_charged(area, gfp_mask);
+ 	return area->addr;
+ 
+ fail:
+-	vfree(area->addr);
++	for (j = 0; j < i; j++)
++		__free_page(area->pages[j]);
++	kfree(area->pages);
++fail_area:
++	remove_vm_area(area->addr);
++	kfree(area);
++	
+ 	return NULL;
+ }
+ 
++void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot)
++{
++	return ____vmalloc(size, gfp_mask, prot, 0);
++}
++
+ EXPORT_SYMBOL(__vmalloc);
+ 
+ /**
+@@ -454,6 +526,20 @@ void *vmalloc(unsigned long size)
+ 
+ EXPORT_SYMBOL(vmalloc);
+ 
++void *vmalloc_best(unsigned long size)
++{
++       return ____vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL, 1);
++}
++
++EXPORT_SYMBOL(vmalloc_best);
++
++void *ub_vmalloc_best(unsigned long size)
++{
++       return ____vmalloc(size, GFP_KERNEL_UBC | __GFP_HIGHMEM, PAGE_KERNEL, 1);
++}
++
++EXPORT_SYMBOL(ub_vmalloc_best);
++
+ /**
+  *	vmalloc_exec  -  allocate virtually contiguous, executable memory
+  *
+@@ -565,3 +651,37 @@ finished:
+ 	read_unlock(&vmlist_lock);
+ 	return buf - buf_start;
+ }
++
++void vprintstat(void)
++{
++	struct vm_struct *p, *last_p = NULL;
++	unsigned long addr, size, free_size, max_free_size;
++	int num;
++
++	addr = VMALLOC_START;
++	size = max_free_size = 0;
++	num = 0;
++
++	read_lock(&vmlist_lock);
++	for (p = vmlist; p; p = p->next) {
++		free_size = (unsigned long)p->addr - addr;
++		if (free_size > max_free_size)
++			max_free_size = free_size;
++		addr = (unsigned long)p->addr + p->size;
++		size += p->size;
++		++num;
++		last_p = p;		
++	}
++	if (last_p) {
++		free_size = VMALLOC_END -
++			((unsigned long)last_p->addr + last_p->size);
++		if (free_size > max_free_size)
++			max_free_size = free_size;
++	}
++	read_unlock(&vmlist_lock);
++
++	printk("VMALLOC Used: %luKB Total: %luKB Entries: %d\n"
++	       "    Max_Free: %luKB Start: %lx End: %lx\n",
++	       size/1024, (VMALLOC_END - VMALLOC_START)/1024, num,
++	       max_free_size/1024, VMALLOC_START, VMALLOC_END);
++}
+diff -uprN linux-2.6.8.1.orig/mm/vmscan.c linux-2.6.8.1-ve022stab078/mm/vmscan.c
+--- linux-2.6.8.1.orig/mm/vmscan.c	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/mm/vmscan.c	2006-05-11 13:05:41.000000000 +0400
+@@ -38,6 +38,8 @@
+ 
+ #include <linux/swapops.h>
+ 
++#include <ub/ub_mem.h>
++
+ /* possible outcome of pageout() */
+ typedef enum {
+ 	/* failed to write page out, page is locked */
+@@ -72,6 +74,8 @@ struct scan_control {
+ 	unsigned int gfp_mask;
+ 
+ 	int may_writepage;
++
++	struct oom_freeing_stat oom_stat;
+ };
+ 
+ /*
+@@ -174,14 +178,16 @@ EXPORT_SYMBOL(remove_shrinker);
+  * are eligible for the caller's allocation attempt.  It is used for balancing
+  * slab reclaim versus page reclaim.
+  */
+-static int shrink_slab(unsigned long scanned, unsigned int gfp_mask,
++static int shrink_slab(struct scan_control *sc, unsigned int gfp_mask,
+ 			unsigned long lru_pages)
+ {
+ 	struct shrinker *shrinker;
++	unsigned long scanned;
+ 
+ 	if (down_trylock(&shrinker_sem))
+ 		return 0;
+ 
++	scanned = sc->nr_scanned;
+ 	list_for_each_entry(shrinker, &shrinker_list, list) {
+ 		unsigned long long delta;
+ 
+@@ -205,6 +211,7 @@ static int shrink_slab(unsigned long sca
+ 			shrinker->nr -= this_scan;
+ 			if (shrink_ret == -1)
+ 				break;
++			sc->oom_stat.slabs += shrink_ret;
+ 			cond_resched();
+ 		}
+ 	}
+@@ -389,6 +396,7 @@ static int shrink_list(struct list_head 
+ 			page_map_unlock(page);
+ 			if (!add_to_swap(page))
+ 				goto activate_locked;
++			sc->oom_stat.swapped++;
+ 			page_map_lock(page);
+ 		}
+ #endif /* CONFIG_SWAP */
+@@ -430,6 +438,7 @@ static int shrink_list(struct list_head 
+ 			case PAGE_ACTIVATE:
+ 				goto activate_locked;
+ 			case PAGE_SUCCESS:
++				sc->oom_stat.written++;
+ 				if (PageWriteback(page) || PageDirty(page))
+ 					goto keep;
+ 				/*
+@@ -589,6 +598,7 @@ static void shrink_cache(struct zone *zo
+ 		else
+ 			mod_page_state_zone(zone, pgscan_direct, nr_scan);
+ 		nr_freed = shrink_list(&page_list, sc);
++		sc->oom_stat.freed += nr_freed;
+ 		if (current_is_kswapd())
+ 			mod_page_state(kswapd_steal, nr_freed);
+ 		mod_page_state_zone(zone, pgsteal, nr_freed);
+@@ -653,6 +663,7 @@ refill_inactive_zone(struct zone *zone, 
+ 	long distress;
+ 	long swap_tendency;
+ 
++	KSTAT_PERF_ENTER(refill_inact)
+ 	lru_add_drain();
+ 	pgmoved = 0;
+ 	spin_lock_irq(&zone->lru_lock);
+@@ -793,6 +804,8 @@ refill_inactive_zone(struct zone *zone, 
+ 
+ 	mod_page_state_zone(zone, pgrefill, pgscanned);
+ 	mod_page_state(pgdeactivate, pgdeactivate);
++
++	KSTAT_PERF_LEAVE(refill_inact);
+ }
+ 
+ /*
+@@ -902,6 +915,10 @@ int try_to_free_pages(struct zone **zone
+ 	unsigned long lru_pages = 0;
+ 	int i;
+ 
++	KSTAT_PERF_ENTER(ttfp);
++
++	memset(&sc.oom_stat, 0, sizeof(struct oom_freeing_stat));
++	sc.oom_stat.oom_generation = oom_generation;
+ 	sc.gfp_mask = gfp_mask;
+ 	sc.may_writepage = 0;
+ 
+@@ -920,7 +937,7 @@ int try_to_free_pages(struct zone **zone
+ 		sc.nr_reclaimed = 0;
+ 		sc.priority = priority;
+ 		shrink_caches(zones, &sc);
+-		shrink_slab(sc.nr_scanned, gfp_mask, lru_pages);
++		shrink_slab(&sc, gfp_mask, lru_pages);
+ 		if (reclaim_state) {
+ 			sc.nr_reclaimed += reclaim_state->reclaimed_slab;
+ 			reclaim_state->reclaimed_slab = 0;
+@@ -949,10 +966,11 @@ int try_to_free_pages(struct zone **zone
+ 			blk_congestion_wait(WRITE, HZ/10);
+ 	}
+ 	if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY))
+-		out_of_memory(gfp_mask);
++		out_of_memory(&sc.oom_stat, gfp_mask);
+ out:
+ 	for (i = 0; zones[i] != 0; i++)
+ 		zones[i]->prev_priority = zones[i]->temp_priority;
++	KSTAT_PERF_LEAVE(ttfp);
+ 	return ret;
+ }
+ 
+@@ -1062,7 +1080,7 @@ scan:
+ 			sc.priority = priority;
+ 			shrink_zone(zone, &sc);
+ 			reclaim_state->reclaimed_slab = 0;
+-			shrink_slab(sc.nr_scanned, GFP_KERNEL, lru_pages);
++			shrink_slab(&sc, GFP_KERNEL, lru_pages);
+ 			sc.nr_reclaimed += reclaim_state->reclaimed_slab;
+ 			total_reclaimed += sc.nr_reclaimed;
+ 			if (zone->all_unreclaimable)
+@@ -1142,8 +1160,8 @@ static int kswapd(void *p)
+ 	tsk->flags |= PF_MEMALLOC|PF_KSWAPD;
+ 
+ 	for ( ; ; ) {
+-		if (current->flags & PF_FREEZE)
+-			refrigerator(PF_FREEZE);
++		if (test_thread_flag(TIF_FREEZE))
++			refrigerator();
+ 		prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
+ 		schedule();
+ 		finish_wait(&pgdat->kswapd_wait, &wait);
+@@ -1223,7 +1241,7 @@ static int __init kswapd_init(void)
+ 	swap_setup();
+ 	for_each_pgdat(pgdat)
+ 		pgdat->kswapd
+-		= find_task_by_pid(kernel_thread(kswapd, pgdat, CLONE_KERNEL));
++		= find_task_by_pid_all(kernel_thread(kswapd, pgdat, CLONE_KERNEL));
+ 	total_memory = nr_free_pagecache_pages();
+ 	hotcpu_notifier(cpu_callback, 0);
+ 	return 0;
+diff -uprN linux-2.6.8.1.orig/net/bluetooth/af_bluetooth.c linux-2.6.8.1-ve022stab078/net/bluetooth/af_bluetooth.c
+--- linux-2.6.8.1.orig/net/bluetooth/af_bluetooth.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/bluetooth/af_bluetooth.c	2006-05-11 13:05:34.000000000 +0400
+@@ -64,7 +64,7 @@ static kmem_cache_t *bt_sock_cache;
+ 
+ int bt_sock_register(int proto, struct net_proto_family *ops)
+ {
+-	if (proto >= BT_MAX_PROTO)
++	if (proto < 0 || proto >= BT_MAX_PROTO)
+ 		return -EINVAL;
+ 
+ 	if (bt_proto[proto])
+@@ -77,7 +77,7 @@ EXPORT_SYMBOL(bt_sock_register);
+ 
+ int bt_sock_unregister(int proto)
+ {
+-	if (proto >= BT_MAX_PROTO)
++	if (proto < 0 || proto >= BT_MAX_PROTO)
+ 		return -EINVAL;
+ 
+ 	if (!bt_proto[proto])
+@@ -92,7 +92,7 @@ static int bt_sock_create(struct socket 
+ {
+ 	int err = 0;
+ 
+-	if (proto >= BT_MAX_PROTO)
++	if (proto < 0 || proto >= BT_MAX_PROTO)
+ 		return -EINVAL;
+ 
+ #if defined(CONFIG_KMOD)
+diff -uprN linux-2.6.8.1.orig/net/compat.c linux-2.6.8.1-ve022stab078/net/compat.c
+--- linux-2.6.8.1.orig/net/compat.c	2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/compat.c	2006-05-11 13:05:49.000000000 +0400
+@@ -90,20 +90,11 @@ int verify_compat_iovec(struct msghdr *k
+ 	} else
+ 		kern_msg->msg_name = NULL;
+ 
+-	if(kern_msg->msg_iovlen > UIO_FASTIOV) {
+-		kern_iov = kmalloc(kern_msg->msg_iovlen * sizeof(struct iovec),
+-				   GFP_KERNEL);
+-		if(!kern_iov)
+-			return -ENOMEM;
+-	}
+-
+ 	tot_len = iov_from_user_compat_to_kern(kern_iov,
+ 					  (struct compat_iovec __user *)kern_msg->msg_iov,
+ 					  kern_msg->msg_iovlen);
+ 	if(tot_len >= 0)
+ 		kern_msg->msg_iov = kern_iov;
+-	else if(kern_msg->msg_iovlen > UIO_FASTIOV)
+-		kfree(kern_iov);
+ 
+ 	return tot_len;
+ }
+@@ -123,6 +114,12 @@ int verify_compat_iovec(struct msghdr *k
+ 	 (struct compat_cmsghdr __user *)((msg)->msg_control) :		\
+ 	 (struct compat_cmsghdr __user *)NULL)
+ 
++#define CMSG_COMPAT_OK(ucmlen, ucmsg, mhdr) \
++	((ucmlen) >= sizeof(struct compat_cmsghdr) && \
++	 (ucmlen) <= (unsigned long) \
++	 ((mhdr)->msg_controllen - \
++	  ((char *)(ucmsg) - (char *)(mhdr)->msg_control)))
++
+ static inline struct compat_cmsghdr __user *cmsg_compat_nxthdr(struct msghdr *msg,
+ 		struct compat_cmsghdr __user *cmsg, int cmsg_len)
+ {
+@@ -137,13 +134,14 @@ static inline struct compat_cmsghdr __us
+  * thus placement) of cmsg headers and length are different for
+  * 32-bit apps.  -DaveM
+  */
+-int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg,
++int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
+ 			       unsigned char *stackbuf, int stackbuf_size)
+ {
+ 	struct compat_cmsghdr __user *ucmsg;
+ 	struct cmsghdr *kcmsg, *kcmsg_base;
+ 	compat_size_t ucmlen;
+ 	__kernel_size_t kcmlen, tmp;
++	int err = -EFAULT;
+ 
+ 	kcmlen = 0;
+ 	kcmsg_base = kcmsg = (struct cmsghdr *)stackbuf;
+@@ -153,15 +151,12 @@ int cmsghdr_from_user_compat_to_kern(str
+ 			return -EFAULT;
+ 
+ 		/* Catch bogons. */
+-		if(CMSG_COMPAT_ALIGN(ucmlen) <
+-		   CMSG_COMPAT_ALIGN(sizeof(struct compat_cmsghdr)))
+-			return -EINVAL;
+-		if((unsigned long)(((char __user *)ucmsg - (char __user *)kmsg->msg_control)
+-				   + ucmlen) > kmsg->msg_controllen)
++		if (!CMSG_COMPAT_OK(ucmlen, ucmsg, kmsg))
+ 			return -EINVAL;
+ 
+ 		tmp = ((ucmlen - CMSG_COMPAT_ALIGN(sizeof(*ucmsg))) +
+ 		       CMSG_ALIGN(sizeof(struct cmsghdr)));
++		tmp = CMSG_ALIGN(tmp);
+ 		kcmlen += tmp;
+ 		ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen);
+ 	}
+@@ -173,30 +168,34 @@ int cmsghdr_from_user_compat_to_kern(str
+ 	 * until we have successfully copied over all of the data
+ 	 * from the user.
+ 	 */
+-	if(kcmlen > stackbuf_size)
+-		kcmsg_base = kcmsg = kmalloc(kcmlen, GFP_KERNEL);
+-	if(kcmsg == NULL)
++	if (kcmlen > stackbuf_size)
++		kcmsg_base = kcmsg = sock_kmalloc(sk, kcmlen, GFP_KERNEL);
++	if (kcmsg == NULL)
+ 		return -ENOBUFS;
+ 
+ 	/* Now copy them over neatly. */
+ 	memset(kcmsg, 0, kcmlen);
+ 	ucmsg = CMSG_COMPAT_FIRSTHDR(kmsg);
+ 	while(ucmsg != NULL) {
+-		__get_user(ucmlen, &ucmsg->cmsg_len);
++		if (__get_user(ucmlen, &ucmsg->cmsg_len))
++			goto Efault;
++		if (!CMSG_COMPAT_OK(ucmlen, ucmsg, kmsg))
++			goto Einval;
+ 		tmp = ((ucmlen - CMSG_COMPAT_ALIGN(sizeof(*ucmsg))) +
+ 		       CMSG_ALIGN(sizeof(struct cmsghdr)));
++		if ((char *)kcmsg_base + kcmlen - (char *)kcmsg < CMSG_ALIGN(tmp))
++			goto Einval;
+ 		kcmsg->cmsg_len = tmp;
+-		__get_user(kcmsg->cmsg_level, &ucmsg->cmsg_level);
+-		__get_user(kcmsg->cmsg_type, &ucmsg->cmsg_type);
+-
+-		/* Copy over the data. */
+-		if(copy_from_user(CMSG_DATA(kcmsg),
+-				  CMSG_COMPAT_DATA(ucmsg),
+-				  (ucmlen - CMSG_COMPAT_ALIGN(sizeof(*ucmsg)))))
+-			goto out_free_efault;
++		tmp = CMSG_ALIGN(tmp);
++		if (__get_user(kcmsg->cmsg_level, &ucmsg->cmsg_level) ||
++		    __get_user(kcmsg->cmsg_type, &ucmsg->cmsg_type) ||
++		    copy_from_user(CMSG_DATA(kcmsg),
++				   CMSG_COMPAT_DATA(ucmsg),
++				   (ucmlen - CMSG_COMPAT_ALIGN(sizeof(*ucmsg)))))
++			goto Efault;
+ 
+ 		/* Advance. */
+-		kcmsg = (struct cmsghdr *)((char *)kcmsg + CMSG_ALIGN(tmp));
++		kcmsg = (struct cmsghdr *)((char *)kcmsg + tmp);
+ 		ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen);
+ 	}
+ 
+@@ -205,10 +204,12 @@ int cmsghdr_from_user_compat_to_kern(str
+ 	kmsg->msg_controllen = kcmlen;
+ 	return 0;
+ 
+-out_free_efault:
+-	if(kcmsg_base != (struct cmsghdr *)stackbuf)
+-		kfree(kcmsg_base);
+-	return -EFAULT;
++Einval:
++	err = -EINVAL;
++Efault:
++	if (kcmsg_base != (struct cmsghdr *)stackbuf)
++		sock_kfree_s(sk, kcmsg_base, kcmlen);
++	return err;
+ }
+ 
+ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *data)
+@@ -303,107 +304,6 @@ void scm_detach_fds_compat(struct msghdr
+ }
+ 
+ /*
+- * For now, we assume that the compatibility and native version
+- * of struct ipt_entry are the same - sfr.  FIXME
+- */
+-struct compat_ipt_replace {
+-	char			name[IPT_TABLE_MAXNAMELEN];
+-	u32			valid_hooks;
+-	u32			num_entries;
+-	u32			size;
+-	u32			hook_entry[NF_IP_NUMHOOKS];
+-	u32			underflow[NF_IP_NUMHOOKS];
+-	u32			num_counters;
+-	compat_uptr_t		counters;	/* struct ipt_counters * */
+-	struct ipt_entry	entries[0];
+-};
+-
+-static int do_netfilter_replace(int fd, int level, int optname,
+-				char __user *optval, int optlen)
+-{
+-	struct compat_ipt_replace __user *urepl;
+-	struct ipt_replace __user *repl_nat;
+-	char name[IPT_TABLE_MAXNAMELEN];
+-	u32 origsize, tmp32, num_counters;
+-	unsigned int repl_nat_size;
+-	int ret;
+-	int i;
+-	compat_uptr_t ucntrs;
+-
+-	urepl = (struct compat_ipt_replace __user *)optval;
+-	if (get_user(origsize, &urepl->size))
+-		return -EFAULT;
+-
+-	/* Hack: Causes ipchains to give correct error msg --RR */
+-	if (optlen != sizeof(*urepl) + origsize)
+-		return -ENOPROTOOPT;
+-
+-	/* XXX Assumes that size of ipt_entry is the same both in
+-	 *     native and compat environments.
+-	 */
+-	repl_nat_size = sizeof(*repl_nat) + origsize;
+-	repl_nat = compat_alloc_user_space(repl_nat_size);
+-
+-	ret = -EFAULT;
+-	if (put_user(origsize, &repl_nat->size))
+-		goto out;
+-
+-	if (!access_ok(VERIFY_READ, urepl, optlen) ||
+-	    !access_ok(VERIFY_WRITE, repl_nat, optlen))
+-		goto out;
+-
+-	if (__copy_from_user(name, urepl->name, sizeof(urepl->name)) ||
+-	    __copy_to_user(repl_nat->name, name, sizeof(repl_nat->name)))
+-		goto out;
+-
+-	if (__get_user(tmp32, &urepl->valid_hooks) ||
+-	    __put_user(tmp32, &repl_nat->valid_hooks))
+-		goto out;
+-
+-	if (__get_user(tmp32, &urepl->num_entries) ||
+-	    __put_user(tmp32, &repl_nat->num_entries))
+-		goto out;
+-
+-	if (__get_user(num_counters, &urepl->num_counters) ||
+-	    __put_user(num_counters, &repl_nat->num_counters))
+-		goto out;
+-
+-	if (__get_user(ucntrs, &urepl->counters) ||
+-	    __put_user(compat_ptr(ucntrs), &repl_nat->counters))
+-		goto out;
+-
+-	if (__copy_in_user(&repl_nat->entries[0],
+-			   &urepl->entries[0],
+-			   origsize))
+-		goto out;
+-
+-	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
+-		if (__get_user(tmp32, &urepl->hook_entry[i]) ||
+-		    __put_user(tmp32, &repl_nat->hook_entry[i]) ||
+-		    __get_user(tmp32, &urepl->underflow[i]) ||
+-		    __put_user(tmp32, &repl_nat->underflow[i]))
+-			goto out;
+-	}
+-
+-	/*
+-	 * Since struct ipt_counters just contains two u_int64_t members
+-	 * we can just do the access_ok check here and pass the (converted)
+-	 * pointer into the standard syscall.  We hope that the pointer is
+-	 * not misaligned ...
+-	 */
+-	if (!access_ok(VERIFY_WRITE, compat_ptr(ucntrs),
+-		       num_counters * sizeof(struct ipt_counters)))
+-		goto out;
+-
+-
+-	ret = sys_setsockopt(fd, level, optname,
+-			     (char __user *)repl_nat, repl_nat_size);
+-
+-out:
+-	return ret;
+-}
+-
+-/*
+  * A struct sock_filter is architecture independent.
+  */
+ struct compat_sock_fprog {
+@@ -455,15 +355,11 @@ static int do_set_sock_timeout(int fd, i
+ asmlinkage long compat_sys_setsockopt(int fd, int level, int optname,
+ 				char __user *optval, int optlen)
+ {
+-	if (optname == IPT_SO_SET_REPLACE)
+-		return do_netfilter_replace(fd, level, optname,
+-					    optval, optlen);
+ 	if (optname == SO_ATTACH_FILTER)
+ 		return do_set_attach_filter(fd, level, optname,
+ 					    optval, optlen);
+ 	if (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO)
+ 		return do_set_sock_timeout(fd, level, optname, optval, optlen);
+-
+ 	return sys_setsockopt(fd, level, optname, optval, optlen);
+ }
+ 
+@@ -499,7 +395,8 @@ static int do_get_sock_timeout(int fd, i
+ asmlinkage long compat_sys_getsockopt(int fd, int level, int optname,
+ 				char __user *optval, int __user *optlen)
+ {
+-	if (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO)
++	if (level == SOL_SOCKET &&
++	    (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO))
+ 		return do_get_sock_timeout(fd, level, optname, optval, optlen);
+ 	return sys_getsockopt(fd, level, optname, optval, optlen);
+ }
+diff -uprN linux-2.6.8.1.orig/net/core/datagram.c linux-2.6.8.1-ve022stab078/net/core/datagram.c
+--- linux-2.6.8.1.orig/net/core/datagram.c	2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/core/datagram.c	2006-05-11 13:05:39.000000000 +0400
+@@ -54,6 +54,8 @@
+ #include <net/sock.h>
+ #include <net/checksum.h>
+ 
++#include <ub/ub_net.h>
++
+ 
+ /*
+  *	Is a socket 'connection oriented' ?
+@@ -454,6 +456,7 @@ unsigned int datagram_poll(struct file *
+ {
+ 	struct sock *sk = sock->sk;
+ 	unsigned int mask;
++	int no_ubc_space;
+ 
+ 	poll_wait(file, sk->sk_sleep, wait);
+ 	mask = 0;
+@@ -461,8 +464,14 @@ unsigned int datagram_poll(struct file *
+ 	/* exceptional events? */
+ 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+ 		mask |= POLLERR;
+-	if (sk->sk_shutdown == SHUTDOWN_MASK)
++	if (sk->sk_shutdown == SHUTDOWN_MASK) {
++		no_ubc_space = 0;
+ 		mask |= POLLHUP;
++	} else {
++		no_ubc_space = ub_sock_makewres_other(sk, SOCK_MIN_UBCSPACE_CH);
++		if (no_ubc_space)
++			ub_sock_sndqueueadd_other(sk, SOCK_MIN_UBCSPACE_CH);
++	}
+ 
+ 	/* readable? */
+ 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
+@@ -479,7 +488,7 @@ unsigned int datagram_poll(struct file *
+ 	}
+ 
+ 	/* writable? */
+-	if (sock_writeable(sk))
++	if (!no_ubc_space && sock_writeable(sk))
+ 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ 	else
+ 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+diff -uprN linux-2.6.8.1.orig/net/core/dev.c linux-2.6.8.1-ve022stab078/net/core/dev.c
+--- linux-2.6.8.1.orig/net/core/dev.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/core/dev.c	2006-05-11 13:05:42.000000000 +0400
+@@ -113,6 +113,7 @@
+ #include <net/iw_handler.h>
+ #endif	/* CONFIG_NET_RADIO */
+ #include <asm/current.h>
++#include <ub/beancounter.h>
+ 
+ /* This define, if set, will randomly drop a packet when congestion
+  * is more than moderate.  It helps fairness in the multi-interface
+@@ -182,25 +183,40 @@ static struct timer_list samp_timer = TI
+  * unregister_netdevice(), which must be called with the rtnl
+  * semaphore held.
+  */
++#if defined(CONFIG_VE)
++#define dev_tail	(get_exec_env()->_net_dev_tail)
++#else
+ struct net_device *dev_base;
+ struct net_device **dev_tail = &dev_base;
+-rwlock_t dev_base_lock = RW_LOCK_UNLOCKED;
+-
+ EXPORT_SYMBOL(dev_base);
++#endif
++
++rwlock_t dev_base_lock = RW_LOCK_UNLOCKED;
+ EXPORT_SYMBOL(dev_base_lock);
+ 
++#ifdef CONFIG_VE
++#define MAX_UNMOVABLE_NETDEVICES (8*4096)
++static uint8_t unmovable_ifindex_list[MAX_UNMOVABLE_NETDEVICES/8];
++static LIST_HEAD(dev_global_list);
++#endif
++
+ #define NETDEV_HASHBITS	8
+ static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
+ static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
+ 
+-static inline struct hlist_head *dev_name_hash(const char *name)
++struct hlist_head *dev_name_hash(const char *name, struct ve_struct *env)
+ {
+-	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
++	unsigned hash;
++	if (!ve_is_super(env))
++		return visible_dev_head(env);
++	hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
+ 	return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
+ }
+ 
+-static inline struct hlist_head *dev_index_hash(int ifindex)
++struct hlist_head *dev_index_hash(int ifindex, struct ve_struct *env)
+ {
++	if (!ve_is_super(env))
++		return visible_dev_index_head(env);
+ 	return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
+ }
+ 
+@@ -488,7 +504,7 @@ struct net_device *__dev_get_by_name(con
+ {
+ 	struct hlist_node *p;
+ 
+-	hlist_for_each(p, dev_name_hash(name)) {
++	hlist_for_each(p, dev_name_hash(name, get_exec_env())) {
+ 		struct net_device *dev
+ 			= hlist_entry(p, struct net_device, name_hlist);
+ 		if (!strncmp(dev->name, name, IFNAMSIZ))
+@@ -520,6 +536,28 @@ struct net_device *dev_get_by_name(const
+ 	return dev;
+ }
+ 
++/**
++ *	__dev_global_get_by_name - find a device by its name in dev_global_list
++ *	@name: name to find
++ *
++ *	Find an interface by name. Must be called under RTNL semaphore
++ *	If the name is found a pointer to the device
++ *	is returned. If the name is not found then %NULL is returned. The
++ *	reference counters are not incremented so the caller must be
++ *	careful with locks.
++ */
++
++struct net_device *__dev_global_get_by_name(const char *name)
++{
++	struct net_device *dev;
++	/* It's called relatively rarely */
++	list_for_each_entry(dev, &dev_global_list, dev_global_list_entry) {
++		if (strncmp(dev->name, name, IFNAMSIZ) == 0)
++			return dev;
++	}
++	return NULL;
++}
++
+ /*
+    Return value is changed to int to prevent illegal usage in future.
+    It is still legal to use to check for device existence.
+@@ -564,7 +602,7 @@ struct net_device *__dev_get_by_index(in
+ {
+ 	struct hlist_node *p;
+ 
+-	hlist_for_each(p, dev_index_hash(ifindex)) {
++	hlist_for_each(p, dev_index_hash(ifindex, get_exec_env())) {
+ 		struct net_device *dev
+ 			= hlist_entry(p, struct net_device, index_hlist);
+ 		if (dev->ifindex == ifindex)
+@@ -720,6 +758,23 @@ int dev_valid_name(const char *name)
+  *	of the unit assigned or a negative errno code.
+  */
+ 
++static inline void __dev_check_name(const char *dev_name, const char *name, 
++	long *inuse, const int max_netdevices)
++{
++	int i = 0;
++	char buf[IFNAMSIZ];
++
++	if (!sscanf(dev_name, name, &i))
++		return;
++	if (i < 0 || i >= max_netdevices)
++		return;
++
++	/* avoid cases where sscanf is not exact inverse of printf */
++	snprintf(buf, sizeof(buf), name, i);
++	if (!strncmp(buf, dev_name, IFNAMSIZ))
++		set_bit(i, inuse);
++}
++
+ int dev_alloc_name(struct net_device *dev, const char *name)
+ {
+ 	int i = 0;
+@@ -744,16 +799,18 @@ int dev_alloc_name(struct net_device *de
+ 		if (!inuse)
+ 			return -ENOMEM;
+ 
+-		for (d = dev_base; d; d = d->next) {
+-			if (!sscanf(d->name, name, &i))
+-				continue;
+-			if (i < 0 || i >= max_netdevices)
+-				continue;
+-
+-			/*  avoid cases where sscanf is not exact inverse of printf */
+-			snprintf(buf, sizeof(buf), name, i);
+-			if (!strncmp(buf, d->name, IFNAMSIZ))
+-				set_bit(i, inuse);
++		if (ve_is_super(get_exec_env())) {
++			list_for_each_entry(d, &dev_global_list, 
++					dev_global_list_entry) {
++				__dev_check_name(d->name, name, inuse, 
++					max_netdevices);
++			}
++		}
++		else {
++			for (d = dev_base; d; d = d->next) {
++				__dev_check_name(d->name, name, inuse, 
++					max_netdevices);
++			}
+ 		}
+ 
+ 		i = find_first_zero_bit(inuse, max_netdevices);
+@@ -761,7 +818,11 @@ int dev_alloc_name(struct net_device *de
+ 	}
+ 
+ 	snprintf(buf, sizeof(buf), name, i);
+-	if (!__dev_get_by_name(buf)) {
++	if (ve_is_super(get_exec_env()))
++		d = __dev_global_get_by_name(buf);
++	else
++		d = __dev_get_by_name(buf);
++	if (d == NULL) {
+ 		strlcpy(dev->name, buf, IFNAMSIZ);
+ 		return i;
+ 	}
+@@ -794,13 +855,15 @@ int dev_change_name(struct net_device *d
+ 	if (!dev_valid_name(newname))
+ 		return -EINVAL;
+ 
++	/* Rename of devices in VE is prohibited by CAP_NET_ADMIN */
++
+ 	if (strchr(newname, '%')) {
+ 		err = dev_alloc_name(dev, newname);
+ 		if (err < 0)
+ 			return err;
+ 		strcpy(newname, dev->name);
+ 	}
+-	else if (__dev_get_by_name(newname))
++	else if (__dev_global_get_by_name(newname))
+ 		return -EEXIST;
+ 	else
+ 		strlcpy(dev->name, newname, IFNAMSIZ);
+@@ -808,7 +871,8 @@ int dev_change_name(struct net_device *d
+ 	err = class_device_rename(&dev->class_dev, dev->name);
+ 	if (!err) {
+ 		hlist_del(&dev->name_hlist);
+-		hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
++		hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name, 
++							get_exec_env()));
+ 		notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
+ 	}
+ 
+@@ -1338,6 +1402,25 @@ int dev_queue_xmit(struct sk_buff *skb)
+ 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
+ #endif
+ 	if (q->enqueue) {
++		struct user_beancounter *ub;
++
++		ub = netdev_bc(dev)->exec_ub;
++		/* the skb CAN be already charged if it transmitted via
++		 * something like bonding device */
++		if (ub && (skb_bc(skb)->resource == 0)) {
++			unsigned long chargesize;
++			chargesize = skb_charge_fullsize(skb);
++			if (charge_beancounter(ub, UB_OTHERSOCKBUF,
++						chargesize, UB_SOFT)) {
++				rcu_read_unlock();
++				rc = -ENOMEM;
++				goto out_kfree_skb;
++			}
++			skb_bc(skb)->ub = ub;
++			skb_bc(skb)->charged = chargesize;
++			skb_bc(skb)->resource = UB_OTHERSOCKBUF;
++		}
++
+ 		/* Grab device queue */
+ 		spin_lock_bh(&dev->queue_lock);
+ 
+@@ -1761,6 +1844,7 @@ int netif_receive_skb(struct sk_buff *sk
+ 	struct packet_type *ptype, *pt_prev;
+ 	int ret = NET_RX_DROP;
+ 	unsigned short type;
++	struct ve_struct *old_env;
+ 
+ #ifdef CONFIG_NETPOLL_RX
+ 	if (skb->dev->netpoll_rx && skb->dev->poll && netpoll_rx(skb)) {
+@@ -1779,6 +1863,15 @@ int netif_receive_skb(struct sk_buff *sk
+ 	skb->h.raw = skb->nh.raw = skb->data;
+ 	skb->mac_len = skb->nh.raw - skb->mac.raw;
+ 
++	/*
++	 * Skb might be alloced in another VE context, than its device works.
++	 * So, set the correct owner_env.
++	 */
++	skb->owner_env = skb->dev->owner_env;
++	BUG_ON(skb->owner_env == NULL);
++
++	old_env = set_exec_env(VE_OWNER_SKB(skb));
++
+ 	pt_prev = NULL;
+ #ifdef CONFIG_NET_CLS_ACT
+ 	if (skb->tc_verd & TC_NCLS) {
+@@ -1844,6 +1937,7 @@ ncls:
+ 
+ out:
+ 	rcu_read_unlock();
++	(void)set_exec_env(old_env);
+ 	return ret;
+ }
+ 
+@@ -2240,7 +2334,8 @@ static int __init dev_proc_init(void)
+ 
+ 	if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
+ 		goto out;
+-	if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
++	if (!__proc_net_fops_create("net/softnet_stat", S_IRUGO,
++				&softnet_seq_fops, NULL))
+ 		goto out_dev;
+ 	if (wireless_proc_init())
+ 		goto out_softnet;
+@@ -2248,7 +2343,7 @@ static int __init dev_proc_init(void)
+ out:
+ 	return rc;
+ out_softnet:
+-	proc_net_remove("softnet_stat");
++	__proc_net_remove("net/softnet_stat");
+ out_dev:
+ 	proc_net_remove("dev");
+ 	goto out;
+@@ -2314,6 +2409,9 @@ void dev_set_promiscuity(struct net_devi
+ 	dev->flags |= IFF_PROMISC;
+ 	if ((dev->promiscuity += inc) == 0)
+ 		dev->flags &= ~IFF_PROMISC;
++	/* Promiscous mode on these devices does not mean anything */
++	if (dev->flags & (IFF_LOOPBACK|IFF_POINTOPOINT))
++		return;
+ 	if (dev->flags ^ old_flags) {
+ 		dev_mc_upload(dev);
+ 		printk(KERN_INFO "device %s %s promiscuous mode\n",
+@@ -2485,6 +2583,8 @@ static int dev_ifsioc(struct ifreq *ifr,
+ 			return dev_set_mtu(dev, ifr->ifr_mtu);
+ 
+ 		case SIOCGIFHWADDR:
++			memset(ifr->ifr_hwaddr.sa_data, 0,
++					sizeof(ifr->ifr_hwaddr.sa_data));
+ 			memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
+ 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
+ 			ifr->ifr_hwaddr.sa_family = dev->type;
+@@ -2720,9 +2820,28 @@ int dev_ioctl(unsigned int cmd, void __u
+ 		 *	- require strict serialization.
+ 		 *	- do not return a value
+ 		 */
++		case SIOCSIFMTU:
++			if (!capable(CAP_NET_ADMIN) &&
++			    !capable(CAP_VE_NET_ADMIN))
++				return -EPERM;
++			dev_load(ifr.ifr_name);
++			rtnl_lock();
++			if (!ve_is_super(get_exec_env())) {
++				struct net_device *dev;
++				ret = -ENODEV;
++				if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL)
++					goto out_set_mtu_unlock;
++				ret = -EPERM;
++				if (ifr.ifr_mtu > dev->orig_mtu)
++					goto out_set_mtu_unlock;
++			}
++			ret = dev_ifsioc(&ifr, cmd);
++out_set_mtu_unlock:
++			rtnl_unlock();
++			return ret;
++		
+ 		case SIOCSIFFLAGS:
+ 		case SIOCSIFMETRIC:
+-		case SIOCSIFMTU:
+ 		case SIOCSIFMAP:
+ 		case SIOCSIFHWADDR:
+ 		case SIOCSIFSLAVE:
+@@ -2798,25 +2917,75 @@ int dev_ioctl(unsigned int cmd, void __u
+ 	}
+ }
+ 
+-
+ /**
+  *	dev_new_index	-	allocate an ifindex
+  *
+  *	Returns a suitable unique value for a new device interface
+- *	number.  The caller must hold the rtnl semaphore or the
++ *	number. The caller must hold the rtnl semaphore or the
+  *	dev_base_lock to be sure it remains unique.
++ *
++ *	Note: dev->name must be valid on entrance
+  */
+-int dev_new_index(void)
++static int dev_ve_new_index(void)
+ {
+-	static int ifindex;
++#ifdef CONFIG_VE
++	int *ifindex = &get_exec_env()->ifindex;
++	int delta = 2;
++#else
++	static int s_ifindex;
++	int *ifindex = &s_ifindex;
++	int delta = 1;
++#endif
+ 	for (;;) {
+-		if (++ifindex <= 0)
+-			ifindex = 1;
+-		if (!__dev_get_by_index(ifindex))
+-			return ifindex;
++		*ifindex += delta;
++		if (*ifindex <= 0)
++			*ifindex = 1;
++		if (!__dev_get_by_index(*ifindex))
++			return *ifindex;
+ 	}
+ }
+ 
++static int dev_glb_new_index(void)
++{
++#ifdef CONFIG_VE
++	int i;
++
++	i = find_first_zero_bit((long*)unmovable_ifindex_list, 
++		MAX_UNMOVABLE_NETDEVICES);
++	
++	if (i == MAX_UNMOVABLE_NETDEVICES)
++		return -EMFILE;
++
++	__set_bit(i, (long*)unmovable_ifindex_list);
++	return (i + 1) * 2;
++#endif
++}
++
++static void dev_glb_free_index(struct net_device *dev)
++{
++#ifdef CONFIG_VE
++	int bit;
++
++	bit = dev->ifindex / 2 - 1;
++	BUG_ON(bit >= MAX_UNMOVABLE_NETDEVICES);
++	__clear_bit(bit, (long*)unmovable_ifindex_list);
++#endif
++}
++
++int dev_new_index(struct net_device *dev)
++{
++	if (ve_is_super(get_exec_env()) && ve_is_dev_movable(dev))
++		return dev_glb_new_index();
++
++	return dev_ve_new_index();
++}
++
++void dev_free_index(struct net_device *dev)
++{
++	if ((dev->ifindex % 2) == 0)
++		dev_glb_free_index(dev);
++}
++
+ static int dev_boot_phase = 1;
+ 
+ /* Delayed registration/unregisteration */
+@@ -2860,6 +3029,10 @@ int register_netdevice(struct net_device
+ 	/* When net_device's are persistent, this will be fatal. */
+ 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
+ 
++	ret = -EPERM;
++	if (!ve_is_super(get_exec_env()) && ve_is_dev_movable(dev))
++		goto out;
++
+ 	spin_lock_init(&dev->queue_lock);
+ 	spin_lock_init(&dev->xmit_lock);
+ 	dev->xmit_lock_owner = -1;
+@@ -2879,27 +3052,32 @@ int register_netdevice(struct net_device
+ 		if (ret) {
+ 			if (ret > 0)
+ 				ret = -EIO;
+-			goto out_err;
++			goto out_free_div;
+ 		}
+ 	}
+  
+ 	if (!dev_valid_name(dev->name)) {
+ 		ret = -EINVAL;
+-		goto out_err;
++		goto out_free_div;
++	}
++
++	dev->ifindex = dev_new_index(dev);
++	if (dev->ifindex < 0) {
++		ret = dev->ifindex;
++		goto out_free_div;
+ 	}
+ 
+-	dev->ifindex = dev_new_index();
+ 	if (dev->iflink == -1)
+ 		dev->iflink = dev->ifindex;
+ 
+ 	/* Check for existence of name */
+-	head = dev_name_hash(dev->name);
++	head = dev_name_hash(dev->name, get_exec_env());
+ 	hlist_for_each(p, head) {
+ 		struct net_device *d
+ 			= hlist_entry(p, struct net_device, name_hlist);
+ 		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
+ 			ret = -EEXIST;
+- 			goto out_err;
++ 			goto out_free_ind;
+ 		}
+  	}
+ 
+@@ -2929,12 +3107,19 @@ int register_netdevice(struct net_device
+ 	set_bit(__LINK_STATE_PRESENT, &dev->state);
+ 
+ 	dev->next = NULL;
++	dev->owner_env = get_exec_env();
++	dev->orig_mtu = dev->mtu;
++	netdev_bc(dev)->owner_ub = get_beancounter(get_exec_ub());
++	netdev_bc(dev)->exec_ub = get_beancounter(get_exec_ub());
+ 	dev_init_scheduler(dev);
++	if (ve_is_super(get_exec_env()))
++		list_add_tail(&dev->dev_global_list_entry, &dev_global_list);
+ 	write_lock_bh(&dev_base_lock);
+ 	*dev_tail = dev;
+ 	dev_tail = &dev->next;
+ 	hlist_add_head(&dev->name_hlist, head);
+-	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
++	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex, 
++						get_exec_env()));
+ 	dev_hold(dev);
+ 	dev->reg_state = NETREG_REGISTERING;
+ 	write_unlock_bh(&dev_base_lock);
+@@ -2948,7 +3133,9 @@ int register_netdevice(struct net_device
+ 
+ out:
+ 	return ret;
+-out_err:
++out_free_ind:
++	dev_free_index(dev);
++out_free_div:
+ 	free_divert_blk(dev);
+ 	goto out;
+ }
+@@ -3032,6 +3219,7 @@ void netdev_run_todo(void)
+ {
+ 	struct list_head list = LIST_HEAD_INIT(list);
+ 	int err;
++	struct ve_struct *current_env;
+ 
+ 
+ 	/* Need to guard against multiple cpu's getting out of order. */
+@@ -3050,22 +3238,30 @@ void netdev_run_todo(void)
+ 	list_splice_init(&net_todo_list, &list);
+ 	spin_unlock(&net_todo_list_lock);
+ 		
++	current_env = get_exec_env();
+ 	while (!list_empty(&list)) {
+ 		struct net_device *dev
+ 			= list_entry(list.next, struct net_device, todo_list);
+ 		list_del(&dev->todo_list);
+ 
++		(void)set_exec_env(dev->owner_env);
+ 		switch(dev->reg_state) {
+ 		case NETREG_REGISTERING:
+ 			err = netdev_register_sysfs(dev);
+-			if (err)
++			if (err) {
+ 				printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
+ 				       dev->name, err);
++				dev->reg_state = NETREG_REGISTER_ERR;
++				break;
++			}
+ 			dev->reg_state = NETREG_REGISTERED;
+ 			break;
+ 
+ 		case NETREG_UNREGISTERING:
+ 			netdev_unregister_sysfs(dev);
++			/* fall through */
++
++		case NETREG_REGISTER_ERR:
+ 			dev->reg_state = NETREG_UNREGISTERED;
+ 
+ 			netdev_wait_allrefs(dev);
+@@ -3076,6 +3272,10 @@ void netdev_run_todo(void)
+ 			BUG_TRAP(!dev->ip6_ptr);
+ 			BUG_TRAP(!dev->dn_ptr);
+ 
++			put_beancounter(netdev_bc(dev)->exec_ub);
++			put_beancounter(netdev_bc(dev)->owner_ub);
++			netdev_bc(dev)->exec_ub = NULL;
++			netdev_bc(dev)->owner_ub = NULL;
+ 
+ 			/* It must be the very last action, 
+ 			 * after this 'dev' may point to freed up memory.
+@@ -3090,6 +3290,7 @@ void netdev_run_todo(void)
+ 			break;
+ 		}
+ 	}
++	(void)set_exec_env(current_env);
+ 
+ out:
+ 	up(&net_todo_run_mutex);
+@@ -3156,7 +3357,8 @@ int unregister_netdevice(struct net_devi
+ 		return -ENODEV;
+ 	}
+ 
+-	BUG_ON(dev->reg_state != NETREG_REGISTERED);
++	BUG_ON(dev->reg_state != NETREG_REGISTERED &&
++	       dev->reg_state != NETREG_REGISTER_ERR);
+ 
+ 	/* If device is running, close it first. */
+ 	if (dev->flags & IFF_UP)
+@@ -3172,6 +3374,8 @@ int unregister_netdevice(struct net_devi
+ 				dev_tail = dp;
+ 			*dp = d->next;
+ 			write_unlock_bh(&dev_base_lock);
++			if (ve_is_super(get_exec_env()))
++				list_del(&dev->dev_global_list_entry);
+ 			break;
+ 		}
+ 	}
+@@ -3181,7 +3385,8 @@ int unregister_netdevice(struct net_devi
+ 		return -ENODEV;
+ 	}
+ 
+-	dev->reg_state = NETREG_UNREGISTERING;
++	if (dev->reg_state != NETREG_REGISTER_ERR)
++		dev->reg_state = NETREG_UNREGISTERING;
+ 
+ 	synchronize_net();
+ 
+@@ -3205,6 +3410,8 @@ int unregister_netdevice(struct net_devi
+ 	/* Notifier chain MUST detach us from master device. */
+ 	BUG_TRAP(!dev->master);
+ 
++	dev_free_index(dev);
++
+ 	free_divert_blk(dev);
+ 
+ 	/* Finish processing unregister after unlock */
+@@ -3352,6 +3559,8 @@ EXPORT_SYMBOL(dev_get_by_name);
+ EXPORT_SYMBOL(dev_getbyhwaddr);
+ EXPORT_SYMBOL(dev_ioctl);
+ EXPORT_SYMBOL(dev_new_index);
++EXPORT_SYMBOL(dev_name_hash);
++EXPORT_SYMBOL(dev_index_hash);
+ EXPORT_SYMBOL(dev_open);
+ EXPORT_SYMBOL(dev_queue_xmit);
+ EXPORT_SYMBOL(dev_queue_xmit_nit);
+diff -uprN linux-2.6.8.1.orig/net/core/dev_mcast.c linux-2.6.8.1-ve022stab078/net/core/dev_mcast.c
+--- linux-2.6.8.1.orig/net/core/dev_mcast.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/core/dev_mcast.c	2006-05-11 13:05:42.000000000 +0400
+@@ -297,3 +297,4 @@ void __init dev_mcast_init(void)
+ EXPORT_SYMBOL(dev_mc_add);
+ EXPORT_SYMBOL(dev_mc_delete);
+ EXPORT_SYMBOL(dev_mc_upload);
++EXPORT_SYMBOL(dev_mc_discard);
+diff -uprN linux-2.6.8.1.orig/net/core/dst.c linux-2.6.8.1-ve022stab078/net/core/dst.c
+--- linux-2.6.8.1.orig/net/core/dst.c	2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/core/dst.c	2006-05-11 13:05:41.000000000 +0400
+@@ -47,6 +47,7 @@ static struct timer_list dst_gc_timer =
+ static void dst_run_gc(unsigned long dummy)
+ {
+ 	int    delayed = 0;
++	int    work_performed;
+ 	struct dst_entry * dst, **dstp;
+ 
+ 	if (!spin_trylock(&dst_lock)) {
+@@ -54,9 +55,9 @@ static void dst_run_gc(unsigned long dum
+ 		return;
+ 	}
+ 
+-
+ 	del_timer(&dst_gc_timer);
+ 	dstp = &dst_garbage_list;
++	work_performed = 0;
+ 	while ((dst = *dstp) != NULL) {
+ 		if (atomic_read(&dst->__refcnt)) {
+ 			dstp = &dst->next;
+@@ -64,6 +65,7 @@ static void dst_run_gc(unsigned long dum
+ 			continue;
+ 		}
+ 		*dstp = dst->next;
++		work_performed = 1;
+ 
+ 		dst = dst_destroy(dst);
+ 		if (dst) {
+@@ -88,9 +90,14 @@ static void dst_run_gc(unsigned long dum
+ 		dst_gc_timer_inc = DST_GC_MAX;
+ 		goto out;
+ 	}
+-	if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX)
+-		dst_gc_timer_expires = DST_GC_MAX;
+-	dst_gc_timer_inc += DST_GC_INC;
++	if (!work_performed) {
++		if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX)
++			dst_gc_timer_expires = DST_GC_MAX;
++		dst_gc_timer_inc += DST_GC_INC;
++	} else {
++		dst_gc_timer_inc = DST_GC_INC;
++		dst_gc_timer_expires = DST_GC_MIN;
++	}
+ 	dst_gc_timer.expires = jiffies + dst_gc_timer_expires;
+ #if RT_CACHE_DEBUG >= 2
+ 	printk("dst_total: %d/%d %ld\n",
+@@ -231,13 +238,13 @@ static void dst_ifdown(struct dst_entry 
+ 
+ 	do {
+ 		if (unregister) {
+-			dst->dev = &loopback_dev;
+-			dev_hold(&loopback_dev);
++			dst->dev = &visible_loopback_dev;
++			dev_hold(&visible_loopback_dev);
+ 			dev_put(dev);
+ 			if (dst->neighbour && dst->neighbour->dev == dev) {
+-				dst->neighbour->dev = &loopback_dev;
++				dst->neighbour->dev = &visible_loopback_dev;
+ 				dev_put(dev);
+-				dev_hold(&loopback_dev);
++				dev_hold(&visible_loopback_dev);
+ 			}
+ 		}
+ 
+@@ -255,12 +262,15 @@ static int dst_dev_event(struct notifier
+ 	switch (event) {
+ 	case NETDEV_UNREGISTER:
+ 	case NETDEV_DOWN:
+-		spin_lock_bh(&dst_lock);
++		local_bh_disable();
++		dst_run_gc(0);
++		spin_lock(&dst_lock);
+ 		for (dst = dst_garbage_list; dst; dst = dst->next) {
+ 			if (dst->dev == dev)
+ 				dst_ifdown(dst, event != NETDEV_DOWN);
+ 		}
+-		spin_unlock_bh(&dst_lock);
++		spin_unlock(&dst_lock);
++		local_bh_enable();
+ 		break;
+ 	}
+ 	return NOTIFY_DONE;
+diff -uprN linux-2.6.8.1.orig/net/core/filter.c linux-2.6.8.1-ve022stab078/net/core/filter.c
+--- linux-2.6.8.1.orig/net/core/filter.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/core/filter.c	2006-05-11 13:05:39.000000000 +0400
+@@ -33,6 +33,7 @@
+ #include <linux/timer.h>
+ #include <asm/system.h>
+ #include <asm/uaccess.h>
++#include <asm/unaligned.h>
+ #include <linux/filter.h>
+ 
+ /* No hurry in this branch */
+@@ -169,7 +170,7 @@ int sk_run_filter(struct sk_buff *skb, s
+ 			k = fentry->k;
+  load_w:
+ 			if (k >= 0 && (unsigned int)(k+sizeof(u32)) <= len) {
+-				A = ntohl(*(u32*)&data[k]);
++				A = ntohl(get_unaligned((u32*)&data[k]));
+ 				continue;
+ 			}
+ 			if (k < 0) {
+@@ -179,7 +180,7 @@ int sk_run_filter(struct sk_buff *skb, s
+ 					break;
+ 				ptr = load_pointer(skb, k);
+ 				if (ptr) {
+-					A = ntohl(*(u32*)ptr);
++					A = ntohl(get_unaligned((u32*)ptr));
+ 					continue;
+ 				}
+ 			} else {
+@@ -194,7 +195,7 @@ int sk_run_filter(struct sk_buff *skb, s
+ 			k = fentry->k;
+  load_h:
+ 			if (k >= 0 && (unsigned int)(k + sizeof(u16)) <= len) {
+-				A = ntohs(*(u16*)&data[k]);
++				A = ntohs(get_unaligned((u16*)&data[k]));
+ 				continue;
+ 			}
+ 			if (k < 0) {
+@@ -204,7 +205,7 @@ int sk_run_filter(struct sk_buff *skb, s
+ 					break;
+ 				ptr = load_pointer(skb, k);
+ 				if (ptr) {
+-					A = ntohs(*(u16*)ptr);
++					A = ntohs(get_unaligned((u16*)ptr));
+ 					continue;
+ 				}
+ 			} else {
+@@ -398,7 +399,7 @@ int sk_attach_filter(struct sock_fprog *
+         if (fprog->filter == NULL || fprog->len > BPF_MAXINSNS)
+                 return -EINVAL;
+ 
+-	fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
++	fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL_UBC);
+ 	if (!fp)
+ 		return -ENOMEM;
+ 	if (copy_from_user(fp->insns, fprog->filter, fsize)) {
+diff -uprN linux-2.6.8.1.orig/net/core/neighbour.c linux-2.6.8.1-ve022stab078/net/core/neighbour.c
+--- linux-2.6.8.1.orig/net/core/neighbour.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/core/neighbour.c	2006-05-11 13:05:41.000000000 +0400
+@@ -652,6 +652,11 @@ static void neigh_timer_handler(unsigned
+ 	struct neighbour *neigh = (struct neighbour *)arg;
+ 	unsigned state;
+ 	int notify = 0;
++	struct ve_struct *env;
++	struct user_beancounter *ub;
++
++	env = set_exec_env(neigh->dev->owner_env);
++	ub = set_exec_ub(netdev_bc(neigh->dev)->exec_ub);
+ 
+ 	write_lock(&neigh->lock);
+ 
+@@ -706,6 +711,8 @@ static void neigh_timer_handler(unsigned
+ 
+ 	neigh->ops->solicit(neigh, skb_peek(&neigh->arp_queue));
+ 	atomic_inc(&neigh->probes);
++	(void)set_exec_ub(ub);
++	set_exec_env(env);
+ 	return;
+ 
+ out:
+@@ -715,6 +722,8 @@ out:
+ 		neigh_app_notify(neigh);
+ #endif
+ 	neigh_release(neigh);
++	(void)set_exec_ub(ub);
++	set_exec_env(env);
+ }
+ 
+ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
+@@ -1068,6 +1077,12 @@ static void neigh_proxy_process(unsigned
+ 		skb = skb->next;
+ 		if (tdif <= 0) {
+ 			struct net_device *dev = back->dev;
++			struct ve_struct *env;
++			struct user_beancounter *ub;
++
++			env = set_exec_env(dev->owner_env);
++			ub = set_exec_ub(netdev_bc(dev)->exec_ub);
++
+ 			__skb_unlink(back, &tbl->proxy_queue);
+ 			if (tbl->proxy_redo && netif_running(dev))
+ 				tbl->proxy_redo(back);
+@@ -1075,6 +1090,9 @@ static void neigh_proxy_process(unsigned
+ 				kfree_skb(back);
+ 
+ 			dev_put(dev);
++
++			(void)set_exec_ub(ub);
++			set_exec_env(env);
+ 		} else if (!sched_next || tdif < sched_next)
+ 			sched_next = tdif;
+ 	}
+@@ -1222,6 +1240,9 @@ int neigh_delete(struct sk_buff *skb, st
+ 	struct net_device *dev = NULL;
+ 	int err = -ENODEV;
+ 
++	if (!ve_is_super(get_exec_env()))
++		return -EACCES;
++
+ 	if (ndm->ndm_ifindex &&
+ 	    (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+ 		goto out;
+@@ -1272,6 +1293,9 @@ int neigh_add(struct sk_buff *skb, struc
+ 	struct net_device *dev = NULL;
+ 	int err = -ENODEV;
+ 
++	if (!ve_is_super(get_exec_env()))
++		return -EACCES;
++
+ 	if (ndm->ndm_ifindex &&
+ 	    (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+ 		goto out;
+@@ -1418,6 +1442,9 @@ int neigh_dump_info(struct sk_buff *skb,
+ 	struct neigh_table *tbl;
+ 	int t, family, s_t;
+ 
++	if (!ve_is_super(get_exec_env()))
++		return -EACCES;
++
+ 	read_lock(&neigh_tbl_lock);
+ 	family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family;
+ 	s_t = cb->args[0];
+@@ -1636,11 +1663,17 @@ int neigh_sysctl_register(struct net_dev
+ 			  int p_id, int pdev_id, char *p_name, 
+ 			  proc_handler *handler)
+ {
+-	struct neigh_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
++	struct neigh_sysctl_table *t;
+ 	const char *dev_name_source = NULL;
+ 	char *dev_name = NULL;
+ 	int err = 0;
+ 
++	/* This function is called from VExx only from devinet_init,
++	   and it is does not matter what is returned */
++	if (!ve_is_super(get_exec_env()))
++		return 0;
++
++	t = kmalloc(sizeof(*t), GFP_KERNEL);
+ 	if (!t)
+ 		return -ENOBUFS;
+ 	memcpy(t, &neigh_sysctl_template, sizeof(*t));
+@@ -1710,6 +1743,8 @@ int neigh_sysctl_register(struct net_dev
+ 
+ void neigh_sysctl_unregister(struct neigh_parms *p)
+ {
++	if (!ve_is_super(get_exec_env()))
++		return;
+ 	if (p->sysctl_table) {
+ 		struct neigh_sysctl_table *t = p->sysctl_table;
+ 		p->sysctl_table = NULL;
+diff -uprN linux-2.6.8.1.orig/net/core/net-sysfs.c linux-2.6.8.1-ve022stab078/net/core/net-sysfs.c
+--- linux-2.6.8.1.orig/net/core/net-sysfs.c	2004-08-14 14:56:14.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/core/net-sysfs.c	2006-05-11 13:05:42.000000000 +0400
+@@ -370,18 +370,26 @@ static void netdev_release(struct class_
+ 	struct net_device *dev 
+ 		= container_of(cd, struct net_device, class_dev);
+ 
+-	BUG_ON(dev->reg_state != NETREG_RELEASED);
++	BUG_ON(dev->reg_state != NETREG_RELEASED &&
++	       dev->reg_state != NETREG_REGISTERING);
+ 
+ 	kfree((char *)dev - dev->padded);
+ }
+ 
+-static struct class net_class = {
++struct class net_class = {
+ 	.name = "net",
+ 	.release = netdev_release,
+ #ifdef CONFIG_HOTPLUG
+ 	.hotplug = netdev_hotplug,
+ #endif
+ };
++EXPORT_SYMBOL(net_class);
++
++#ifndef CONFIG_VE
++#define visible_net_class net_class
++#else
++#define visible_net_class (*get_exec_env()->net_class)
++#endif
+ 
+ void netdev_unregister_sysfs(struct net_device * net)
+ {
+@@ -406,7 +414,7 @@ int netdev_register_sysfs(struct net_dev
+ 	struct class_device_attribute *attr;
+ 	int ret;
+ 
+-	class_dev->class = &net_class;
++	class_dev->class = &visible_net_class;
+ 	class_dev->class_data = net;
+ 	net->last_stats = net->get_stats;
+ 
+@@ -440,12 +448,21 @@ out_cleanup:
+ out_unreg:
+ 	printk(KERN_WARNING "%s: sysfs attribute registration failed %d\n",
+ 	       net->name, ret);
+-	class_device_unregister(class_dev);
++	/* put is called in free_netdev() */
++	class_device_del(class_dev);
+ out:
+ 	return ret;
+ }
+ 
++void prepare_sysfs_netdev(void)
++{
++#ifdef CONFIG_VE
++	get_ve0()->net_class = &net_class;
++#endif
++}
++
+ int netdev_sysfs_init(void)
+ {
++	prepare_sysfs_netdev();
+ 	return class_register(&net_class);
+ }
+diff -uprN linux-2.6.8.1.orig/net/core/netfilter.c linux-2.6.8.1-ve022stab078/net/core/netfilter.c
+--- linux-2.6.8.1.orig/net/core/netfilter.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/core/netfilter.c	2006-05-11 13:05:41.000000000 +0400
+@@ -49,6 +49,13 @@ struct list_head nf_hooks[NPROTO][NF_MAX
+ static LIST_HEAD(nf_sockopts);
+ static spinlock_t nf_hook_lock = SPIN_LOCK_UNLOCKED;
+ 
++#ifdef CONFIG_VE_IPTABLES
++#define ve_nf_hooks \
++	((struct list_head (*)[NF_MAX_HOOKS])(get_exec_env()->_nf_hooks))
++#else
++#define ve_nf_hooks nf_hooks
++#endif
++
+ /* 
+  * A queue handler may be registered for each protocol.  Each is protected by
+  * long term mutex.  The handler must provide an an outfn() to accept packets
+@@ -65,7 +72,7 @@ int nf_register_hook(struct nf_hook_ops 
+ 	struct list_head *i;
+ 
+ 	spin_lock_bh(&nf_hook_lock);
+-	list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
++	list_for_each(i, &ve_nf_hooks[reg->pf][reg->hooknum]) {
+ 		if (reg->priority < ((struct nf_hook_ops *)i)->priority)
+ 			break;
+ 	}
+@@ -76,6 +83,32 @@ int nf_register_hook(struct nf_hook_ops 
+ 	return 0;
+ }
+ 
++int visible_nf_register_hook(struct nf_hook_ops *reg)
++{
++	int ret = 0;
++
++	if (!ve_is_super(get_exec_env())) {
++		struct nf_hook_ops *tmp;
++		ret = -ENOMEM;
++		tmp = kmalloc(sizeof(struct nf_hook_ops), GFP_KERNEL);
++		if (!tmp)
++			goto nomem;
++		memcpy(tmp, reg, sizeof(struct nf_hook_ops));
++		reg =  tmp;
++	}
++
++	ret = nf_register_hook(reg);
++	if (ret)
++		goto out;
++
++	return 0;
++out:
++	if (!ve_is_super(get_exec_env()))
++		kfree(reg);
++nomem:
++	return ret;
++}
++
+ void nf_unregister_hook(struct nf_hook_ops *reg)
+ {
+ 	spin_lock_bh(&nf_hook_lock);
+@@ -85,6 +118,28 @@ void nf_unregister_hook(struct nf_hook_o
+ 	synchronize_net();
+ }
+ 
++int visible_nf_unregister_hook(struct nf_hook_ops *reg)
++{
++	struct nf_hook_ops *i;
++
++	spin_lock_bh(&nf_hook_lock);
++	list_for_each_entry(i, &ve_nf_hooks[reg->pf][reg->hooknum], list) {
++		if (reg->hook == i->hook) {
++			reg = i;
++			break;
++		}
++	}
++	spin_unlock_bh(&nf_hook_lock);
++	if (reg != i)
++		return -ENOENT;
++
++	nf_unregister_hook(reg);
++
++	if (!ve_is_super(get_exec_env()))
++		kfree(reg);
++	return 0;	
++}
++
+ /* Do exclusive ranges overlap? */
+ static inline int overlap(int min1, int max1, int min2, int max2)
+ {
+@@ -292,6 +347,12 @@ static int nf_sockopt(struct sock *sk, i
+ 	struct nf_sockopt_ops *ops;
+ 	int ret;
+ 
++#ifdef CONFIG_VE_IPTABLES
++	if (!get_exec_env()->_nf_hooks || 
++	    !get_exec_env()->_ipt_standard_target)
++		return -ENOPROTOOPT;
++#endif
++
+ 	if (down_interruptible(&nf_sockopt_mutex) != 0)
+ 		return -EINTR;
+ 
+@@ -515,9 +576,9 @@ int nf_hook_slow(int pf, unsigned int ho
+ 	skb->nf_debug |= (1 << hook);
+ #endif
+ 
+-	elem = &nf_hooks[pf][hook];
++	elem = &ve_nf_hooks[pf][hook];
+  next_hook:
+-	verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev,
++	verdict = nf_iterate(&ve_nf_hooks[pf][hook], &skb, hook, indev,
+ 			     outdev, &elem, okfn, hook_thresh);
+ 	if (verdict == NF_QUEUE) {
+ 		NFDEBUG("nf_hook: Verdict = QUEUE.\n");
+@@ -563,12 +624,12 @@ void nf_reinject(struct sk_buff *skb, st
+ 	/* Drop reference to owner of hook which queued us. */
+ 	module_put(info->elem->owner);
+ 
+-	list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
++	list_for_each_rcu(i, &ve_nf_hooks[info->pf][info->hook]) {
+ 		if (i == elem) 
+   			break;
+   	}
+   
+-	if (elem == &nf_hooks[info->pf][info->hook]) {
++	if (elem == &ve_nf_hooks[info->pf][info->hook]) {
+ 		/* The module which sent it to userspace is gone. */
+ 		NFDEBUG("%s: module disappeared, dropping packet.\n",
+ 			__FUNCTION__);
+@@ -583,7 +644,7 @@ void nf_reinject(struct sk_buff *skb, st
+ 
+ 	if (verdict == NF_ACCEPT) {
+ 	next_hook:
+-		verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
++		verdict = nf_iterate(&ve_nf_hooks[info->pf][info->hook],
+ 				     &skb, info->hook, 
+ 				     info->indev, info->outdev, &elem,
+ 				     info->okfn, INT_MIN);
+@@ -808,26 +869,69 @@ EXPORT_SYMBOL(nf_log_packet);
+    with it. */
+ void (*ip_ct_attach)(struct sk_buff *, struct nf_ct_info *);
+ 
+-void __init netfilter_init(void)
++void init_nf_hooks(struct list_head (*nh)[NF_MAX_HOOKS])
+ {
+ 	int i, h;
+ 
+ 	for (i = 0; i < NPROTO; i++) {
+ 		for (h = 0; h < NF_MAX_HOOKS; h++)
+-			INIT_LIST_HEAD(&nf_hooks[i][h]);
++			INIT_LIST_HEAD(&nh[i][h]);
+ 	}
+ }
+ 
++int init_netfilter(void)
++{
++#ifdef CONFIG_VE_IPTABLES
++	struct ve_struct *envid;
++
++	envid = get_exec_env();
++	envid->_nf_hooks = kmalloc(sizeof(nf_hooks), GFP_KERNEL);
++	if (envid->_nf_hooks == NULL)
++		return -ENOMEM;
++
++	/* FIXME: charge ubc */
++
++	init_nf_hooks(envid->_nf_hooks);
++	return 0;
++#else
++	init_nf_hooks(nf_hooks);
++	return 0;
++#endif
++}
++
++#ifdef CONFIG_VE_IPTABLES
++void fini_netfilter(void)
++{
++	struct ve_struct *envid;
++
++	envid = get_exec_env();
++	if (envid->_nf_hooks != NULL)
++		kfree(envid->_nf_hooks);
++	envid->_nf_hooks = NULL;
++
++	/* FIXME: uncharge ubc */
++}
++#endif
++
++void __init netfilter_init(void)
++{
++	init_netfilter();
++}
++
+ EXPORT_SYMBOL(ip_ct_attach);
+ EXPORT_SYMBOL(ip_route_me_harder);
+ EXPORT_SYMBOL(nf_getsockopt);
+ EXPORT_SYMBOL(nf_hook_slow);
+ EXPORT_SYMBOL(nf_hooks);
+ EXPORT_SYMBOL(nf_register_hook);
++EXPORT_SYMBOL(visible_nf_register_hook);
+ EXPORT_SYMBOL(nf_register_queue_handler);
+ EXPORT_SYMBOL(nf_register_sockopt);
+ EXPORT_SYMBOL(nf_reinject);
+ EXPORT_SYMBOL(nf_setsockopt);
+ EXPORT_SYMBOL(nf_unregister_hook);
++EXPORT_SYMBOL(visible_nf_unregister_hook);
+ EXPORT_SYMBOL(nf_unregister_queue_handler);
+ EXPORT_SYMBOL(nf_unregister_sockopt);
++EXPORT_SYMBOL(init_netfilter);
++EXPORT_SYMBOL(fini_netfilter);
+diff -uprN linux-2.6.8.1.orig/net/core/rtnetlink.c linux-2.6.8.1-ve022stab078/net/core/rtnetlink.c
+--- linux-2.6.8.1.orig/net/core/rtnetlink.c	2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/core/rtnetlink.c	2006-05-11 13:05:42.000000000 +0400
+@@ -294,6 +294,8 @@ static int rtnetlink_dump_all(struct sk_
+ 		if (rtnetlink_links[idx] == NULL ||
+ 		    rtnetlink_links[idx][type].dumpit == NULL)
+ 			continue;
++		if (vz_security_proto_check(idx, 0, 0))
++			continue;
+ 		if (idx > s_idx)
+ 			memset(&cb->args[0], 0, sizeof(cb->args));
+ 		if (rtnetlink_links[idx][type].dumpit(skb, cb))
+@@ -362,7 +364,7 @@ rtnetlink_rcv_msg(struct sk_buff *skb, s
+ 		return 0;
+ 
+ 	family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
+-	if (family >= NPROTO) {
++	if (family >= NPROTO || vz_security_proto_check(family, 0, 0)) {
+ 		*errp = -EAFNOSUPPORT;
+ 		return -1;
+ 	}
+@@ -488,7 +490,13 @@ static void rtnetlink_rcv(struct sock *s
+ 			return;
+ 
+ 		while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+-			if (rtnetlink_rcv_skb(skb)) {
++			int ret;
++			struct ve_struct *old_env;
++
++			old_env = set_exec_env(VE_OWNER_SKB(skb));
++			ret = rtnetlink_rcv_skb(skb);
++			(void)set_exec_env(old_env);
++			if (ret) {
+ 				if (skb->len)
+ 					skb_queue_head(&sk->sk_receive_queue,
+ 						       skb);
+diff -uprN linux-2.6.8.1.orig/net/core/scm.c linux-2.6.8.1-ve022stab078/net/core/scm.c
+--- linux-2.6.8.1.orig/net/core/scm.c	2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/core/scm.c	2006-05-11 13:05:41.000000000 +0400
+@@ -34,6 +34,7 @@
+ #include <net/compat.h>
+ #include <net/scm.h>
+ 
++#include <ub/ub_mem.h>
+ 
+ /*
+  *	Only allow a user to send credentials, that they could set with 
+@@ -42,7 +43,9 @@
+ 
+ static __inline__ int scm_check_creds(struct ucred *creds)
+ {
+-	if ((creds->pid == current->tgid || capable(CAP_SYS_ADMIN)) &&
++	if ((creds->pid == virt_tgid(current) ||
++	     creds->pid == current->tgid ||
++	     capable(CAP_VE_SYS_ADMIN)) &&
+ 	    ((creds->uid == current->uid || creds->uid == current->euid ||
+ 	      creds->uid == current->suid) || capable(CAP_SETUID)) &&
+ 	    ((creds->gid == current->gid || creds->gid == current->egid ||
+@@ -69,7 +72,7 @@ static int scm_fp_copy(struct cmsghdr *c
+ 
+ 	if (!fpl)
+ 	{
+-		fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
++		fpl = ub_kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
+ 		if (!fpl)
+ 			return -ENOMEM;
+ 		*fplp = fpl;
+@@ -127,9 +130,7 @@ int __scm_send(struct socket *sock, stru
+ 		   for too short ancillary data object at all! Oops.
+ 		   OK, let's add it...
+ 		 */
+-		if (cmsg->cmsg_len < sizeof(struct cmsghdr) ||
+-		    (unsigned long)(((char*)cmsg - (char*)msg->msg_control)
+-				    + cmsg->cmsg_len) > msg->msg_controllen)
++		if (!CMSG_OK(msg, cmsg))
+ 			goto error;
+ 
+ 		if (cmsg->cmsg_level != SOL_SOCKET)
+@@ -277,7 +278,7 @@ struct scm_fp_list *scm_fp_dup(struct sc
+ 	if (!fpl)
+ 		return NULL;
+ 
+-	new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
++	new_fpl = ub_kmalloc(sizeof(*fpl), GFP_KERNEL);
+ 	if (new_fpl) {
+ 		for (i=fpl->count-1; i>=0; i--)
+ 			get_file(fpl->fp[i]);
+diff -uprN linux-2.6.8.1.orig/net/core/skbuff.c linux-2.6.8.1-ve022stab078/net/core/skbuff.c
+--- linux-2.6.8.1.orig/net/core/skbuff.c	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/core/skbuff.c	2006-05-11 13:05:41.000000000 +0400
+@@ -48,6 +48,7 @@
+ #include <linux/in.h>
+ #include <linux/inet.h>
+ #include <linux/slab.h>
++#include <linux/kmem_cache.h>
+ #include <linux/netdevice.h>
+ #ifdef CONFIG_NET_CLS_ACT
+ #include <net/pkt_sched.h>
+@@ -68,6 +69,8 @@
+ #include <asm/uaccess.h>
+ #include <asm/system.h>
+ 
++#include <ub/ub_net.h>
++
+ static kmem_cache_t *skbuff_head_cache;
+ 
+ /*
+@@ -136,6 +139,9 @@ struct sk_buff *alloc_skb(unsigned int s
+ 	if (!skb)
+ 		goto out;
+ 
++	if (ub_skb_alloc_bc(skb, gfp_mask))
++		goto nobc;
++
+ 	/* Get the DATA. Size must match skb_add_mtu(). */
+ 	size = SKB_DATA_ALIGN(size);
+ 	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+@@ -149,6 +155,7 @@ struct sk_buff *alloc_skb(unsigned int s
+ 	skb->data = data;
+ 	skb->tail = data;
+ 	skb->end  = data + size;
++	SET_VE_OWNER_SKB(skb, get_exec_env());
+ 
+ 	atomic_set(&(skb_shinfo(skb)->dataref), 1);
+ 	skb_shinfo(skb)->nr_frags  = 0;
+@@ -158,6 +165,8 @@ struct sk_buff *alloc_skb(unsigned int s
+ out:
+ 	return skb;
+ nodata:
++	ub_skb_free_bc(skb);
++nobc:
+ 	kmem_cache_free(skbuff_head_cache, skb);
+ 	skb = NULL;
+ 	goto out;
+@@ -208,6 +217,7 @@ void skb_release_data(struct sk_buff *sk
+ void kfree_skbmem(struct sk_buff *skb)
+ {
+ 	skb_release_data(skb);
++	ub_skb_free_bc(skb);
+ 	kmem_cache_free(skbuff_head_cache, skb);
+ }
+ 
+@@ -232,6 +242,7 @@ void __kfree_skb(struct sk_buff *skb)
+ #ifdef CONFIG_XFRM
+ 	secpath_put(skb->sp);
+ #endif
++	ub_skb_uncharge(skb);
+ 	if(skb->destructor) {
+ 		if (in_irq())
+ 			printk(KERN_WARNING "Warning: kfree_skb on "
+@@ -277,6 +288,11 @@ struct sk_buff *skb_clone(struct sk_buff
+ 	if (!n) 
+ 		return NULL;
+ 
++	if (ub_skb_alloc_bc(n, gfp_mask)) {
++		kmem_cache_free(skbuff_head_cache, n);
++		return NULL;
++	}
++
+ #define C(x) n->x = skb->x
+ 
+ 	n->next = n->prev = NULL;
+@@ -305,6 +321,7 @@ struct sk_buff *skb_clone(struct sk_buff
+ 	C(priority);
+ 	C(protocol);
+ 	C(security);
++	SET_VE_OWNER_SKB(n, VE_OWNER_SKB(skb));
+ 	n->destructor = NULL;
+ #ifdef CONFIG_NETFILTER
+ 	C(nfmark);
+@@ -372,6 +389,7 @@ static void copy_skb_header(struct sk_bu
+ 	new->stamp	= old->stamp;
+ 	new->destructor = NULL;
+ 	new->security	= old->security;
++	SET_VE_OWNER_SKB(new, VE_OWNER_SKB((struct sk_buff *)old));
+ #ifdef CONFIG_NETFILTER
+ 	new->nfmark	= old->nfmark;
+ 	new->nfcache	= old->nfcache;
+@@ -1434,6 +1452,7 @@ void __init skb_init(void)
+ 					      NULL, NULL);
+ 	if (!skbuff_head_cache)
+ 		panic("cannot create skbuff cache");
++	skbuff_head_cache->flags |= CFLGS_ENVIDS;
+ }
+ 
+ EXPORT_SYMBOL(___pskb_trim);
+diff -uprN linux-2.6.8.1.orig/net/core/sock.c linux-2.6.8.1-ve022stab078/net/core/sock.c
+--- linux-2.6.8.1.orig/net/core/sock.c	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/core/sock.c	2006-05-11 13:05:41.000000000 +0400
+@@ -106,6 +106,7 @@
+ #include <linux/net.h>
+ #include <linux/mm.h>
+ #include <linux/slab.h>
++#include <linux/kmem_cache.h>
+ #include <linux/interrupt.h>
+ #include <linux/poll.h>
+ #include <linux/tcp.h>
+@@ -121,6 +122,9 @@
+ #include <net/xfrm.h>
+ #include <linux/ipsec.h>
+ 
++#include <ub/ub_net.h>
++#include <ub/beancounter.h>
++
+ #include <linux/filter.h>
+ 
+ #ifdef CONFIG_INET
+@@ -169,7 +173,7 @@ static void sock_warn_obsolete_bsdism(co
+ 	static char warncomm[16];
+ 	if (strcmp(warncomm, current->comm) && warned < 5) { 
+ 		strcpy(warncomm,  current->comm); 
+-		printk(KERN_WARNING "process `%s' is using obsolete "
++		ve_printk(VE_LOG, KERN_WARNING "process `%s' is using obsolete "
+ 		       "%s SO_BSDCOMPAT\n", warncomm, name);
+ 		warned++;
+ 	}
+@@ -621,6 +625,7 @@ struct sock *sk_alloc(int family, int pr
+ 			       zero_it == 1 ? sizeof(struct sock) : zero_it);
+ 			sk->sk_family = family;
+ 			sock_lock_init(sk);
++			SET_VE_OWNER_SK(sk, get_exec_env());
+ 		}
+ 		sk->sk_slab = slab;
+ 		
+@@ -653,6 +658,7 @@ void sk_free(struct sock *sk)
+ 		       __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
+ 
+ 	security_sk_free(sk);
++	ub_sock_uncharge(sk);
+ 	kmem_cache_free(sk->sk_slab, sk);
+ 	module_put(owner);
+ }
+@@ -663,6 +669,7 @@ void __init sk_init(void)
+ 				      SLAB_HWCACHE_ALIGN, NULL, NULL);
+ 	if (!sk_cachep)
+ 		printk(KERN_CRIT "sk_init: Cannot create sock SLAB cache!");
++	sk_cachep->flags |= CFLGS_ENVIDS;
+ 
+ 	if (num_physpages <= 4096) {
+ 		sysctl_wmem_max = 32767;
+@@ -819,6 +826,7 @@ static long sock_wait_for_wmem(struct so
+ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
+ 				     unsigned long data_len, int noblock, int *errcode)
+ {
++#if 0
+ 	struct sk_buff *skb;
+ 	unsigned int gfp_mask;
+ 	long timeo;
+@@ -895,13 +903,87 @@ interrupted:
+ 	err = sock_intr_errno(timeo);
+ failure:
+ 	*errcode = err;
++#endif
++	return NULL;
++}
++
++struct sk_buff *sock_alloc_send_skb2(struct sock *sk, unsigned long size,
++				     unsigned long size2, int noblock,
++				     int *errcode)
++{
++	struct sk_buff *skb;
++	unsigned int gfp_mask;
++	long timeo;
++	int err;
++
++	gfp_mask = sk->sk_allocation;
++	if (gfp_mask & __GFP_WAIT)
++		gfp_mask |= __GFP_REPEAT;
++
++	timeo = sock_sndtimeo(sk, noblock);
++	while (1) {
++		err = sock_error(sk);
++		if (err != 0)
++			goto failure;
++
++		err = -EPIPE;
++		if (sk->sk_shutdown & SEND_SHUTDOWN)
++			goto failure;
++
++		if (ub_sock_getwres_other(sk, skb_charge_size(size))) {
++			if (size2 < size) {
++				size = size2;
++				continue;
++			}
++			set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
++			err = -EAGAIN;
++			if (!timeo)
++				goto failure;
++			if (signal_pending(current))
++				goto interrupted;
++			timeo = ub_sock_wait_for_space(sk, timeo,
++					skb_charge_size(size));
++			continue;
++		}
++
++		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
++			skb = alloc_skb(size, sk->sk_allocation);
++			if (skb)
++				/* Full success... */
++				break;
++			ub_sock_retwres_other(sk, skb_charge_size(size),
++					SOCK_MIN_UBCSPACE_CH);
++			err = -ENOBUFS;
++			goto failure;
++		}
++		ub_sock_retwres_other(sk,
++				skb_charge_size(size),
++				SOCK_MIN_UBCSPACE_CH);
++		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
++		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
++		err = -EAGAIN;
++		if (!timeo)
++			goto failure;
++		if (signal_pending(current))
++			goto interrupted;
++		timeo = sock_wait_for_wmem(sk, timeo);
++	}
++
++	ub_skb_set_charge(skb, sk, skb_charge_size(size), UB_OTHERSOCKBUF);
++	skb_set_owner_w(skb, sk);
++	return skb;
++
++interrupted:
++	err = sock_intr_errno(timeo);
++failure:
++	*errcode = err;
+ 	return NULL;
+ }
+ 
+ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, 
+ 				    int noblock, int *errcode)
+ {
+-	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
++	return sock_alloc_send_skb2(sk, size, size, noblock, errcode);
+ }
+ 
+ void __lock_sock(struct sock *sk)
+diff -uprN linux-2.6.8.1.orig/net/core/stream.c linux-2.6.8.1-ve022stab078/net/core/stream.c
+--- linux-2.6.8.1.orig/net/core/stream.c	2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/core/stream.c	2006-05-11 13:05:39.000000000 +0400
+@@ -109,8 +109,9 @@ EXPORT_SYMBOL(sk_stream_wait_close);
+  * sk_stream_wait_memory - Wait for more memory for a socket
+  * @sk - socket to wait for memory
+  * @timeo_p - for how long
++ * @amount - amount of memory to wait for (in UB space!)
+  */
+-int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
++int sk_stream_wait_memory(struct sock *sk, long *timeo_p, unsigned long amount)
+ {
+ 	int err = 0;
+ 	long vm_wait = 0;
+@@ -132,14 +133,19 @@ int sk_stream_wait_memory(struct sock *s
+ 		if (signal_pending(current))
+ 			goto do_interrupted;
+ 		clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+-		if (sk_stream_memory_free(sk) && !vm_wait)
+-			break;
++		if (amount == 0) {
++			if (sk_stream_memory_free(sk) && !vm_wait)
++				break;
++		} else
++			ub_sock_sndqueueadd_tcp(sk, amount);
+ 
+ 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ 		sk->sk_write_pending++;
+ 		sk_wait_event(sk, &current_timeo, sk_stream_memory_free(sk) &&
+ 						  vm_wait);
+ 		sk->sk_write_pending--;
++		if (amount > 0)
++			ub_sock_sndqueuedel(sk);
+ 
+ 		if (vm_wait) {
+ 			vm_wait -= current_timeo;
+diff -uprN linux-2.6.8.1.orig/net/ipv4/af_inet.c linux-2.6.8.1-ve022stab078/net/ipv4/af_inet.c
+--- linux-2.6.8.1.orig/net/ipv4/af_inet.c	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/af_inet.c	2006-05-11 13:05:41.000000000 +0400
+@@ -113,6 +113,8 @@
+ #include <linux/mroute.h>
+ #endif
+ 
++#include <ub/ub_net.h>
++
+ DEFINE_SNMP_STAT(struct linux_mib, net_statistics);
+ 
+ #ifdef INET_REFCNT_DEBUG
+@@ -299,6 +301,13 @@ static int inet_create(struct socket *so
+ 	err = -EPROTONOSUPPORT;
+ 	if (!protocol)
+ 		goto out_sk_free;
++	err = -ENOBUFS;
++	if (ub_sock_charge(sk, PF_INET, sock->type))
++		goto out_sk_free;
++	/* if charge was successful, sock_init_data() MUST be called to
++	 * set sk->sk_type. otherwise sk will be uncharged to wrong resource
++	 */
++
+ 	err = 0;
+ 	sock->ops = answer->ops;
+ 	sk->sk_prot = answer->prot;
+@@ -377,6 +386,9 @@ int inet_release(struct socket *sock)
+ 
+ 	if (sk) {
+ 		long timeout;
++		struct ve_struct *saved_env;
++
++		saved_env = set_exec_env(VE_OWNER_SK(sk));
+ 
+ 		/* Applications forget to leave groups before exiting */
+ 		ip_mc_drop_socket(sk);
+@@ -394,6 +406,8 @@ int inet_release(struct socket *sock)
+ 			timeout = sk->sk_lingertime;
+ 		sock->sk = NULL;
+ 		sk->sk_prot->close(sk, timeout);
++
++		set_exec_env(saved_env);
+ 	}
+ 	return 0;
+ }
+@@ -981,20 +995,20 @@ static struct net_protocol icmp_protocol
+ 
+ static int __init init_ipv4_mibs(void)
+ {
+-	net_statistics[0] = alloc_percpu(struct linux_mib);
+-	net_statistics[1] = alloc_percpu(struct linux_mib);
+-	ip_statistics[0] = alloc_percpu(struct ipstats_mib);
+-	ip_statistics[1] = alloc_percpu(struct ipstats_mib);
+-	icmp_statistics[0] = alloc_percpu(struct icmp_mib);
+-	icmp_statistics[1] = alloc_percpu(struct icmp_mib);
+-	tcp_statistics[0] = alloc_percpu(struct tcp_mib);
+-	tcp_statistics[1] = alloc_percpu(struct tcp_mib);
+-	udp_statistics[0] = alloc_percpu(struct udp_mib);
+-	udp_statistics[1] = alloc_percpu(struct udp_mib);
++	ve_net_statistics[0] = alloc_percpu(struct linux_mib);
++	ve_net_statistics[1] = alloc_percpu(struct linux_mib);
++	ve_ip_statistics[0] = alloc_percpu(struct ipstats_mib);
++	ve_ip_statistics[1] = alloc_percpu(struct ipstats_mib);
++	ve_icmp_statistics[0] = alloc_percpu(struct icmp_mib);
++	ve_icmp_statistics[1] = alloc_percpu(struct icmp_mib);
++	ve_tcp_statistics[0] = alloc_percpu(struct tcp_mib);
++	ve_tcp_statistics[1] = alloc_percpu(struct tcp_mib);
++	ve_udp_statistics[0] = alloc_percpu(struct udp_mib);
++	ve_udp_statistics[1] = alloc_percpu(struct udp_mib);
+ 	if (!
+-	    (net_statistics[0] && net_statistics[1] && ip_statistics[0]
+-	     && ip_statistics[1] && tcp_statistics[0] && tcp_statistics[1]
+-	     && udp_statistics[0] && udp_statistics[1]))
++	    (ve_net_statistics[0] && ve_net_statistics[1] && ve_ip_statistics[0]
++	     && ve_ip_statistics[1] && ve_tcp_statistics[0] && ve_tcp_statistics[1]
++	     && ve_udp_statistics[0] && ve_udp_statistics[1]))
+ 		return -ENOMEM;
+ 
+ 	(void) tcp_mib_init();
+diff -uprN linux-2.6.8.1.orig/net/ipv4/arp.c linux-2.6.8.1-ve022stab078/net/ipv4/arp.c
+--- linux-2.6.8.1.orig/net/ipv4/arp.c	2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/arp.c	2006-05-11 13:05:41.000000000 +0400
+@@ -695,6 +695,9 @@ void arp_send(int type, int ptype, u32 d
+ 
+ static void parp_redo(struct sk_buff *skb)
+ {
++#if defined(CONFIG_NETFILTER) && defined(CONFIG_NETFILTER_DEBUG)
++	skb->nf_debug = 0;
++#endif
+ 	arp_rcv(skb, skb->dev, NULL);
+ }
+ 
+@@ -980,7 +983,7 @@ int arp_req_set(struct arpreq *r, struct
+ 			return 0;
+ 		}
+ 		if (dev == NULL) {
+-			ipv4_devconf.proxy_arp = 1;
++			ve_ipv4_devconf.proxy_arp = 1;
+ 			return 0;
+ 		}
+ 		if (__in_dev_get(dev)) {
+@@ -1066,7 +1069,7 @@ int arp_req_delete(struct arpreq *r, str
+ 			return pneigh_delete(&arp_tbl, &ip, dev);
+ 		if (mask == 0) {
+ 			if (dev == NULL) {
+-				ipv4_devconf.proxy_arp = 0;
++				ve_ipv4_devconf.proxy_arp = 0;
+ 				return 0;
+ 			}
+ 			if (__in_dev_get(dev)) {
+@@ -1115,6 +1118,8 @@ int arp_ioctl(unsigned int cmd, void __u
+ 			if (!capable(CAP_NET_ADMIN))
+ 				return -EPERM;
+ 		case SIOCGARP:
++			if (!ve_is_super(get_exec_env()))
++				return -EACCES;
+ 			err = copy_from_user(&r, arg, sizeof(struct arpreq));
+ 			if (err)
+ 				return -EFAULT;
+@@ -1486,8 +1491,12 @@ static int arp_seq_open(struct inode *in
+ {
+ 	struct seq_file *seq;
+ 	int rc = -ENOMEM;
+-	struct arp_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+-       
++	struct arp_iter_state *s;
++
++	if (!ve_is_super(get_exec_env()))
++		return -EPERM;
++
++	s = kmalloc(sizeof(*s), GFP_KERNEL);
+ 	if (!s)
+ 		goto out;
+ 
+diff -uprN linux-2.6.8.1.orig/net/ipv4/devinet.c linux-2.6.8.1-ve022stab078/net/ipv4/devinet.c
+--- linux-2.6.8.1.orig/net/ipv4/devinet.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/devinet.c	2006-05-11 13:05:42.000000000 +0400
+@@ -77,10 +77,21 @@ static struct ipv4_devconf ipv4_devconf_
+ 	.accept_source_route = 1,
+ };
+ 
++struct ipv4_devconf *get_ipv4_devconf_dflt_addr(void)
++{
++	return &ipv4_devconf_dflt;
++}
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_ipv4_devconf_dflt	(*(get_exec_env()->_ipv4_devconf_dflt))
++#else
++#define ve_ipv4_devconf_dflt	ipv4_devconf_dflt
++#endif
++
+ static void rtmsg_ifa(int event, struct in_ifaddr *);
+ 
+ static struct notifier_block *inetaddr_chain;
+-static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
++void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+ 			 int destroy);
+ #ifdef CONFIG_SYSCTL
+ static void devinet_sysctl_register(struct in_device *in_dev,
+@@ -221,7 +232,7 @@ int inet_addr_onlink(struct in_device *i
+ 	return 0;
+ }
+ 
+-static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
++void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+ 			 int destroy)
+ {
+ 	struct in_ifaddr *ifa1 = *ifap;
+@@ -537,7 +548,7 @@ int devinet_ioctl(unsigned int cmd, void
+ 
+ 	case SIOCSIFFLAGS:
+ 		ret = -EACCES;
+-		if (!capable(CAP_NET_ADMIN))
++		if (!capable(CAP_VE_NET_ADMIN))
+ 			goto out;
+ 		break;
+ 	case SIOCSIFADDR:	/* Set interface address (and family) */
+@@ -545,7 +556,7 @@ int devinet_ioctl(unsigned int cmd, void
+ 	case SIOCSIFDSTADDR:	/* Set the destination address */
+ 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
+ 		ret = -EACCES;
+-		if (!capable(CAP_NET_ADMIN))
++		if (!capable(CAP_VE_NET_ADMIN))
+ 			goto out;
+ 		ret = -EINVAL;
+ 		if (sin->sin_family != AF_INET)
+@@ -965,7 +976,7 @@ static int inetdev_event(struct notifier
+ 	case NETDEV_UP:
+ 		if (dev->mtu < 68)
+ 			break;
+-		if (dev == &loopback_dev) {
++		if (dev == &visible_loopback_dev) {
+ 			struct in_ifaddr *ifa;
+ 			if ((ifa = inet_alloc_ifa()) != NULL) {
+ 				ifa->ifa_local =
+@@ -1130,10 +1141,10 @@ static struct rtnetlink_link inet_rtnetl
+ void inet_forward_change(void)
+ {
+ 	struct net_device *dev;
+-	int on = ipv4_devconf.forwarding;
++	int on = ve_ipv4_devconf.forwarding;
+ 
+-	ipv4_devconf.accept_redirects = !on;
+-	ipv4_devconf_dflt.forwarding = on;
++	ve_ipv4_devconf.accept_redirects = !on;
++	ve_ipv4_devconf_dflt.forwarding = on;
+ 
+ 	read_lock(&dev_base_lock);
+ 	for (dev = dev_base; dev; dev = dev->next) {
+@@ -1158,9 +1169,9 @@ static int devinet_sysctl_forward(ctl_ta
+ 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ 
+ 	if (write && *valp != val) {
+-		if (valp == &ipv4_devconf.forwarding)
++		if (valp == &ve_ipv4_devconf.forwarding)
+ 			inet_forward_change();
+-		else if (valp != &ipv4_devconf_dflt.forwarding)
++		else if (valp != &ve_ipv4_devconf_dflt.forwarding)
+ 			rt_cache_flush(0);
+ 	}
+ 
+@@ -1422,30 +1433,22 @@ static struct devinet_sysctl_table {
+ 	},
+ };
+ 
+-static void devinet_sysctl_register(struct in_device *in_dev,
+-				    struct ipv4_devconf *p)
++static struct devinet_sysctl_table *__devinet_sysctl_register(char *dev_name,
++		int ifindex, struct ipv4_devconf *p)
+ {
+ 	int i;
+-	struct net_device *dev = in_dev ? in_dev->dev : NULL;
+-	struct devinet_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
+-	char *dev_name = NULL;
++	struct devinet_sysctl_table *t;
+ 
++	t = kmalloc(sizeof(*t), GFP_KERNEL);
+ 	if (!t)
+-		return;
++		goto out;
++
+ 	memcpy(t, &devinet_sysctl, sizeof(*t));
+ 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
+ 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
+ 		t->devinet_vars[i].de = NULL;
+ 	}
+ 
+-	if (dev) {
+-		dev_name = dev->name; 
+-		t->devinet_dev[0].ctl_name = dev->ifindex;
+-	} else {
+-		dev_name = "default";
+-		t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
+-	}
+-
+ 	/* 
+ 	 * Make a copy of dev_name, because '.procname' is regarded as const 
+ 	 * by sysctl and we wouldn't want anyone to change it under our feet
+@@ -1453,8 +1456,9 @@ static void devinet_sysctl_register(stru
+ 	 */	
+ 	dev_name = net_sysctl_strdup(dev_name);
+ 	if (!dev_name)
+-	    goto free;
++	    goto out_free_table;
+ 
++	t->devinet_dev[0].ctl_name    = ifindex;
+ 	t->devinet_dev[0].procname    = dev_name;
+ 	t->devinet_dev[0].child	      = t->devinet_vars;
+ 	t->devinet_dev[0].de	      = NULL;
+@@ -1467,17 +1471,38 @@ static void devinet_sysctl_register(stru
+ 
+ 	t->sysctl_header = register_sysctl_table(t->devinet_root_dir, 0);
+ 	if (!t->sysctl_header)
+-	    goto free_procname;
++	    goto out_free_procname;
+ 
+-	p->sysctl = t;
+-	return;
++	return t;
+ 
+ 	/* error path */
+- free_procname:
++out_free_procname:
+ 	kfree(dev_name);
+- free:
++out_free_table:
+ 	kfree(t);
+-	return;
++out:
++	printk(KERN_DEBUG "Can't register net/ipv4/conf sysctls.\n");
++	return NULL;
++}
++
++static void devinet_sysctl_register(struct in_device *in_dev,
++				    struct ipv4_devconf *p)
++{
++	struct net_device *dev;
++	char *dev_name;
++	int ifindex;
++
++	dev = in_dev ? in_dev->dev : NULL;
++
++	if (dev) {
++		dev_name = dev->name; 
++		ifindex = dev->ifindex;
++	} else {
++		dev_name = "default";
++		ifindex = NET_PROTO_CONF_DEFAULT;
++	}
++
++	p->sysctl = __devinet_sysctl_register(dev_name, ifindex, p);
+ }
+ 
+ static void devinet_sysctl_unregister(struct ipv4_devconf *p)
+@@ -1490,7 +1515,189 @@ static void devinet_sysctl_unregister(st
+ 		kfree(t);
+ 	}
+ }
++
++extern int visible_ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
++			void __user *buffer, size_t *lenp, loff_t *ppos);
++extern int visible_ipv4_sysctl_forward_strategy(ctl_table *table, int *name, int nlen,
++			 void *oldval, size_t *oldlenp,
++			 void *newval, size_t newlen, 
++			 void **context);
++
++extern void *get_flush_delay_addr(void);
++extern int visible_ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
++			      void __user *buffer, size_t *lenp, loff_t *ppos);
++extern int visible_ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
++						int __user *name,
++						int nlen,
++						void __user *oldval,
++						size_t __user *oldlenp,
++						void __user *newval,
++						size_t newlen,
++						void **context);
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++static ctl_table net_sysctl_tables[] = {
++	/* 0: net */
++	{
++		.ctl_name	= CTL_NET,
++		.procname	= "net",
++		.mode		= 0555,
++		.child		= &net_sysctl_tables[2],
++	},
++	{ .ctl_name = 0, },
++	/* 2: net/ipv4 */
++	{
++		.ctl_name	= NET_IPV4,
++		.procname	= "ipv4",
++		.mode		= 0555,
++		.child		= &net_sysctl_tables[4],
++	},
++	{ .ctl_name = 0, },
++	/* 4, 5: net/ipv4/[vars] */
++	{
++		.ctl_name	= NET_IPV4_FORWARD,
++		.procname	= "ip_forward",
++		.data		= &ipv4_devconf.forwarding,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &visible_ipv4_sysctl_forward,
++		.strategy	= &visible_ipv4_sysctl_forward_strategy,
++	},
++	{
++		.ctl_name	= NET_IPV4_ROUTE,
++		.procname	= "route",
++		.maxlen		= 0,
++		.mode		= 0555,
++		.child		= &net_sysctl_tables[7],
++	},
++	{ .ctl_name = 0 },
++	/* 7: net/ipv4/route/flush */
++	{
++		.ctl_name 	= NET_IPV4_ROUTE_FLUSH,
++		.procname	= "flush",
++		.data		= NULL, /* setuped below */
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &visible_ipv4_sysctl_rtcache_flush,
++		.strategy	= &visible_ipv4_sysctl_rtcache_flush_strategy,
++	},
++	{ .ctl_name = 0 },
++};
++
++static int ip_forward_sysctl_register(struct ve_struct *ve,
++		struct ipv4_devconf *p)
++{
++	struct ctl_table_header *hdr;
++	ctl_table *root;
++
++	root = clone_sysctl_template(net_sysctl_tables,
++			sizeof(net_sysctl_tables) / sizeof(ctl_table));
++	if (root == NULL)
++		goto out;
++
++	root[4].data = &p->forwarding;
++	root[7].data = get_flush_delay_addr();
++
++	hdr = register_sysctl_table(root, 1);
++	if (hdr == NULL)
++		goto out_free;
++
++	ve->forward_header = hdr;
++	ve->forward_table = root;
++	return 0;
++
++out_free:
++	free_sysctl_clone(root);
++out:
++	return -ENOMEM;
++}
++
++static inline void ip_forward_sysctl_unregister(struct ve_struct *ve)
++{
++	unregister_sysctl_table(ve->forward_header);
++	ve->forward_header = NULL;
++}
++
++static inline void ip_forward_sysctl_free(struct ve_struct *ve)
++{
++	free_sysctl_clone(ve->forward_table);
++	ve->forward_table = NULL;
++}
+ #endif
++#endif
++
++int devinet_sysctl_init(struct ve_struct *ve)
++{
++	int err = 0;
++#ifdef CONFIG_SYSCTL
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	struct ipv4_devconf *conf, *conf_def;
++
++	err = -ENOMEM;
++
++	conf = kmalloc(sizeof(*conf), GFP_KERNEL);
++	if (!conf)
++		goto err1;
++
++	memcpy(conf, &ipv4_devconf, sizeof(*conf));
++	conf->sysctl = __devinet_sysctl_register("all",
++			NET_PROTO_CONF_ALL, conf);
++	if (!conf->sysctl)
++		goto err2;
++
++	conf_def = kmalloc(sizeof(*conf_def), GFP_KERNEL);
++	if (!conf_def)
++		goto err3;
++
++	memcpy(conf_def, &ipv4_devconf_dflt, sizeof(*conf_def));
++	conf_def->sysctl = __devinet_sysctl_register("default",
++			NET_PROTO_CONF_DEFAULT, conf_def);
++	if (!conf_def->sysctl)
++		goto err4;
++
++	err = ip_forward_sysctl_register(ve, conf);
++	if (err)
++		goto err5;
++
++	ve->_ipv4_devconf = conf;
++	ve->_ipv4_devconf_dflt = conf_def;
++	return 0;
++
++err5:
++	devinet_sysctl_unregister(conf_def);
++err4:
++	kfree(conf_def);
++err3:
++	devinet_sysctl_unregister(conf);
++err2:
++	kfree(conf);
++err1:
++#endif
++#endif
++	return err;
++}
++
++void devinet_sysctl_fini(struct ve_struct *ve)
++{
++#ifdef CONFIG_SYSCTL
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	ip_forward_sysctl_unregister(ve);
++	devinet_sysctl_unregister(ve->_ipv4_devconf);
++	devinet_sysctl_unregister(ve->_ipv4_devconf_dflt);
++#endif
++#endif
++}
++
++void devinet_sysctl_free(struct ve_struct *ve)
++{
++#ifdef CONFIG_SYSCTL
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++	ip_forward_sysctl_free(ve);
++	kfree(ve->_ipv4_devconf);
++	kfree(ve->_ipv4_devconf_dflt);
++#endif
++#endif
++}
+ 
+ void __init devinet_init(void)
+ {
+@@ -1500,14 +1707,19 @@ void __init devinet_init(void)
+ #ifdef CONFIG_SYSCTL
+ 	devinet_sysctl.sysctl_header =
+ 		register_sysctl_table(devinet_sysctl.devinet_root_dir, 0);
+-	devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
++	__devinet_sysctl_register("default", NET_PROTO_CONF_DEFAULT,
++			&ipv4_devconf_dflt);
+ #endif
+ }
+ 
+ EXPORT_SYMBOL(devinet_ioctl);
+ EXPORT_SYMBOL(in_dev_finish_destroy);
+ EXPORT_SYMBOL(inet_select_addr);
++EXPORT_SYMBOL(inet_del_ifa);
+ EXPORT_SYMBOL(inetdev_by_index);
+ EXPORT_SYMBOL(inetdev_lock);
++EXPORT_SYMBOL(devinet_sysctl_init);
++EXPORT_SYMBOL(devinet_sysctl_fini);
++EXPORT_SYMBOL(devinet_sysctl_free);
+ EXPORT_SYMBOL(register_inetaddr_notifier);
+ EXPORT_SYMBOL(unregister_inetaddr_notifier);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/fib_frontend.c linux-2.6.8.1-ve022stab078/net/ipv4/fib_frontend.c
+--- linux-2.6.8.1.orig/net/ipv4/fib_frontend.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/fib_frontend.c	2006-05-11 13:05:41.000000000 +0400
+@@ -51,14 +51,46 @@
+ 
+ #define RT_TABLE_MIN RT_TABLE_MAIN
+ 
++#undef ip_fib_local_table
++#undef ip_fib_main_table
+ struct fib_table *ip_fib_local_table;
+ struct fib_table *ip_fib_main_table;
++void prepare_fib_tables(void)
++{
++#ifdef CONFIG_VE
++	get_ve0()->_local_table = ip_fib_local_table;
++	ip_fib_local_table = (struct fib_table *)0x12345678;
++	get_ve0()->_main_table = ip_fib_main_table;
++	ip_fib_main_table = (struct fib_table *)0x12345678;
++#endif
++}
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ip_fib_local_table 	get_exec_env()->_local_table
++#define ip_fib_main_table 	get_exec_env()->_main_table
++#endif
+ 
+ #else
+ 
+ #define RT_TABLE_MIN 1
+ 
++#undef fib_tables
+ struct fib_table *fib_tables[RT_TABLE_MAX+1];
++void prepare_fib_tables(void)
++{
++#ifdef CONFIG_VE
++	int i;
++
++	BUG_ON(sizeof(fib_tables) !=
++		sizeof(((struct ve_struct *)0)->_fib_tables));
++	memcpy(get_ve0()->_fib_tables, fib_tables, sizeof(fib_tables));
++	for (i = 0; i <= RT_TABLE_MAX; i++)
++		fib_tables[i] = (void *)0x12366678;
++#endif
++}
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define fib_tables get_exec_env()->_fib_tables
++#endif
+ 
+ struct fib_table *__fib_new_table(int id)
+ {
+@@ -248,7 +280,7 @@ int ip_rt_ioctl(unsigned int cmd, void _
+ 	switch (cmd) {
+ 	case SIOCADDRT:		/* Add a route */
+ 	case SIOCDELRT:		/* Delete a route */
+-		if (!capable(CAP_NET_ADMIN))
++		if (!capable(CAP_VE_NET_ADMIN))
+ 			return -EPERM;
+ 		if (copy_from_user(&r, arg, sizeof(struct rtentry)))
+ 			return -EFAULT;
+@@ -595,6 +627,7 @@ struct notifier_block fib_netdev_notifie
+ 
+ void __init ip_fib_init(void)
+ {
++	prepare_fib_tables();
+ #ifndef CONFIG_IP_MULTIPLE_TABLES
+ 	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
+ 	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/fib_hash.c linux-2.6.8.1-ve022stab078/net/ipv4/fib_hash.c
+--- linux-2.6.8.1.orig/net/ipv4/fib_hash.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/fib_hash.c	2006-05-11 13:05:41.000000000 +0400
+@@ -35,6 +35,7 @@
+ #include <linux/skbuff.h>
+ #include <linux/netlink.h>
+ #include <linux/init.h>
++#include <linux/ve.h>
+ 
+ #include <net/ip.h>
+ #include <net/protocol.h>
+@@ -101,12 +102,6 @@ struct fn_zone
+    can be cheaper than memory lookup, so that FZ_* macros are used.
+  */
+ 
+-struct fn_hash
+-{
+-	struct fn_zone	*fn_zones[33];
+-	struct fn_zone	*fn_zone_list;
+-};
+-
+ static __inline__ fn_hash_idx_t fn_hash(fn_key_t key, struct fn_zone *fz)
+ {
+ 	u32 h = ntohl(key.datum)>>(32 - fz->fz_order);
+@@ -701,7 +696,14 @@ FTprint("tb(%d)_delete: %d %08x/%d %d\n"
+ 		f = *del_fp;
+ 		rtmsg_fib(RTM_DELROUTE, f, z, tb->tb_id, n, req);
+ 
+-		if (matched != 1) {
++		if (matched != 1 ||
++			/*
++			 * Don't try to be excessively smart if it's not one of
++			 * the host system tables, it would be a waste of
++			 * memory.
++			 */
++		    !ve_is_super(get_exec_env()))
++		{
+ 			write_lock_bh(&fib_hash_lock);
+ 			*del_fp = f->fn_next;
+ 			write_unlock_bh(&fib_hash_lock);
+@@ -766,6 +768,92 @@ static int fn_hash_flush(struct fib_tabl
+ 	return found;
+ }
+ 
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++static __inline__ void
++fib_destroy_list(struct fib_node ** fp, int z, struct fn_hash *table)
++{
++	struct fib_node *f;
++
++	while ((f = *fp) != NULL) {
++		write_lock_bh(&fib_hash_lock);
++		*fp = f->fn_next;
++		write_unlock_bh(&fib_hash_lock);
++
++		fn_free_node(f);
++	}
++}
++
++void fib_hash_destroy(struct fib_table *tb)
++{
++	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
++	struct fn_zone *fz;
++
++	for (fz = table->fn_zone_list; fz; fz = fz->fz_next) {
++		int i;
++		for (i=fz->fz_divisor-1; i>=0; i--)
++			fib_destroy_list(&fz->fz_hash[i], fz->fz_order, table);
++		fz->fz_nent = 0;
++	}
++}
++
++/*
++ * Initialization of virtualized networking subsystem.
++ */
++int init_ve_route(struct ve_struct *ve)
++{
++#ifdef CONFIG_IP_MULTIPLE_TABLES
++	if (fib_rules_create())
++		return -ENOMEM;
++	ve->_fib_tables[RT_TABLE_LOCAL] = fib_hash_init(RT_TABLE_LOCAL);
++	if (!ve->_fib_tables[RT_TABLE_LOCAL])
++		goto out_destroy;
++	ve->_fib_tables[RT_TABLE_MAIN] = fib_hash_init(RT_TABLE_MAIN);
++	if (!ve->_fib_tables[RT_TABLE_MAIN])
++		goto out_destroy_local;
++
++	return 0;
++
++out_destroy_local:
++	fib_hash_destroy(ve->_fib_tables[RT_TABLE_LOCAL]);
++out_destroy:
++	fib_rules_destroy();
++	ve->_local_rule = NULL;
++	return -ENOMEM;
++#else
++	ve->_local_table = fib_hash_init(RT_TABLE_LOCAL);
++	if (!ve->_local_table)
++		return -ENOMEM;
++	ve->_main_table = fib_hash_init(RT_TABLE_MAIN);
++	if (!ve->_main_table) {
++		fib_hash_destroy(ve->_local_table);
++		return -ENOMEM;
++	}
++	return 0;
++#endif
++}
++
++void fini_ve_route(struct ve_struct *ve)
++{
++#ifdef CONFIG_IP_MULTIPLE_TABLES
++	int i;
++	for (i=0; i<RT_TABLE_MAX+1; i++)
++	{
++		if (!ve->_fib_tables[i])
++			continue;
++		fib_hash_destroy(ve->_fib_tables[i]);
++	}
++	fib_rules_destroy();
++	ve->_local_rule = NULL;
++#else
++	fib_hash_destroy(ve->_local_table);
++	fib_hash_destroy(ve->_main_table);
++#endif
++}
++
++EXPORT_SYMBOL(init_ve_route);
++EXPORT_SYMBOL(fini_ve_route);
++#endif
++
+ 
+ static __inline__ int
+ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
+@@ -863,7 +951,7 @@ static void rtmsg_fib(int event, struct 
+ 		netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
+ }
+ 
+-#ifdef CONFIG_IP_MULTIPLE_TABLES
++#if defined(CONFIG_IP_MULTIPLE_TABLES) || defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
+ struct fib_table * fib_hash_init(int id)
+ #else
+ struct fib_table * __init fib_hash_init(int id)
+@@ -973,13 +1061,23 @@ out:
+ 	return iter->node;
+ }
+ 
++static struct fib_node *fib_get_idx(struct seq_file *seq, loff_t pos)
++{
++	struct fib_node *fn = fib_get_first(seq);
++
++	if (fn)
++		while (pos && (fn = fib_get_next(seq)))
++			--pos;
++	return pos ? NULL : fn;
++}
++
+ static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
+ {
+ 	void *v = NULL;
+ 
+ 	read_lock(&fib_hash_lock);
+ 	if (ip_fib_main_table)
+-		v = *pos ? fib_get_next(seq) : SEQ_START_TOKEN;
++		v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+ 	return v;
+ }
+ 
+diff -uprN linux-2.6.8.1.orig/net/ipv4/fib_rules.c linux-2.6.8.1-ve022stab078/net/ipv4/fib_rules.c
+--- linux-2.6.8.1.orig/net/ipv4/fib_rules.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/fib_rules.c	2006-05-11 13:05:41.000000000 +0400
+@@ -38,6 +38,7 @@
+ #include <linux/proc_fs.h>
+ #include <linux/skbuff.h>
+ #include <linux/netlink.h>
++#include <linux/rtnetlink.h>
+ #include <linux/init.h>
+ 
+ #include <net/ip.h>
+@@ -101,6 +102,87 @@ static struct fib_rule local_rule = {
+ static struct fib_rule *fib_rules = &local_rule;
+ static rwlock_t fib_rules_lock = RW_LOCK_UNLOCKED;
+ 
++void prepare_fib_rules(void)
++{
++#ifdef CONFIG_VE
++	get_ve0()->_local_rule = &local_rule;
++	get_ve0()->_fib_rules = fib_rules;
++	fib_rules = (void *)0x12345678;
++#endif
++}
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define ve_local_rule (get_exec_env()->_local_rule)
++#define ve_fib_rules (get_exec_env()->_fib_rules)
++#else
++#define ve_local_rule	(&local_rule)
++#define ve_fib_rules	fib_rules
++#endif
++
++#if defined(CONFIG_VE_CALLS) || defined(CONFIG_VE_CALLS_MODULE)
++int fib_rules_create()
++{
++	struct fib_rule *default_rule, *main_rule, *loc_rule;
++
++	default_rule = kmalloc(sizeof(struct fib_rule), GFP_KERNEL);
++	if (default_rule == NULL)
++		goto out_def;
++	memset(default_rule, 0, sizeof(struct fib_rule));
++	atomic_set(&default_rule->r_clntref, 1);
++	default_rule->r_preference = 0x7FFF;
++	default_rule->r_table = RT_TABLE_DEFAULT;
++	default_rule->r_action = RTN_UNICAST;
++
++	main_rule = kmalloc(sizeof(struct fib_rule), GFP_KERNEL);
++	if (main_rule == NULL)
++		goto out_main;
++	memset(main_rule, 0, sizeof(struct fib_rule));
++	atomic_set(&main_rule->r_clntref, 1);
++	main_rule->r_preference = 0x7FFE;
++	main_rule->r_table = RT_TABLE_MAIN;
++	main_rule->r_action = RTN_UNICAST;
++	main_rule->r_next = default_rule;
++
++	loc_rule = kmalloc(sizeof(struct fib_rule), GFP_KERNEL);
++	if (loc_rule == NULL)
++		goto out_loc;
++	memset(loc_rule, 0, sizeof(struct fib_rule));
++	atomic_set(&loc_rule->r_clntref, 1);
++	loc_rule->r_preference = 0;
++	loc_rule->r_table = RT_TABLE_LOCAL;
++	loc_rule->r_action = RTN_UNICAST;
++	loc_rule->r_next = main_rule;
++
++	ve_local_rule = loc_rule;
++	ve_fib_rules = loc_rule;
++
++	return 0;
++
++out_loc:
++	kfree(main_rule);
++out_main:
++	kfree(default_rule);
++out_def:
++	return -1;
++}
++
++void fib_rules_destroy()
++{
++	struct fib_rule *r;
++
++	rtnl_lock();
++	write_lock_bh(&fib_rules_lock);
++	while(ve_fib_rules != NULL) {
++		r = ve_fib_rules;
++		ve_fib_rules = ve_fib_rules->r_next;
++		r->r_dead = 1;
++		fib_rule_put(r);
++	}
++	write_unlock_bh(&fib_rules_lock);
++	rtnl_unlock();
++}
++#endif
++
+ int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+ {
+ 	struct rtattr **rta = arg;
+@@ -108,7 +190,7 @@ int inet_rtm_delrule(struct sk_buff *skb
+ 	struct fib_rule *r, **rp;
+ 	int err = -ESRCH;
+ 
+-	for (rp=&fib_rules; (r=*rp) != NULL; rp=&r->r_next) {
++	for (rp=&ve_fib_rules; (r=*rp) != NULL; rp=&r->r_next) {
+ 		if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 4) == 0) &&
+ 		    rtm->rtm_src_len == r->r_src_len &&
+ 		    rtm->rtm_dst_len == r->r_dst_len &&
+@@ -122,7 +204,7 @@ int inet_rtm_delrule(struct sk_buff *skb
+ 		    (!rta[RTA_IIF-1] || strcmp(RTA_DATA(rta[RTA_IIF-1]), r->r_ifname) == 0) &&
+ 		    (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) {
+ 			err = -EPERM;
+-			if (r == &local_rule)
++			if (r == ve_local_rule)
+ 				break;
+ 
+ 			write_lock_bh(&fib_rules_lock);
+@@ -186,6 +268,7 @@ int inet_rtm_newrule(struct sk_buff *skb
+ 	new_r = kmalloc(sizeof(*new_r), GFP_KERNEL);
+ 	if (!new_r)
+ 		return -ENOMEM;
++
+ 	memset(new_r, 0, sizeof(*new_r));
+ 	if (rta[RTA_SRC-1])
+ 		memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 4);
+@@ -221,11 +304,11 @@ int inet_rtm_newrule(struct sk_buff *skb
+ 		memcpy(&new_r->r_tclassid, RTA_DATA(rta[RTA_FLOW-1]), 4);
+ #endif
+ 
+-	rp = &fib_rules;
++	rp = &ve_fib_rules;
+ 	if (!new_r->r_preference) {
+-		r = fib_rules;
++		r = ve_fib_rules;
+ 		if (r && (r = r->r_next) != NULL) {
+-			rp = &fib_rules->r_next;
++			rp = &ve_fib_rules->r_next;
+ 			if (r->r_preference)
+ 				new_r->r_preference = r->r_preference - 1;
+ 		}
+@@ -285,7 +368,7 @@ static void fib_rules_detach(struct net_
+ {
+ 	struct fib_rule *r;
+ 
+-	for (r=fib_rules; r; r=r->r_next) {
++	for (r=ve_fib_rules; r; r=r->r_next) {
+ 		if (r->r_ifindex == dev->ifindex) {
+ 			write_lock_bh(&fib_rules_lock);
+ 			r->r_ifindex = -1;
+@@ -298,7 +381,7 @@ static void fib_rules_attach(struct net_
+ {
+ 	struct fib_rule *r;
+ 
+-	for (r=fib_rules; r; r=r->r_next) {
++	for (r=ve_fib_rules; r; r=r->r_next) {
+ 		if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0) {
+ 			write_lock_bh(&fib_rules_lock);
+ 			r->r_ifindex = dev->ifindex;
+@@ -319,7 +402,7 @@ int fib_lookup(const struct flowi *flp, 
+ FRprintk("Lookup: %u.%u.%u.%u <- %u.%u.%u.%u ",
+ 	NIPQUAD(flp->fl4_dst), NIPQUAD(flp->fl4_src));
+ 	read_lock(&fib_rules_lock);
+-	for (r = fib_rules; r; r=r->r_next) {
++	for (r = ve_fib_rules; r; r=r->r_next) {
+ 		if (((saddr^r->r_src) & r->r_srcmask) ||
+ 		    ((daddr^r->r_dst) & r->r_dstmask) ||
+ #ifdef CONFIG_IP_ROUTE_TOS
+@@ -449,7 +532,7 @@ int inet_dump_rules(struct sk_buff *skb,
+ 	struct fib_rule *r;
+ 
+ 	read_lock(&fib_rules_lock);
+-	for (r=fib_rules, idx=0; r; r = r->r_next, idx++) {
++	for (r=ve_fib_rules, idx=0; r; r = r->r_next, idx++) {
+ 		if (idx < s_idx)
+ 			continue;
+ 		if (inet_fill_rule(skb, r, cb) < 0)
+@@ -463,5 +546,6 @@ int inet_dump_rules(struct sk_buff *skb,
+ 
+ void __init fib_rules_init(void)
+ {
++	prepare_fib_rules();
+ 	register_netdevice_notifier(&fib_rules_notifier);
+ }
+diff -uprN linux-2.6.8.1.orig/net/ipv4/fib_semantics.c linux-2.6.8.1-ve022stab078/net/ipv4/fib_semantics.c
+--- linux-2.6.8.1.orig/net/ipv4/fib_semantics.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/fib_semantics.c	2006-05-11 13:05:41.000000000 +0400
+@@ -32,6 +32,7 @@
+ #include <linux/netdevice.h>
+ #include <linux/if_arp.h>
+ #include <linux/proc_fs.h>
++#include <linux/ve.h>
+ #include <linux/skbuff.h>
+ #include <linux/netlink.h>
+ #include <linux/init.h>
+@@ -49,6 +50,18 @@ static struct fib_info 	*fib_info_list;
+ static rwlock_t fib_info_lock = RW_LOCK_UNLOCKED;
+ int fib_info_cnt;
+ 
++void prepare_fib_info(void)
++{
++#ifdef CONFIG_VE
++	get_ve0()->_fib_info_list = fib_info_list;
++	fib_info_list = (void *)0x12345678;
++#endif
++}
++
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++#define fib_info_list (get_exec_env()->_fib_info_list)
++#endif
++
+ #define for_fib_info() { struct fib_info *fi; \
+ 	for (fi = fib_info_list; fi; fi = fi->fib_next)
+ 
+@@ -155,7 +168,6 @@ void free_fib_info(struct fib_info *fi)
+ 			dev_put(nh->nh_dev);
+ 		nh->nh_dev = NULL;
+ 	} endfor_nexthops(fi);
+-	fib_info_cnt--;
+ 	kfree(fi);
+ }
+ 
+@@ -483,11 +495,13 @@ fib_create_info(const struct rtmsg *r, s
+ 	}
+ #endif
+ 
+-	fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
++
+ 	err = -ENOBUFS;
++
++	fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
+ 	if (fi == NULL)
+ 		goto failure;
+-	fib_info_cnt++;
++
+ 	memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
+ 
+ 	fi->fib_protocol = r->rtm_protocol;
+diff -uprN linux-2.6.8.1.orig/net/ipv4/icmp.c linux-2.6.8.1-ve022stab078/net/ipv4/icmp.c
+--- linux-2.6.8.1.orig/net/ipv4/icmp.c	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/icmp.c	2006-05-11 13:05:27.000000000 +0400
+@@ -346,12 +346,12 @@ static void icmp_push_reply(struct icmp_
+ {
+ 	struct sk_buff *skb;
+ 
+-	ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param,
+-		       icmp_param->data_len+icmp_param->head_len,
+-		       icmp_param->head_len,
+-		       ipc, rt, MSG_DONTWAIT);
+-
+-	if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) {
++	if (ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param,
++		           icmp_param->data_len+icmp_param->head_len,
++		           icmp_param->head_len,
++		           ipc, rt, MSG_DONTWAIT) < 0)
++		ip_flush_pending_frames(icmp_socket->sk);
++	else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) {
+ 		struct icmphdr *icmph = skb->h.icmph;
+ 		unsigned int csum = 0;
+ 		struct sk_buff *skb1;
+diff -uprN linux-2.6.8.1.orig/net/ipv4/igmp.c linux-2.6.8.1-ve022stab078/net/ipv4/igmp.c
+--- linux-2.6.8.1.orig/net/ipv4/igmp.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/igmp.c	2006-05-11 13:05:42.000000000 +0400
+@@ -889,7 +889,10 @@ int igmp_rcv(struct sk_buff *skb)
+ 		/* Is it our report looped back? */
+ 		if (((struct rtable*)skb->dst)->fl.iif == 0)
+ 			break;
+-		igmp_heard_report(in_dev, ih->group);
++		/* don't rely on MC router hearing unicast reports */
++		if (skb->pkt_type == PACKET_MULTICAST ||
++		    skb->pkt_type == PACKET_BROADCAST)
++			igmp_heard_report(in_dev, ih->group);
+ 		break;
+ 	case IGMP_PIM:
+ #ifdef CONFIG_IP_PIMSM_V1
+@@ -1776,12 +1779,12 @@ int ip_mc_source(int add, int omode, str
+ 			goto done;
+ 		rv = !0;
+ 		for (i=0; i<psl->sl_count; i++) {
+-			rv = memcmp(&psl->sl_addr, &mreqs->imr_multiaddr,
++			rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr,
+ 				sizeof(__u32));
+-			if (rv >= 0)
++			if (rv == 0)
+ 				break;
+ 		}
+-		if (!rv)	/* source not found */
++		if (rv)		/* source not found */
+ 			goto done;
+ 
+ 		/* update the interface filter */
+@@ -1823,9 +1826,9 @@ int ip_mc_source(int add, int omode, str
+ 	}
+ 	rv = 1;	/* > 0 for insert logic below if sl_count is 0 */
+ 	for (i=0; i<psl->sl_count; i++) {
+-		rv = memcmp(&psl->sl_addr, &mreqs->imr_multiaddr,
++		rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr,
+ 			sizeof(__u32));
+-		if (rv >= 0)
++		if (rv == 0)
+ 			break;
+ 	}
+ 	if (rv == 0)		/* address already there is an error */
+@@ -2297,7 +2300,8 @@ static inline struct ip_sf_list *igmp_mc
+ 	struct ip_mc_list *im = NULL;
+ 	struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq);
+ 
+-	for (state->dev = dev_base, state->idev = NULL, state->im = NULL;
++	for (state->dev = dev_base,
++					state->idev = NULL, state->im = NULL;
+ 	     state->dev; 
+ 	     state->dev = state->dev->next) {
+ 		struct in_device *idev;
+diff -uprN linux-2.6.8.1.orig/net/ipv4/ip_forward.c linux-2.6.8.1-ve022stab078/net/ipv4/ip_forward.c
+--- linux-2.6.8.1.orig/net/ipv4/ip_forward.c	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/ip_forward.c	2006-05-11 13:05:41.000000000 +0400
+@@ -91,6 +91,23 @@ int ip_forward(struct sk_buff *skb)
+ 	if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
+ 		goto sr_failed;
+ 
++	/*
++	 * We try to optimize forwarding of VE packets:
++	 * do not decrement TTL (and so save skb_cow)
++	 * during forwarding of outgoing pkts from VE.
++	 * For incoming pkts we still do ttl decr,
++	 * since such skb is not cloned and does not require
++	 * actual cow. So, there is at least one place
++	 * in pkts path with mandatory ttl decr, that is
++	 * sufficient to prevent routing loops.
++	 */
++	if (
++#ifdef CONFIG_IP_ROUTE_NAT			
++	    (rt->rt_flags & RTCF_NAT) == 0 &&	  /* no NAT mangling expected */
++#endif						  /* and */
++	    (skb->dev->features & NETIF_F_VENET)) /* src is VENET device */
++		goto no_ttl_decr;
++
+ 	/* We are about to mangle packet. Copy it! */
+ 	if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len))
+ 		goto drop;
+@@ -99,6 +116,8 @@ int ip_forward(struct sk_buff *skb)
+ 	/* Decrease ttl after skb cow done */
+ 	ip_decrease_ttl(iph);
+ 
++no_ttl_decr:
++
+ 	/*
+ 	 *	We now generate an ICMP HOST REDIRECT giving the route
+ 	 *	we calculated.
+diff -uprN linux-2.6.8.1.orig/net/ipv4/ip_fragment.c linux-2.6.8.1-ve022stab078/net/ipv4/ip_fragment.c
+--- linux-2.6.8.1.orig/net/ipv4/ip_fragment.c	2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/ip_fragment.c	2006-05-11 13:05:41.000000000 +0400
+@@ -42,6 +42,7 @@
+ #include <linux/udp.h>
+ #include <linux/inet.h>
+ #include <linux/netfilter_ipv4.h>
++#include <linux/ve_owner.h>
+ 
+ /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
+  * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
+@@ -73,6 +74,7 @@ struct ipfrag_skb_cb
+ struct ipq {
+ 	struct ipq	*next;		/* linked list pointers			*/
+ 	struct list_head lru_list;	/* lru list member 			*/
++	u32		user;
+ 	u32		saddr;
+ 	u32		daddr;
+ 	u16		id;
+@@ -91,8 +93,12 @@ struct ipq {
+ 	struct ipq	**pprev;
+ 	int		iif;
+ 	struct timeval	stamp;
++	struct ve_struct *owner_env;
+ };
+ 
++DCL_VE_OWNER_PROTO(IPQ, TAIL_SOFT, struct ipq, owner_env, inline, (always_inline))
++DCL_VE_OWNER(IPQ, TAIL_SOFT, struct ipq, owner_env, inline, (always_inline))
++
+ /* Hash table. */
+ 
+ #define IPQ_HASHSZ	64
+@@ -104,6 +110,20 @@ static u32 ipfrag_hash_rnd;
+ static LIST_HEAD(ipq_lru_list);
+ int ip_frag_nqueues = 0;
+ 
++void prepare_ipq(void)
++{
++	struct ipq *qp;
++	unsigned int hash;
++
++	write_lock(&ipfrag_lock);
++	for (hash = 0; hash < IPQ_HASHSZ; hash++) {
++		for(qp = ipq_hash[hash]; qp; qp = qp->next) {
++			SET_VE_OWNER_IPQ(qp, get_ve0());
++		}
++	}
++	write_unlock(&ipfrag_lock);
++}
++
+ static __inline__ void __ipq_unlink(struct ipq *qp)
+ {
+ 	if(qp->next)
+@@ -183,7 +203,8 @@ static __inline__ void frag_free_queue(s
+ 
+ static __inline__ struct ipq *frag_alloc_queue(void)
+ {
+-	struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC);
++	struct ipq *qp = kmalloc(sizeof(struct ipq) + sizeof(void *),
++				GFP_ATOMIC);
+ 
+ 	if(!qp)
+ 		return NULL;
+@@ -273,6 +294,9 @@ static void ip_evictor(void)
+ static void ip_expire(unsigned long arg)
+ {
+ 	struct ipq *qp = (struct ipq *) arg;
++	struct ve_struct *envid;
++
++	envid = set_exec_env(VE_OWNER_IPQ(qp));
+ 
+ 	spin_lock(&qp->lock);
+ 
+@@ -295,6 +319,8 @@ static void ip_expire(unsigned long arg)
+ out:
+ 	spin_unlock(&qp->lock);
+ 	ipq_put(qp);
++
++	(void)set_exec_env(envid);
+ }
+ 
+ /* Creation primitives. */
+@@ -313,7 +339,9 @@ static struct ipq *ip_frag_intern(unsign
+ 		if(qp->id == qp_in->id		&&
+ 		   qp->saddr == qp_in->saddr	&&
+ 		   qp->daddr == qp_in->daddr	&&
+-		   qp->protocol == qp_in->protocol) {
++		   qp->protocol == qp_in->protocol &&
++		   qp->user == qp_in->user	&&
++		   qp->owner_env == get_exec_env()) {
+ 			atomic_inc(&qp->refcnt);
+ 			write_unlock(&ipfrag_lock);
+ 			qp_in->last_in |= COMPLETE;
+@@ -340,7 +368,7 @@ static struct ipq *ip_frag_intern(unsign
+ }
+ 
+ /* Add an entry to the 'ipq' queue for a newly received IP datagram. */
+-static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph)
++static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph, u32 user)
+ {
+ 	struct ipq *qp;
+ 
+@@ -352,6 +380,7 @@ static struct ipq *ip_frag_create(unsign
+ 	qp->id = iph->id;
+ 	qp->saddr = iph->saddr;
+ 	qp->daddr = iph->daddr;
++	qp->user = user;
+ 	qp->len = 0;
+ 	qp->meat = 0;
+ 	qp->fragments = NULL;
+@@ -364,6 +393,8 @@ static struct ipq *ip_frag_create(unsign
+ 	qp->lock = SPIN_LOCK_UNLOCKED;
+ 	atomic_set(&qp->refcnt, 1);
+ 
++	SET_VE_OWNER_IPQ(qp, get_exec_env());
++
+ 	return ip_frag_intern(hash, qp);
+ 
+ out_nomem:
+@@ -374,7 +405,7 @@ out_nomem:
+ /* Find the correct entry in the "incomplete datagrams" queue for
+  * this IP datagram, and create new one, if nothing is found.
+  */
+-static inline struct ipq *ip_find(struct iphdr *iph)
++static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
+ {
+ 	__u16 id = iph->id;
+ 	__u32 saddr = iph->saddr;
+@@ -388,7 +419,9 @@ static inline struct ipq *ip_find(struct
+ 		if(qp->id == id		&&
+ 		   qp->saddr == saddr	&&
+ 		   qp->daddr == daddr	&&
+-		   qp->protocol == protocol) {
++		   qp->protocol == protocol &&
++		   qp->user == user	&&
++		   qp->owner_env == get_exec_env()) {
+ 			atomic_inc(&qp->refcnt);
+ 			read_unlock(&ipfrag_lock);
+ 			return qp;
+@@ -396,7 +429,7 @@ static inline struct ipq *ip_find(struct
+ 	}
+ 	read_unlock(&ipfrag_lock);
+ 
+-	return ip_frag_create(hash, iph);
++	return ip_frag_create(hash, iph, user);
+ }
+ 
+ /* Add new segment to existing queue. */
+@@ -630,7 +663,7 @@ out_fail:
+ }
+ 
+ /* Process an incoming IP datagram fragment. */
+-struct sk_buff *ip_defrag(struct sk_buff *skb)
++struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
+ {
+ 	struct iphdr *iph = skb->nh.iph;
+ 	struct ipq *qp;
+@@ -645,7 +678,7 @@ struct sk_buff *ip_defrag(struct sk_buff
+ 	dev = skb->dev;
+ 
+ 	/* Lookup (or create) queue header */
+-	if ((qp = ip_find(iph)) != NULL) {
++	if ((qp = ip_find(iph, user)) != NULL) {
+ 		struct sk_buff *ret = NULL;
+ 
+ 		spin_lock(&qp->lock);
+@@ -656,6 +689,9 @@ struct sk_buff *ip_defrag(struct sk_buff
+ 		    qp->meat == qp->len)
+ 			ret = ip_frag_reasm(qp, dev);
+ 
++		if (ret)
++			SET_VE_OWNER_SKB(ret, VE_OWNER_SKB(skb));
++
+ 		spin_unlock(&qp->lock);
+ 		ipq_put(qp);
+ 		return ret;
+@@ -666,6 +702,48 @@ struct sk_buff *ip_defrag(struct sk_buff
+ 	return NULL;
+ }
+ 
++#ifdef CONFIG_VE
++/* XXX */
++void ip_fragment_cleanup(struct ve_struct *envid)
++{
++	int i, progress;
++
++	/* All operations with fragment queues are performed from NET_RX/TX
++	 * soft interrupts or from timer context.  --Den */
++	local_bh_disable();
++	do {
++		progress = 0;
++		for (i = 0; i < IPQ_HASHSZ; i++) {
++			struct ipq *qp;
++			if (ipq_hash[i] == NULL)
++				continue;
++inner_restart:
++			read_lock(&ipfrag_lock);
++			for (qp = ipq_hash[i]; qp; qp = qp->next) {
++				if (!ve_accessible_strict(
++						VE_OWNER_IPQ(qp),
++						envid))
++					continue;
++				atomic_inc(&qp->refcnt);
++				read_unlock(&ipfrag_lock);
++
++				spin_lock(&qp->lock);
++				if (!(qp->last_in&COMPLETE))
++					ipq_kill(qp);
++				spin_unlock(&qp->lock);
++
++				ipq_put(qp);
++				progress = 1;
++				goto inner_restart;
++			}
++			read_unlock(&ipfrag_lock);
++		}
++	} while(progress);
++	local_bh_enable();
++}
++EXPORT_SYMBOL(ip_fragment_cleanup);
++#endif
++
+ void ipfrag_init(void)
+ {
+ 	ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
+diff -uprN linux-2.6.8.1.orig/net/ipv4/ip_input.c linux-2.6.8.1-ve022stab078/net/ipv4/ip_input.c
+--- linux-2.6.8.1.orig/net/ipv4/ip_input.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/ip_input.c	2006-05-11 13:05:25.000000000 +0400
+@@ -172,7 +172,7 @@ int ip_call_ra_chain(struct sk_buff *skb
+ 		    (!sk->sk_bound_dev_if ||
+ 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
+ 			if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+-				skb = ip_defrag(skb);
++				skb = ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN);
+ 				if (skb == NULL) {
+ 					read_unlock(&ip_ra_lock);
+ 					return 1;
+@@ -274,7 +274,7 @@ int ip_local_deliver(struct sk_buff *skb
+ 	 */
+ 
+ 	if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+-		skb = ip_defrag(skb);
++		skb = ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER);
+ 		if (!skb)
+ 			return 0;
+ 	}
+diff -uprN linux-2.6.8.1.orig/net/ipv4/ip_options.c linux-2.6.8.1-ve022stab078/net/ipv4/ip_options.c
+--- linux-2.6.8.1.orig/net/ipv4/ip_options.c	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/ip_options.c	2006-05-11 13:05:33.000000000 +0400
+@@ -515,6 +515,8 @@ int ip_options_get(struct ip_options **o
+ 		kfree(opt);
+ 		return -EINVAL;
+ 	}
++	if (*optp)
++		kfree(*optp);
+ 	*optp = opt;
+ 	return 0;
+ }
+diff -uprN linux-2.6.8.1.orig/net/ipv4/ip_output.c linux-2.6.8.1-ve022stab078/net/ipv4/ip_output.c
+--- linux-2.6.8.1.orig/net/ipv4/ip_output.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/ip_output.c	2006-05-11 13:05:44.000000000 +0400
+@@ -405,6 +405,7 @@ static void ip_copy_metadata(struct sk_b
+ 	to->priority = from->priority;
+ 	to->protocol = from->protocol;
+ 	to->security = from->security;
++	dst_release(to->dst);
+ 	to->dst = dst_clone(from->dst);
+ 	to->dev = from->dev;
+ 
+@@ -519,6 +520,7 @@ int ip_fragment(struct sk_buff *skb, int
+ 			/* Prepare header of the next frame,
+ 			 * before previous one went down. */
+ 			if (frag) {
++				frag->ip_summed = CHECKSUM_NONE;
+ 				frag->h.raw = frag->data;
+ 				frag->nh.raw = __skb_push(frag, hlen);
+ 				memcpy(frag->nh.raw, iph, hlen);
+@@ -1147,11 +1149,7 @@ int ip_push_pending_frames(struct sock *
+ 	iph->tos = inet->tos;
+ 	iph->tot_len = htons(skb->len);
+ 	iph->frag_off = df;
+-	if (!df) {
+-		__ip_select_ident(iph, &rt->u.dst, 0);
+-	} else {
+-		iph->id = htons(inet->id++);
+-	}
++	ip_select_ident(iph, &rt->u.dst, sk);
+ 	iph->ttl = ttl;
+ 	iph->protocol = sk->sk_protocol;
+ 	iph->saddr = rt->rt_src;
+@@ -1242,13 +1240,14 @@ void ip_send_reply(struct sock *sk, stru
+ 		char			data[40];
+ 	} replyopts;
+ 	struct ipcm_cookie ipc;
+-	u32 daddr;
++	u32 saddr, daddr;
+ 	struct rtable *rt = (struct rtable*)skb->dst;
+ 
+ 	if (ip_options_echo(&replyopts.opt, skb))
+ 		return;
+ 
+-	daddr = ipc.addr = rt->rt_src;
++	saddr = skb->nh.iph->daddr;
++	daddr = ipc.addr = skb->nh.iph->saddr;
+ 	ipc.opt = NULL;
+ 
+ 	if (replyopts.opt.optlen) {
+@@ -1261,7 +1260,7 @@ void ip_send_reply(struct sock *sk, stru
+ 	{
+ 		struct flowi fl = { .nl_u = { .ip4_u =
+ 					      { .daddr = daddr,
+-						.saddr = rt->rt_spec_dst,
++						.saddr = saddr,
+ 						.tos = RT_TOS(skb->nh.iph->tos) } },
+ 				    /* Not quite clean, but right. */
+ 				    .uli_u = { .ports =
+diff -uprN linux-2.6.8.1.orig/net/ipv4/ip_sockglue.c linux-2.6.8.1-ve022stab078/net/ipv4/ip_sockglue.c
+--- linux-2.6.8.1.orig/net/ipv4/ip_sockglue.c	2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/ip_sockglue.c	2006-05-11 13:05:34.000000000 +0400
+@@ -146,11 +146,8 @@ int ip_cmsg_send(struct msghdr *msg, str
+ 	struct cmsghdr *cmsg;
+ 
+ 	for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+-		if (cmsg->cmsg_len < sizeof(struct cmsghdr) ||
+-		    (unsigned long)(((char*)cmsg - (char*)msg->msg_control)
+-				    + cmsg->cmsg_len) > msg->msg_controllen) {
++		if (!CMSG_OK(msg, cmsg))
+ 			return -EINVAL;
+-		}
+ 		if (cmsg->cmsg_level != SOL_IP)
+ 			continue;
+ 		switch (cmsg->cmsg_type) {
+@@ -851,6 +848,9 @@ mc_msf_out:
+  
+ 		case IP_IPSEC_POLICY:
+ 		case IP_XFRM_POLICY:
++			err = -EPERM;
++			if (!capable(CAP_NET_ADMIN))
++				break;
+ 			err = xfrm_user_policy(sk, optname, optval, optlen);
+ 			break;
+ 
+diff -uprN linux-2.6.8.1.orig/net/ipv4/ipmr.c linux-2.6.8.1-ve022stab078/net/ipv4/ipmr.c
+--- linux-2.6.8.1.orig/net/ipv4/ipmr.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/ipmr.c	2006-05-11 13:05:41.000000000 +0400
+@@ -828,7 +828,7 @@ static void mrtsock_destruct(struct sock
+ {
+ 	rtnl_lock();
+ 	if (sk == mroute_socket) {
+-		ipv4_devconf.mc_forwarding--;
++		ve_ipv4_devconf.mc_forwarding--;
+ 
+ 		write_lock_bh(&mrt_lock);
+ 		mroute_socket=NULL;
+@@ -879,7 +879,7 @@ int ip_mroute_setsockopt(struct sock *sk
+ 				mroute_socket=sk;
+ 				write_unlock_bh(&mrt_lock);
+ 
+-				ipv4_devconf.mc_forwarding++;
++				ve_ipv4_devconf.mc_forwarding++;
+ 			}
+ 			rtnl_unlock();
+ 			return ret;
+diff -uprN linux-2.6.8.1.orig/net/ipv4/ipvs/ip_vs_conn.c linux-2.6.8.1-ve022stab078/net/ipv4/ipvs/ip_vs_conn.c
+--- linux-2.6.8.1.orig/net/ipv4/ipvs/ip_vs_conn.c	2004-08-14 14:56:15.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/ipvs/ip_vs_conn.c	2006-05-11 13:05:39.000000000 +0400
+@@ -876,7 +876,8 @@ int ip_vs_conn_init(void)
+ 	/* Allocate ip_vs_conn slab cache */
+ 	ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn",
+ 					      sizeof(struct ip_vs_conn), 0,
+-					      SLAB_HWCACHE_ALIGN, NULL, NULL);
++					      SLAB_HWCACHE_ALIGN | SLAB_UBC,
++					      NULL, NULL);
+ 	if (!ip_vs_conn_cachep) {
+ 		vfree(ip_vs_conn_tab);
+ 		return -ENOMEM;
+diff -uprN linux-2.6.8.1.orig/net/ipv4/ipvs/ip_vs_core.c linux-2.6.8.1-ve022stab078/net/ipv4/ipvs/ip_vs_core.c
+--- linux-2.6.8.1.orig/net/ipv4/ipvs/ip_vs_core.c	2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/ipvs/ip_vs_core.c	2006-05-11 13:05:41.000000000 +0400
+@@ -541,9 +541,9 @@ u16 ip_vs_checksum_complete(struct sk_bu
+ }
+ 
+ static inline struct sk_buff *
+-ip_vs_gather_frags(struct sk_buff *skb)
++ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
+ {
+-	skb = ip_defrag(skb);
++	skb = ip_defrag(skb, user);
+ 	if (skb)
+ 		ip_send_check(skb->nh.iph);
+ 	return skb;
+@@ -617,7 +617,7 @@ static int ip_vs_out_icmp(struct sk_buff
+ 
+ 	/* reassemble IP fragments */
+ 	if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
+-		skb = ip_vs_gather_frags(skb);
++		skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
+ 		if (!skb)
+ 			return NF_STOLEN;
+ 		*pskb = skb;
+@@ -759,7 +759,7 @@ ip_vs_out(unsigned int hooknum, struct s
+ 	/* reassemble IP fragments */
+ 	if (unlikely(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET) &&
+ 		     !pp->dont_defrag)) {
+-		skb = ip_vs_gather_frags(skb);
++		skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
+ 		if (!skb)
+ 			return NF_STOLEN;
+ 		iph = skb->nh.iph;
+@@ -862,7 +862,8 @@ check_for_ip_vs_out(struct sk_buff **psk
+  *	forward to the right destination host if relevant.
+  *	Currently handles error types - unreachable, quench, ttl exceeded.
+  */
+-static int ip_vs_in_icmp(struct sk_buff **pskb, int *related)
++static int 
++ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
+ {
+ 	struct sk_buff *skb = *pskb;
+ 	struct iphdr *iph;
+@@ -876,7 +877,9 @@ static int ip_vs_in_icmp(struct sk_buff 
+ 
+ 	/* reassemble IP fragments */
+ 	if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
+-		skb = ip_vs_gather_frags(skb);
++		skb = ip_vs_gather_frags(skb,
++		                         hooknum == NF_IP_LOCAL_IN ?
++					 IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD);
+ 		if (!skb)
+ 			return NF_STOLEN;
+ 		*pskb = skb;
+@@ -972,6 +975,10 @@ ip_vs_in(unsigned int hooknum, struct sk
+ 	 *	Big tappo: only PACKET_HOST (neither loopback nor mcasts)
+ 	 *	... don't know why 1st test DOES NOT include 2nd (?)
+ 	 */
++	/*
++	 * VZ: the question above is right.
++	 * The second test is superfluous.
++	 */
+ 	if (unlikely(skb->pkt_type != PACKET_HOST
+ 		     || skb->dev == &loopback_dev || skb->sk)) {
+ 		IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
+@@ -990,7 +997,7 @@ ip_vs_in(unsigned int hooknum, struct sk
+ 
+ 	iph = skb->nh.iph;
+ 	if (unlikely(iph->protocol == IPPROTO_ICMP)) {
+-		int related, verdict = ip_vs_in_icmp(pskb, &related);
++		int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum);
+ 
+ 		if (related)
+ 			return verdict;
+@@ -1085,7 +1092,7 @@ ip_vs_forward_icmp(unsigned int hooknum,
+ 	if ((*pskb)->nh.iph->protocol != IPPROTO_ICMP)
+ 		return NF_ACCEPT;
+ 
+-	return ip_vs_in_icmp(pskb, &r);
++	return ip_vs_in_icmp(pskb, &r, hooknum);
+ }
+ 
+ 
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_conntrack_core.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_conntrack_core.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_conntrack_core.c	2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_conntrack_core.c	2006-05-11 13:05:45.000000000 +0400
+@@ -47,6 +47,7 @@
+ #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_core.h>
+ #include <linux/netfilter_ipv4/listhelp.h>
++#include <ub/ub_mem.h>
+ 
+ #define IP_CONNTRACK_VERSION	"2.1"
+ 
+@@ -62,10 +63,10 @@ DECLARE_RWLOCK(ip_conntrack_expect_tuple
+ void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
+ LIST_HEAD(ip_conntrack_expect_list);
+ LIST_HEAD(protocol_list);
+-static LIST_HEAD(helpers);
++LIST_HEAD(helpers);
+ unsigned int ip_conntrack_htable_size = 0;
+ int ip_conntrack_max;
+-static atomic_t ip_conntrack_count = ATOMIC_INIT(0);
++atomic_t ip_conntrack_count = ATOMIC_INIT(0);
+ struct list_head *ip_conntrack_hash;
+ static kmem_cache_t *ip_conntrack_cachep;
+ struct ip_conntrack ip_conntrack_untracked;
+@@ -83,7 +84,7 @@ struct ip_conntrack_protocol *__ip_ct_fi
+ 	struct ip_conntrack_protocol *p;
+ 
+ 	MUST_BE_READ_LOCKED(&ip_conntrack_lock);
+-	p = LIST_FIND(&protocol_list, proto_cmpfn,
++	p = LIST_FIND(&ve_ip_conntrack_protocol_list, proto_cmpfn,
+ 		      struct ip_conntrack_protocol *, protocol);
+ 	if (!p)
+ 		p = &ip_conntrack_generic_protocol;
+@@ -126,6 +127,28 @@ hash_conntrack(const struct ip_conntrack
+ 	                     ip_conntrack_hash_rnd) % ip_conntrack_htable_size);
+ }
+ 
++#ifdef CONFIG_VE_IPTABLES
++/* this function gives us an ability to safely restore
++ * connection in case of failure */
++void ip_conntrack_hash_insert(struct ip_conntrack *ct)
++{
++	u_int32_t hash, repl_hash;
++
++	if (!ip_conntrack_hash_rnd_initted) {
++		get_random_bytes(&ip_conntrack_hash_rnd, 4);
++		ip_conntrack_hash_rnd_initted = 1;
++	}
++
++        hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
++        repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
++        list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list,
++                 &ve_ip_conntrack_hash[hash]);
++        list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list,
++                 &ve_ip_conntrack_hash[repl_hash]);
++}
++EXPORT_SYMBOL(ip_conntrack_hash_insert);
++#endif
++
+ int
+ get_tuple(const struct iphdr *iph,
+ 	  const struct sk_buff *skb,
+@@ -195,7 +218,7 @@ __ip_ct_expect_find(const struct ip_conn
+ {
+ 	MUST_BE_READ_LOCKED(&ip_conntrack_lock);
+ 	MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock);
+-	return LIST_FIND(&ip_conntrack_expect_list, expect_cmp, 
++	return LIST_FIND(&ve_ip_conntrack_expect_list, expect_cmp,
+ 			 struct ip_conntrack_expect *, tuple);
+ }
+ 
+@@ -278,7 +301,11 @@ static void remove_expectations(struct i
+ 			continue;
+ 		}
+ 
++#ifdef CONFIG_VE_IPTABLES
++		IP_NF_ASSERT(list_inlist(&(ct->ct_env)->_ip_conntrack_expect_list, exp));
++#else
+ 		IP_NF_ASSERT(list_inlist(&ip_conntrack_expect_list, exp));
++#endif
+ 		IP_NF_ASSERT(exp->expectant == ct);
+ 
+ 		/* delete expectation from global and private lists */
+@@ -296,8 +323,15 @@ clean_from_lists(struct ip_conntrack *ct
+ 
+ 	ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ 	hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
++#ifdef CONFIG_VE_IPTABLES
++	LIST_DELETE(&((ct->ct_env)->_ip_conntrack_hash)[ho],
++ 		    &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
++	LIST_DELETE(&((ct->ct_env)->_ip_conntrack_hash)[hr],
++ 		    &ct->tuplehash[IP_CT_DIR_REPLY]);
++#else
+ 	LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
+ 	LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
++#endif
+ 
+ 	/* Destroy all un-established, pending expectations */
+ 	remove_expectations(ct, 1);
+@@ -320,8 +354,13 @@ destroy_conntrack(struct nf_conntrack *n
+ 	if (proto && proto->destroy)
+ 		proto->destroy(ct);
+ 
++#ifdef CONFIG_VE_IPTABLES
++	if (ct->ct_env->_ip_conntrack_destroyed)
++		ct->ct_env->_ip_conntrack_destroyed(ct);
++#else
+ 	if (ip_conntrack_destroyed)
+ 		ip_conntrack_destroyed(ct);
++#endif
+ 
+ 	WRITE_LOCK(&ip_conntrack_lock);
+ 	/* Make sure don't leave any orphaned expectations lying around */
+@@ -343,9 +382,13 @@ destroy_conntrack(struct nf_conntrack *n
+ 	if (master)
+ 		ip_conntrack_put(master);
+ 
++#ifdef CONFIG_VE_IPTABLES
++	atomic_dec(&(ct->ct_env->_ip_conntrack_count));
++#else
++	atomic_dec(&ip_conntrack_count);
++#endif
+ 	DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
+ 	kmem_cache_free(ip_conntrack_cachep, ct);
+-	atomic_dec(&ip_conntrack_count);
+ }
+ 
+ static void death_by_timeout(unsigned long ul_conntrack)
+@@ -376,7 +419,7 @@ __ip_conntrack_find(const struct ip_conn
+ 	unsigned int hash = hash_conntrack(tuple);
+ 
+ 	MUST_BE_READ_LOCKED(&ip_conntrack_lock);
+-	h = LIST_FIND(&ip_conntrack_hash[hash],
++	h = LIST_FIND(&ve_ip_conntrack_hash[hash],
+ 		      conntrack_tuple_cmp,
+ 		      struct ip_conntrack_tuple_hash *,
+ 		      tuple, ignored_conntrack);
+@@ -454,17 +497,23 @@ __ip_conntrack_confirm(struct nf_ct_info
+ 	/* See if there's one in the list already, including reverse:
+            NAT could have grabbed it without realizing, since we're
+            not in the hash.  If there is, we lost race. */
+-	if (!LIST_FIND(&ip_conntrack_hash[hash],
++	if (!LIST_FIND(&ve_ip_conntrack_hash[hash],
+ 		       conntrack_tuple_cmp,
+ 		       struct ip_conntrack_tuple_hash *,
+ 		       &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
+-	    && !LIST_FIND(&ip_conntrack_hash[repl_hash],
++	    && !LIST_FIND(&ve_ip_conntrack_hash[repl_hash],
+ 			  conntrack_tuple_cmp,
+ 			  struct ip_conntrack_tuple_hash *,
+ 			  &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
+-		list_prepend(&ip_conntrack_hash[hash],
++		/*
++		 * Just to avoid one ct to be inserted in 2 or more
++		 * ve_ip_conntrack_hash'es... Otherwise it can crash.
++		 */
++		if (is_confirmed(ct))
++			goto ok;
++		list_prepend(&ve_ip_conntrack_hash[hash],
+ 			     &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
+-		list_prepend(&ip_conntrack_hash[repl_hash],
++		list_prepend(&ve_ip_conntrack_hash[repl_hash],
+ 			     &ct->tuplehash[IP_CT_DIR_REPLY]);
+ 		/* Timer relative to confirmation time, not original
+ 		   setting time, otherwise we'd get timer wrap in
+@@ -473,6 +522,7 @@ __ip_conntrack_confirm(struct nf_ct_info
+ 		add_timer(&ct->timeout);
+ 		atomic_inc(&ct->ct_general.use);
+ 		set_bit(IPS_CONFIRMED_BIT, &ct->status);
++ok:
+ 		WRITE_UNLOCK(&ip_conntrack_lock);
+ 		return NF_ACCEPT;
+ 	}
+@@ -611,11 +661,45 @@ static inline int helper_cmp(const struc
+ 
+ struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple)
+ {
+-	return LIST_FIND(&helpers, helper_cmp,
++	return LIST_FIND(&ve_ip_conntrack_helpers, helper_cmp,
+ 			 struct ip_conntrack_helper *,
+ 			 tuple);
+ }
+ 
++struct ip_conntrack *
++ip_conntrack_alloc(struct user_beancounter *ub)
++{
++	int i;
++	struct ip_conntrack *conntrack;
++	struct user_beancounter *old_ub;
++
++	old_ub = set_exec_ub(ub);
++	conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
++	(void)set_exec_ub(old_ub);
++	if (unlikely(!conntrack)) {
++		DEBUGP("Can't allocate conntrack.\n");
++		return NULL;
++	}
++
++	memset(conntrack, 0, sizeof(*conntrack));
++	atomic_set(&conntrack->ct_general.use, 1);
++	conntrack->ct_general.destroy = destroy_conntrack;
++	for (i=0; i < IP_CT_NUMBER; i++)
++		conntrack->infos[i].master = &conntrack->ct_general;
++
++	/* Don't set timer yet: wait for confirmation */
++	init_timer(&conntrack->timeout);
++	conntrack->timeout.data = (unsigned long)conntrack;
++	conntrack->timeout.function = death_by_timeout;
++#ifdef CONFIG_VE_IPTABLES
++	conntrack->ct_env = (get_exec_env())->_ip_conntrack;
++#endif
++
++	INIT_LIST_HEAD(&conntrack->sibling_list);
++	return conntrack;
++}
++EXPORT_SYMBOL(ip_conntrack_alloc);
++
+ /* Allocate a new conntrack: we return -ENOMEM if classification
+    failed due to stress.  Otherwise it really is unclassifiable. */
+ static struct ip_conntrack_tuple_hash *
+@@ -625,10 +709,11 @@ init_conntrack(const struct ip_conntrack
+ {
+ 	struct ip_conntrack *conntrack;
+ 	struct ip_conntrack_tuple repl_tuple;
++ 	struct ip_conntrack_tuple_hash *ret;
+ 	size_t hash;
+ 	struct ip_conntrack_expect *expected;
+-	int i;
+ 	static unsigned int drop_next;
++	struct user_beancounter *ub;
+ 
+ 	if (!ip_conntrack_hash_rnd_initted) {
+ 		get_random_bytes(&ip_conntrack_hash_rnd, 4);
+@@ -637,19 +722,19 @@ init_conntrack(const struct ip_conntrack
+ 
+ 	hash = hash_conntrack(tuple);
+ 
+-	if (ip_conntrack_max &&
+-	    atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
++	if (ve_ip_conntrack_max &&
++	    atomic_read(&ve_ip_conntrack_count) >= ve_ip_conntrack_max) {
+ 		/* Try dropping from random chain, or else from the
+                    chain about to put into (in case they're trying to
+                    bomb one hash chain). */
+ 		unsigned int next = (drop_next++)%ip_conntrack_htable_size;
+ 
+-		if (!early_drop(&ip_conntrack_hash[next])
+-		    && !early_drop(&ip_conntrack_hash[hash])) {
++		if (!early_drop(&ve_ip_conntrack_hash[next])
++		    && !early_drop(&ve_ip_conntrack_hash[hash])) {
+ 			if (net_ratelimit())
+-				printk(KERN_WARNING
+-				       "ip_conntrack: table full, dropping"
+-				       " packet.\n");
++				ve_printk(VE_LOG_BOTH, KERN_WARNING
++				       "ip_conntrack: VPS %d: table full, dropping"
++				       " packet.\n", VEID(get_exec_env()));
+ 			return ERR_PTR(-ENOMEM);
+ 		}
+ 	}
+@@ -659,37 +744,33 @@ init_conntrack(const struct ip_conntrack
+ 		return NULL;
+ 	}
+ 
+-	conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
+-	if (!conntrack) {
+-		DEBUGP("Can't allocate conntrack.\n");
+-		return ERR_PTR(-ENOMEM);
+-	}
++#ifdef CONFIG_USER_RESOURCE
++	if (skb->dev != NULL)  /* received skb */
++		ub = netdev_bc(skb->dev)->exec_ub;
++	else if (skb->sk != NULL) /* sent skb */
++		ub = sock_bc(skb->sk)->ub;
++	else
++#endif
++		ub = NULL;
++
++	ret = ERR_PTR(-ENOMEM);
++	conntrack = ip_conntrack_alloc(ub);
++	if (!conntrack)
++		goto out;
+ 
+-	memset(conntrack, 0, sizeof(*conntrack));
+-	atomic_set(&conntrack->ct_general.use, 1);
+-	conntrack->ct_general.destroy = destroy_conntrack;
+ 	conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple;
+ 	conntrack->tuplehash[IP_CT_DIR_ORIGINAL].ctrack = conntrack;
+ 	conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple;
+ 	conntrack->tuplehash[IP_CT_DIR_REPLY].ctrack = conntrack;
+-	for (i=0; i < IP_CT_NUMBER; i++)
+-		conntrack->infos[i].master = &conntrack->ct_general;
+ 
+-	if (!protocol->new(conntrack, skb)) {
+-		kmem_cache_free(ip_conntrack_cachep, conntrack);
+-		return NULL;
+-	}
+-	/* Don't set timer yet: wait for confirmation */
+-	init_timer(&conntrack->timeout);
+-	conntrack->timeout.data = (unsigned long)conntrack;
+-	conntrack->timeout.function = death_by_timeout;
+-
+-	INIT_LIST_HEAD(&conntrack->sibling_list);
++	ret = NULL;
++	if (!protocol->new(conntrack, skb))
++		goto free_ct;
+ 
+ 	WRITE_LOCK(&ip_conntrack_lock);
+ 	/* Need finding and deleting of expected ONLY if we win race */
+ 	READ_LOCK(&ip_conntrack_expect_tuple_lock);
+-	expected = LIST_FIND(&ip_conntrack_expect_list, expect_cmp,
++	expected = LIST_FIND(&ve_ip_conntrack_expect_list, expect_cmp,
+ 			     struct ip_conntrack_expect *, tuple);
+ 	READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
+ 
+@@ -718,16 +799,21 @@ init_conntrack(const struct ip_conntrack
+ 		__set_bit(IPS_EXPECTED_BIT, &conntrack->status);
+ 		conntrack->master = expected;
+ 		expected->sibling = conntrack;
+-		LIST_DELETE(&ip_conntrack_expect_list, expected);
++		LIST_DELETE(&ve_ip_conntrack_expect_list, expected);
+ 		expected->expectant->expecting--;
+ 		nf_conntrack_get(&master_ct(conntrack)->infos[0]);
+ 	}
+-	atomic_inc(&ip_conntrack_count);
++	atomic_inc(&ve_ip_conntrack_count);
+ 	WRITE_UNLOCK(&ip_conntrack_lock);
+ 
+ 	if (expected && expected->expectfn)
+ 		expected->expectfn(conntrack);
+ 	return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
++
++free_ct:
++	kmem_cache_free(ip_conntrack_cachep, conntrack);
++out:
++	return ret;
+ }
+ 
+ /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
+@@ -937,7 +1023,7 @@ ip_conntrack_expect_alloc(void)
+ 	return new;
+ }
+ 
+-static void
++void
+ ip_conntrack_expect_insert(struct ip_conntrack_expect *new,
+ 			   struct ip_conntrack *related_to)
+ {
+@@ -949,7 +1035,7 @@ ip_conntrack_expect_insert(struct ip_con
+ 	/* add to expected list for this connection */
+ 	list_add_tail(&new->expected_list, &related_to->sibling_list);
+ 	/* add to global list of expectations */
+-	list_prepend(&ip_conntrack_expect_list, &new->list);
++	list_prepend(&ve_ip_conntrack_expect_list, &new->list);
+ 	/* add and start timer if required */
+ 	if (related_to->helper->timeout) {
+ 		init_timer(&new->timeout);
+@@ -961,6 +1047,7 @@ ip_conntrack_expect_insert(struct ip_con
+ 	}
+ 	related_to->expecting++;
+ }
++EXPORT_SYMBOL(ip_conntrack_expect_insert);
+ 
+ /* Add a related connection. */
+ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect,
+@@ -977,7 +1064,7 @@ int ip_conntrack_expect_related(struct i
+ 	DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
+ 	DEBUGP("mask:  "); DUMP_TUPLE(&expect->mask);
+ 
+-	old = LIST_FIND(&ip_conntrack_expect_list, resent_expect,
++	old = LIST_FIND(&ve_ip_conntrack_expect_list, resent_expect,
+ 		        struct ip_conntrack_expect *, &expect->tuple, 
+ 			&expect->mask);
+ 	if (old) {
+@@ -1043,7 +1130,7 @@ int ip_conntrack_expect_related(struct i
+ 		 */
+ 		unexpect_related(old);
+ 		ret = -EPERM;
+-	} else if (LIST_FIND(&ip_conntrack_expect_list, expect_clash,
++	} else if (LIST_FIND(&ve_ip_conntrack_expect_list, expect_clash,
+ 			     struct ip_conntrack_expect *, &expect->tuple, 
+ 			     &expect->mask)) {
+ 		WRITE_UNLOCK(&ip_conntrack_lock);
+@@ -1077,7 +1164,7 @@ int ip_conntrack_change_expect(struct ip
+ 		/* Never seen before */
+ 		DEBUGP("change expect: never seen before\n");
+ 		if (!ip_ct_tuple_equal(&expect->tuple, newtuple) 
+-		    && LIST_FIND(&ip_conntrack_expect_list, expect_clash,
++		    && LIST_FIND(&ve_ip_conntrack_expect_list, expect_clash,
+ 			         struct ip_conntrack_expect *, newtuple, &expect->mask)) {
+ 			/* Force NAT to find an unused tuple */
+ 			ret = -1;
+@@ -1128,12 +1215,42 @@ int ip_conntrack_alter_reply(struct ip_c
+ int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
+ {
+ 	WRITE_LOCK(&ip_conntrack_lock);
+-	list_prepend(&helpers, me);
++	list_prepend(&ve_ip_conntrack_helpers, me);
+ 	WRITE_UNLOCK(&ip_conntrack_lock);
+ 
+ 	return 0;
+ }
+ 
++int visible_ip_conntrack_helper_register(struct ip_conntrack_helper *me)
++{
++	int ret;
++	struct module *mod = me->me;
++
++	if (!ve_is_super(get_exec_env())) {
++		struct ip_conntrack_helper *tmp;
++		__module_get(mod);
++		ret = -ENOMEM;
++		tmp = kmalloc(sizeof(struct ip_conntrack_helper), GFP_KERNEL);
++		if (!tmp)
++			goto nomem;
++		memcpy(tmp, me, sizeof(struct ip_conntrack_helper));
++		me = tmp;
++	}
++
++	ret = ip_conntrack_helper_register(me);
++	if (ret)
++		goto out;
++
++	return 0;
++out:
++	if (!ve_is_super(get_exec_env())){
++		kfree(me);
++nomem:
++		module_put(mod);
++	}
++	return ret;
++}
++
+ static inline int unhelp(struct ip_conntrack_tuple_hash *i,
+ 			 const struct ip_conntrack_helper *me)
+ {
+@@ -1152,11 +1269,11 @@ void ip_conntrack_helper_unregister(stru
+ 
+ 	/* Need write lock here, to delete helper. */
+ 	WRITE_LOCK(&ip_conntrack_lock);
+-	LIST_DELETE(&helpers, me);
++	LIST_DELETE(&ve_ip_conntrack_helpers, me);
+ 
+ 	/* Get rid of expecteds, set helpers to NULL. */
+ 	for (i = 0; i < ip_conntrack_htable_size; i++)
+-		LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
++		LIST_FIND_W(&ve_ip_conntrack_hash[i], unhelp,
+ 			    struct ip_conntrack_tuple_hash *, me);
+ 	WRITE_UNLOCK(&ip_conntrack_lock);
+ 
+@@ -1164,6 +1281,29 @@ void ip_conntrack_helper_unregister(stru
+ 	synchronize_net();
+ }
+ 
++void visible_ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
++{
++	struct ip_conntrack_helper *i;
++
++	READ_LOCK(&ip_conntrack_lock);
++	list_for_each_entry(i, &ve_ip_conntrack_helpers, list) {
++		if (i->name == me->name) {
++			me = i;
++			break;
++		}
++	}
++	READ_UNLOCK(&ip_conntrack_lock);
++	if (me != i)
++		return;
++
++	ip_conntrack_helper_unregister(me);
++
++	if (!ve_is_super(get_exec_env())) {
++		module_put(me->me);
++		kfree(me);
++	}
++}
++
+ /* Refresh conntrack for this many jiffies. */
+ void ip_ct_refresh(struct ip_conntrack *ct, unsigned long extra_jiffies)
+ {
+@@ -1185,7 +1325,7 @@ void ip_ct_refresh(struct ip_conntrack *
+ 
+ /* Returns new sk_buff, or NULL */
+ struct sk_buff *
+-ip_ct_gather_frags(struct sk_buff *skb)
++ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
+ {
+ 	struct sock *sk = skb->sk;
+ #ifdef CONFIG_NETFILTER_DEBUG
+@@ -1197,7 +1337,7 @@ ip_ct_gather_frags(struct sk_buff *skb)
+ 	}
+ 
+ 	local_bh_disable(); 
+-	skb = ip_defrag(skb);
++	skb = ip_defrag(skb, user);
+ 	local_bh_enable();
+ 
+ 	if (!skb) {
+@@ -1257,7 +1397,7 @@ get_next_corpse(int (*kill)(const struct
+ 
+ 	READ_LOCK(&ip_conntrack_lock);
+ 	for (; !h && *bucket < ip_conntrack_htable_size; (*bucket)++) {
+-		h = LIST_FIND(&ip_conntrack_hash[*bucket], do_kill,
++		h = LIST_FIND(&ve_ip_conntrack_hash[*bucket], do_kill,
+ 			      struct ip_conntrack_tuple_hash *, kill, data);
+ 	}
+ 	if (h)
+@@ -1295,6 +1435,9 @@ getorigdst(struct sock *sk, int optval, 
+ 	struct ip_conntrack_tuple_hash *h;
+ 	struct ip_conntrack_tuple tuple;
+ 	
++	if (!get_exec_env()->_ip_conntrack)
++		return -ENOPROTOOPT;
++
+ 	IP_CT_TUPLE_U_BLANK(&tuple);
+ 	tuple.src.ip = inet->rcv_saddr;
+ 	tuple.src.u.tcp.port = inet->sport;
+@@ -1354,6 +1497,9 @@ static int kill_all(const struct ip_conn
+    supposed to kill the mall. */
+ void ip_conntrack_cleanup(void)
+ {
++#ifdef CONFIG_VE
++	struct ve_struct *env;
++#endif
+ 	ip_ct_attach = NULL;
+ 	/* This makes sure all current packets have passed through
+            netfilter framework.  Roll on, two-stage module
+@@ -1362,22 +1508,45 @@ void ip_conntrack_cleanup(void)
+  
+  i_see_dead_people:
+ 	ip_ct_selective_cleanup(kill_all, NULL);
+-	if (atomic_read(&ip_conntrack_count) != 0) {
++	if (atomic_read(&ve_ip_conntrack_count) != 0) {
+ 		schedule();
+ 		goto i_see_dead_people;
+ 	}
+ 
++#ifdef CONFIG_VE_IPTABLES
++	env = get_exec_env();
++	if (ve_is_super(env)) {
++		kmem_cache_destroy(ip_conntrack_cachep);
++		nf_unregister_sockopt(&so_getorigdst);
++	} else {
++		visible_ip_conntrack_protocol_unregister(
++				&ip_conntrack_protocol_icmp);
++		visible_ip_conntrack_protocol_unregister(
++				&ip_conntrack_protocol_udp);
++		visible_ip_conntrack_protocol_unregister(
++				&ip_conntrack_protocol_tcp);
++	}
++	vfree(ve_ip_conntrack_hash);
++	ve_ip_conntrack_hash = NULL;
++	INIT_LIST_HEAD(&ve_ip_conntrack_expect_list);
++	INIT_LIST_HEAD(&ve_ip_conntrack_protocol_list);
++	INIT_LIST_HEAD(&ve_ip_conntrack_helpers);
++	ve_ip_conntrack_max = 0;
++	atomic_set(&ve_ip_conntrack_count, 0);
++	kfree(env->_ip_conntrack);
++	env->_ip_conntrack = NULL;
++#else
+ 	kmem_cache_destroy(ip_conntrack_cachep);
+ 	vfree(ip_conntrack_hash);
+ 	nf_unregister_sockopt(&so_getorigdst);
++#endif /*CONFIG_VE_IPTABLES*/
+ }
+ 
+ static int hashsize;
+ MODULE_PARM(hashsize, "i");
+ 
+-int __init ip_conntrack_init(void)
++static int ip_conntrack_cache_create(void)
+ {
+-	unsigned int i;
+ 	int ret;
+ 
+ 	/* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
+@@ -1393,33 +1562,135 @@ int __init ip_conntrack_init(void)
+ 		if (ip_conntrack_htable_size < 16)
+ 			ip_conntrack_htable_size = 16;
+ 	}
+-	ip_conntrack_max = 8 * ip_conntrack_htable_size;
++	ve_ip_conntrack_max = 8 * ip_conntrack_htable_size;
+ 
+ 	printk("ip_conntrack version %s (%u buckets, %d max)"
+ 	       " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
+-	       ip_conntrack_htable_size, ip_conntrack_max,
++	       ip_conntrack_htable_size, ve_ip_conntrack_max,
+ 	       sizeof(struct ip_conntrack));
+ 
+ 	ret = nf_register_sockopt(&so_getorigdst);
+ 	if (ret != 0) {
+ 		printk(KERN_ERR "Unable to register netfilter socket option\n");
+-		return ret;
+-	}
+-
+-	ip_conntrack_hash = vmalloc(sizeof(struct list_head)
+-				    * ip_conntrack_htable_size);
+-	if (!ip_conntrack_hash) {
+-		printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
+-		goto err_unreg_sockopt;
++		goto out_sockopt;
+ 	}
+ 
++	ret = -ENOMEM;
+ 	ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
+-	                                        sizeof(struct ip_conntrack), 0,
+-	                                        SLAB_HWCACHE_ALIGN, NULL, NULL);
++                                        sizeof(struct ip_conntrack), 0,
++                                        SLAB_HWCACHE_ALIGN | SLAB_UBC,
++                                        NULL, NULL);
+ 	if (!ip_conntrack_cachep) {
+ 		printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
+-		goto err_free_hash;
++		goto err_unreg_sockopt;
+ 	}
++
++	return 0;
++
++err_unreg_sockopt:
++	nf_unregister_sockopt(&so_getorigdst);
++out_sockopt:
++	return ret;
++}
++
++/* From ip_conntrack_proto_tcp.c */
++extern unsigned long ip_ct_tcp_timeout_syn_sent;
++extern unsigned long ip_ct_tcp_timeout_syn_recv;
++extern unsigned long ip_ct_tcp_timeout_established;
++extern unsigned long ip_ct_tcp_timeout_fin_wait;
++extern unsigned long ip_ct_tcp_timeout_close_wait;
++extern unsigned long ip_ct_tcp_timeout_last_ack;
++extern unsigned long ip_ct_tcp_timeout_time_wait;
++extern unsigned long ip_ct_tcp_timeout_close;
++
++/* From ip_conntrack_proto_udp.c */
++extern unsigned long ip_ct_udp_timeout;
++extern unsigned long ip_ct_udp_timeout_stream;
++
++/* From ip_conntrack_proto_icmp.c */
++extern unsigned long ip_ct_icmp_timeout;
++
++/* From ip_conntrack_proto_icmp.c */
++extern unsigned long ip_ct_generic_timeout;
++
++int ip_conntrack_init(void)
++{
++	unsigned int i;
++	int ret;
++
++#ifdef CONFIG_VE_IPTABLES
++	struct ve_struct *env;
++
++	env = get_exec_env();
++	ret = -ENOMEM;
++	env->_ip_conntrack =
++		kmalloc(sizeof(struct ve_ip_conntrack), GFP_KERNEL);
++	if (!env->_ip_conntrack)
++		goto out;
++	memset(env->_ip_conntrack, 0, sizeof(struct ve_ip_conntrack));
++	if (ve_is_super(env)) {
++		ret = ip_conntrack_cache_create();
++		if (ret)
++			goto cache_fail;
++	} else
++		ve_ip_conntrack_max = 8 * ip_conntrack_htable_size;
++#else /* CONFIG_VE_IPTABLES */
++	ret = ip_conntrack_cache_create();
++	if (ret)
++		goto out;
++#endif
++
++	ret = -ENOMEM;
++	ve_ip_conntrack_hash = ub_vmalloc(sizeof(struct list_head)
++  				    * ip_conntrack_htable_size);
++	if (!ve_ip_conntrack_hash) {
++		printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
++		goto err_free_cache;
++	}
++
++#ifdef CONFIG_VE_IPTABLES
++	INIT_LIST_HEAD(&ve_ip_conntrack_expect_list);
++	INIT_LIST_HEAD(&ve_ip_conntrack_protocol_list);
++	INIT_LIST_HEAD(&ve_ip_conntrack_helpers);
++
++	ve_ip_conntrack_max = ip_conntrack_max;
++	ve_ip_ct_tcp_timeouts[1] = ip_ct_tcp_timeout_established;
++	ve_ip_ct_tcp_timeouts[2] = ip_ct_tcp_timeout_syn_sent;
++	ve_ip_ct_tcp_timeouts[3] = ip_ct_tcp_timeout_syn_recv;
++	ve_ip_ct_tcp_timeouts[4] = ip_ct_tcp_timeout_fin_wait;
++	ve_ip_ct_tcp_timeouts[5] = ip_ct_tcp_timeout_time_wait;
++	ve_ip_ct_tcp_timeouts[6] = ip_ct_tcp_timeout_close;
++	ve_ip_ct_tcp_timeouts[7] = ip_ct_tcp_timeout_close_wait;
++	ve_ip_ct_tcp_timeouts[8] = ip_ct_tcp_timeout_last_ack;
++	ve_ip_ct_udp_timeout = ip_ct_udp_timeout;
++	ve_ip_ct_udp_timeout_stream = ip_ct_udp_timeout_stream;
++	ve_ip_ct_icmp_timeout = ip_ct_icmp_timeout;
++	ve_ip_ct_generic_timeout = ip_ct_generic_timeout;
++
++	if (!ve_is_super(env)) {
++		ret = visible_ip_conntrack_protocol_register(
++				&ip_conntrack_protocol_tcp);
++		if (ret)
++			goto tcp_fail;
++		ret = visible_ip_conntrack_protocol_register(
++				&ip_conntrack_protocol_udp);
++		if (ret)
++			goto udp_fail;
++		ret = visible_ip_conntrack_protocol_register(
++				&ip_conntrack_protocol_icmp);
++		if (ret)
++			goto icmp_fail;
++	} else {
++		WRITE_LOCK(&ip_conntrack_lock);
++		list_append(&ve_ip_conntrack_protocol_list,
++			&ip_conntrack_protocol_tcp);
++		list_append(&ve_ip_conntrack_protocol_list,
++			&ip_conntrack_protocol_udp);
++		list_append(&ve_ip_conntrack_protocol_list,
++			&ip_conntrack_protocol_icmp);
++		WRITE_UNLOCK(&ip_conntrack_lock);
++	}
++#else
+ 	/* Don't NEED lock here, but good form anyway. */
+ 	WRITE_LOCK(&ip_conntrack_lock);
+ 	/* Sew in builtin protocols. */
+@@ -1427,12 +1698,18 @@ int __init ip_conntrack_init(void)
+ 	list_append(&protocol_list, &ip_conntrack_protocol_udp);
+ 	list_append(&protocol_list, &ip_conntrack_protocol_icmp);
+ 	WRITE_UNLOCK(&ip_conntrack_lock);
++#endif /* CONFIG_VE_IPTABLES */
+ 
+ 	for (i = 0; i < ip_conntrack_htable_size; i++)
+-		INIT_LIST_HEAD(&ip_conntrack_hash[i]);
++		INIT_LIST_HEAD(&ve_ip_conntrack_hash[i]);
+ 
++#ifdef CONFIG_VE_IPTABLES
++	if (ve_is_super(env))
++		ip_ct_attach = ip_conntrack_attach;
++#else
+ 	/* For use by ipt_REJECT */
+ 	ip_ct_attach = ip_conntrack_attach;
++#endif
+ 
+ 	/* Set up fake conntrack:
+ 	    - to never be deleted, not in any hashes */
+@@ -1445,12 +1722,29 @@ int __init ip_conntrack_init(void)
+ 	ip_conntrack_untracked.infos[IP_CT_RELATED + IP_CT_IS_REPLY].master = 
+ 			&ip_conntrack_untracked.ct_general;
+ 
+-	return ret;
++	return 0;
+ 
+-err_free_hash:
+-	vfree(ip_conntrack_hash);
+-err_unreg_sockopt:
++#ifdef CONFIG_VE_IPTABLES
++icmp_fail:
++	visible_ip_conntrack_protocol_unregister(&ip_conntrack_protocol_udp);
++udp_fail:
++	visible_ip_conntrack_protocol_unregister(&ip_conntrack_protocol_tcp);
++tcp_fail:
++	vfree(ve_ip_conntrack_hash);
++	ve_ip_conntrack_hash = NULL;
++err_free_cache:
++	if (ve_is_super(env)) {
++		kmem_cache_destroy(ip_conntrack_cachep);
++		nf_unregister_sockopt(&so_getorigdst);
++	}
++cache_fail:
++	kfree(env->_ip_conntrack);
++	env->_ip_conntrack = NULL;
++#else
++err_free_cache:
++	kmem_cache_destroy(ip_conntrack_cachep);
+ 	nf_unregister_sockopt(&so_getorigdst);
+-
+-	return -ENOMEM;
++#endif /* CONFIG_VE_IPTABLES */
++out:
++	return ret;
+ }
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_conntrack_ftp.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_conntrack_ftp.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_conntrack_ftp.c	2004-08-14 14:56:26.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_conntrack_ftp.c	2006-05-11 13:05:41.000000000 +0400
+@@ -15,6 +15,7 @@
+ #include <linux/ctype.h>
+ #include <net/checksum.h>
+ #include <net/tcp.h>
++#include <linux/nfcalls.h>
+ 
+ #include <linux/netfilter_ipv4/lockhelp.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+@@ -27,17 +28,25 @@ MODULE_DESCRIPTION("ftp connection track
+ /* This is slow, but it's simple. --RR */
+ static char ftp_buffer[65536];
+ 
+-DECLARE_LOCK(ip_ftp_lock);
++static DECLARE_LOCK(ip_ftp_lock);
+ struct module *ip_conntrack_ftp = THIS_MODULE;
+ 
+ #define MAX_PORTS 8
+ static int ports[MAX_PORTS];
+-static int ports_c;
+ MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_PORTS) "i");
+ 
+ static int loose;
+ MODULE_PARM(loose, "i");
+ 
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_ports_c	\
++	(get_exec_env()->_ip_conntrack->_ip_conntrack_ftp_ports_c)
++#else
++static int ports_c = 0;
++#define ve_ports_c	ports_c
++#endif
++
+ #if 0
+ #define DEBUGP printk
+ #else
+@@ -375,6 +384,7 @@ static int help(struct sk_buff *skb,
+ 		   problem (DMZ machines opening holes to internal
+ 		   networks, or the packet filter itself). */
+ 		if (!loose) {
++			ip_conntrack_expect_put(exp);
+ 			ret = NF_ACCEPT;
+ 			goto out;
+ 		}
+@@ -404,15 +414,43 @@ static int help(struct sk_buff *skb,
+ static struct ip_conntrack_helper ftp[MAX_PORTS];
+ static char ftp_names[MAX_PORTS][10];
+ 
+-/* Not __exit: called from init() */
+-static void fini(void)
++void fini_iptable_ftp(void)
+ {
+ 	int i;
+-	for (i = 0; i < ports_c; i++) {
++
++	for (i = 0; i < ve_ports_c; i++) {
+ 		DEBUGP("ip_ct_ftp: unregistering helper for port %d\n",
+ 				ports[i]);
+-		ip_conntrack_helper_unregister(&ftp[i]);
++		visible_ip_conntrack_helper_unregister(&ftp[i]);
++	}
++	ve_ports_c = 0;
++}
++
++int init_iptable_ftp(void)
++{
++	int i, ret;
++
++	ve_ports_c = 0;
++	for (i = 0; (i < MAX_PORTS) && ports[i]; i++) {
++		DEBUGP("ip_ct_ftp: registering helper for port %d\n",
++				ports[i]);
++		ret = visible_ip_conntrack_helper_register(&ftp[i]);
++		if (ret) {
++			fini_iptable_ftp();
++			return ret;
++		}
++		ve_ports_c++;
+ 	}
++	return 0;
++}
++
++/* Not __exit: called from init() */
++static void fini(void)
++{
++	KSYMMODUNRESOLVE(ip_conntrack_ftp);
++	KSYMUNRESOLVE(init_iptable_ftp);
++	KSYMUNRESOLVE(fini_iptable_ftp);
++	fini_iptable_ftp();
+ }
+ 
+ static int __init init(void)
+@@ -423,6 +461,7 @@ static int __init init(void)
+ 	if (ports[0] == 0)
+ 		ports[0] = FTP_PORT;
+ 
++	ve_ports_c = 0;
+ 	for (i = 0; (i < MAX_PORTS) && ports[i]; i++) {
+ 		ftp[i].tuple.src.u.tcp.port = htons(ports[i]);
+ 		ftp[i].tuple.dst.protonum = IPPROTO_TCP;
+@@ -443,19 +482,22 @@ static int __init init(void)
+ 
+ 		DEBUGP("ip_ct_ftp: registering helper for port %d\n", 
+ 				ports[i]);
+-		ret = ip_conntrack_helper_register(&ftp[i]);
++		ret = visible_ip_conntrack_helper_register(&ftp[i]);
+ 
+ 		if (ret) {
+ 			fini();
+ 			return ret;
+ 		}
+-		ports_c++;
++		ve_ports_c++;
+ 	}
++
++	KSYMRESOLVE(init_iptable_ftp);
++	KSYMRESOLVE(fini_iptable_ftp);
++	KSYMMODRESOLVE(ip_conntrack_ftp);
+ 	return 0;
+ }
+ 
+ PROVIDES_CONNTRACK(ftp);
+-EXPORT_SYMBOL(ip_ftp_lock);
+ 
+ module_init(init);
+ module_exit(fini);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_conntrack_irc.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_conntrack_irc.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_conntrack_irc.c	2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_conntrack_irc.c	2006-05-11 13:05:41.000000000 +0400
+@@ -28,6 +28,7 @@
+ #include <linux/ip.h>
+ #include <net/checksum.h>
+ #include <net/tcp.h>
++#include <linux/nfcalls.h>
+ 
+ #include <linux/netfilter_ipv4/lockhelp.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+@@ -35,11 +36,11 @@
+ 
+ #define MAX_PORTS 8
+ static int ports[MAX_PORTS];
+-static int ports_c;
+ static int max_dcc_channels = 8;
+ static unsigned int dcc_timeout = 300;
+ /* This is slow, but it's simple. --RR */
+ static char irc_buffer[65536];
++static DECLARE_LOCK(irc_buffer_lock);
+ 
+ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+ MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
+@@ -54,9 +55,17 @@ MODULE_PARM_DESC(dcc_timeout, "timeout o
+ static char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " };
+ #define MINMATCHLEN	5
+ 
+-DECLARE_LOCK(ip_irc_lock);
+ struct module *ip_conntrack_irc = THIS_MODULE;
+ 
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_ports_c	\
++	(get_exec_env()->_ip_conntrack->_ip_conntrack_irc_ports_c)
++#else
++static int ports_c = 0;
++#define ve_ports_c	ports_c
++#endif
++
+ #if 0
+ #define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s:" format, \
+                                        __FILE__, __FUNCTION__ , ## args)
+@@ -134,7 +143,7 @@ static int help(struct sk_buff *skb,
+ 	if (dataoff >= skb->len)
+ 		return NF_ACCEPT;
+ 
+-	LOCK_BH(&ip_irc_lock);
++	LOCK_BH(&irc_buffer_lock);
+ 	skb_copy_bits(skb, dataoff, irc_buffer, skb->len - dataoff);
+ 
+ 	data = irc_buffer;
+@@ -227,7 +236,7 @@ static int help(struct sk_buff *skb,
+ 	} /* while data < ... */
+ 
+  out:
+-	UNLOCK_BH(&ip_irc_lock);
++	UNLOCK_BH(&irc_buffer_lock);
+ 	return NF_ACCEPT;
+ }
+ 
+@@ -236,6 +245,37 @@ static char irc_names[MAX_PORTS][10];
+ 
+ static void fini(void);
+ 
++void fini_iptable_irc(void)
++{
++	int i;
++
++	for (i = 0; i < ve_ports_c; i++) {
++		DEBUGP("unregistering port %d\n",
++				ports[i]);
++		visible_ip_conntrack_helper_unregister(&irc_helpers[i]);
++	}
++	ve_ports_c = 0;
++}
++
++int init_iptable_irc(void)
++{
++	int i, ret;
++
++	ve_ports_c = 0;
++	for (i = 0; (i < MAX_PORTS) && ports[i]; i++) {
++		DEBUGP("port #%d: %d\n", i, ports[i]);
++		ret = visible_ip_conntrack_helper_register(&irc_helpers[i]);
++		if (ret) {
++			printk("ip_conntrack_irc: ERROR registering port %d\n",
++				ports[i]);
++			fini_iptable_irc();
++			return -EBUSY;
++		}
++		ve_ports_c++;
++	}
++	return 0;
++}
++
+ static int __init init(void)
+ {
+ 	int i, ret;
+@@ -255,6 +295,7 @@ static int __init init(void)
+ 	if (ports[0] == 0)
+ 		ports[0] = IRC_PORT;
+ 
++	ve_ports_c = 0;
+ 	for (i = 0; (i < MAX_PORTS) && ports[i]; i++) {
+ 		hlpr = &irc_helpers[i];
+ 		hlpr->tuple.src.u.tcp.port = htons(ports[i]);
+@@ -276,7 +317,7 @@ static int __init init(void)
+ 
+ 		DEBUGP("port #%d: %d\n", i, ports[i]);
+ 
+-		ret = ip_conntrack_helper_register(hlpr);
++		ret = visible_ip_conntrack_helper_register(hlpr);
+ 
+ 		if (ret) {
+ 			printk("ip_conntrack_irc: ERROR registering port %d\n",
+@@ -284,8 +325,12 @@ static int __init init(void)
+ 			fini();
+ 			return -EBUSY;
+ 		}
+-		ports_c++;
++		ve_ports_c++;
+ 	}
++
++	KSYMRESOLVE(init_iptable_irc);
++	KSYMRESOLVE(fini_iptable_irc);
++	KSYMMODRESOLVE(ip_conntrack_irc);
+ 	return 0;
+ }
+ 
+@@ -293,16 +338,13 @@ static int __init init(void)
+  * it is needed by the init function */
+ static void fini(void)
+ {
+-	int i;
+-	for (i = 0; i < ports_c; i++) {
+-		DEBUGP("unregistering port %d\n",
+-		       ports[i]);
+-		ip_conntrack_helper_unregister(&irc_helpers[i]);
+-	}
++	KSYMMODUNRESOLVE(ip_conntrack_irc);
++	KSYMUNRESOLVE(init_iptable_irc);
++	KSYMUNRESOLVE(fini_iptable_irc);
++	fini_iptable_irc();
+ }
+ 
+ PROVIDES_CONNTRACK(irc);
+-EXPORT_SYMBOL(ip_irc_lock);
+ 
+ module_init(init);
+ module_exit(fini);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_conntrack_proto_tcp.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_conntrack_proto_tcp.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_conntrack_proto_tcp.c	2006-05-11 13:05:41.000000000 +0400
+@@ -66,7 +66,7 @@ unsigned long ip_ct_tcp_timeout_last_ack
+ unsigned long ip_ct_tcp_timeout_time_wait =     2 MINS;
+ unsigned long ip_ct_tcp_timeout_close =        10 SECS;
+ 
+-static unsigned long * tcp_timeouts[]
++unsigned long * tcp_timeouts[]
+ = { NULL,                              /*      TCP_CONNTRACK_NONE */
+     &ip_ct_tcp_timeout_established,    /*      TCP_CONNTRACK_ESTABLISHED,      */
+     &ip_ct_tcp_timeout_syn_sent,       /*      TCP_CONNTRACK_SYN_SENT, */
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_conntrack_standalone.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_conntrack_standalone.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_conntrack_standalone.c	2004-08-14 14:55:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_conntrack_standalone.c	2006-05-11 13:05:41.000000000 +0400
+@@ -25,6 +25,7 @@
+ #endif
+ #include <net/checksum.h>
+ #include <net/ip.h>
++#include <linux/nfcalls.h>
+ 
+ #define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock)
+ #define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock)
+@@ -43,6 +44,9 @@
+ 
+ MODULE_LICENSE("GPL");
+ 
++int ip_conntrack_enable_ve0 = 0;
++MODULE_PARM(ip_conntrack_enable_ve0, "i");
++
+ static int kill_proto(const struct ip_conntrack *i, void *data)
+ {
+ 	return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum == 
+@@ -153,7 +157,7 @@ list_conntracks(char *buffer, char **sta
+ 	READ_LOCK(&ip_conntrack_lock);
+ 	/* Traverse hash; print originals then reply. */
+ 	for (i = 0; i < ip_conntrack_htable_size; i++) {
+-		if (LIST_FIND(&ip_conntrack_hash[i], conntrack_iterate,
++		if (LIST_FIND(&ve_ip_conntrack_hash[i], conntrack_iterate,
+ 			      struct ip_conntrack_tuple_hash *,
+ 			      buffer, offset, &upto, &len, length))
+ 			goto finished;
+@@ -161,7 +165,7 @@ list_conntracks(char *buffer, char **sta
+ 
+ 	/* Now iterate through expecteds. */
+ 	READ_LOCK(&ip_conntrack_expect_tuple_lock);
+-	list_for_each(e, &ip_conntrack_expect_list) {
++	list_for_each(e, &ve_ip_conntrack_expect_list) {
+ 		unsigned int last_len;
+ 		struct ip_conntrack_expect *expect
+ 			= (struct ip_conntrack_expect *)e;
+@@ -208,7 +212,10 @@ static unsigned int ip_conntrack_defrag(
+ 
+ 	/* Gather fragments. */
+ 	if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+-		*pskb = ip_ct_gather_frags(*pskb);
++		*pskb = ip_ct_gather_frags(*pskb,
++		                           hooknum == NF_IP_PRE_ROUTING ? 
++					   IP_DEFRAG_CONNTRACK_IN :
++					   IP_DEFRAG_CONNTRACK_OUT);
+ 		if (!*pskb)
+ 			return NF_STOLEN;
+ 	}
+@@ -334,7 +341,25 @@ extern unsigned long ip_ct_icmp_timeout;
+ /* From ip_conntrack_proto_icmp.c */
+ extern unsigned long ip_ct_generic_timeout;
+ 
++#ifdef CONFIG_VE
++#define ve_ip_ct_sysctl_header \
++	(get_exec_env()->_ip_conntrack->_ip_ct_sysctl_header)
++#define ve_ip_ct_net_table \
++	(get_exec_env()->_ip_conntrack->_ip_ct_net_table)
++#define ve_ip_ct_ipv4_table \
++	(get_exec_env()->_ip_conntrack->_ip_ct_ipv4_table)
++#define ve_ip_ct_netfilter_table \
++	(get_exec_env()->_ip_conntrack->_ip_ct_netfilter_table)
++#define ve_ip_ct_sysctl_table \
++	(get_exec_env()->_ip_conntrack->_ip_ct_sysctl_table)
++#else
+ static struct ctl_table_header *ip_ct_sysctl_header;
++#define ve_ip_ct_sysctl_header		ip_ct_sysctl_header
++#define ve_ip_ct_net_table		ip_ct_net_table
++#define ve_ip_ct_ipv4_table		ip_ct_ipv4_table
++#define ve_ip_ct_netfilter_table	ip_ct_netfilter_table
++#define ve_ip_ct_sysctl_table		ip_ct_sysctl_table
++#endif
+ 
+ static ctl_table ip_ct_sysctl_table[] = {
+ 	{
+@@ -491,7 +516,89 @@ static ctl_table ip_ct_net_table[] = {
+ 	},
+ 	{ .ctl_name = 0 }
+ };
+-#endif
++
++#ifdef CONFIG_VE
++static void ip_conntrack_sysctl_cleanup(void)
++{
++	if (!ve_is_super(get_exec_env())) {
++		kfree(ve_ip_ct_net_table);
++		kfree(ve_ip_ct_ipv4_table);
++		kfree(ve_ip_ct_netfilter_table);
++		kfree(ve_ip_ct_sysctl_table);
++	}
++	ve_ip_ct_net_table = NULL;
++	ve_ip_ct_ipv4_table = NULL;
++	ve_ip_ct_netfilter_table = NULL;
++	ve_ip_ct_sysctl_table = NULL;
++}
++
++#define ALLOC_ENVCTL(field,k,label) \
++		if ( !(field = kmalloc(k*sizeof(ctl_table), GFP_KERNEL)) ) \
++				goto label;
++static int ip_conntrack_sysctl_init(void)
++{
++	int i, ret = 0;
++
++	ret = -ENOMEM;
++	if (ve_is_super(get_exec_env())) {
++		ve_ip_ct_net_table = ip_ct_net_table;
++		ve_ip_ct_ipv4_table = ip_ct_ipv4_table;
++		ve_ip_ct_netfilter_table = ip_ct_netfilter_table;
++		ve_ip_ct_sysctl_table = ip_ct_sysctl_table;
++	} else {
++		/* allocate structures in ve_struct */
++		ALLOC_ENVCTL(ve_ip_ct_net_table, 2, out);
++		ALLOC_ENVCTL(ve_ip_ct_ipv4_table, 2, nomem_1);
++		ALLOC_ENVCTL(ve_ip_ct_netfilter_table, 3, nomem_2);
++		ALLOC_ENVCTL(ve_ip_ct_sysctl_table, 15, nomem_3);
++
++		memcpy(ve_ip_ct_net_table, ip_ct_net_table,
++				2*sizeof(ctl_table));
++		memcpy(ve_ip_ct_ipv4_table, ip_ct_ipv4_table,
++				2*sizeof(ctl_table));
++		memcpy(ve_ip_ct_netfilter_table, ip_ct_netfilter_table,
++				3*sizeof(ctl_table));
++		memcpy(ve_ip_ct_sysctl_table, ip_ct_sysctl_table,
++				15*sizeof(ctl_table));
++
++		ve_ip_ct_net_table[0].child = ve_ip_ct_ipv4_table;
++		ve_ip_ct_ipv4_table[0].child = ve_ip_ct_netfilter_table;
++		ve_ip_ct_netfilter_table[0].child = ve_ip_ct_sysctl_table;
++	}
++	ve_ip_ct_sysctl_table[0].data = &ve_ip_conntrack_max;
++	/* skip ve_ip_ct_sysctl_table[1].data as it is read-only and common
++	 * for all environments */
++	ve_ip_ct_sysctl_table[2].data = &ve_ip_ct_tcp_timeouts[2];
++	ve_ip_ct_sysctl_table[3].data = &ve_ip_ct_tcp_timeouts[3];
++	ve_ip_ct_sysctl_table[4].data = &ve_ip_ct_tcp_timeouts[1];
++	ve_ip_ct_sysctl_table[5].data = &ve_ip_ct_tcp_timeouts[4];
++	ve_ip_ct_sysctl_table[6].data = &ve_ip_ct_tcp_timeouts[7];
++	ve_ip_ct_sysctl_table[7].data = &ve_ip_ct_tcp_timeouts[8];
++	ve_ip_ct_sysctl_table[8].data = &ve_ip_ct_tcp_timeouts[5];
++	ve_ip_ct_sysctl_table[9].data = &ve_ip_ct_tcp_timeouts[6];
++	ve_ip_ct_sysctl_table[10].data = &ve_ip_ct_udp_timeout;
++	ve_ip_ct_sysctl_table[11].data = &ve_ip_ct_udp_timeout_stream;
++	ve_ip_ct_sysctl_table[12].data = &ve_ip_ct_icmp_timeout;
++	ve_ip_ct_sysctl_table[13].data = &ve_ip_ct_generic_timeout;
++	for (i = 0; i < 14; i++)
++		ve_ip_ct_sysctl_table[i].owner_env = get_exec_env();
++	return 0;
++
++nomem_3:
++	kfree(ve_ip_ct_netfilter_table);
++	ve_ip_ct_netfilter_table = NULL;
++nomem_2:
++	kfree(ve_ip_ct_ipv4_table);
++	ve_ip_ct_ipv4_table = NULL;
++nomem_1:
++	kfree(ve_ip_ct_net_table);
++	ve_ip_ct_net_table = NULL;
++out:
++	return ret;
++}
++#endif /*CONFIG_VE*/
++#endif /*CONFIG_SYSCTL*/
++
+ static int init_or_cleanup(int init)
+ {
+ 	struct proc_dir_entry *proc;
+@@ -499,77 +606,115 @@ static int init_or_cleanup(int init)
+ 
+ 	if (!init) goto cleanup;
+ 
++	ret = -ENOENT;
++	if (!ve_is_super(get_exec_env()))
++		__module_get(THIS_MODULE);
++
+ 	ret = ip_conntrack_init();
+ 	if (ret < 0)
+-		goto cleanup_nothing;
++		goto cleanup_unget;
++
++	if (ve_is_super(get_exec_env()) && !ip_conntrack_enable_ve0)
++		return 0;
+ 
+-	proc = proc_net_create("ip_conntrack", 0440, list_conntracks);
++	ret = -ENOENT;
++	proc = proc_mkdir("net", NULL);
+ 	if (!proc) goto cleanup_init;
++	proc = create_proc_info_entry("net/ip_conntrack", 0440,
++						NULL, list_conntracks);
++	if (!proc) goto cleanup_proc2;
+ 	proc->owner = THIS_MODULE;
+ 
+-	ret = nf_register_hook(&ip_conntrack_defrag_ops);
++	ret = visible_nf_register_hook(&ip_conntrack_defrag_ops);
+ 	if (ret < 0) {
+ 		printk("ip_conntrack: can't register pre-routing defrag hook.\n");
+ 		goto cleanup_proc;
+ 	}
+-	ret = nf_register_hook(&ip_conntrack_defrag_local_out_ops);
++	ret = visible_nf_register_hook(&ip_conntrack_defrag_local_out_ops);
+ 	if (ret < 0) {
+ 		printk("ip_conntrack: can't register local_out defrag hook.\n");
+ 		goto cleanup_defragops;
+ 	}
+-	ret = nf_register_hook(&ip_conntrack_in_ops);
++	ret = visible_nf_register_hook(&ip_conntrack_in_ops);
+ 	if (ret < 0) {
+ 		printk("ip_conntrack: can't register pre-routing hook.\n");
+ 		goto cleanup_defraglocalops;
+ 	}
+-	ret = nf_register_hook(&ip_conntrack_local_out_ops);
++	ret = visible_nf_register_hook(&ip_conntrack_local_out_ops);
+ 	if (ret < 0) {
+ 		printk("ip_conntrack: can't register local out hook.\n");
+ 		goto cleanup_inops;
+ 	}
+-	ret = nf_register_hook(&ip_conntrack_out_ops);
++	ret = visible_nf_register_hook(&ip_conntrack_out_ops);
+ 	if (ret < 0) {
+ 		printk("ip_conntrack: can't register post-routing hook.\n");
+ 		goto cleanup_inandlocalops;
+ 	}
+-	ret = nf_register_hook(&ip_conntrack_local_in_ops);
++	ret = visible_nf_register_hook(&ip_conntrack_local_in_ops);
+ 	if (ret < 0) {
+ 		printk("ip_conntrack: can't register local in hook.\n");
+ 		goto cleanup_inoutandlocalops;
+ 	}
+ #ifdef CONFIG_SYSCTL
+-	ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table, 0);
+-	if (ip_ct_sysctl_header == NULL) {
++#ifdef CONFIG_VE
++	ret = ip_conntrack_sysctl_init();
++	if (ret < 0)
++		goto cleanup_sysctl;
++#endif
++	ret = -ENOMEM;
++	ve_ip_ct_sysctl_header = register_sysctl_table(ve_ip_ct_net_table, 0);
++	if (ve_ip_ct_sysctl_header == NULL) {
+ 		printk("ip_conntrack: can't register to sysctl.\n");
+-		goto cleanup;
++		goto cleanup_sysctl2;
+ 	}
+ #endif
++	return 0;
+ 
+-	return ret;
+-
+- cleanup:
++cleanup:
++	if (ve_is_super(get_exec_env()) && !ip_conntrack_enable_ve0)
++		goto cleanup_init;
+ #ifdef CONFIG_SYSCTL
+- 	unregister_sysctl_table(ip_ct_sysctl_header);
++ 	unregister_sysctl_table(ve_ip_ct_sysctl_header);
++cleanup_sysctl2:
++#ifdef CONFIG_VE
++	ip_conntrack_sysctl_cleanup();
++cleanup_sysctl:
++#endif
+ #endif
+-	nf_unregister_hook(&ip_conntrack_local_in_ops);
++	visible_nf_unregister_hook(&ip_conntrack_local_in_ops);
+  cleanup_inoutandlocalops:
+-	nf_unregister_hook(&ip_conntrack_out_ops);
++	visible_nf_unregister_hook(&ip_conntrack_out_ops);
+  cleanup_inandlocalops:
+-	nf_unregister_hook(&ip_conntrack_local_out_ops);
++	visible_nf_unregister_hook(&ip_conntrack_local_out_ops);
+  cleanup_inops:
+-	nf_unregister_hook(&ip_conntrack_in_ops);
++	visible_nf_unregister_hook(&ip_conntrack_in_ops);
+  cleanup_defraglocalops:
+-	nf_unregister_hook(&ip_conntrack_defrag_local_out_ops);
++	visible_nf_unregister_hook(&ip_conntrack_defrag_local_out_ops);
+  cleanup_defragops:
+-	nf_unregister_hook(&ip_conntrack_defrag_ops);
++	visible_nf_unregister_hook(&ip_conntrack_defrag_ops);
+  cleanup_proc:
+-	proc_net_remove("ip_conntrack");
++	remove_proc_entry("net/ip_conntrack", NULL);
++ cleanup_proc2:
++	if (!ve_is_super(get_exec_env()))
++		remove_proc_entry("net", NULL);
+  cleanup_init:
+ 	ip_conntrack_cleanup();
+- cleanup_nothing:
++ cleanup_unget:
++	if (!ve_is_super(get_exec_env()))
++		module_put(THIS_MODULE);
+ 	return ret;
+ }
+ 
++int init_iptable_conntrack(void)
++{
++	return init_or_cleanup(1);
++}
++
++void fini_iptable_conntrack(void)
++{
++	init_or_cleanup(0);
++}
++
+ /* FIXME: Allow NULL functions and sub in pointers to generic for
+    them. --RR */
+ int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto)
+@@ -578,7 +723,7 @@ int ip_conntrack_protocol_register(struc
+ 	struct list_head *i;
+ 
+ 	WRITE_LOCK(&ip_conntrack_lock);
+-	list_for_each(i, &protocol_list) {
++	list_for_each(i, &ve_ip_conntrack_protocol_list) {
+ 		if (((struct ip_conntrack_protocol *)i)->proto
+ 		    == proto->proto) {
+ 			ret = -EBUSY;
+@@ -586,20 +731,47 @@ int ip_conntrack_protocol_register(struc
+ 		}
+ 	}
+ 
+-	list_prepend(&protocol_list, proto);
++	list_prepend(&ve_ip_conntrack_protocol_list, proto);
+ 
+  out:
+ 	WRITE_UNLOCK(&ip_conntrack_lock);
+ 	return ret;
+ }
+ 
++int visible_ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto)
++{
++	int ret = 0;
++
++	if (!ve_is_super(get_exec_env())) {
++		struct ip_conntrack_protocol *tmp;
++		ret = -ENOMEM;
++		tmp = kmalloc(sizeof(struct ip_conntrack_protocol),
++				GFP_KERNEL);
++		if (!tmp)
++			goto nomem;
++		memcpy(tmp, proto, sizeof(struct ip_conntrack_protocol));
++		proto =  tmp;
++	}
++
++	ret = ip_conntrack_protocol_register(proto);
++	if (ret)
++		goto out;
++
++	return 0;
++out:
++	if (!ve_is_super(get_exec_env()))
++		kfree(proto);
++nomem:
++	return ret;
++}
++
+ void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto)
+ {
+ 	WRITE_LOCK(&ip_conntrack_lock);
+ 
+ 	/* ip_ct_find_proto() returns proto_generic in case there is no protocol 
+ 	 * helper. So this should be enough - HW */
+-	LIST_DELETE(&protocol_list, proto);
++	LIST_DELETE(&ve_ip_conntrack_protocol_list, proto);
+ 	WRITE_UNLOCK(&ip_conntrack_lock);
+ 	
+ 	/* Somebody could be still looking at the proto in bh. */
+@@ -609,17 +781,53 @@ void ip_conntrack_protocol_unregister(st
+ 	ip_ct_selective_cleanup(kill_proto, &proto->proto);
+ }
+ 
++void visible_ip_conntrack_protocol_unregister(
++			struct ip_conntrack_protocol *proto)
++{
++#ifdef CONFIG_VE
++	struct ip_conntrack_protocol *i;
++
++	READ_LOCK(&ip_conntrack_lock);
++	list_for_each_entry(i, &ve_ip_conntrack_protocol_list, list) {
++		if (i->proto == proto->proto) {
++			proto = i;
++			break;
++		}
++	}
++	READ_UNLOCK(&ip_conntrack_lock);
++	if (proto != i)
++		return;
++#endif
++
++	ip_conntrack_protocol_unregister(proto);
++
++	if (!ve_is_super(get_exec_env()))
++		kfree(proto);
++}
++
+ static int __init init(void)
+ {
+-	return init_or_cleanup(1);
++	int err;
++
++	err = init_iptable_conntrack();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_iptable_conntrack);
++	KSYMRESOLVE(fini_iptable_conntrack);
++	KSYMMODRESOLVE(ip_conntrack);
++	return 0;
+ }
+ 
+ static void __exit fini(void)
+ {
+-	init_or_cleanup(0);
++	KSYMMODUNRESOLVE(ip_conntrack);
++	KSYMUNRESOLVE(init_iptable_conntrack);
++	KSYMUNRESOLVE(fini_iptable_conntrack);
++	fini_iptable_conntrack();
+ }
+ 
+-module_init(init);
++subsys_initcall(init);
+ module_exit(fini);
+ 
+ /* Some modules need us, but don't depend directly on any symbol.
+@@ -628,8 +836,11 @@ void need_ip_conntrack(void)
+ {
+ }
+ 
++EXPORT_SYMBOL(ip_conntrack_enable_ve0);
+ EXPORT_SYMBOL(ip_conntrack_protocol_register);
+ EXPORT_SYMBOL(ip_conntrack_protocol_unregister);
++EXPORT_SYMBOL(visible_ip_conntrack_protocol_register);
++EXPORT_SYMBOL(visible_ip_conntrack_protocol_unregister);
+ EXPORT_SYMBOL(invert_tuplepr);
+ EXPORT_SYMBOL(ip_conntrack_alter_reply);
+ EXPORT_SYMBOL(ip_conntrack_destroyed);
+@@ -637,6 +848,8 @@ EXPORT_SYMBOL(ip_conntrack_get);
+ EXPORT_SYMBOL(need_ip_conntrack);
+ EXPORT_SYMBOL(ip_conntrack_helper_register);
+ EXPORT_SYMBOL(ip_conntrack_helper_unregister);
++EXPORT_SYMBOL(visible_ip_conntrack_helper_register);
++EXPORT_SYMBOL(visible_ip_conntrack_helper_unregister);
+ EXPORT_SYMBOL(ip_ct_selective_cleanup);
+ EXPORT_SYMBOL(ip_ct_refresh);
+ EXPORT_SYMBOL(ip_ct_find_proto);
+@@ -652,8 +865,8 @@ EXPORT_SYMBOL(ip_conntrack_tuple_taken);
+ EXPORT_SYMBOL(ip_ct_gather_frags);
+ EXPORT_SYMBOL(ip_conntrack_htable_size);
+ EXPORT_SYMBOL(ip_conntrack_expect_list);
+-EXPORT_SYMBOL(ip_conntrack_lock);
+ EXPORT_SYMBOL(ip_conntrack_hash);
++EXPORT_SYMBOL(ip_conntrack_lock);
+ EXPORT_SYMBOL(ip_conntrack_untracked);
+ EXPORT_SYMBOL_GPL(ip_conntrack_find_get);
+ EXPORT_SYMBOL_GPL(ip_conntrack_put);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_fw_compat.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_fw_compat.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_fw_compat.c	2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_fw_compat.c	2006-05-11 13:05:25.000000000 +0400
+@@ -80,7 +80,7 @@ fw_in(unsigned int hooknum,
+ 					  &redirpt, pskb);
+ 
+ 		if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+-			*pskb = ip_ct_gather_frags(*pskb);
++			*pskb = ip_ct_gather_frags(*pskb, IP_DEFRAG_FW_COMPAT);
+ 
+ 			if (!*pskb)
+ 				return NF_STOLEN;
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_core.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_nat_core.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_core.c	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_nat_core.c	2006-05-11 13:05:45.000000000 +0400
+@@ -20,6 +20,7 @@
+ #include <net/tcp.h>  /* For tcp_prot in getorigdst */
+ #include <linux/icmp.h>
+ #include <linux/udp.h>
++#include <ub/ub_mem.h>
+ 
+ #define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
+ #define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)
+@@ -46,10 +47,19 @@ DECLARE_RWLOCK_EXTERN(ip_conntrack_lock)
+ /* Calculated at init based on memory size */
+ static unsigned int ip_nat_htable_size;
+ 
+-static struct list_head *bysource;
+-static struct list_head *byipsproto;
++#ifdef CONFIG_VE_IPTABLES
++#define ve_ip_nat_bysource	\
++	(get_exec_env()->_ip_conntrack->_ip_nat_bysource)
++#define ve_ip_nat_byipsproto	\
++	(get_exec_env()->_ip_conntrack->_ip_nat_bysource+ip_nat_htable_size)
++#else
+ LIST_HEAD(protos);
+ LIST_HEAD(helpers);
++static struct list_head *bysource;
++static struct list_head *byipsproto;
++#define ve_ip_nat_bysource	bysource
++#define ve_ip_nat_byipsproto	byipsproto
++#endif
+ 
+ extern struct ip_nat_protocol unknown_nat_protocol;
+ 
+@@ -74,7 +84,9 @@ static void ip_nat_cleanup_conntrack(str
+ {
+ 	struct ip_nat_info *info = &conn->nat.info;
+ 	unsigned int hs, hp;
+-
++#ifdef CONFIG_VE_IPTABLES
++	struct ve_ip_conntrack *env;
++#endif
+ 	if (!info->initialized)
+ 		return;
+ 
+@@ -91,8 +103,15 @@ static void ip_nat_cleanup_conntrack(str
+ 	                      .tuple.dst.protonum);
+ 
+ 	WRITE_LOCK(&ip_nat_lock);
++#ifdef CONFIG_VE_IPTABLES
++	env = conn->ct_env;
++	LIST_DELETE(&(env->_ip_nat_bysource)[hs], &info->bysource);
++	LIST_DELETE(&(env->_ip_nat_bysource + ip_nat_htable_size)[hp],
++		&info->byipsproto);
++#else
+ 	LIST_DELETE(&bysource[hs], &info->bysource);
+ 	LIST_DELETE(&byipsproto[hp], &info->byipsproto);
++#endif
+ 	WRITE_UNLOCK(&ip_nat_lock);
+ }
+ 
+@@ -118,7 +137,8 @@ find_nat_proto(u_int16_t protonum)
+ 	struct ip_nat_protocol *i;
+ 
+ 	MUST_BE_READ_LOCKED(&ip_nat_lock);
+-	i = LIST_FIND(&protos, cmp_proto, struct ip_nat_protocol *, protonum);
++	i = LIST_FIND(&ve_ip_nat_protos, cmp_proto,
++			struct ip_nat_protocol *, protonum);
+ 	if (!i)
+ 		i = &unknown_nat_protocol;
+ 	return i;
+@@ -197,7 +217,8 @@ find_appropriate_src(const struct ip_con
+ 	struct ip_nat_hash *i;
+ 
+ 	MUST_BE_READ_LOCKED(&ip_nat_lock);
+-	i = LIST_FIND(&bysource[h], src_cmp, struct ip_nat_hash *, tuple, mr);
++	i = LIST_FIND(&ve_ip_nat_bysource[h], src_cmp,
++			struct ip_nat_hash *, tuple, mr);
+ 	if (i)
+ 		return &i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src;
+ 	else
+@@ -253,7 +274,7 @@ count_maps(u_int32_t src, u_int32_t dst,
+ 
+ 	MUST_BE_READ_LOCKED(&ip_nat_lock);
+ 	h = hash_by_ipsproto(src, dst, protonum);
+-	LIST_FIND(&byipsproto[h], fake_cmp, struct ip_nat_hash *,
++	LIST_FIND(&ve_ip_nat_byipsproto[h], fake_cmp, struct ip_nat_hash *,
+ 	          src, dst, protonum, &score, conntrack);
+ 
+ 	return score;
+@@ -505,6 +526,28 @@ helper_cmp(const struct ip_nat_helper *h
+ 	return ip_ct_tuple_mask_cmp(tuple, &helper->tuple, &helper->mask);
+ }
+ 
++/* this function gives us an ability to safely restore
++ * connection in case of failure */
++int ip_nat_install_conntrack(struct ip_conntrack *conntrack, int helper)
++{
++	int ret = 0;
++
++	WRITE_LOCK(&ip_nat_lock);
++	if (helper) {
++		conntrack->nat.info.helper = LIST_FIND(&ve_ip_nat_helpers,
++						       helper_cmp, struct ip_nat_helper *,
++						       &conntrack->tuplehash[1].tuple);
++		if (conntrack->nat.info.helper == NULL)
++			ret = -EINVAL;
++	}
++	if (!ret)
++		place_in_hashes(conntrack, &conntrack->nat.info);
++	WRITE_UNLOCK(&ip_nat_lock);
++	return ret;
++}
++EXPORT_SYMBOL(ip_nat_install_conntrack);
++
++
+ /* Where to manip the reply packets (will be reverse manip). */
+ static unsigned int opposite_hook[NF_IP_NUMHOOKS]
+ = { [NF_IP_PRE_ROUTING] = NF_IP_POST_ROUTING,
+@@ -643,8 +686,8 @@ ip_nat_setup_info(struct ip_conntrack *c
+ 
+ 	/* If there's a helper, assign it; based on new tuple. */
+ 	if (!conntrack->master)
+-		info->helper = LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *,
+-					 &reply);
++		info->helper = LIST_FIND(&ve_ip_nat_helpers,
++			helper_cmp, struct ip_nat_helper *, &reply);
+ 
+ 	/* It's done. */
+ 	info->initialized |= (1 << HOOK2MANIP(hooknum));
+@@ -684,8 +727,8 @@ void replace_in_hashes(struct ip_conntra
+ 	list_del(&info->bysource.list);
+ 	list_del(&info->byipsproto.list);
+ 
+-	list_prepend(&bysource[srchash], &info->bysource);
+-	list_prepend(&byipsproto[ipsprotohash], &info->byipsproto);
++	list_prepend(&ve_ip_nat_bysource[srchash], &info->bysource);
++	list_prepend(&ve_ip_nat_byipsproto[ipsprotohash], &info->byipsproto);
+ }
+ 
+ void place_in_hashes(struct ip_conntrack *conntrack,
+@@ -712,8 +755,8 @@ void place_in_hashes(struct ip_conntrack
+ 	info->byipsproto.conntrack = conntrack;
+ 	info->bysource.conntrack = conntrack;
+ 
+-	list_prepend(&bysource[srchash], &info->bysource);
+-	list_prepend(&byipsproto[ipsprotohash], &info->byipsproto);
++	list_prepend(&ve_ip_nat_bysource[srchash], &info->bysource);
++	list_prepend(&ve_ip_nat_byipsproto[ipsprotohash], &info->byipsproto);
+ }
+ 
+ /* Returns true if succeeded. */
+@@ -988,41 +1031,64 @@ icmp_reply_translation(struct sk_buff **
+ 	return 0;
+ }
+ 
+-int __init ip_nat_init(void)
++int ip_nat_init(void)
+ {
+ 	size_t i;
++	int ret;
+ 
+-	/* Leave them the same for the moment. */
+-	ip_nat_htable_size = ip_conntrack_htable_size;
++	if (ve_is_super(get_exec_env()))
++		ip_nat_htable_size = ip_conntrack_htable_size;
++	INIT_LIST_HEAD(&ve_ip_nat_protos);
++	INIT_LIST_HEAD(&ve_ip_nat_helpers);
+ 
+ 	/* One vmalloc for both hash tables */
+-	bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size*2);
+-	if (!bysource) {
+-		return -ENOMEM;
+-	}
+-	byipsproto = bysource + ip_nat_htable_size;
+-
+-	/* Sew in builtin protocols. */
+-	WRITE_LOCK(&ip_nat_lock);
+-	list_append(&protos, &ip_nat_protocol_tcp);
+-	list_append(&protos, &ip_nat_protocol_udp);
+-	list_append(&protos, &ip_nat_protocol_icmp);
+-	WRITE_UNLOCK(&ip_nat_lock);
++	ret = -ENOMEM;
++	ve_ip_nat_bysource = ub_vmalloc(sizeof(struct list_head)*ip_nat_htable_size*2);
++	if (!ve_ip_nat_bysource)
++		goto err;
++	/*byipsproto = bysource + ip_nat_htable_size;*/
+ 
+ 	for (i = 0; i < ip_nat_htable_size; i++) {
+-		INIT_LIST_HEAD(&bysource[i]);
+-		INIT_LIST_HEAD(&byipsproto[i]);
++		INIT_LIST_HEAD(&ve_ip_nat_bysource[i]);
++		INIT_LIST_HEAD(&ve_ip_nat_byipsproto[i]);
++	}
++
++	if (!ve_is_super(get_exec_env())) {
++		ret = visible_ip_nat_protocol_register(&ip_nat_protocol_tcp);
++		if (ret)
++			goto tcp_fail;
++		ret = visible_ip_nat_protocol_register(&ip_nat_protocol_udp);
++		if (ret)
++			goto udp_fail;
++		ret = visible_ip_nat_protocol_register(&ip_nat_protocol_icmp);
++		if (ret)
++			goto icmp_fail;
++	} else {
++		/* Sew in builtin protocols. */
++		WRITE_LOCK(&ip_nat_lock);
++		list_append(&ve_ip_nat_protos, &ip_nat_protocol_tcp);
++		list_append(&ve_ip_nat_protos, &ip_nat_protocol_udp);
++		list_append(&ve_ip_nat_protos, &ip_nat_protocol_icmp);
++		WRITE_UNLOCK(&ip_nat_lock);
++
++		/* Initialize fake conntrack so that NAT will skip it */
++		ip_conntrack_untracked.nat.info.initialized |=
++			(1 << IP_NAT_MANIP_SRC) | (1 << IP_NAT_MANIP_DST);
+ 	}
+ 
+ 	/* FIXME: Man, this is a hack.  <SIGH> */
+-	IP_NF_ASSERT(ip_conntrack_destroyed == NULL);
+-	ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
+-	
+-	/* Initialize fake conntrack so that NAT will skip it */
+-	ip_conntrack_untracked.nat.info.initialized |= 
+-		(1 << IP_NAT_MANIP_SRC) | (1 << IP_NAT_MANIP_DST);
++	IP_NF_ASSERT(ve_ip_conntrack_destroyed == NULL);
++	ve_ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
+ 
+ 	return 0;
++icmp_fail:
++	visible_ip_nat_protocol_unregister(&ip_nat_protocol_udp);
++udp_fail:
++	visible_ip_nat_protocol_unregister(&ip_nat_protocol_tcp);
++tcp_fail:
++	vfree(ve_ip_nat_bysource);
++err:
++	return ret;
+ }
+ 
+ /* Clear NAT section of all conntracks, in case we're loaded again. */
+@@ -1036,6 +1102,13 @@ static int clean_nat(const struct ip_con
+ void ip_nat_cleanup(void)
+ {
+ 	ip_ct_selective_cleanup(&clean_nat, NULL);
+-	ip_conntrack_destroyed = NULL;
+-	vfree(bysource);
++	ve_ip_conntrack_destroyed = NULL;
++	vfree(ve_ip_nat_bysource);
++	ve_ip_nat_bysource = NULL;
++
++	if (!ve_is_super(get_exec_env())){
++		visible_ip_nat_protocol_unregister(&ip_nat_protocol_icmp);
++		visible_ip_nat_protocol_unregister(&ip_nat_protocol_udp);
++		visible_ip_nat_protocol_unregister(&ip_nat_protocol_tcp);
++	}
+ }
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_ftp.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_nat_ftp.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_ftp.c	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_nat_ftp.c	2006-05-11 13:05:41.000000000 +0400
+@@ -18,6 +18,7 @@
+ #include <linux/netfilter_ipv4/ip_nat_rule.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
++#include <linux/nfcalls.h>
+ 
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
+@@ -31,11 +32,17 @@ MODULE_DESCRIPTION("ftp NAT helper");
+ 
+ #define MAX_PORTS 8
+ static int ports[MAX_PORTS];
+-static int ports_c;
+ 
+ MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_PORTS) "i");
+ 
+-DECLARE_LOCK_EXTERN(ip_ftp_lock);
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_ports_c	\
++	(get_exec_env()->_ip_conntrack->_ip_nat_ftp_ports_c)
++#else
++static int ports_c = 0;
++#define ve_ports_c	ports_c
++#endif
+ 
+ /* FIXME: Time out? --RR */
+ 
+@@ -59,8 +66,6 @@ ftp_nat_expected(struct sk_buff **pskb,
+ 	DEBUGP("nat_expected: We have a connection!\n");
+ 	exp_ftp_info = &ct->master->help.exp_ftp_info;
+ 
+-	LOCK_BH(&ip_ftp_lock);
+-
+ 	if (exp_ftp_info->ftptype == IP_CT_FTP_PORT
+ 	    || exp_ftp_info->ftptype == IP_CT_FTP_EPRT) {
+ 		/* PORT command: make connection go to the client. */
+@@ -75,7 +80,6 @@ ftp_nat_expected(struct sk_buff **pskb,
+ 		DEBUGP("nat_expected: PASV cmd. %u.%u.%u.%u->%u.%u.%u.%u\n",
+ 		       NIPQUAD(newsrcip), NIPQUAD(newdstip));
+ 	}
+-	UNLOCK_BH(&ip_ftp_lock);
+ 
+ 	if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC)
+ 		newip = newsrcip;
+@@ -111,8 +115,6 @@ mangle_rfc959_packet(struct sk_buff **ps
+ {
+ 	char buffer[sizeof("nnn,nnn,nnn,nnn,nnn,nnn")];
+ 
+-	MUST_BE_LOCKED(&ip_ftp_lock);
+-
+ 	sprintf(buffer, "%u,%u,%u,%u,%u,%u",
+ 		NIPQUAD(newip), port>>8, port&0xFF);
+ 
+@@ -134,8 +136,6 @@ mangle_eprt_packet(struct sk_buff **pskb
+ {
+ 	char buffer[sizeof("|1|255.255.255.255|65535|")];
+ 
+-	MUST_BE_LOCKED(&ip_ftp_lock);
+-
+ 	sprintf(buffer, "|1|%u.%u.%u.%u|%u|", NIPQUAD(newip), port);
+ 
+ 	DEBUGP("calling ip_nat_mangle_tcp_packet\n");
+@@ -156,8 +156,6 @@ mangle_epsv_packet(struct sk_buff **pskb
+ {
+ 	char buffer[sizeof("|||65535|")];
+ 
+-	MUST_BE_LOCKED(&ip_ftp_lock);
+-
+ 	sprintf(buffer, "|||%u|", port);
+ 
+ 	DEBUGP("calling ip_nat_mangle_tcp_packet\n");
+@@ -189,7 +187,6 @@ static int ftp_data_fixup(const struct i
+ 	u_int16_t port;
+ 	struct ip_conntrack_tuple newtuple;
+ 
+-	MUST_BE_LOCKED(&ip_ftp_lock);
+ 	DEBUGP("FTP_NAT: seq %u + %u in %u\n",
+ 	       expect->seq, ct_ftp_info->len,
+ 	       ntohl(tcph->seq));
+@@ -268,13 +265,11 @@ static unsigned int help(struct ip_connt
+ 	}
+ 
+ 	datalen = (*pskb)->len - iph->ihl * 4 - tcph->doff * 4;
+-	LOCK_BH(&ip_ftp_lock);
+ 	/* If it's in the right range... */
+ 	if (between(exp->seq + ct_ftp_info->len,
+ 		    ntohl(tcph->seq),
+ 		    ntohl(tcph->seq) + datalen)) {
+ 		if (!ftp_data_fixup(ct_ftp_info, ct, pskb, ctinfo, exp)) {
+-			UNLOCK_BH(&ip_ftp_lock);
+ 			return NF_DROP;
+ 		}
+ 	} else {
+@@ -286,26 +281,52 @@ static unsigned int help(struct ip_connt
+ 			       ntohl(tcph->seq),
+ 			       ntohl(tcph->seq) + datalen);
+ 		}
+-		UNLOCK_BH(&ip_ftp_lock);
+ 		return NF_DROP;
+ 	}
+-	UNLOCK_BH(&ip_ftp_lock);
+-
+ 	return NF_ACCEPT;
+ }
+ 
+ static struct ip_nat_helper ftp[MAX_PORTS];
+ static char ftp_names[MAX_PORTS][10];
+ 
+-/* Not __exit: called from init() */
+-static void fini(void)
++void fini_iptable_nat_ftp(void)
+ {
+ 	int i;
+ 
+-	for (i = 0; i < ports_c; i++) {
++	for (i = 0; i < ve_ports_c; i++) {
+ 		DEBUGP("ip_nat_ftp: unregistering port %d\n", ports[i]);
+-		ip_nat_helper_unregister(&ftp[i]);
++		visible_ip_nat_helper_unregister(&ftp[i]);
++	}
++	ve_ports_c = 0;
++}
++
++int init_iptable_nat_ftp(void)
++{
++	int i, ret = 0;
++
++	ve_ports_c = 0;
++	for (i = 0; (i < MAX_PORTS) && ports[i]; i++) {
++		DEBUGP("ip_nat_ftp: Trying to register for port %d\n",
++				ports[i]);
++		ret = visible_ip_nat_helper_register(&ftp[i]);
++		if (ret) {
++			printk("ip_nat_ftp: error registering "
++			       "helper for port %d\n", ports[i]);
++			fini_iptable_nat_ftp();
++			return ret;
++		}
++		ve_ports_c++;
+ 	}
++	return 0;
++}
++
++/* Not __exit: called from init() */
++static void fini(void)
++{
++	KSYMMODUNRESOLVE(ip_nat_ftp);
++	KSYMUNRESOLVE(init_iptable_nat_ftp);
++	KSYMUNRESOLVE(fini_iptable_nat_ftp);
++	fini_iptable_nat_ftp();
+ }
+ 
+ static int __init init(void)
+@@ -316,6 +337,7 @@ static int __init init(void)
+ 	if (ports[0] == 0)
+ 		ports[0] = FTP_PORT;
+ 
++	ve_ports_c = 0;
+ 	for (i = 0; (i < MAX_PORTS) && ports[i]; i++) {
+ 		ftp[i].tuple.dst.protonum = IPPROTO_TCP;
+ 		ftp[i].tuple.src.u.tcp.port = htons(ports[i]);
+@@ -335,7 +357,7 @@ static int __init init(void)
+ 
+ 		DEBUGP("ip_nat_ftp: Trying to register for port %d\n",
+ 				ports[i]);
+-		ret = ip_nat_helper_register(&ftp[i]);
++		ret = visible_ip_nat_helper_register(&ftp[i]);
+ 
+ 		if (ret) {
+ 			printk("ip_nat_ftp: error registering "
+@@ -343,9 +365,12 @@ static int __init init(void)
+ 			fini();
+ 			return ret;
+ 		}
+-		ports_c++;
++		ve_ports_c++;
+ 	}
+ 
++	KSYMRESOLVE(init_iptable_nat_ftp);
++	KSYMRESOLVE(fini_iptable_nat_ftp);
++	KSYMMODRESOLVE(ip_nat_ftp);
+ 	return ret;
+ }
+ 
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_helper.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_nat_helper.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_helper.c	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_nat_helper.c	2006-05-11 13:05:41.000000000 +0400
+@@ -410,33 +410,59 @@ int ip_nat_helper_register(struct ip_nat
+ 	int ret = 0;
+ 
+ 	WRITE_LOCK(&ip_nat_lock);
+-	if (LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *,&me->tuple))
++	if (LIST_FIND(&ve_ip_nat_helpers, helper_cmp,
++		struct ip_nat_helper *,&me->tuple))
+ 		ret = -EBUSY;
+ 	else
+-		list_prepend(&helpers, me);
++		list_prepend(&ve_ip_nat_helpers, me);
+ 	WRITE_UNLOCK(&ip_nat_lock);
+ 
+ 	return ret;
+ }
+ 
+-static int
+-kill_helper(const struct ip_conntrack *i, void *helper)
++int visible_ip_nat_helper_register(struct ip_nat_helper *me)
+ {
+ 	int ret;
++	struct module *mod = me->me;
+ 
+-	READ_LOCK(&ip_nat_lock);
+-	ret = (i->nat.info.helper == helper);
+-	READ_UNLOCK(&ip_nat_lock);
++	if (!ve_is_super(get_exec_env())) {
++		struct ip_nat_helper *tmp;
++		__module_get(mod);
++		ret = -ENOMEM;
++		tmp = kmalloc(sizeof(struct ip_nat_helper), GFP_KERNEL);
++		if (!tmp)
++			goto nomem;
++		memcpy(tmp, me, sizeof(struct ip_nat_helper));
++		me = tmp;
++	}
+ 
++	ret = ip_nat_helper_register(me);
++	if (ret)
++		goto out;
++
++	return 0;
++out:
++	if (!ve_is_super(get_exec_env())) {
++		kfree(me);
++nomem:
++		module_put(mod);
++	}
+ 	return ret;
+ }
+ 
++static int
++kill_helper(const struct ip_conntrack *i, void *helper)
++{
++	return (i->nat.info.helper == helper);
++}
++
+ void ip_nat_helper_unregister(struct ip_nat_helper *me)
+ {
+ 	WRITE_LOCK(&ip_nat_lock);
+ 	/* Autoloading conntrack helper might have failed */
+-	if (LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *,&me->tuple)) {
+-		LIST_DELETE(&helpers, me);
++	if (LIST_FIND(&ve_ip_nat_helpers, helper_cmp,
++		struct ip_nat_helper *,&me->tuple)) {
++		LIST_DELETE(&ve_ip_nat_helpers, me);
+ 	}
+ 	WRITE_UNLOCK(&ip_nat_lock);
+ 
+@@ -452,3 +478,26 @@ void ip_nat_helper_unregister(struct ip_
+ 	   worse. --RR */
+ 	ip_ct_selective_cleanup(kill_helper, me);
+ }
++
++void visible_ip_nat_helper_unregister(struct ip_nat_helper *me)
++{
++	struct ip_nat_helper *i;
++
++	READ_LOCK(&ip_nat_lock);
++	list_for_each_entry(i, &ve_ip_nat_helpers, list) {
++		if (i->name == me->name) {
++			me = i;
++			break;
++		}
++	}
++	READ_UNLOCK(&ip_nat_lock);
++	if (me != i)
++		return;
++
++	ip_nat_helper_unregister(me);
++
++	if (!ve_is_super(get_exec_env())) {
++		module_put(me->me);
++		kfree(me);
++	}
++}
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_irc.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_nat_irc.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_irc.c	2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_nat_irc.c	2006-05-11 13:05:41.000000000 +0400
+@@ -27,6 +27,7 @@
+ #include <linux/netfilter_ipv4/ip_nat_rule.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_irc.h>
+ #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
++#include <linux/nfcalls.h>
+ 
+ #if 0
+ #define DEBUGP printk
+@@ -36,7 +37,15 @@
+ 
+ #define MAX_PORTS 8
+ static int ports[MAX_PORTS];
+-static int ports_c;
++
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_ports_c	\
++	(get_exec_env()->_ip_conntrack->_ip_nat_irc_ports_c)
++#else
++static int ports_c = 0;
++#define ve_ports_c	ports_c
++#endif
+ 
+ MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+ MODULE_DESCRIPTION("IRC (DCC) NAT helper");
+@@ -44,9 +53,6 @@ MODULE_LICENSE("GPL");
+ MODULE_PARM(ports, "1-" __MODULE_STRING(MAX_PORTS) "i");
+ MODULE_PARM_DESC(ports, "port numbers of IRC servers");
+ 
+-/* protects irc part of conntracks */
+-DECLARE_LOCK_EXTERN(ip_irc_lock);
+-
+ /* FIXME: Time out? --RR */
+ 
+ static unsigned int
+@@ -102,8 +108,6 @@ static int irc_data_fixup(const struct i
+ 	/* "4294967296 65635 " */
+ 	char buffer[18];
+ 
+-	MUST_BE_LOCKED(&ip_irc_lock);
+-
+ 	DEBUGP("IRC_NAT: info (seq %u + %u) in %u\n",
+ 	       expect->seq, ct_irc_info->len,
+ 	       ntohl(tcph->seq));
+@@ -111,11 +115,6 @@ static int irc_data_fixup(const struct i
+ 	newip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
+ 
+ 	/* Alter conntrack's expectations. */
+-
+-	/* We can read expect here without conntrack lock, since it's
+-	   only set in ip_conntrack_irc, with ip_irc_lock held
+-	   writable */
+-
+ 	t = expect->tuple;
+ 	t.dst.ip = newip;
+ 	for (port = ct_irc_info->port; port != 0; port++) {
+@@ -185,13 +184,11 @@ static unsigned int help(struct ip_connt
+ 	DEBUGP("got beyond not touching\n");
+ 
+ 	datalen = (*pskb)->len - iph->ihl * 4 - tcph->doff * 4;
+-	LOCK_BH(&ip_irc_lock);
+ 	/* Check whether the whole IP/address pattern is carried in the payload */
+ 	if (between(exp->seq + ct_irc_info->len,
+ 		    ntohl(tcph->seq),
+ 		    ntohl(tcph->seq) + datalen)) {
+ 		if (!irc_data_fixup(ct_irc_info, ct, pskb, ctinfo, exp)) {
+-			UNLOCK_BH(&ip_irc_lock);
+ 			return NF_DROP;
+ 		}
+ 	} else { 
+@@ -204,28 +201,59 @@ static unsigned int help(struct ip_connt
+ 			     ntohl(tcph->seq),
+ 			     ntohl(tcph->seq) + datalen);
+ 		}
+-		UNLOCK_BH(&ip_irc_lock);
+ 		return NF_DROP;
+ 	}
+-	UNLOCK_BH(&ip_irc_lock);
+-
+ 	return NF_ACCEPT;
+ }
+ 
+ static struct ip_nat_helper ip_nat_irc_helpers[MAX_PORTS];
+ static char irc_names[MAX_PORTS][10];
+ 
+-/* This function is intentionally _NOT_ defined as  __exit, because
+- * it is needed by init() */
+-static void fini(void)
++void fini_iptable_nat_irc(void)
+ {
+ 	int i;
+ 
+-	for (i = 0; i < ports_c; i++) {
++	for (i = 0; i < ve_ports_c; i++) {
+ 		DEBUGP("ip_nat_irc: unregistering helper for port %d\n",
+ 		       ports[i]);
+-		ip_nat_helper_unregister(&ip_nat_irc_helpers[i]);
++		visible_ip_nat_helper_unregister(&ip_nat_irc_helpers[i]);
+ 	} 
++	ve_ports_c = 0;
++}
++
++/* This function is intentionally _NOT_ defined as __exit, because
++ * it is needed by the init function */
++static void fini(void)
++{
++	KSYMMODUNRESOLVE(ip_nat_irc);
++	KSYMUNRESOLVE(init_iptable_nat_irc);
++	KSYMUNRESOLVE(fini_iptable_nat_irc);
++	fini_iptable_nat_irc();
++}
++
++int init_iptable_nat_irc(void)
++{
++	int ret = 0;
++	int i;
++	struct ip_nat_helper *hlpr;
++
++	ve_ports_c = 0;
++	for (i = 0; (i < MAX_PORTS) && ports[i]; i++) {
++		hlpr = &ip_nat_irc_helpers[i];
++		DEBUGP
++		    ("ip_nat_irc: Trying to register helper for port %d: name %s\n",
++		     ports[i], hlpr->name);
++		ret = visible_ip_nat_helper_register(hlpr);
++		if (ret) {
++			printk
++			    ("ip_nat_irc: error registering helper for port %d\n",
++			     ports[i]);
++			fini_iptable_nat_irc();
++			return 1;
++		}
++		ve_ports_c++;
++	}
++	return 0;
+ }
+ 
+ static int __init init(void)
+@@ -239,6 +267,7 @@ static int __init init(void)
+ 		ports[0] = IRC_PORT;
+ 	}
+ 
++	ve_ports_c = 0;
+ 	for (i = 0; (i < MAX_PORTS) && ports[i] != 0; i++) {
+ 		hlpr = &ip_nat_irc_helpers[i];
+ 		hlpr->tuple.dst.protonum = IPPROTO_TCP;
+@@ -260,7 +289,7 @@ static int __init init(void)
+ 		DEBUGP
+ 		    ("ip_nat_irc: Trying to register helper for port %d: name %s\n",
+ 		     ports[i], hlpr->name);
+-		ret = ip_nat_helper_register(hlpr);
++		ret = visible_ip_nat_helper_register(hlpr);
+ 
+ 		if (ret) {
+ 			printk
+@@ -269,8 +298,12 @@ static int __init init(void)
+ 			fini();
+ 			return 1;
+ 		}
+-		ports_c++;
++		ve_ports_c++;
+ 	}
++
++	KSYMRESOLVE(init_iptable_nat_irc);
++	KSYMRESOLVE(fini_iptable_nat_irc);
++	KSYMMODRESOLVE(ip_nat_irc);
+ 	return ret;
+ }
+ 
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_proto_tcp.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_nat_proto_tcp.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_proto_tcp.c	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_nat_proto_tcp.c	2006-05-11 13:05:27.000000000 +0400
+@@ -40,7 +40,8 @@ tcp_unique_tuple(struct ip_conntrack_tup
+ 		 enum ip_nat_manip_type maniptype,
+ 		 const struct ip_conntrack *conntrack)
+ {
+-	static u_int16_t port, *portptr;
++	static u_int16_t port;
++	u_int16_t *portptr;
+ 	unsigned int range_size, min, i;
+ 
+ 	if (maniptype == IP_NAT_MANIP_SRC)
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_proto_udp.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_nat_proto_udp.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_proto_udp.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_nat_proto_udp.c	2006-05-11 13:05:27.000000000 +0400
+@@ -41,7 +41,8 @@ udp_unique_tuple(struct ip_conntrack_tup
+ 		 enum ip_nat_manip_type maniptype,
+ 		 const struct ip_conntrack *conntrack)
+ {
+-	static u_int16_t port, *portptr;
++	static u_int16_t port;
++	u_int16_t *portptr;
+ 	unsigned int range_size, min, i;
+ 
+ 	if (maniptype == IP_NAT_MANIP_SRC)
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_rule.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_nat_rule.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_rule.c	2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_nat_rule.c	2006-05-11 13:05:49.000000000 +0400
+@@ -17,6 +17,7 @@
+ #include <linux/proc_fs.h>
+ #include <net/checksum.h>
+ #include <linux/bitops.h>
++#include <ub/ub_mem.h>
+ 
+ #define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
+ #define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)
+@@ -33,6 +34,16 @@
+ #define DEBUGP(format, args...)
+ #endif
+ 
++#ifdef CONFIG_VE_IPTABLES
++#define ve_ip_nat_table		\
++	(get_exec_env()->_ip_conntrack->_ip_nat_table)
++#define ve_ip_nat_initial_table		\
++	(get_exec_env()->_ip_conntrack->_ip_nat_initial_table)
++#else
++#define ve_ip_nat_table		&nat_table
++#define ve_ip_nat_initial_table	&nat_initial_table
++#endif
++
+ #define NAT_VALID_HOOKS ((1<<NF_IP_PRE_ROUTING) | (1<<NF_IP_POST_ROUTING) | (1<<NF_IP_LOCAL_OUT))
+ 
+ /* Standard entry. */
+@@ -54,12 +65,12 @@ struct ipt_error
+ 	struct ipt_error_target target;
+ };
+ 
+-static struct
++static struct ipt_nat_initial_table
+ {
+ 	struct ipt_replace repl;
+ 	struct ipt_standard entries[3];
+ 	struct ipt_error term;
+-} nat_initial_table __initdata
++} nat_initial_table
+ = { { "nat", NAT_VALID_HOOKS, 4,
+       sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
+       { [NF_IP_PRE_ROUTING] = 0,
+@@ -241,6 +252,93 @@ static int ipt_dnat_checkentry(const cha
+ 	return 1;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int compat_to_user(void *target, void **dstptr,
++		int *size, int off)
++{
++	struct ipt_entry_target *pt;
++	struct ip_nat_multi_range *pinfo;
++	struct compat_ip_nat_multi_range info;
++	u_int16_t tsize;
++
++	pt = (struct ipt_entry_target *)target;
++	tsize = pt->u.user.target_size;
++	if (__copy_to_user(*dstptr, pt, sizeof(struct ipt_entry_target)))
++		return -EFAULT;
++	pinfo = (struct ip_nat_multi_range *)pt->data;
++	memset(&info, 0, sizeof(struct compat_ip_nat_multi_range));
++	info.rangesize = pinfo->rangesize;
++	info.range[0].flags = pinfo->range[0].flags;
++	info.range[0].min_ip = pinfo->range[0].min_ip;
++	info.range[0].max_ip = pinfo->range[0].max_ip;
++	info.range[0].min = pinfo->range[0].min;
++	info.range[0].max = pinfo->range[0].max;
++	if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_target),
++				&info, sizeof(struct compat_ip_nat_multi_range)))
++		return -EFAULT;
++	tsize -= off;
++	if (put_user(tsize, (u_int16_t *)*dstptr))
++		return -EFAULT;
++	*size -= off;
++	*dstptr += tsize;
++	return 0;
++}
++
++static int compat_from_user(void *target, void **dstptr,
++		int *size, int off)
++{
++	struct compat_ipt_entry_target *pt;
++	struct ipt_entry_target *dstpt;
++	struct compat_ip_nat_multi_range *pinfo;
++	struct ip_nat_multi_range info;
++	u_int16_t tsize;
++
++	pt = (struct compat_ipt_entry_target *)target;
++	dstpt = (struct ipt_entry_target *)*dstptr;
++	tsize = pt->u.user.target_size;
++	memcpy(*dstptr, pt, sizeof(struct compat_ipt_entry_target));
++	pinfo = (struct compat_ip_nat_multi_range *)pt->data;
++	memset(&info, 0, sizeof(struct ip_nat_multi_range));
++	info.rangesize = pinfo->rangesize;
++	info.range[0].flags = pinfo->range[0].flags;
++	info.range[0].min_ip = pinfo->range[0].min_ip;
++	info.range[0].max_ip = pinfo->range[0].max_ip;
++	info.range[0].min = pinfo->range[0].min;
++	info.range[0].max = pinfo->range[0].max;
++	memcpy(*dstptr + sizeof(struct compat_ipt_entry_target),
++				&info, sizeof(struct ip_nat_multi_range));
++	tsize += off;
++	dstpt->u.user.target_size = tsize;
++	*size += off;
++	*dstptr += tsize;
++	return 0;
++}
++
++static int compat(void *target, void **dstptr, int *size, int convert)
++{
++	int ret, off;
++
++	off = IPT_ALIGN(sizeof(struct ip_nat_multi_range)) -
++		COMPAT_IPT_ALIGN(sizeof(struct compat_ip_nat_multi_range));
++	switch (convert) {
++		case COMPAT_TO_USER:
++			ret = compat_to_user(target, dstptr, size, off);
++			break;
++		case COMPAT_FROM_USER:
++			ret = compat_from_user(target, dstptr, size, off);
++			break;
++		case COMPAT_CALC_SIZE:
++			*size += off;
++			ret = 0;
++			break;
++		default:
++			ret = -ENOPROTOOPT;
++			break;
++	}
++	return ret;
++}
++#endif
++
+ inline unsigned int
+ alloc_null_binding(struct ip_conntrack *conntrack,
+ 		   struct ip_nat_info *info,
+@@ -271,7 +369,7 @@ int ip_nat_rule_find(struct sk_buff **ps
+ {
+ 	int ret;
+ 
+-	ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL);
++	ret = ipt_do_table(pskb, hooknum, in, out, ve_ip_nat_table, NULL);
+ 
+ 	if (ret == NF_ACCEPT) {
+ 		if (!(info->initialized & (1 << HOOK2MANIP(hooknum))))
+@@ -285,42 +383,91 @@ static struct ipt_target ipt_snat_reg = 
+ 	.name		= "SNAT",
+ 	.target		= ipt_snat_target,
+ 	.checkentry	= ipt_snat_checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= &compat,
++#endif
+ };
+ 
+ static struct ipt_target ipt_dnat_reg = {
+ 	.name		= "DNAT",
+ 	.target		= ipt_dnat_target,
+ 	.checkentry	= ipt_dnat_checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= &compat,
++#endif
+ };
+ 
+-int __init ip_nat_rule_init(void)
++int ip_nat_rule_init(void)
+ {
+ 	int ret;
+ 
+-	ret = ipt_register_table(&nat_table);
++#ifdef CONFIG_VE_IPTABLES
++	if (ve_is_super(get_exec_env())) {
++		ve_ip_nat_table = &nat_table;
++		ve_ip_nat_initial_table = &nat_initial_table;
++	} else {
++		/* allocate structures in ve_struct */
++		ret = -ENOMEM;
++		ve_ip_nat_initial_table =
++			ub_kmalloc(sizeof(nat_initial_table), GFP_KERNEL);
++		if (!ve_ip_nat_initial_table)
++			goto nomem_initial;
++		ve_ip_nat_table = ub_kmalloc(sizeof(nat_table), GFP_KERNEL);
++		if (!ve_ip_nat_table)
++			goto nomem_table;
++
++		memcpy(ve_ip_nat_initial_table, &nat_initial_table,
++				sizeof(nat_initial_table));
++		memcpy(ve_ip_nat_table, &nat_table,
++				sizeof(nat_table));
++		ve_ip_nat_table->table =
++				&ve_ip_nat_initial_table->repl;
++	}
++#endif
++
++	ret = ipt_register_table(ve_ip_nat_table);
+ 	if (ret != 0)
+-		return ret;
+-	ret = ipt_register_target(&ipt_snat_reg);
++		goto out;
++	ret = visible_ipt_register_target(&ipt_snat_reg);
+ 	if (ret != 0)
+ 		goto unregister_table;
+ 
+-	ret = ipt_register_target(&ipt_dnat_reg);
++	ret = visible_ipt_register_target(&ipt_dnat_reg);
+ 	if (ret != 0)
+ 		goto unregister_snat;
+ 
+ 	return ret;
+ 
+  unregister_snat:
+-	ipt_unregister_target(&ipt_snat_reg);
++	visible_ipt_unregister_target(&ipt_snat_reg);
+  unregister_table:
+-	ipt_unregister_table(&nat_table);
+-
++	ipt_unregister_table(ve_ip_nat_table);
++ out:
++#ifdef CONFIG_VE_IPTABLES
++ 	if (!ve_is_super(get_exec_env()))
++		kfree(ve_ip_nat_table);
++	ve_ip_nat_table = NULL;
++ nomem_table:
++ 	if (!ve_is_super(get_exec_env()))
++		kfree(ve_ip_nat_initial_table);
++	ve_ip_nat_initial_table = NULL;
++ nomem_initial:
++#endif
+ 	return ret;
+ }
+ 
+ void ip_nat_rule_cleanup(void)
+ {
+-	ipt_unregister_target(&ipt_dnat_reg);
+-	ipt_unregister_target(&ipt_snat_reg);
+-	ipt_unregister_table(&nat_table);
++	ipt_unregister_table(ve_ip_nat_table);
++	visible_ipt_unregister_target(&ipt_dnat_reg);
++	visible_ipt_unregister_target(&ipt_snat_reg);
++
++#ifdef CONFIG_VE
++	if (!ve_is_super(get_exec_env())) {
++		kfree(ve_ip_nat_initial_table);
++		kfree(ve_ip_nat_table);
++	}
++	ve_ip_nat_initial_table = NULL;
++	ve_ip_nat_table = NULL;
++#endif
+ }
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_standalone.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_nat_standalone.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_nat_standalone.c	2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_nat_standalone.c	2006-05-11 13:05:42.000000000 +0400
+@@ -30,6 +30,7 @@
+ #include <net/ip.h>
+ #include <net/checksum.h>
+ #include <linux/spinlock.h>
++#include <linux/nfcalls.h>
+ 
+ #define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
+ #define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)
+@@ -200,7 +201,7 @@ ip_nat_out(unsigned int hooknum,
+ 	   I'm starting to have nightmares about fragments.  */
+ 
+ 	if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+-		*pskb = ip_ct_gather_frags(*pskb);
++		*pskb = ip_ct_gather_frags(*pskb, IP_DEFRAG_NAT_OUT);
+ 
+ 		if (!*pskb)
+ 			return NF_STOLEN;
+@@ -284,7 +285,7 @@ int ip_nat_protocol_register(struct ip_n
+ 	struct list_head *i;
+ 
+ 	WRITE_LOCK(&ip_nat_lock);
+-	list_for_each(i, &protos) {
++	list_for_each(i, &ve_ip_nat_protos) {
+ 		if (((struct ip_nat_protocol *)i)->protonum
+ 		    == proto->protonum) {
+ 			ret = -EBUSY;
+@@ -292,23 +293,70 @@ int ip_nat_protocol_register(struct ip_n
+ 		}
+ 	}
+ 
+-	list_prepend(&protos, proto);
++	list_prepend(&ve_ip_nat_protos, proto);
+  out:
+ 	WRITE_UNLOCK(&ip_nat_lock);
+ 	return ret;
+ }
+ 
++int visible_ip_nat_protocol_register(struct ip_nat_protocol *proto)
++{
++	int ret = 0;
++
++	if (!ve_is_super(get_exec_env())) {
++		struct ip_nat_protocol *tmp;
++		ret = -ENOMEM;
++		tmp = kmalloc(sizeof(struct ip_nat_protocol), GFP_KERNEL);
++		if (!tmp)
++			goto nomem;
++		memcpy(tmp, proto, sizeof(struct ip_nat_protocol));
++		proto =  tmp;
++	}
++
++	ret = ip_nat_protocol_register(proto);
++	if (ret)
++		goto out;
++
++	return 0;
++out:
++	if (!ve_is_super(get_exec_env()))
++		kfree(proto);
++nomem:
++	return ret;
++}
++
+ /* Noone stores the protocol anywhere; simply delete it. */
+ void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
+ {
+ 	WRITE_LOCK(&ip_nat_lock);
+-	LIST_DELETE(&protos, proto);
++	LIST_DELETE(&ve_ip_nat_protos, proto);
+ 	WRITE_UNLOCK(&ip_nat_lock);
+ 
+ 	/* Someone could be still looking at the proto in a bh. */
+ 	synchronize_net();
+ }
+ 
++void visible_ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
++{
++	struct ip_nat_protocol *i;
++
++	READ_LOCK(&ip_nat_lock);
++	list_for_each_entry(i, &ve_ip_nat_protos, list) {
++		if (i->protonum == proto->protonum) {
++			proto = i;
++			break;
++		}
++	}
++	READ_UNLOCK(&ip_nat_lock);
++	if (proto != i)
++		return;
++
++	ip_nat_protocol_unregister(proto);
++
++	if (!ve_is_super(get_exec_env()))
++		kfree(proto);
++}
++
+ static int init_or_cleanup(int init)
+ {
+ 	int ret = 0;
+@@ -317,77 +365,113 @@ static int init_or_cleanup(int init)
+ 
+ 	if (!init) goto cleanup;
+ 
++	if (!ve_is_super(get_exec_env()))
++		__module_get(THIS_MODULE);
++
+ 	ret = ip_nat_rule_init();
+ 	if (ret < 0) {
+ 		printk("ip_nat_init: can't setup rules.\n");
+-		goto cleanup_nothing;
++		goto cleanup_modput;
+ 	}
+ 	ret = ip_nat_init();
+ 	if (ret < 0) {
+ 		printk("ip_nat_init: can't setup rules.\n");
+ 		goto cleanup_rule_init;
+ 	}
+-	ret = nf_register_hook(&ip_nat_in_ops);
++	if (ve_is_super(get_exec_env()) && !ip_conntrack_enable_ve0)
++		return 0;
++
++	ret = visible_nf_register_hook(&ip_nat_in_ops);
+ 	if (ret < 0) {
+ 		printk("ip_nat_init: can't register in hook.\n");
+ 		goto cleanup_nat;
+ 	}
+-	ret = nf_register_hook(&ip_nat_out_ops);
++	ret = visible_nf_register_hook(&ip_nat_out_ops);
+ 	if (ret < 0) {
+ 		printk("ip_nat_init: can't register out hook.\n");
+ 		goto cleanup_inops;
+ 	}
+ #ifdef CONFIG_IP_NF_NAT_LOCAL
+-	ret = nf_register_hook(&ip_nat_local_out_ops);
++	ret = visible_nf_register_hook(&ip_nat_local_out_ops);
+ 	if (ret < 0) {
+ 		printk("ip_nat_init: can't register local out hook.\n");
+ 		goto cleanup_outops;
+ 	}
+-	ret = nf_register_hook(&ip_nat_local_in_ops);
++	ret = visible_nf_register_hook(&ip_nat_local_in_ops);
+ 	if (ret < 0) {
+ 		printk("ip_nat_init: can't register local in hook.\n");
+ 		goto cleanup_localoutops;
+ 	}
+ #endif
+-	return ret;
++	return 0;
+ 
+  cleanup:
++	if (ve_is_super(get_exec_env()) && !ip_conntrack_enable_ve0)
++		goto cleanup_nat;
+ #ifdef CONFIG_IP_NF_NAT_LOCAL
+-	nf_unregister_hook(&ip_nat_local_in_ops);
++	visible_nf_unregister_hook(&ip_nat_local_in_ops);
+  cleanup_localoutops:
+-	nf_unregister_hook(&ip_nat_local_out_ops);
++	visible_nf_unregister_hook(&ip_nat_local_out_ops);
+  cleanup_outops:
+ #endif
+-	nf_unregister_hook(&ip_nat_out_ops);
++	visible_nf_unregister_hook(&ip_nat_out_ops);
+  cleanup_inops:
+-	nf_unregister_hook(&ip_nat_in_ops);
++	visible_nf_unregister_hook(&ip_nat_in_ops);
+  cleanup_nat:
+ 	ip_nat_cleanup();
+  cleanup_rule_init:
+ 	ip_nat_rule_cleanup();
+- cleanup_nothing:
++ cleanup_modput:
++	if (!ve_is_super(get_exec_env()))
++		module_put(THIS_MODULE);
+ 	MUST_BE_READ_WRITE_UNLOCKED(&ip_nat_lock);
+ 	return ret;
+ }
+ 
+-static int __init init(void)
++int init_iptable_nat(void)
+ {
+ 	return init_or_cleanup(1);
+ }
+ 
+-static void __exit fini(void)
++void fini_iptable_nat(void)
+ {
+ 	init_or_cleanup(0);
+ }
+ 
+-module_init(init);
++static int __init init(void)
++{
++	int err;
++
++	err = init_iptable_nat();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_iptable_nat);
++	KSYMRESOLVE(fini_iptable_nat);
++	KSYMMODRESOLVE(iptable_nat);
++	return 0;
++}
++
++static void __exit fini(void)
++{
++	KSYMMODUNRESOLVE(iptable_nat);
++	KSYMUNRESOLVE(init_iptable_nat);
++	KSYMUNRESOLVE(fini_iptable_nat);
++	fini_iptable_nat();
++}
++
++fs_initcall(init);
+ module_exit(fini);
+ 
+ EXPORT_SYMBOL(ip_nat_setup_info);
+ EXPORT_SYMBOL(ip_nat_protocol_register);
++EXPORT_SYMBOL(visible_ip_nat_protocol_register);
+ EXPORT_SYMBOL(ip_nat_protocol_unregister);
++EXPORT_SYMBOL(visible_ip_nat_protocol_unregister);
+ EXPORT_SYMBOL(ip_nat_helper_register);
++EXPORT_SYMBOL(visible_ip_nat_helper_register);
+ EXPORT_SYMBOL(ip_nat_helper_unregister);
++EXPORT_SYMBOL(visible_ip_nat_helper_unregister);
+ EXPORT_SYMBOL(ip_nat_cheat_check);
+ EXPORT_SYMBOL(ip_nat_mangle_tcp_packet);
+ EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_queue.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_queue.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_queue.c	2004-08-14 14:56:25.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_queue.c	2006-05-11 13:05:42.000000000 +0400
+@@ -3,6 +3,7 @@
+  * communicating with userspace via netlink.
+  *
+  * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
++ * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
+  *
+  * This program is free software; you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License version 2 as
+@@ -14,6 +15,7 @@
+  *             Zander).
+  * 2000-08-01: Added Nick Williams' MAC support.
+  * 2002-06-25: Code cleanup.
++ * 2005-05-26: local_bh_{disable,enable} around nf_reinject (Harald Welte)
+  *
+  */
+ #include <linux/module.h>
+@@ -66,7 +68,15 @@ static DECLARE_MUTEX(ipqnl_sem);
+ static void
+ ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
+ {
++	/* TCP input path (and probably other bits) assume to be called
++	 * from softirq context, not from syscall, like ipq_issue_verdict is
++	 * called.  TCP input path deadlocks with locks taken from timer
++	 * softirq, e.g.  We therefore emulate this by local_bh_disable() */
++
++	local_bh_disable();
+ 	nf_reinject(entry->skb, entry->info, verdict);
++	local_bh_enable();
++
+ 	kfree(entry);
+ }
+ 
+@@ -540,7 +550,14 @@ ipq_rcv_sk(struct sock *sk, int len)
+ 			return;
+ 			
+ 		while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
++#ifdef CONFIG_VE
++			struct ve_struct *env;
++			env = set_exec_env(VE_OWNER_SKB(skb));
++#endif
+ 			ipq_rcv_skb(skb);
++#ifdef CONFIG_VE
++			(void)set_exec_env(env);
++#endif
+ 			kfree_skb(skb);
+ 		}
+ 		
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ip_tables.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_tables.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ip_tables.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ip_tables.c	2006-05-11 13:05:49.000000000 +0400
+@@ -23,12 +23,20 @@
+ #include <linux/udp.h>
+ #include <linux/icmp.h>
+ #include <net/ip.h>
++#include <net/compat.h>
+ #include <asm/uaccess.h>
+ #include <asm/semaphore.h>
+ #include <linux/proc_fs.h>
++#include <linux/nfcalls.h>
+ 
+ #include <linux/netfilter_ipv4/ip_tables.h>
+ 
++#include <ub/ub_mem.h>
++
++#ifdef CONFIG_USER_RESOURCE
++#include <ub/beancounter.h>
++#endif
++
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+ MODULE_DESCRIPTION("IPv4 packet filter");
+@@ -108,6 +116,52 @@ struct ipt_table_info
+ static LIST_HEAD(ipt_target);
+ static LIST_HEAD(ipt_match);
+ static LIST_HEAD(ipt_tables);
++
++#ifdef CONFIG_VE_IPTABLES
++/* include ve.h and define get_exec_env */
++#include <linux/sched.h>
++
++int init_iptables(void);
++
++#define ve_ipt_target		(*(get_exec_env()->_ipt_target))
++#define ve_ipt_match		(*(get_exec_env()->_ipt_match))
++#define ve_ipt_tables		(*(get_exec_env()->_ipt_tables))
++#define ve_ipt_standard_target	(*(get_exec_env()->_ipt_standard_target))
++#define ve_ipt_error_target	(*(get_exec_env()->_ipt_error_target))
++#define ve_tcp_matchstruct	(*(get_exec_env()->_tcp_matchstruct))
++#define ve_udp_matchstruct	(*(get_exec_env()->_udp_matchstruct))
++#define ve_icmp_matchstruct	(*(get_exec_env()->_icmp_matchstruct))
++
++
++#ifdef CONFIG_USER_RESOURCE
++#define UB_NUMIPTENT 23
++static int charge_iptables(struct user_beancounter *ub, unsigned long size)
++{
++	if (ub == NULL)
++		return 0;
++	return charge_beancounter(ub, UB_NUMIPTENT, size, 1);
++}
++static void uncharge_iptables(struct user_beancounter *ub, unsigned long size)
++{
++	if (ub == NULL)
++		return;
++	uncharge_beancounter(ub, UB_NUMIPTENT, size);
++}
++#endif	/* CONFIG_USER_RESOURCE */
++
++#else	/* CONFIG_VE_IPTABLES */
++
++#define ve_ipt_target		ipt_target
++#define ve_ipt_match		ipt_match
++#define ve_ipt_tables		ipt_tables
++#define ve_ipt_standard_target	ipt_standard_target
++#define ve_ipt_error_target	ipt_error_target
++#define ve_tcp_matchstruct	tcp_matchstruct
++#define ve_udp_matchstruct	udp_matchstruct
++#define ve_icmp_matchstruct	icmp_matchstruct
++
++#endif	/* CONFIG_VE_IPTABLES */
++
+ #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
+ 
+ #ifdef CONFIG_SMP
+@@ -122,6 +176,29 @@ static LIST_HEAD(ipt_tables);
+ #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
+ #endif
+ 
++static struct ipt_table_info *ipt_table_info_alloc(int size)
++{
++	struct ipt_table_info *newinfo;
++
++	if (size >= PAGE_SIZE)
++		newinfo = ub_vmalloc_best(size);
++	else
++		newinfo = ub_kmalloc(size, GFP_KERNEL);
++
++	return newinfo;
++}
++
++static void ipt_table_info_free(struct ipt_table_info *info)
++{
++	if ((unsigned long)info >= VMALLOC_START &&
++	    (unsigned long)info  < VMALLOC_END)
++		vfree(info);
++	else
++		kfree(info);
++}
++
++#define ipt_table_info_ub(info)	(mem_ub(info))
++
+ /* Returns whether matches rule or not. */
+ static inline int
+ ip_packet_match(const struct iphdr *ip,
+@@ -310,7 +387,7 @@ ipt_do_table(struct sk_buff **pskb,
+ 	do {
+ 		IP_NF_ASSERT(e);
+ 		IP_NF_ASSERT(back);
+-		(*pskb)->nfcache |= e->nfcache;
++		(*pskb)->nfcache |= e->nfcache & NFC_IPT_MASK;
+ 		if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
+ 			struct ipt_entry_target *t;
+ 
+@@ -417,9 +494,9 @@ find_inlist_lock_noload(struct list_head
+ 
+ #if 0 
+ 	duprintf("find_inlist: searching for `%s' in %s.\n",
+-		 name, head == &ipt_target ? "ipt_target"
+-		 : head == &ipt_match ? "ipt_match"
+-		 : head == &ipt_tables ? "ipt_tables" : "UNKNOWN");
++		 name, head == &ve_ipt_target ? "ipt_target"
++		 : head == &ve_ipt_match ? "ipt_match"
++		 : head == &ve_ipt_tables ? "ipt_tables" : "UNKNOWN");
+ #endif
+ 
+ 	*error = down_interruptible(mutex);
+@@ -460,19 +537,19 @@ find_inlist_lock(struct list_head *head,
+ static inline struct ipt_table *
+ ipt_find_table_lock(const char *name, int *error, struct semaphore *mutex)
+ {
+-	return find_inlist_lock(&ipt_tables, name, "iptable_", error, mutex);
++	return find_inlist_lock(&ve_ipt_tables, name, "iptable_", error, mutex);
+ }
+ 
+ static inline struct ipt_match *
+ find_match_lock(const char *name, int *error, struct semaphore *mutex)
+ {
+-	return find_inlist_lock(&ipt_match, name, "ipt_", error, mutex);
++	return find_inlist_lock(&ve_ipt_match, name, "ipt_", error, mutex);
+ }
+ 
+ struct ipt_target *
+ ipt_find_target_lock(const char *name, int *error, struct semaphore *mutex)
+ {
+-	return find_inlist_lock(&ipt_target, name, "ipt_", error, mutex);
++	return find_inlist_lock(&ve_ipt_target, name, "ipt_", error, mutex);
+ }
+ 
+ /* All zeroes == unconditional rule. */
+@@ -513,7 +590,7 @@ mark_source_chains(struct ipt_table_info
+ 				= (void *)ipt_get_target(e);
+ 
+ 			if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
+-				printk("iptables: loop hook %u pos %u %08X.\n",
++				ve_printk(VE_LOG, "iptables: loop hook %u pos %u %08X.\n",
+ 				       hook, pos, e->comefrom);
+ 				return 0;
+ 			}
+@@ -583,7 +660,6 @@ mark_source_chains(struct ipt_table_info
+ 	}
+ 	return 1;
+ }
+-
+ static inline int
+ cleanup_match(struct ipt_entry_match *m, unsigned int *i)
+ {
+@@ -607,7 +683,7 @@ standard_check(const struct ipt_entry_ta
+ 	if (t->u.target_size
+ 	    != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
+ 		duprintf("standard_check: target size %u != %u\n",
+-			 t->u.target_size,
++			 t->u.target_size, (unsigned int)
+ 			 IPT_ALIGN(sizeof(struct ipt_standard_target)));
+ 		return 0;
+ 	}
+@@ -698,7 +774,7 @@ check_entry(struct ipt_entry *e, const c
+ 	t->u.kernel.target = target;
+ 	up(&ipt_mutex);
+ 
+-	if (t->u.kernel.target == &ipt_standard_target) {
++	if (t->u.kernel.target == &ve_ipt_standard_target) {
+ 		if (!standard_check(t, size)) {
+ 			ret = -EINVAL;
+ 			goto cleanup_matches;
+@@ -866,6 +942,69 @@ translate_table(const char *name,
+ 	return ret;
+ }
+ 
++#if defined(CONFIG_VE_IPTABLES) && defined(CONFIG_USER_RESOURCE)
++static int charge_replace_table(struct ipt_table_info *oldinfo,
++				struct ipt_table_info *newinfo)
++{
++	struct user_beancounter *old_ub, *new_ub;
++	int old_number, new_number;
++
++	old_ub = ipt_table_info_ub(oldinfo);
++	new_ub = ipt_table_info_ub(newinfo);
++	old_number = oldinfo->number;
++	new_number = newinfo->number;
++
++	/* XXX: I don't understand the code below and am not sure that it does
++	 * something reasonable.  2002/04/26  SAW */
++	if (old_ub == new_ub) {
++		int charge;
++		/* charge only differences in entries */
++		charge = new_number - old_number;
++	 	if (charge > 0) {
++			if (charge_iptables(old_ub, charge))
++				return -1;
++		} else
++			uncharge_iptables(old_ub, -charge);
++	} else {
++		/* different contexts; do charge current and uncharge old */
++		if (charge_iptables(new_ub, new_number))
++			return -1;
++		uncharge_iptables(old_ub, old_number);
++	}
++	return 0;
++}
++#endif
++
++static int setup_table(struct ipt_table *table, struct ipt_table_info *info)
++{
++#ifdef CONFIG_NETFILTER_DEBUG
++	{
++		struct ipt_entry *table_base;
++		unsigned int i;
++
++		for (i = 0; i < NR_CPUS; i++) {
++			table_base =
++				(void *)info->entries
++				+ TABLE_OFFSET(info, i);
++
++			table_base->comefrom = 0xdead57ac;
++		}
++	}
++#endif
++#if defined(CONFIG_VE_IPTABLES) && defined(CONFIG_USER_RESOURCE)
++	{
++		struct user_beancounter *ub;
++
++		ub = ipt_table_info_ub(info);
++		if (charge_iptables(ub, info->number))
++			return -ENOMEM;
++	}
++#endif
++	table->private = info;
++	info->initial_entries = 0;
++	return 0;
++}
++
+ static struct ipt_table_info *
+ replace_table(struct ipt_table *table,
+ 	      unsigned int num_counters,
+@@ -900,6 +1039,16 @@ replace_table(struct ipt_table *table,
+ 		return NULL;
+ 	}
+ 	oldinfo = table->private;
++
++#if defined(CONFIG_VE_IPTABLES) && defined(CONFIG_USER_RESOURCE)
++	if (charge_replace_table(oldinfo, newinfo)) {
++		oldinfo = NULL;
++		write_unlock_bh(&table->lock);
++		*error = -ENOMEM;
++		return NULL;
++ 	}
++#endif
++
+ 	table->private = newinfo;
+ 	newinfo->initial_entries = oldinfo->initial_entries;
+ 	write_unlock_bh(&table->lock);
+@@ -936,24 +1085,19 @@ get_counters(const struct ipt_table_info
+ 	}
+ }
+ 
+-static int
+-copy_entries_to_user(unsigned int total_size,
+-		     struct ipt_table *table,
+-		     void __user *userptr)
++static inline struct ipt_counters * alloc_counters(struct ipt_table *table)
+ {
+-	unsigned int off, num, countersize;
+-	struct ipt_entry *e;
+ 	struct ipt_counters *counters;
+-	int ret = 0;
++	unsigned int countersize;
+ 
+ 	/* We need atomic snapshot of counters: rest doesn't change
+ 	   (other than comefrom, which userspace doesn't care
+ 	   about). */
+ 	countersize = sizeof(struct ipt_counters) * table->private->number;
+-	counters = vmalloc(countersize);
++	counters = vmalloc_best(countersize);
+ 
+ 	if (counters == NULL)
+-		return -ENOMEM;
++		return ERR_PTR(-ENOMEM);
+ 
+ 	/* First, sum counters... */
+ 	memset(counters, 0, countersize);
+@@ -961,6 +1105,23 @@ copy_entries_to_user(unsigned int total_
+ 	get_counters(table->private, counters);
+ 	write_unlock_bh(&table->lock);
+ 
++	return counters;
++}
++
++static int
++copy_entries_to_user(unsigned int total_size,
++		     struct ipt_table *table,
++		     void __user *userptr)
++{
++	unsigned int off, num;
++	struct ipt_entry *e;
++	struct ipt_counters *counters;
++	int ret = 0;
++
++	counters = alloc_counters(table);
++	if (IS_ERR(counters))
++		return PTR_ERR(counters);
++
+ 	/* ... then copy entire thing from CPU 0... */
+ 	if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
+ 		ret = -EFAULT;
+@@ -1015,216 +1176,1207 @@ copy_entries_to_user(unsigned int total_
+ 	return ret;
+ }
+ 
+-static int
+-get_entries(const struct ipt_get_entries *entries,
+-	    struct ipt_get_entries __user *uptr)
++#ifdef CONFIG_COMPAT
++static DECLARE_MUTEX(compat_ipt_mutex);
++
++struct compat_delta {
++	struct compat_delta *next;
++	u_int16_t offset;
++	short delta;
++};
++
++static struct compat_delta *compat_offsets = NULL;
++
++static int compat_add_offset(u_int16_t offset, short delta)
+ {
+-	int ret;
+-	struct ipt_table *t;
++	struct compat_delta *tmp;
+ 
+-	t = ipt_find_table_lock(entries->name, &ret, &ipt_mutex);
+-	if (t) {
+-		duprintf("t->private->number = %u\n",
+-			 t->private->number);
+-		if (entries->size == t->private->size)
+-			ret = copy_entries_to_user(t->private->size,
+-						   t, uptr->entrytable);
+-		else {
+-			duprintf("get_entries: I've got %u not %u!\n",
+-				 t->private->size,
+-				 entries->size);
+-			ret = -EINVAL;
+-		}
+-		up(&ipt_mutex);
+-	} else
+-		duprintf("get_entries: Can't find %s!\n",
+-			 entries->name);
++	tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL);
++	if (!tmp)
++		return -ENOMEM;
++	tmp->offset = offset;
++	tmp->delta = delta;
++	if (compat_offsets) {
++		tmp->next = compat_offsets->next;
++		compat_offsets->next = tmp;
++	} else {
++		compat_offsets = tmp;
++		tmp->next = NULL;
++	}
++	return 0;
++}
+ 
+-	return ret;
++static void compat_flush_offsets(void)
++{
++	struct compat_delta *tmp, *next;
++
++	if (compat_offsets) {
++		for(tmp = compat_offsets; tmp; tmp = next) {
++			next = tmp->next;
++			kfree(tmp);
++		}
++		compat_offsets = NULL;
++	}
+ }
+ 
+-static int
+-do_replace(void __user *user, unsigned int len)
++static short compat_calc_jump(u_int16_t offset)
+ {
+-	int ret;
+-	struct ipt_replace tmp;
+-	struct ipt_table *t;
+-	struct ipt_table_info *newinfo, *oldinfo;
+-	struct ipt_counters *counters;
++	struct compat_delta *tmp;
++	short delta;
+ 
+-	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+-		return -EFAULT;
++	for(tmp = compat_offsets, delta = 0; tmp; tmp = tmp->next)
++		if (tmp->offset < offset)
++			delta += tmp->delta;
++	return delta;
++}
+ 
+-	/* Hack: Causes ipchains to give correct error msg --RR */
+-	if (len != sizeof(tmp) + tmp.size)
+-		return -ENOPROTOOPT;
++struct compat_ipt_standard_target
++{
++	struct compat_ipt_entry_target target;
++	compat_int_t verdict;
++};
+ 
+-	/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
+-	if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
+-		return -ENOMEM;
++#define IPT_ST_OFFSET	(sizeof(struct ipt_standard_target) - \
++				sizeof(struct compat_ipt_standard_target))
+ 
+-	newinfo = vmalloc(sizeof(struct ipt_table_info)
+-			  + SMP_ALIGN(tmp.size) * NR_CPUS);
+-	if (!newinfo)
+-		return -ENOMEM;
++struct ipt_standard
++{
++	struct ipt_entry entry;
++	struct ipt_standard_target target;
++};
+ 
+-	if (copy_from_user(newinfo->entries, user + sizeof(tmp),
+-			   tmp.size) != 0) {
+-		ret = -EFAULT;
+-		goto free_newinfo;
+-	}
++struct compat_ipt_standard
++{
++	struct compat_ipt_entry entry;
++	struct compat_ipt_standard_target target;
++};
+ 
+-	counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
+-	if (!counters) {
+-		ret = -ENOMEM;
+-		goto free_newinfo;
++static int compat_ipt_standard_fn(void *target,
++		void **dstptr, int *size, int convert)
++{
++	struct compat_ipt_standard_target compat_st, *pcompat_st;
++	struct ipt_standard_target st, *pst;
++	int ret;
++
++	ret = 0;
++	switch (convert) {
++		case COMPAT_TO_USER:
++			pst = (struct ipt_standard_target *)target;
++			memcpy(&compat_st.target, &pst->target,
++					sizeof(struct ipt_entry_target));
++			compat_st.verdict = pst->verdict;
++			if (compat_st.verdict > 0)
++				compat_st.verdict -=
++					compat_calc_jump(compat_st.verdict);
++			compat_st.target.u.user.target_size =
++			sizeof(struct compat_ipt_standard_target);
++			if (__copy_to_user(*dstptr, &compat_st,
++				sizeof(struct compat_ipt_standard_target)))
++				ret = -EFAULT;
++			*size -= IPT_ST_OFFSET;
++			*dstptr += sizeof(struct compat_ipt_standard_target);
++			break;
++		case COMPAT_FROM_USER:
++			pcompat_st =
++				(struct compat_ipt_standard_target *)target;
++			memcpy(&st.target, &pcompat_st->target,
++					sizeof(struct ipt_entry_target));
++			st.verdict = pcompat_st->verdict;
++			if (st.verdict > 0)
++				st.verdict += compat_calc_jump(st.verdict);
++			st.target.u.user.target_size =
++			sizeof(struct ipt_standard_target);
++			memcpy(*dstptr, &st,
++					sizeof(struct ipt_standard_target));
++			*size += IPT_ST_OFFSET;
++			*dstptr += sizeof(struct ipt_standard_target);
++			break;
++		case COMPAT_CALC_SIZE:
++			*size += IPT_ST_OFFSET;
++			break;
++		default:
++			ret = -ENOPROTOOPT;
++			break;
+ 	}
+-	memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
++	return ret;
++}
+ 
+-	ret = translate_table(tmp.name, tmp.valid_hooks,
+-			      newinfo, tmp.size, tmp.num_entries,
+-			      tmp.hook_entry, tmp.underflow);
+-	if (ret != 0)
+-		goto free_newinfo_counters;
++int ipt_target_align_compat(void *target, void **dstptr,
++		int *size, int off, int convert)
++{
++	struct compat_ipt_entry_target *pcompat;
++	struct ipt_entry_target *pt;
++	u_int16_t tsize;
++	int ret;
+ 
+-	duprintf("ip_tables: Translated table\n");
++	ret = 0;
++	switch (convert) {
++		case COMPAT_TO_USER:
++			pt = (struct ipt_entry_target *)target;
++			tsize = pt->u.user.target_size;
++			if (__copy_to_user(*dstptr, pt, tsize)) {
++				ret = -EFAULT;
++				break;
++			}
++			tsize -= off;
++			if (put_user(tsize, (u_int16_t *)*dstptr))
++				ret = -EFAULT;
++			*size -= off;
++			*dstptr += tsize;
++			break;
++		case COMPAT_FROM_USER:
++			pcompat = (struct compat_ipt_entry_target *)target;
++			pt = (struct ipt_entry_target *)*dstptr;
++			tsize = pcompat->u.user.target_size;
++			memcpy(pt, pcompat, tsize);
++			tsize += off;
++			pt->u.user.target_size = tsize;
++			*size += off;
++			*dstptr += tsize;
++			break;
++		case COMPAT_CALC_SIZE:
++			*size += off;
++			break;
++		default:
++			ret = -ENOPROTOOPT;
++			break;
++	}
++	return ret;
++}
+ 
+-	t = ipt_find_table_lock(tmp.name, &ret, &ipt_mutex);
+-	if (!t)
+-		goto free_newinfo_counters_untrans;
++int ipt_match_align_compat(void *match, void **dstptr,
++		int *size, int off, int convert)
++{
++	struct compat_ipt_entry_match *pcompat_m;
++	struct ipt_entry_match *pm;
++	u_int16_t msize;
++	int ret;
+ 
+-	/* You lied! */
+-	if (tmp.valid_hooks != t->valid_hooks) {
+-		duprintf("Valid hook crap: %08X vs %08X\n",
+-			 tmp.valid_hooks, t->valid_hooks);
+-		ret = -EINVAL;
+-		goto free_newinfo_counters_untrans_unlock;
++	ret = 0;
++	switch (convert) {
++		case COMPAT_TO_USER:
++			pm = (struct ipt_entry_match *)match;
++			msize = pm->u.user.match_size;
++			if (__copy_to_user(*dstptr, pm, msize)) {
++				ret = -EFAULT;
++				break;
++			}
++			msize -= off;
++			if (put_user(msize, (u_int16_t *)*dstptr))
++				ret = -EFAULT;
++			*size -= off;
++			*dstptr += msize;
++			break;
++		case COMPAT_FROM_USER:
++			pcompat_m = (struct compat_ipt_entry_match *)match;
++			pm = (struct ipt_entry_match *)*dstptr;
++			msize = pcompat_m->u.user.match_size;
++			memcpy(pm, pcompat_m, msize);
++			msize += off;
++			pm->u.user.match_size = msize;
++			*size += off;
++			*dstptr += msize;
++			break;
++		case COMPAT_CALC_SIZE:
++			*size += off;
++			break;
++		default:
++			ret = -ENOPROTOOPT;
++			break;
+ 	}
++	return ret;
++}
+ 
+-	/* Get a reference in advance, we're not allowed fail later */
+-	if (!try_module_get(t->me)) {
+-		ret = -EBUSY;
+-		goto free_newinfo_counters_untrans_unlock;
+-	}
++static int tcp_compat(void *match,
++		void **dstptr, int *size, int convert)
++{
++	int off;
+ 
++	off = IPT_ALIGN(sizeof(struct ipt_tcp)) -
++		COMPAT_IPT_ALIGN(sizeof(struct ipt_tcp));
++	return ipt_match_align_compat(match, dstptr, size, off, convert);
++}
+ 
+-	oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
+-	if (!oldinfo)
+-		goto put_module;
++static int udp_compat(void *match,
++		void **dstptr, int *size, int convert)
++{
++	int off;
+ 
+-	/* Update module usage count based on number of rules */
+-	duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
+-		oldinfo->number, oldinfo->initial_entries, newinfo->number);
+-	if ((oldinfo->number > oldinfo->initial_entries) || 
+-	    (newinfo->number <= oldinfo->initial_entries)) 
+-		module_put(t->me);
+-	if ((oldinfo->number > oldinfo->initial_entries) &&
+-	    (newinfo->number <= oldinfo->initial_entries))
+-		module_put(t->me);
++	off = IPT_ALIGN(sizeof(struct ipt_udp)) -
++		COMPAT_IPT_ALIGN(sizeof(struct ipt_udp));
++	return ipt_match_align_compat(match, dstptr, size, off, convert);
++}
+ 
+-	/* Get the old counters. */
+-	get_counters(oldinfo, counters);
+-	/* Decrease module usage counts and free resource */
+-	IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
+-	vfree(oldinfo);
+-	/* Silent error: too late now. */
+-	copy_to_user(tmp.counters, counters,
+-		     sizeof(struct ipt_counters) * tmp.num_counters);
+-	vfree(counters);
+-	up(&ipt_mutex);
+-	return 0;
++static int icmp_compat(void *match,
++		void **dstptr, int *size, int convert)
++{
++	int off;
+ 
+- put_module:
+-	module_put(t->me);
+- free_newinfo_counters_untrans_unlock:
+-	up(&ipt_mutex);
+- free_newinfo_counters_untrans:
+-	IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
+- free_newinfo_counters:
+-	vfree(counters);
+- free_newinfo:
+-	vfree(newinfo);
+-	return ret;
++	off = IPT_ALIGN(sizeof(struct ipt_icmp)) -
++		COMPAT_IPT_ALIGN(sizeof(struct ipt_icmp));
++	return ipt_match_align_compat(match, dstptr, size, off, convert);
+ }
+ 
+-/* We're lazy, and add to the first CPU; overflow works its fey magic
+- * and everything is OK. */
+ static inline int
+-add_counter_to_entry(struct ipt_entry *e,
+-		     const struct ipt_counters addme[],
+-		     unsigned int *i)
++compat_calc_match(struct ipt_entry_match *m, int * size)
+ {
+-#if 0
+-	duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
+-		 *i,
+-		 (long unsigned int)e->counters.pcnt,
+-		 (long unsigned int)e->counters.bcnt,
+-		 (long unsigned int)addme[*i].pcnt,
+-		 (long unsigned int)addme[*i].bcnt);
+-#endif
+-
+-	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+-
+-	(*i)++;
++	if (m->u.kernel.match->compat)
++		m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE);
+ 	return 0;
+ }
+ 
+-static int
+-do_add_counters(void __user *user, unsigned int len)
++static int compat_calc_entry(struct ipt_entry *e,
++		struct ipt_table_info *info, struct ipt_table_info *newinfo)
+ {
+-	unsigned int i;
+-	struct ipt_counters_info tmp, *paddc;
+-	struct ipt_table *t;
+-	int ret;
++	struct ipt_entry_target *t;
++	u_int16_t entry_offset;
++	int off, i, ret;
+ 
+-	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+-		return -EFAULT;
++	off = 0;
++	entry_offset = (void *)e - (void *)info->entries;
++	IPT_MATCH_ITERATE(e, compat_calc_match, &off);
++	t = ipt_get_target(e);
++	if (t->u.kernel.target->compat)
++		t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE);
++	newinfo->size -= off;
++	ret = compat_add_offset(entry_offset, off);
++	if (ret)
++		return ret;
++
++	for (i = 0; i< NF_IP_NUMHOOKS; i++) {
++		if (info->hook_entry[i] && (e < (struct ipt_entry *)
++				(info->entries + info->hook_entry[i])))
++			newinfo->hook_entry[i] -= off;
++		if (info->underflow[i] && (e < (struct ipt_entry *)
++				(info->entries + info->underflow[i])))
++			newinfo->underflow[i] -= off;
++	}
++	return 0;
++}
+ 
+-	if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
++static int compat_table_info(struct ipt_table_info *info,
++		struct ipt_table_info *newinfo)
++{
++	if (!newinfo)
+ 		return -EINVAL;
+ 
+-	paddc = vmalloc(len);
++	memcpy(newinfo, info, sizeof(struct ipt_table_info));
++	return IPT_ENTRY_ITERATE(info->entries,
++			info->size, compat_calc_entry, info, newinfo);
++}
++#endif
++
++static int get_info(void __user *user, int *len)
++{
++	char name[IPT_TABLE_MAXNAMELEN];
++	struct ipt_table *t;
++	int ret, size;
++
++#ifdef CONFIG_COMPAT
++	if (is_current_32bits())
++		size = sizeof(struct compat_ipt_getinfo);
++	else
++#endif
++		size = sizeof(struct ipt_getinfo);
++
++	if (*len != size) {
++		duprintf("length %u != %u\n", *len,
++			(unsigned int)sizeof(struct ipt_getinfo));
++		return -EINVAL;
++	}
++
++	if (copy_from_user(name, user, sizeof(name)) != 0)
++		return -EFAULT;
++
++	name[IPT_TABLE_MAXNAMELEN-1] = '\0';
++#ifdef CONFIG_COMPAT
++	down(&compat_ipt_mutex);
++#endif
++	t = ipt_find_table_lock(name, &ret, &ipt_mutex);
++	if (t) {
++		struct ipt_getinfo info;
++#ifdef CONFIG_COMPAT
++		struct compat_ipt_getinfo compat_info;
++#endif
++		void *pinfo;
++
++#ifdef CONFIG_COMPAT
++		if (is_current_32bits()) {
++			struct ipt_table_info t_info;
++			ret = compat_table_info(t->private, &t_info);
++			compat_flush_offsets();
++			memcpy(compat_info.hook_entry, t_info.hook_entry,
++					sizeof(compat_info.hook_entry));
++			memcpy(compat_info.underflow, t_info.underflow,
++					sizeof(compat_info.underflow));
++			compat_info.valid_hooks = t->valid_hooks;
++			compat_info.num_entries = t->private->number;
++			compat_info.size = t_info.size;
++			strcpy(compat_info.name, name);
++			pinfo = (void *)&compat_info;
++		} else
++#endif
++		{
++			info.valid_hooks = t->valid_hooks;
++			memcpy(info.hook_entry, t->private->hook_entry,
++					sizeof(info.hook_entry));
++			memcpy(info.underflow, t->private->underflow,
++					sizeof(info.underflow));
++			info.num_entries = t->private->number;
++			info.size = t->private->size;
++			strcpy(info.name, name);
++			pinfo = (void *)&info;
++		}
++
++		if (copy_to_user(user, pinfo, *len) != 0)
++			ret = -EFAULT;
++		else
++			ret = 0;
++
++		up(&ipt_mutex);
++	}
++#ifdef CONFIG_COMPAT
++	up(&compat_ipt_mutex);
++#endif
++	return ret;
++}
++
++static int
++get_entries(struct ipt_get_entries __user *uptr, int *len)
++{
++	int ret;
++	struct ipt_get_entries get;
++	struct ipt_table *t;
++
++	if (*len < sizeof(get)) {
++		duprintf("get_entries: %u < %d\n", *len,
++				(unsigned int)sizeof(get));
++		return -EINVAL;
++	}
++
++	if (copy_from_user(&get, uptr, sizeof(get)) != 0)
++		return -EFAULT;
++
++	if (*len != sizeof(struct ipt_get_entries) + get.size) {
++		duprintf("get_entries: %u != %u\n", *len,
++				(unsigned int)(sizeof(struct ipt_get_entries) +
++				get.size));
++		return -EINVAL;
++	}
++
++	t = ipt_find_table_lock(get.name, &ret, &ipt_mutex);
++	if (t) {
++		duprintf("t->private->number = %u\n",
++			 t->private->number);
++		if (get.size == t->private->size)
++			ret = copy_entries_to_user(t->private->size,
++						   t, uptr->entrytable);
++		else {
++			duprintf("get_entries: I've got %u not %u!\n",
++				 t->private->size,
++				 get.size);
++			ret = -EINVAL;
++		}
++		up(&ipt_mutex);
++	} else
++		duprintf("get_entries: Can't find %s!\n",
++			 get.name);
++
++	return ret;
++}
++
++static int
++__do_replace(const char *name, unsigned int valid_hooks,
++		struct ipt_table_info *newinfo, unsigned int size,
++		unsigned int num_counters, void __user *counters_ptr)
++{
++	int ret;
++	struct ipt_table *t;
++	struct ipt_table_info *oldinfo;
++	struct ipt_counters *counters;
++
++	counters = ub_vmalloc_best(num_counters *
++					sizeof(struct ipt_counters));
++	if (!counters) {
++		ret = -ENOMEM;
++		goto out;
++	}
++	memset(counters, 0, num_counters * sizeof(struct ipt_counters));
++
++	t = ipt_find_table_lock(name, &ret, &ipt_mutex);
++	if (!t)
++		goto free_newinfo_counters_untrans;
++
++	/* You lied! */
++	if (valid_hooks != t->valid_hooks) {
++		duprintf("Valid hook crap: %08X vs %08X\n",
++			 valid_hooks, t->valid_hooks);
++		ret = -EINVAL;
++		goto free_newinfo_counters_untrans_unlock;
++	}
++
++	/* Get a reference in advance, we're not allowed fail later */
++	if (!try_module_get(t->me)) {
++		ret = -EBUSY;
++		goto free_newinfo_counters_untrans_unlock;
++	}
++
++	oldinfo = replace_table(t, num_counters, newinfo, &ret);
++	if (!oldinfo)
++		goto put_module;
++
++	/* Update module usage count based on number of rules */
++	duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
++		oldinfo->number, oldinfo->initial_entries, newinfo->number);
++	if ((oldinfo->number > oldinfo->initial_entries) || 
++	    (newinfo->number <= oldinfo->initial_entries)) 
++		module_put(t->me);
++	if ((oldinfo->number > oldinfo->initial_entries) &&
++	    (newinfo->number <= oldinfo->initial_entries))
++		module_put(t->me);
++
++	/* Get the old counters. */
++	get_counters(oldinfo, counters);
++	/* Decrease module usage counts and free resource */
++	IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
++	ipt_table_info_free(oldinfo);
++	/* Silent error: too late now. */
++	copy_to_user(counters_ptr, counters,
++		     sizeof(struct ipt_counters) * num_counters);
++	vfree(counters);
++	up(&ipt_mutex);
++	return 0;
++ put_module:
++	module_put(t->me);
++ free_newinfo_counters_untrans_unlock:
++	up(&ipt_mutex);
++ free_newinfo_counters_untrans:
++	vfree(counters);
++ out:
++	return ret;
++}
++
++static int
++do_replace(void __user *user, unsigned int len)
++{
++	int ret;
++	struct ipt_replace tmp;
++	struct ipt_table_info *newinfo;
++
++	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
++		return -EFAULT;
++
++	/* Hack: Causes ipchains to give correct error msg --RR */
++	if (len != sizeof(tmp) + tmp.size)
++		return -ENOPROTOOPT;
++
++	/* overflow check */
++	if (tmp.size >= (INT_MAX - sizeof(struct ipt_table_info)) / NR_CPUS -
++			SMP_CACHE_BYTES)
++		return -ENOMEM;
++	if (tmp.num_counters >= INT_MAX / sizeof(struct ipt_counters))
++		return -ENOMEM;
++
++	/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
++	if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
++		return -ENOMEM;
++
++	newinfo = ipt_table_info_alloc(sizeof(struct ipt_table_info)
++			  + SMP_ALIGN(tmp.size) * NR_CPUS);
++	if (!newinfo)
++		return -ENOMEM;
++
++	if (copy_from_user(newinfo->entries, user + sizeof(tmp), tmp.size) != 0) {
++		ret = -EFAULT;
++		goto free_newinfo;
++	}
++
++	ret = translate_table(tmp.name, tmp.valid_hooks,
++			      newinfo, tmp.size, tmp.num_entries,
++			      tmp.hook_entry, tmp.underflow);
++	if (ret != 0)
++		goto free_newinfo;
++
++	duprintf("ip_tables: Translated table\n");
++
++	ret = __do_replace(tmp.name, tmp.valid_hooks,
++			      newinfo, tmp.size, tmp.num_counters,
++			      tmp.counters);
++	if (ret)
++		goto free_newinfo_untrans;
++	return 0;
++
++ free_newinfo_untrans:
++	IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
++ free_newinfo:
++	ipt_table_info_free(newinfo);
++	return ret;
++}
++
++/* We're lazy, and add to the first CPU; overflow works its fey magic
++ * and everything is OK. */
++static inline int
++add_counter_to_entry(struct ipt_entry *e,
++		     const struct ipt_counters addme[],
++		     unsigned int *i)
++{
++#if 0
++	duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
++		 *i,
++		 (long unsigned int)e->counters.pcnt,
++		 (long unsigned int)e->counters.bcnt,
++		 (long unsigned int)addme[*i].pcnt,
++		 (long unsigned int)addme[*i].bcnt);
++#endif
++
++	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
++
++	(*i)++;
++	return 0;
++}
++
++static int
++do_add_counters(void __user *user, unsigned int len)
++{
++	unsigned int i;
++	struct ipt_counters_info tmp;
++	void *ptmp;
++	struct ipt_table *t;
++	unsigned int num_counters;
++	char *name;
++	struct ipt_counters *paddc;
++	int ret, size;
++#ifdef CONFIG_COMPAT
++	struct compat_ipt_counters_info compat_tmp;
++
++	if (is_current_32bits()) {
++		ptmp = &compat_tmp;
++		size = sizeof(struct compat_ipt_counters_info);
++	} else
++#endif
++	{
++		ptmp = &tmp;
++		size = sizeof(struct ipt_counters_info);
++	}
++
++	if (copy_from_user(ptmp, user, size) != 0)
++		return -EFAULT;
++
++#ifdef CONFIG_COMPAT
++	if (is_current_32bits()) {
++		num_counters = compat_tmp.num_counters;
++		name = compat_tmp.name;
++	} else
++#endif
++	{
++		num_counters = tmp.num_counters;
++		name = tmp.name;
++	}
++
++	if (len != size + num_counters * sizeof(struct ipt_counters))
++		return -EINVAL;
++
++	paddc = ub_vmalloc_best(len - size);
+ 	if (!paddc)
+ 		return -ENOMEM;
+ 
+-	if (copy_from_user(paddc, user, len) != 0) {
++	if (copy_from_user(paddc, user + size, len - size) != 0) {
++		ret = -EFAULT;
++		goto free;
++	}
++
++	t = ipt_find_table_lock(name, &ret, &ipt_mutex);
++	if (!t)
++		goto free;
++
++	write_lock_bh(&t->lock);
++	if (t->private->number != num_counters) {
++		ret = -EINVAL;
++		goto unlock_up_free;
++	}
++
++	i = 0;
++	IPT_ENTRY_ITERATE(t->private->entries,
++			  t->private->size,
++			  add_counter_to_entry,
++			  paddc,
++			  &i);
++ unlock_up_free:
++	write_unlock_bh(&t->lock);
++	up(&ipt_mutex);
++ free:
++	vfree(paddc);
++
++	return ret;
++}
++
++#ifdef CONFIG_COMPAT
++struct compat_ipt_replace {
++	char			name[IPT_TABLE_MAXNAMELEN];
++	u32			valid_hooks;
++	u32			num_entries;
++	u32			size;
++	u32			hook_entry[NF_IP_NUMHOOKS];
++	u32			underflow[NF_IP_NUMHOOKS];
++	u32			num_counters;
++	compat_uptr_t		counters;	/* struct ipt_counters * */
++	struct compat_ipt_entry	entries[0];
++};
++
++static inline int compat_copy_match_to_user(struct ipt_entry_match *m,
++		void __user **dstptr, compat_uint_t *size)
++{
++	if (m->u.kernel.match->compat)
++		m->u.kernel.match->compat(m, dstptr, size, COMPAT_TO_USER);
++	else {
++		if (__copy_to_user(*dstptr, m, m->u.match_size))
++			return -EFAULT;
++		*dstptr += m->u.match_size;
++	}
++	return 0;
++}
++
++static int compat_copy_entry_to_user(struct ipt_entry *e,
++		void __user **dstptr, compat_uint_t *size)
++{
++	struct ipt_entry_target __user *t;
++	struct compat_ipt_entry __user *ce;
++	u_int16_t target_offset, next_offset;
++	compat_uint_t origsize;
++	int ret;
++
++	ret = -EFAULT;
++	origsize = *size;
++	ce = (struct compat_ipt_entry __user *)*dstptr;
++	if (__copy_to_user(ce, e, sizeof(struct ipt_entry)))
++		goto out;
++
++	*dstptr += sizeof(struct compat_ipt_entry);
++	ret = IPT_MATCH_ITERATE(e, compat_copy_match_to_user, dstptr, size);
++	target_offset = e->target_offset - (origsize - *size);
++	if (ret)
++		goto out;
++	t = ipt_get_target(e);
++	if (t->u.kernel.target->compat) {
++		ret = t->u.kernel.target->compat(t,
++				dstptr, size, COMPAT_TO_USER);
++		if (ret)
++			goto out;
++	} else {
++		ret = -EFAULT;
++		if (__copy_to_user(*dstptr, t, t->u.target_size))
++			goto out;
++		*dstptr += t->u.target_size;
++	}
++	ret = -EFAULT;
++	next_offset = e->next_offset - (origsize - *size);
++	if (__put_user(target_offset, &ce->target_offset))
++		goto out;
++	if (__put_user(next_offset, &ce->next_offset))
++		goto out;
++	return 0;
++out:
++	return ret;
++}
++
++static inline int
++compat_check_calc_match(struct ipt_entry_match *m,
++	    const char *name,
++	    const struct ipt_ip *ip,
++	    unsigned int hookmask,
++	    int *size, int *i)
++{
++	int ret;
++	struct ipt_match *match;
++
++	match = find_match_lock(m->u.user.name, &ret, &ipt_mutex);
++	if (!match) {
++		duprintf("check_match: `%s' not found\n", m->u.user.name);
++		return ret;
++	}
++	if (!try_module_get(match->me)) {
++		up(&ipt_mutex);
++		return -ENOENT;
++	}
++	m->u.kernel.match = match;
++	up(&ipt_mutex);
++
++	if (m->u.kernel.match->compat)
++		m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE);
++
++	(*i)++;
++	return 0;
++}
++
++static inline int
++check_compat_entry_size_and_hooks(struct ipt_entry *e,
++			   struct ipt_table_info *newinfo,
++			   unsigned char *base,
++			   unsigned char *limit,
++			   unsigned int *hook_entries,
++			   unsigned int *underflows,
++			   unsigned int *i,
++			   const char *name)
++{
++	struct ipt_entry_target *t;
++	struct ipt_target *target;
++	u_int16_t entry_offset;
++	int ret, off, h, j;
++
++	duprintf("check_compat_entry_size_and_hooks %p\n", e);
++	if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0
++	    || (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) {
++		duprintf("Bad offset %p, limit = %p\n", e, limit);
++		return -EINVAL;
++	}
++
++	if (e->next_offset < sizeof(struct compat_ipt_entry) +
++			sizeof(struct compat_ipt_entry_target)) {
++		duprintf("checking: element %p size %u\n",
++			 e, e->next_offset);
++		return -EINVAL;
++	}
++
++	if (!ip_checkentry(&e->ip)) {
++		duprintf("ip_tables: ip check failed %p %s.\n", e, name);
++		return -EINVAL;
++	}
++
++	off = 0;
++	entry_offset = (void *)e - (void *)base;
++	j = 0;
++	ret = IPT_MATCH_ITERATE(e, compat_check_calc_match, name, &e->ip,
++			e->comefrom, &off, &j);
++	if (ret != 0)
++		goto out;
++
++	t = ipt_get_target(e);
++	target = ipt_find_target_lock(t->u.user.name, &ret, &ipt_mutex);
++	if (!target) {
++		duprintf("check_entry: `%s' not found\n", t->u.user.name);
++		goto out;
++	}
++	if (!try_module_get(target->me)) {
++		up(&ipt_mutex);
++		ret = -ENOENT;
++		goto out;
++	}
++	t->u.kernel.target = target;
++	up(&ipt_mutex);
++
++	if (t->u.kernel.target->compat)
++		t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE);
++	newinfo->size += off;
++	ret = compat_add_offset(entry_offset, off);
++	if (ret)
++		goto out;
++
++	/* Check hooks & underflows */
++	for (h = 0; h < NF_IP_NUMHOOKS; h++) {
++		if ((unsigned char *)e - base == hook_entries[h])
++			newinfo->hook_entry[h] = hook_entries[h];
++		if ((unsigned char *)e - base == underflows[h])
++			newinfo->underflow[h] = underflows[h];
++	}
++
++	/* Clear counters and comefrom */
++	e->counters = ((struct ipt_counters) { 0, 0 });
++	e->comefrom = 0;
++
++	(*i)++;
++	return 0;
++out:
++	IPT_MATCH_ITERATE(e, cleanup_match, &j);
++	return ret;
++}
++
++static inline int compat_copy_match_from_user(struct ipt_entry_match *m,
++	void **dstptr, compat_uint_t *size, const char *name,
++	const struct ipt_ip *ip, unsigned int hookmask)
++{
++	struct ipt_entry_match *dm;
++
++	dm = (struct ipt_entry_match *)*dstptr;
++	if (m->u.kernel.match->compat)
++		m->u.kernel.match->compat(m, dstptr, size, COMPAT_FROM_USER);
++	else {
++		memcpy(*dstptr, m, m->u.match_size);
++		*dstptr += m->u.match_size;
++	}
++
++	if (dm->u.kernel.match->checkentry
++	    && !dm->u.kernel.match->checkentry(name, ip, dm->data,
++					      dm->u.match_size - sizeof(*dm),
++					      hookmask)) {
++		module_put(dm->u.kernel.match->me);
++		duprintf("ip_tables: check failed for `%s'.\n",
++			 dm->u.kernel.match->name);
++		return -EINVAL;
++	}
++
++	return 0;
++}
++
++static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
++	unsigned int *size, const char *name,
++	struct ipt_table_info *newinfo, unsigned char *base)
++{
++	struct ipt_entry_target *t;
++	struct ipt_entry *de;
++	unsigned int origsize;
++	int ret, h;
++
++	ret = 0;
++	origsize = *size;
++	de = (struct ipt_entry *)*dstptr;
++	memcpy(de, e, sizeof(struct ipt_entry));
++
++	*dstptr += sizeof(struct compat_ipt_entry);
++	ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size,
++			name, &de->ip, de->comefrom);
++	if (ret)
++		goto out;
++	de->target_offset = e->target_offset - (origsize - *size);
++	t = ipt_get_target(e);
++	if (t->u.kernel.target->compat)
++		t->u.kernel.target->compat(t,
++				dstptr, size, COMPAT_FROM_USER);
++	else {
++		memcpy(*dstptr, t, t->u.target_size);
++		*dstptr += t->u.target_size;
++	}
++
++	de->next_offset = e->next_offset - (origsize - *size);
++	for (h = 0; h < NF_IP_NUMHOOKS; h++) {
++		if ((unsigned char *)de - base < newinfo->hook_entry[h])
++			newinfo->hook_entry[h] -= origsize - *size;
++		if ((unsigned char *)de - base < newinfo->underflow[h])
++			newinfo->underflow[h] -= origsize - *size;
++	}
++
++	ret = -EINVAL;
++	t = ipt_get_target(de);
++	if (t->u.kernel.target == &ve_ipt_standard_target) {
++		if (!standard_check(t, *size))
++			goto out;
++	} else if (t->u.kernel.target->checkentry
++		   && !t->u.kernel.target->checkentry(name, de, t->data,
++						      t->u.target_size
++						      - sizeof(*t),
++						      de->comefrom)) {
++		module_put(t->u.kernel.target->me);
++		duprintf("ip_tables: compat: check failed for `%s'.\n",
++			 t->u.kernel.target->name);
++		goto out;
++	}
++	ret = 0;
++out:
++	return ret;
++}
++
++static int
++translate_compat_table(const char *name,
++		unsigned int valid_hooks,
++		struct ipt_table_info **pinfo,
++		unsigned int total_size,
++		unsigned int number,
++		unsigned int *hook_entries,
++		unsigned int *underflows)
++{
++	unsigned int i;
++	struct ipt_table_info *newinfo, *info;
++	void *pos;
++	unsigned int size;
++	int ret;
++
++	info = *pinfo;
++	info->size = total_size;
++	info->number = number;
++
++	/* Init all hooks to impossible value. */
++	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
++		info->hook_entry[i] = 0xFFFFFFFF;
++		info->underflow[i] = 0xFFFFFFFF;
++	}
++
++	duprintf("translate_compat_table: size %u\n", info->size);
++	i = 0;
++	down(&compat_ipt_mutex);
++	/* Walk through entries, checking offsets. */
++	ret = IPT_ENTRY_ITERATE(info->entries, total_size,
++				check_compat_entry_size_and_hooks,
++				info, info->entries,
++				info->entries + total_size,
++				hook_entries, underflows, &i, name);
++	if (ret != 0)
++		goto out_unlock;
++
++	ret = -EINVAL;
++	if (i != number) {
++		duprintf("translate_compat_table: %u not %u entries\n",
++			 i, number);
++		goto out_unlock;
++	}
++
++	/* Check hooks all assigned */
++	for (i = 0; i < NF_IP_NUMHOOKS; i++) {
++		/* Only hooks which are valid */
++		if (!(valid_hooks & (1 << i)))
++			continue;
++		if (info->hook_entry[i] == 0xFFFFFFFF) {
++			duprintf("Invalid hook entry %u %u\n",
++				 i, hook_entries[i]);
++			goto out_unlock;
++		}
++		if (info->underflow[i] == 0xFFFFFFFF) {
++			duprintf("Invalid underflow %u %u\n",
++				 i, underflows[i]);
++			goto out_unlock;
++		}
++	}
++
++	ret = -ENOMEM;
++	newinfo = ipt_table_info_alloc(sizeof(struct ipt_table_info)
++			  + SMP_ALIGN(info->size) * NR_CPUS);
++	if (!newinfo)
++		goto out_unlock;
++
++	memcpy(newinfo, info, sizeof(struct ipt_table_info));
++	pos = newinfo->entries;
++	size =  total_size;
++	ret = IPT_ENTRY_ITERATE(info->entries, total_size,
++			compat_copy_entry_from_user, &pos, &size,
++			name, newinfo, newinfo->entries);
++	compat_flush_offsets();
++	up(&compat_ipt_mutex);
++	if (ret)
++		goto free_newinfo;
++
++	ret = -ELOOP;
++	if (!mark_source_chains(newinfo, valid_hooks))
++		goto free_newinfo;
++
++	/* And one copy for every other CPU */
++	for (i = 1; i < NR_CPUS; i++) {
++		memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
++		       newinfo->entries,
++		       SMP_ALIGN(newinfo->size));
++	}
++
++	*pinfo = newinfo;
++	ipt_table_info_free(info);
++	return 0;
++
++free_newinfo:
++	ipt_table_info_free(newinfo);
++out:
++	return ret;
++out_unlock:
++	up(&compat_ipt_mutex);
++	goto out;
++}
++
++static int
++compat_do_replace(void __user *user, unsigned int len)
++{
++	int ret;
++	struct compat_ipt_replace tmp;
++	struct ipt_table_info *newinfo;
++
++	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
++		return -EFAULT;
++
++	/* Hack: Causes ipchains to give correct error msg --RR */
++	if (len != sizeof(tmp) + tmp.size)
++		return -ENOPROTOOPT;
++
++	/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
++	if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
++		return -ENOMEM;
++
++	newinfo = ipt_table_info_alloc(sizeof(struct ipt_table_info)
++			  + SMP_ALIGN(tmp.size) * NR_CPUS);
++	if (!newinfo)
++		return -ENOMEM;
++
++	if (copy_from_user(newinfo->entries, user + sizeof(tmp), tmp.size) != 0) {
+ 		ret = -EFAULT;
+-		goto free;
++		goto free_newinfo;
+ 	}
+ 
+-	t = ipt_find_table_lock(tmp.name, &ret, &ipt_mutex);
+-	if (!t)
+-		goto free;
++	ret = translate_compat_table(tmp.name, tmp.valid_hooks,
++			      &newinfo, tmp.size, tmp.num_entries,
++			      tmp.hook_entry, tmp.underflow);
++	if (ret != 0)
++		goto free_newinfo;
+ 
+-	write_lock_bh(&t->lock);
+-	if (t->private->number != paddc->num_counters) {
+-		ret = -EINVAL;
+-		goto unlock_up_free;
++	duprintf("do_compat_replace: Translated table\n");
++
++	ret = __do_replace(tmp.name, tmp.valid_hooks,
++			      newinfo, tmp.size, tmp.num_counters,
++			      compat_ptr(tmp.counters));
++	if (ret)
++		goto free_newinfo_untrans;
++	return 0;
++
++ free_newinfo_untrans:
++	IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
++ free_newinfo:
++	ipt_table_info_free(newinfo);
++	return ret;
++}
++
++struct compat_ipt_get_entries
++{
++	char name[IPT_TABLE_MAXNAMELEN];
++	compat_uint_t size;
++	struct compat_ipt_entry entrytable[0];
++};
++
++static int compat_copy_entries_to_user(unsigned int total_size,
++		     struct ipt_table *table, void __user *userptr)
++{
++	unsigned int off, num;
++	struct compat_ipt_entry e;
++	struct ipt_counters *counters;
++	void __user *pos;
++	unsigned int size;
++	int ret = 0;
++
++	counters = alloc_counters(table);
++	if (IS_ERR(counters))
++		return PTR_ERR(counters);
++
++	/* ... then copy entire thing from CPU 0... */
++	pos = userptr;
++	size = total_size;
++	ret = IPT_ENTRY_ITERATE(table->private->entries,
++			total_size, compat_copy_entry_to_user, &pos, &size);
++
++	/* ... then go back and fix counters and names */
++	for (off = 0, num = 0; off < size; off += e.next_offset, num++) {
++		unsigned int i;
++		struct ipt_entry_match m;
++		struct ipt_entry_target t;
++
++		ret = -EFAULT;
++		if (copy_from_user(&e, userptr + off,
++					sizeof(struct compat_ipt_entry)))
++			goto free_counters;
++		if (copy_to_user(userptr + off +
++			offsetof(struct compat_ipt_entry, counters),
++			 &counters[num], sizeof(counters[num])))
++			goto free_counters;
++
++		for (i = sizeof(struct compat_ipt_entry);
++				i < e.target_offset; i += m.u.match_size) {
++			if (copy_from_user(&m, userptr + off + i,
++					sizeof(struct ipt_entry_match)))
++				goto free_counters;
++			if (copy_to_user(userptr + off + i +
++				offsetof(struct ipt_entry_match, u.user.name),
++				m.u.kernel.match->name,
++				strlen(m.u.kernel.match->name) + 1))
++				goto free_counters;
++		}
++
++		if (copy_from_user(&t, userptr + off + e.target_offset,
++					sizeof(struct ipt_entry_target)))
++			goto free_counters;
++		if (copy_to_user(userptr + off + e.target_offset +
++			offsetof(struct ipt_entry_target, u.user.name),
++			t.u.kernel.target->name,
++			strlen(t.u.kernel.target->name) + 1))
++			goto free_counters;
+ 	}
++	ret = 0;
++free_counters:
++	vfree(counters);
++	return ret;
++}
+ 
+-	i = 0;
+-	IPT_ENTRY_ITERATE(t->private->entries,
+-			  t->private->size,
+-			  add_counter_to_entry,
+-			  paddc->counters,
+-			  &i);
+- unlock_up_free:
+-	write_unlock_bh(&t->lock);
+-	up(&ipt_mutex);
+- free:
+-	vfree(paddc);
++static int
++compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
++{
++	int ret;
++	struct compat_ipt_get_entries get;
++	struct ipt_table *t;
++
++
++	if (*len < sizeof(get)) {
++		duprintf("compat_get_entries: %u < %u\n",
++				*len, (unsigned int)sizeof(get));
++		return -EINVAL;
++	}
++
++	if (copy_from_user(&get, uptr, sizeof(get)) != 0)
++		return -EFAULT;
++
++	if (*len != sizeof(struct compat_ipt_get_entries) + get.size) {
++		duprintf("compat_get_entries: %u != %u\n", *len,
++			(unsigned int)(sizeof(struct compat_ipt_get_entries) +
++			get.size));
++		return -EINVAL;
++	}
++
++	down(&compat_ipt_mutex);
++	t = ipt_find_table_lock(get.name, &ret, &ipt_mutex);
++	if (t) {
++		struct ipt_table_info info;
++		duprintf("t->private->number = %u\n",
++			 t->private->number);
++		ret = compat_table_info(t->private, &info);
++		if (!ret && get.size == info.size) {
++			ret = compat_copy_entries_to_user(t->private->size,
++						   t, uptr->entrytable);
++		} else if (!ret) {
++			duprintf("compat_get_entries: I've got %u not %u!\n",
++				 t->private->size,
++				 get.size);
++			ret = -EINVAL;
++		}
++		compat_flush_offsets();
++		up(&ipt_mutex);
++	} else
++		duprintf("compat_get_entries: Can't find %s!\n",
++			 get.name);
++	up(&compat_ipt_mutex);
++	return ret;
++}
++
++static int
++compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
++{
++	int ret;
+ 
++	switch (cmd) {
++	case IPT_SO_GET_INFO:
++		ret = get_info(user, len);
++		break;
++	case IPT_SO_GET_ENTRIES:
++		ret = compat_get_entries(user, len);
++		break;
++	default:
++		duprintf("compat_do_ipt_get_ctl: unknown request %i\n", cmd);
++		ret = -EINVAL;
++	}
+ 	return ret;
+ }
++#endif
+ 
+ static int
+ do_ipt_set_ctl(struct sock *sk,	int cmd, void __user *user, unsigned int len)
+ {
+ 	int ret;
+ 
+-	if (!capable(CAP_NET_ADMIN))
++	if (!capable(CAP_VE_NET_ADMIN))
+ 		return -EPERM;
+ 
++#ifdef CONFIG_COMPAT
++	if (is_current_32bits() && (cmd == IPT_SO_SET_REPLACE))
++		return compat_do_replace(user, len);
++#endif
++
+ 	switch (cmd) {
+ 	case IPT_SO_SET_REPLACE:
+ 		ret = do_replace(user, len);
+@@ -1247,65 +2399,22 @@ do_ipt_get_ctl(struct sock *sk, int cmd,
+ {
+ 	int ret;
+ 
+-	if (!capable(CAP_NET_ADMIN))
++	if (!capable(CAP_VE_NET_ADMIN))
+ 		return -EPERM;
+ 
+-	switch (cmd) {
+-	case IPT_SO_GET_INFO: {
+-		char name[IPT_TABLE_MAXNAMELEN];
+-		struct ipt_table *t;
+-
+-		if (*len != sizeof(struct ipt_getinfo)) {
+-			duprintf("length %u != %u\n", *len,
+-				 sizeof(struct ipt_getinfo));
+-			ret = -EINVAL;
+-			break;
+-		}
+-
+-		if (copy_from_user(name, user, sizeof(name)) != 0) {
+-			ret = -EFAULT;
+-			break;
+-		}
+-		name[IPT_TABLE_MAXNAMELEN-1] = '\0';
+-		t = ipt_find_table_lock(name, &ret, &ipt_mutex);
+-		if (t) {
+-			struct ipt_getinfo info;
+-
+-			info.valid_hooks = t->valid_hooks;
+-			memcpy(info.hook_entry, t->private->hook_entry,
+-			       sizeof(info.hook_entry));
+-			memcpy(info.underflow, t->private->underflow,
+-			       sizeof(info.underflow));
+-			info.num_entries = t->private->number;
+-			info.size = t->private->size;
+-			strcpy(info.name, name);
+-
+-			if (copy_to_user(user, &info, *len) != 0)
+-				ret = -EFAULT;
+-			else
+-				ret = 0;
+-
+-			up(&ipt_mutex);
+-		}
+-	}
+-	break;
++#ifdef CONFIG_COMPAT
++	if (is_current_32bits())
++		return compat_do_ipt_get_ctl(sk, cmd, user, len);
++#endif
+ 
+-	case IPT_SO_GET_ENTRIES: {
+-		struct ipt_get_entries get;
++	switch (cmd) {
++	case IPT_SO_GET_INFO:
++		ret = get_info(user, len);
++		break;
+ 
+-		if (*len < sizeof(get)) {
+-			duprintf("get_entries: %u < %u\n", *len, sizeof(get));
+-			ret = -EINVAL;
+-		} else if (copy_from_user(&get, user, sizeof(get)) != 0) {
+-			ret = -EFAULT;
+-		} else if (*len != sizeof(struct ipt_get_entries) + get.size) {
+-			duprintf("get_entries: %u != %u\n", *len,
+-				 sizeof(struct ipt_get_entries) + get.size);
+-			ret = -EINVAL;
+-		} else
+-			ret = get_entries(&get, user);
++	case IPT_SO_GET_ENTRIES:
++		ret = get_entries(user, len);
+ 		break;
+-	}
+ 
+ 	default:
+ 		duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
+@@ -1325,7 +2434,7 @@ ipt_register_target(struct ipt_target *t
+ 	if (ret != 0)
+ 		return ret;
+ 
+-	if (!list_named_insert(&ipt_target, target)) {
++	if (!list_named_insert(&ve_ipt_target, target)) {
+ 		duprintf("ipt_register_target: `%s' already in list!\n",
+ 			 target->name);
+ 		ret = -EINVAL;
+@@ -1334,12 +2443,60 @@ ipt_register_target(struct ipt_target *t
+ 	return ret;
+ }
+ 
++int
++visible_ipt_register_target(struct ipt_target *target)
++{
++	int ret;
++	struct module *mod = target->me;
++
++	if (!ve_is_super(get_exec_env())) {
++		struct ipt_target *tmp;
++		__module_get(mod);
++		ret = -ENOMEM;
++		tmp = kmalloc(sizeof(struct ipt_target), GFP_KERNEL);
++		if (!tmp)
++			goto nomem;
++		memcpy(tmp, target, sizeof(struct ipt_target));
++		target = tmp;
++	}
++
++	ret = ipt_register_target(target);
++	if (ret)
++		goto out;
++
++	return 0;
++out:
++	if (!ve_is_super(get_exec_env())) {
++		kfree(target);
++nomem:
++		module_put(mod);
++	}
++	return ret;
++}
++
+ void
+ ipt_unregister_target(struct ipt_target *target)
+ {
+ 	down(&ipt_mutex);
+-	LIST_DELETE(&ipt_target, target);
++	LIST_DELETE(&ve_ipt_target, target);
++	up(&ipt_mutex);
++}
++
++void
++visible_ipt_unregister_target(struct ipt_target *target)
++{
++	down(&ipt_mutex);
++	target = list_named_find(&ve_ipt_target, target->name);
+ 	up(&ipt_mutex);
++	if (!target)
++		return;
++
++	ipt_unregister_target(target);
++
++	if (!ve_is_super(get_exec_env())) {
++		module_put(target->me);
++		kfree(target);
++	}
+ }
+ 
+ int
+@@ -1351,13 +2508,43 @@ ipt_register_match(struct ipt_match *mat
+ 	if (ret != 0)
+ 		return ret;
+ 
+-	if (!list_named_insert(&ipt_match, match)) {
++	if (!list_named_insert(&ve_ipt_match, match)) {
+ 		duprintf("ipt_register_match: `%s' already in list!\n",
+ 			 match->name);
+ 		ret = -EINVAL;
+ 	}
+ 	up(&ipt_mutex);
++	return ret;
++}
++
++int
++visible_ipt_register_match(struct ipt_match *match)
++{
++	int ret;
++	struct module *mod = match->me;
++
++	if (!ve_is_super(get_exec_env())) {
++		struct ipt_match *tmp;
++		__module_get(mod);
++		ret = -ENOMEM;
++		tmp = kmalloc(sizeof(struct ipt_match), GFP_KERNEL);
++		if (!tmp)
++			goto nomem;
++		memcpy(tmp, match, sizeof(struct ipt_match));
++		match = tmp;
++	}
++
++	ret = ipt_register_match(match);
++	if (ret)
++		goto out;
+ 
++	return 0;
++out:
++	if (!ve_is_super(get_exec_env())) {
++		kfree(match);
++nomem:
++		module_put(mod);
++	}
+ 	return ret;
+ }
+ 
+@@ -1365,7 +2552,38 @@ void
+ ipt_unregister_match(struct ipt_match *match)
+ {
+ 	down(&ipt_mutex);
+-	LIST_DELETE(&ipt_match, match);
++	LIST_DELETE(&ve_ipt_match, match);
++	up(&ipt_mutex);
++}
++
++void
++visible_ipt_unregister_match(struct ipt_match *match)
++{
++	down(&ipt_mutex);
++	match = list_named_find(&ve_ipt_match, match->name);
++	up(&ipt_mutex);
++	if (!match)
++		return;
++
++	ipt_unregister_match(match);
++
++	if (!ve_is_super(get_exec_env())) {
++		module_put(match->me);
++		kfree(match);
++	}
++}
++
++void ipt_flush_table(struct ipt_table *table)
++{
++	if (table == NULL)
++		return;
++
++	down(&ipt_mutex);
++	IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
++			  cleanup_entry, NULL);
++	if (table->private->number > table->private->initial_entries)
++		module_put(table->me);
++	table->private->size = 0;
+ 	up(&ipt_mutex);
+ }
+ 
+@@ -1373,13 +2591,12 @@ int ipt_register_table(struct ipt_table 
+ {
+ 	int ret;
+ 	struct ipt_table_info *newinfo;
+-	static struct ipt_table_info bootstrap
+-		= { 0, 0, 0, { 0 }, { 0 }, { } };
+ 
+-	newinfo = vmalloc(sizeof(struct ipt_table_info)
++	ret = -ENOMEM;
++	newinfo = ipt_table_info_alloc(sizeof(struct ipt_table_info)
+ 			  + SMP_ALIGN(table->table->size) * NR_CPUS);
+ 	if (!newinfo)
+-		return -ENOMEM;
++		goto out;
+ 
+ 	memcpy(newinfo->entries, table->table->entries, table->table->size);
+ 
+@@ -1388,56 +2605,58 @@ int ipt_register_table(struct ipt_table 
+ 			      table->table->num_entries,
+ 			      table->table->hook_entry,
+ 			      table->table->underflow);
+-	if (ret != 0) {
+-		vfree(newinfo);
+-		return ret;
+-	}
++	if (ret != 0)
++		goto out_free;
+ 
+ 	ret = down_interruptible(&ipt_mutex);
+-	if (ret != 0) {
+-		vfree(newinfo);
+-		return ret;
+-	}
++	if (ret != 0)
++		goto out_free;
+ 
+ 	/* Don't autoload: we'd eat our tail... */
+-	if (list_named_find(&ipt_tables, table->name)) {
+-		ret = -EEXIST;
+-		goto free_unlock;
+-	}
++	ret = -EEXIST;
++	if (list_named_find(&ve_ipt_tables, table->name))
++		goto out_free_unlock;
+ 
+-	/* Simplifies replace_table code. */
+-	table->private = &bootstrap;
+-	if (!replace_table(table, 0, newinfo, &ret))
+-		goto free_unlock;
++	table->lock = RW_LOCK_UNLOCKED;
++	ret = setup_table(table, newinfo);
++	if (ret)
++		goto out_free_unlock;
+ 
+ 	duprintf("table->private->number = %u\n",
+ 		 table->private->number);
+-	
++
+ 	/* save number of initial entries */
+ 	table->private->initial_entries = table->private->number;
+ 
+-	table->lock = RW_LOCK_UNLOCKED;
+-	list_prepend(&ipt_tables, table);
++	list_prepend(&ve_ipt_tables, table);
+ 
+- unlock:
+ 	up(&ipt_mutex);
+-	return ret;
++	return 0;
+ 
+- free_unlock:
+-	vfree(newinfo);
+-	goto unlock;
++out_free_unlock:
++	up(&ipt_mutex);
++out_free:
++	ipt_table_info_free(newinfo);
++out:
++	return ret;
+ }
+ 
+ void ipt_unregister_table(struct ipt_table *table)
+ {
+ 	down(&ipt_mutex);
+-	LIST_DELETE(&ipt_tables, table);
++	LIST_DELETE(&ve_ipt_tables, table);
+ 	up(&ipt_mutex);
+ 
++	/* size to uncharge taken from ipt_register_table */
++#if defined(CONFIG_VE_IPTABLES) && defined(CONFIG_USER_RESOURCE)
++	uncharge_iptables(ipt_table_info_ub(table->private),
++				table->private->number);
++#endif
++
+ 	/* Decrease module usage counts and free resources */
+ 	IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
+ 			  cleanup_entry, NULL);
+-	vfree(table->private);
++	ipt_table_info_free(table->private);
+ }
+ 
+ /* Returns 1 if the port is matched by the range, 0 otherwise */
+@@ -1604,8 +2823,8 @@ udp_checkentry(const char *tablename,
+ 		return 0;
+ 	}
+ 	if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
+-		duprintf("ipt_udp: matchsize %u != %u\n",
+-			 matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
++		duprintf("ipt_udp: matchsize %u != %u\n", matchinfosize,
++			(unsigned int)IPT_ALIGN(sizeof(struct ipt_udp)));
+ 		return 0;
+ 	}
+ 	if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
+@@ -1677,6 +2896,9 @@ icmp_checkentry(const char *tablename,
+ /* The built-in targets: standard (NULL) and error. */
+ static struct ipt_target ipt_standard_target = {
+ 	.name		= IPT_STANDARD_TARGET,
++#ifdef CONFIG_COMPAT
++	.compat		= &compat_ipt_standard_fn,
++#endif
+ };
+ 
+ static struct ipt_target ipt_error_target = {
+@@ -1698,18 +2920,27 @@ static struct ipt_match tcp_matchstruct 
+ 	.name		= "tcp",
+ 	.match		= &tcp_match,
+ 	.checkentry	= &tcp_checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= &tcp_compat,
++#endif
+ };
+ 
+ static struct ipt_match udp_matchstruct = {
+ 	.name		= "udp",
+ 	.match		= &udp_match,
+ 	.checkentry	= &udp_checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= &udp_compat,
++#endif
+ };
+ 
+ static struct ipt_match icmp_matchstruct = {
+ 	.name		= "icmp",
+ 	.match		= &icmp_match,
+ 	.checkentry	= &icmp_checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= &icmp_compat,
++#endif
+ };
+ 
+ #ifdef CONFIG_PROC_FS
+@@ -1735,7 +2966,7 @@ static inline int print_target(const str
+                                off_t start_offset, char *buffer, int length,
+                                off_t *pos, unsigned int *count)
+ {
+-	if (t == &ipt_standard_target || t == &ipt_error_target)
++	if (t == &ve_ipt_standard_target || t == &ve_ipt_error_target)
+ 		return 0;
+ 	return print_name((char *)t, start_offset, buffer, length, pos, count);
+ }
+@@ -1745,10 +2976,16 @@ static int ipt_get_tables(char *buffer, 
+ 	off_t pos = 0;
+ 	unsigned int count = 0;
+ 
++#ifdef CONFIG_VE_IPTABLES
++	/* if we don't initialized for current VE exiting */
++	if (&ve_ipt_standard_target == NULL)
++		return 0;
++#endif
++
+ 	if (down_interruptible(&ipt_mutex) != 0)
+ 		return 0;
+ 
+-	LIST_FIND(&ipt_tables, print_name, void *,
++	LIST_FIND(&ve_ipt_tables, print_name, void *,
+ 		  offset, buffer, length, &pos, &count);
+ 
+ 	up(&ipt_mutex);
+@@ -1763,10 +3000,15 @@ static int ipt_get_targets(char *buffer,
+ 	off_t pos = 0;
+ 	unsigned int count = 0;
+ 
++#ifdef CONFIG_VE_IPTABLES
++	/* if we don't initialized for current VE exiting */
++	if (&ve_ipt_standard_target == NULL)
++		return 0;
++#endif
+ 	if (down_interruptible(&ipt_mutex) != 0)
+ 		return 0;
+ 
+-	LIST_FIND(&ipt_target, print_target, struct ipt_target *,
++	LIST_FIND(&ve_ipt_target, print_target, struct ipt_target *,
+ 		  offset, buffer, length, &pos, &count);
+ 	
+ 	up(&ipt_mutex);
+@@ -1780,10 +3022,15 @@ static int ipt_get_matches(char *buffer,
+ 	off_t pos = 0;
+ 	unsigned int count = 0;
+ 
++#ifdef CONFIG_VE_IPTABLES
++	/* if we don't initialized for current VE exiting */
++	if (&ve_ipt_standard_target == NULL)
++		return 0;
++#endif
+ 	if (down_interruptible(&ipt_mutex) != 0)
+ 		return 0;
+ 	
+-	LIST_FIND(&ipt_match, print_name, void *,
++	LIST_FIND(&ve_ipt_match, print_name, void *,
+ 		  offset, buffer, length, &pos, &count);
+ 
+ 	up(&ipt_mutex);
+@@ -1799,6 +3046,7 @@ static struct { char *name; get_info_t *
+   { NULL, NULL} };
+ #endif /*CONFIG_PROC_FS*/
+ 
++void fini_iptables(void);
+ static int __init init(void)
+ {
+ 	int ret;
+@@ -1839,11 +3087,132 @@ static int __init init(void)
+ #endif
+ 
+ 	printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
++
++#if defined(CONFIG_VE_IPTABLES)
++	/* init ve0 */
++	ret = init_iptables();
++	if (ret == 0) {
++		KSYMRESOLVE(init_iptables);
++		KSYMRESOLVE(fini_iptables);
++		KSYMRESOLVE(ipt_flush_table);
++		KSYMMODRESOLVE(ip_tables);
++	}
++#else
++	ret = 0;
++#endif
++	return ret;
++}
++
++#ifdef CONFIG_VE_IPTABLES
++/* alloc helper */
++#define ALLOC_ENVF(field,label) \
++		if ( !(envid->field = kmalloc(sizeof(*(envid->field)), GFP_KERNEL)) ) \
++				goto label;
++int init_iptables(void)
++{
++	struct ve_struct *envid;
++
++	envid = get_exec_env();
++
++	if (ve_is_super(envid)) {
++		envid->_ipt_target = &ipt_target;
++		envid->_ipt_match = &ipt_match;
++		envid->_ipt_tables = &ipt_tables;
++
++		envid->_ipt_standard_target = &ipt_standard_target;
++		envid->_ipt_error_target = &ipt_error_target;
++		envid->_tcp_matchstruct = &tcp_matchstruct;
++		envid->_udp_matchstruct = &udp_matchstruct;
++		envid->_icmp_matchstruct = &icmp_matchstruct;
++	} else {
++		/* allocate structures in ve_struct */
++		ALLOC_ENVF(_ipt_target,nomem0);
++		ALLOC_ENVF(_ipt_match,nomem1);
++		ALLOC_ENVF(_ipt_tables,nomem2);
++		ALLOC_ENVF(_ipt_standard_target,nomem3);
++		ALLOC_ENVF(_ipt_error_target,nomem4);
++		ALLOC_ENVF(_tcp_matchstruct,nomem5);
++		ALLOC_ENVF(_udp_matchstruct,nomem6);
++		ALLOC_ENVF(_icmp_matchstruct,nomem7);
++
++		/* FIXME: charge ubc */
++		INIT_LIST_HEAD(envid->_ipt_target);
++		INIT_LIST_HEAD(envid->_ipt_match);
++		INIT_LIST_HEAD(envid->_ipt_tables);
++
++		memcpy(envid->_ipt_standard_target, &ipt_standard_target,
++						sizeof(ipt_standard_target));
++		memcpy(envid->_ipt_error_target, &ipt_error_target,
++						sizeof(ipt_error_target));
++		memcpy(envid->_tcp_matchstruct, &tcp_matchstruct,
++						sizeof(tcp_matchstruct));
++		memcpy(envid->_udp_matchstruct, &udp_matchstruct,
++						sizeof(udp_matchstruct));
++		memcpy(envid->_icmp_matchstruct, &icmp_matchstruct,
++						sizeof(icmp_matchstruct));
++
++		down(&ipt_mutex);
++		list_append(envid->_ipt_target, envid->_ipt_standard_target);
++		list_append(envid->_ipt_target, envid->_ipt_error_target);
++		list_append(envid->_ipt_match, envid->_tcp_matchstruct);
++		list_append(envid->_ipt_match, envid->_udp_matchstruct);
++		list_append(envid->_ipt_match, envid->_icmp_matchstruct);
++		up(&ipt_mutex);
++	}
++
+ 	return 0;
++
++nomem7:
++	kfree(envid->_udp_matchstruct); envid->_udp_matchstruct = NULL;
++nomem6:
++	kfree(envid->_tcp_matchstruct); envid->_tcp_matchstruct = NULL;
++nomem5:
++	kfree(envid->_ipt_error_target); envid->_ipt_error_target = NULL;
++nomem4:
++	kfree(envid->_ipt_standard_target); envid->_ipt_standard_target = NULL;
++nomem3:
++	kfree(envid->_ipt_tables); envid->_ipt_tables = NULL;
++nomem2:
++	kfree(envid->_ipt_match); envid->_ipt_match = NULL;
++nomem1:
++	kfree(envid->_ipt_target); envid->_ipt_target = NULL;
++nomem0:
++	return -ENOMEM;
++}
++
++void fini_iptables(void)
++{
++	/* some cleanup */
++	struct ve_struct *envid = get_exec_env();
++
++	if (envid->_ipt_tables != NULL && !ve_is_super(envid)) {
++		kfree(envid->_ipt_tables);
++		kfree(envid->_ipt_target);
++		kfree(envid->_ipt_match);
++		kfree(envid->_ipt_standard_target);
++		kfree(envid->_ipt_error_target);
++		kfree(envid->_tcp_matchstruct);
++		kfree(envid->_udp_matchstruct);
++		kfree(envid->_icmp_matchstruct);
++	}
++
++	envid->_ipt_tables = NULL;
++	envid->_ipt_target = NULL;
++	envid->_ipt_match = NULL;
++	envid->_ipt_standard_target = NULL;
++	envid->_ipt_error_target = NULL;
++	envid->_tcp_matchstruct = NULL;
++	envid->_udp_matchstruct = NULL;
++	envid->_icmp_matchstruct = NULL;
+ }
++#endif
+ 
+ static void __exit fini(void)
+ {
++	KSYMMODUNRESOLVE(ip_tables);
++	KSYMUNRESOLVE(init_iptables);
++	KSYMUNRESOLVE(fini_iptables);
++	KSYMUNRESOLVE(ipt_flush_table);
+ 	nf_unregister_sockopt(&ipt_sockopts);
+ #ifdef CONFIG_PROC_FS
+ 	{
+@@ -1852,16 +3221,28 @@ static void __exit fini(void)
+ 		proc_net_remove(ipt_proc_entry[i].name);
+ 	}
+ #endif
++#ifdef CONFIG_VE_IPTABLES
++	fini_iptables();
++#endif
+ }
+ 
++EXPORT_SYMBOL(ipt_flush_table);
+ EXPORT_SYMBOL(ipt_register_table);
+ EXPORT_SYMBOL(ipt_unregister_table);
+ EXPORT_SYMBOL(ipt_register_match);
+ EXPORT_SYMBOL(ipt_unregister_match);
+ EXPORT_SYMBOL(ipt_do_table);
++EXPORT_SYMBOL(visible_ipt_register_match);
++EXPORT_SYMBOL(visible_ipt_unregister_match);
+ EXPORT_SYMBOL(ipt_register_target);
+ EXPORT_SYMBOL(ipt_unregister_target);
++EXPORT_SYMBOL(visible_ipt_register_target);
++EXPORT_SYMBOL(visible_ipt_unregister_target);
+ EXPORT_SYMBOL(ipt_find_target_lock);
++#ifdef CONFIG_COMPAT
++EXPORT_SYMBOL(ipt_match_align_compat);
++EXPORT_SYMBOL(ipt_target_align_compat);
++#endif
+ 
+-module_init(init);
++subsys_initcall(init);
+ module_exit(fini);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_CLASSIFY.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_CLASSIFY.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_CLASSIFY.c	2004-08-14 14:54:46.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_CLASSIFY.c	2006-05-11 13:05:42.000000000 +0400
+@@ -48,7 +48,8 @@ checkentry(const char *tablename,
+            unsigned int hook_mask)
+ {
+ 	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_classify_target_info))){
+-		printk(KERN_ERR "CLASSIFY: invalid size (%u != %Zu).\n",
++		ve_printk(VE_LOG, KERN_ERR
++				"CLASSIFY: invalid size (%u != %Zu).\n",
+ 		       targinfosize,
+ 		       IPT_ALIGN(sizeof(struct ipt_classify_target_info)));
+ 		return 0;
+@@ -56,13 +57,14 @@ checkentry(const char *tablename,
+ 	
+ 	if (hook_mask & ~((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) |
+ 	                  (1 << NF_IP_POST_ROUTING))) {
+-		printk(KERN_ERR "CLASSIFY: only valid in LOCAL_OUT, FORWARD "
++		ve_printk(VE_LOG, KERN_ERR
++				"CLASSIFY: only valid in LOCAL_OUT, FORWARD "
+ 		                "and POST_ROUTING.\n");
+ 		return 0;
+ 	}
+ 
+ 	if (strcmp(tablename, "mangle") != 0) {
+-		printk(KERN_ERR "CLASSIFY: can only be called from "
++		ve_printk(VE_LOG, KERN_ERR "CLASSIFY: can only be called from "
+ 		                "\"mangle\" table, not \"%s\".\n",
+ 		                tablename);
+ 		return 0;
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_LOG.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_LOG.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_LOG.c	2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_LOG.c	2006-05-11 13:05:49.000000000 +0400
+@@ -18,6 +18,7 @@
+ #include <net/udp.h>
+ #include <net/tcp.h>
+ #include <net/route.h>
++#include <linux/nfcalls.h>
+ 
+ #include <linux/netfilter.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+@@ -48,32 +49,32 @@ static void dump_packet(const struct ipt
+ 	struct iphdr iph;
+ 
+ 	if (skb_copy_bits(skb, iphoff, &iph, sizeof(iph)) < 0) {
+-		printk("TRUNCATED");
++		ve_printk(VE_LOG, "TRUNCATED");
+ 		return;
+ 	}
+ 
+ 	/* Important fields:
+ 	 * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */
+ 	/* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */
+-	printk("SRC=%u.%u.%u.%u DST=%u.%u.%u.%u ",
++	ve_printk(VE_LOG, "SRC=%u.%u.%u.%u DST=%u.%u.%u.%u ",
+ 	       NIPQUAD(iph.saddr), NIPQUAD(iph.daddr));
+ 
+ 	/* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
+-	printk("LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
++	ve_printk(VE_LOG, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
+ 	       ntohs(iph.tot_len), iph.tos & IPTOS_TOS_MASK,
+ 	       iph.tos & IPTOS_PREC_MASK, iph.ttl, ntohs(iph.id));
+ 
+ 	/* Max length: 6 "CE DF MF " */
+ 	if (ntohs(iph.frag_off) & IP_CE)
+-		printk("CE ");
++		ve_printk(VE_LOG, "CE ");
+ 	if (ntohs(iph.frag_off) & IP_DF)
+-		printk("DF ");
++		ve_printk(VE_LOG, "DF ");
+ 	if (ntohs(iph.frag_off) & IP_MF)
+-		printk("MF ");
++		ve_printk(VE_LOG, "MF ");
+ 
+ 	/* Max length: 11 "FRAG:65535 " */
+ 	if (ntohs(iph.frag_off) & IP_OFFSET)
+-		printk("FRAG:%u ", ntohs(iph.frag_off) & IP_OFFSET);
++		ve_printk(VE_LOG, "FRAG:%u ", ntohs(iph.frag_off) & IP_OFFSET);
+ 
+ 	if ((info->logflags & IPT_LOG_IPOPT)
+ 	    && iph.ihl * 4 > sizeof(struct iphdr)) {
+@@ -82,15 +83,15 @@ static void dump_packet(const struct ipt
+ 
+ 		optsize = iph.ihl * 4 - sizeof(struct iphdr);
+ 		if (skb_copy_bits(skb, iphoff+sizeof(iph), opt, optsize) < 0) {
+-			printk("TRUNCATED");
++			ve_printk(VE_LOG, "TRUNCATED");
+ 			return;
+ 		}
+ 
+ 		/* Max length: 127 "OPT (" 15*4*2chars ") " */
+-		printk("OPT (");
++		ve_printk(VE_LOG, "OPT (");
+ 		for (i = 0; i < optsize; i++)
+-			printk("%02X", opt[i]);
+-		printk(") ");
++			ve_printk(VE_LOG, "%02X", opt[i]);
++		ve_printk(VE_LOG, ") ");
+ 	}
+ 
+ 	switch (iph.protocol) {
+@@ -98,7 +99,7 @@ static void dump_packet(const struct ipt
+ 		struct tcphdr tcph;
+ 
+ 		/* Max length: 10 "PROTO=TCP " */
+-		printk("PROTO=TCP ");
++		ve_printk(VE_LOG, "PROTO=TCP ");
+ 
+ 		if (ntohs(iph.frag_off) & IP_OFFSET)
+ 			break;
+@@ -106,41 +107,41 @@ static void dump_packet(const struct ipt
+ 		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ 		if (skb_copy_bits(skb, iphoff+iph.ihl*4, &tcph, sizeof(tcph))
+ 		    < 0) {
+-			printk("INCOMPLETE [%u bytes] ",
++			ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
+ 			       skb->len - iphoff - iph.ihl*4);
+ 			break;
+ 		}
+ 
+ 		/* Max length: 20 "SPT=65535 DPT=65535 " */
+-		printk("SPT=%u DPT=%u ",
++		ve_printk(VE_LOG, "SPT=%u DPT=%u ",
+ 		       ntohs(tcph.source), ntohs(tcph.dest));
+ 		/* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
+ 		if (info->logflags & IPT_LOG_TCPSEQ)
+-			printk("SEQ=%u ACK=%u ",
++			ve_printk(VE_LOG, "SEQ=%u ACK=%u ",
+ 			       ntohl(tcph.seq), ntohl(tcph.ack_seq));
+ 		/* Max length: 13 "WINDOW=65535 " */
+-		printk("WINDOW=%u ", ntohs(tcph.window));
++		ve_printk(VE_LOG, "WINDOW=%u ", ntohs(tcph.window));
+ 		/* Max length: 9 "RES=0x3F " */
+-		printk("RES=0x%02x ", (u8)(ntohl(tcp_flag_word(&tcph) & TCP_RESERVED_BITS) >> 22));
++		ve_printk(VE_LOG, "RES=0x%02x ", (u8)(ntohl(tcp_flag_word(&tcph) & TCP_RESERVED_BITS) >> 22));
+ 		/* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
+ 		if (tcph.cwr)
+-			printk("CWR ");
++			ve_printk(VE_LOG, "CWR ");
+ 		if (tcph.ece)
+-			printk("ECE ");
++			ve_printk(VE_LOG, "ECE ");
+ 		if (tcph.urg)
+-			printk("URG ");
++			ve_printk(VE_LOG, "URG ");
+ 		if (tcph.ack)
+-			printk("ACK ");
++			ve_printk(VE_LOG, "ACK ");
+ 		if (tcph.psh)
+-			printk("PSH ");
++			ve_printk(VE_LOG, "PSH ");
+ 		if (tcph.rst)
+-			printk("RST ");
++			ve_printk(VE_LOG, "RST ");
+ 		if (tcph.syn)
+-			printk("SYN ");
++			ve_printk(VE_LOG, "SYN ");
+ 		if (tcph.fin)
+-			printk("FIN ");
++			ve_printk(VE_LOG, "FIN ");
+ 		/* Max length: 11 "URGP=65535 " */
+-		printk("URGP=%u ", ntohs(tcph.urg_ptr));
++		ve_printk(VE_LOG, "URGP=%u ", ntohs(tcph.urg_ptr));
+ 
+ 		if ((info->logflags & IPT_LOG_TCPOPT)
+ 		    && tcph.doff * 4 > sizeof(struct tcphdr)) {
+@@ -150,15 +151,15 @@ static void dump_packet(const struct ipt
+ 			optsize = tcph.doff * 4 - sizeof(struct tcphdr);
+ 			if (skb_copy_bits(skb, iphoff+iph.ihl*4 + sizeof(tcph),
+ 					  opt, optsize) < 0) {
+-				printk("TRUNCATED");
++				ve_printk(VE_LOG, "TRUNCATED");
+ 				return;
+ 			}
+ 
+ 			/* Max length: 127 "OPT (" 15*4*2chars ") " */
+-			printk("OPT (");
++			ve_printk(VE_LOG, "OPT (");
+ 			for (i = 0; i < optsize; i++)
+-				printk("%02X", opt[i]);
+-			printk(") ");
++				ve_printk(VE_LOG, "%02X", opt[i]);
++			ve_printk(VE_LOG, ") ");
+ 		}
+ 		break;
+ 	}
+@@ -166,7 +167,7 @@ static void dump_packet(const struct ipt
+ 		struct udphdr udph;
+ 
+ 		/* Max length: 10 "PROTO=UDP " */
+-		printk("PROTO=UDP ");
++		ve_printk(VE_LOG, "PROTO=UDP ");
+ 
+ 		if (ntohs(iph.frag_off) & IP_OFFSET)
+ 			break;
+@@ -174,13 +175,13 @@ static void dump_packet(const struct ipt
+ 		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ 		if (skb_copy_bits(skb, iphoff+iph.ihl*4, &udph, sizeof(udph))
+ 		    < 0) {
+-			printk("INCOMPLETE [%u bytes] ",
++			ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
+ 			       skb->len - iphoff - iph.ihl*4);
+ 			break;
+ 		}
+ 
+ 		/* Max length: 20 "SPT=65535 DPT=65535 " */
+-		printk("SPT=%u DPT=%u LEN=%u ",
++		ve_printk(VE_LOG, "SPT=%u DPT=%u LEN=%u ",
+ 		       ntohs(udph.source), ntohs(udph.dest),
+ 		       ntohs(udph.len));
+ 		break;
+@@ -206,7 +207,7 @@ static void dump_packet(const struct ipt
+ 			    [ICMP_ADDRESSREPLY] = 12 };
+ 
+ 		/* Max length: 11 "PROTO=ICMP " */
+-		printk("PROTO=ICMP ");
++		ve_printk(VE_LOG, "PROTO=ICMP ");
+ 
+ 		if (ntohs(iph.frag_off) & IP_OFFSET)
+ 			break;
+@@ -214,19 +215,19 @@ static void dump_packet(const struct ipt
+ 		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ 		if (skb_copy_bits(skb, iphoff+iph.ihl*4, &icmph, sizeof(icmph))
+ 		    < 0) {
+-			printk("INCOMPLETE [%u bytes] ",
++			ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
+ 			       skb->len - iphoff - iph.ihl*4);
+ 			break;
+ 		}
+ 
+ 		/* Max length: 18 "TYPE=255 CODE=255 " */
+-		printk("TYPE=%u CODE=%u ", icmph.type, icmph.code);
++		ve_printk(VE_LOG, "TYPE=%u CODE=%u ", icmph.type, icmph.code);
+ 
+ 		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ 		if (icmph.type <= NR_ICMP_TYPES
+ 		    && required_len[icmph.type]
+ 		    && skb->len-iphoff-iph.ihl*4 < required_len[icmph.type]) {
+-			printk("INCOMPLETE [%u bytes] ",
++			ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
+ 			       skb->len - iphoff - iph.ihl*4);
+ 			break;
+ 		}
+@@ -235,19 +236,19 @@ static void dump_packet(const struct ipt
+ 		case ICMP_ECHOREPLY:
+ 		case ICMP_ECHO:
+ 			/* Max length: 19 "ID=65535 SEQ=65535 " */
+-			printk("ID=%u SEQ=%u ",
++			ve_printk(VE_LOG, "ID=%u SEQ=%u ",
+ 			       ntohs(icmph.un.echo.id),
+ 			       ntohs(icmph.un.echo.sequence));
+ 			break;
+ 
+ 		case ICMP_PARAMETERPROB:
+ 			/* Max length: 14 "PARAMETER=255 " */
+-			printk("PARAMETER=%u ",
++			ve_printk(VE_LOG, "PARAMETER=%u ",
+ 			       ntohl(icmph.un.gateway) >> 24);
+ 			break;
+ 		case ICMP_REDIRECT:
+ 			/* Max length: 24 "GATEWAY=255.255.255.255 " */
+-			printk("GATEWAY=%u.%u.%u.%u ",
++			ve_printk(VE_LOG, "GATEWAY=%u.%u.%u.%u ",
+ 			       NIPQUAD(icmph.un.gateway));
+ 			/* Fall through */
+ 		case ICMP_DEST_UNREACH:
+@@ -255,16 +256,16 @@ static void dump_packet(const struct ipt
+ 		case ICMP_TIME_EXCEEDED:
+ 			/* Max length: 3+maxlen */
+ 			if (!iphoff) { /* Only recurse once. */
+-				printk("[");
++				ve_printk(VE_LOG, "[");
+ 				dump_packet(info, skb,
+ 					    iphoff + iph.ihl*4+sizeof(icmph));
+-				printk("] ");
++				ve_printk(VE_LOG, "] ");
+ 			}
+ 
+ 			/* Max length: 10 "MTU=65535 " */
+ 			if (icmph.type == ICMP_DEST_UNREACH
+ 			    && icmph.code == ICMP_FRAG_NEEDED)
+-				printk("MTU=%u ", ntohs(icmph.un.frag.mtu));
++				ve_printk(VE_LOG, "MTU=%u ", ntohs(icmph.un.frag.mtu));
+ 		}
+ 		break;
+ 	}
+@@ -276,24 +277,24 @@ static void dump_packet(const struct ipt
+ 			break;
+ 		
+ 		/* Max length: 9 "PROTO=AH " */
+-		printk("PROTO=AH ");
++		ve_printk(VE_LOG, "PROTO=AH ");
+ 
+ 		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ 		if (skb_copy_bits(skb, iphoff+iph.ihl*4, &ah, sizeof(ah)) < 0) {
+-			printk("INCOMPLETE [%u bytes] ",
++			ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
+ 			       skb->len - iphoff - iph.ihl*4);
+ 			break;
+ 		}
+ 
+ 		/* Length: 15 "SPI=0xF1234567 " */
+-		printk("SPI=0x%x ", ntohl(ah.spi));
++		ve_printk(VE_LOG, "SPI=0x%x ", ntohl(ah.spi));
+ 		break;
+ 	}
+ 	case IPPROTO_ESP: {
+ 		struct ip_esp_hdr esph;
+ 
+ 		/* Max length: 10 "PROTO=ESP " */
+-		printk("PROTO=ESP ");
++		ve_printk(VE_LOG, "PROTO=ESP ");
+ 
+ 		if (ntohs(iph.frag_off) & IP_OFFSET)
+ 			break;
+@@ -301,18 +302,18 @@ static void dump_packet(const struct ipt
+ 		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ 		if (skb_copy_bits(skb, iphoff+iph.ihl*4, &esph, sizeof(esph))
+ 		    < 0) {
+-			printk("INCOMPLETE [%u bytes] ",
++			ve_printk(VE_LOG, "INCOMPLETE [%u bytes] ",
+ 			       skb->len - iphoff - iph.ihl*4);
+ 			break;
+ 		}
+ 
+ 		/* Length: 15 "SPI=0xF1234567 " */
+-		printk("SPI=0x%x ", ntohl(esph.spi));
++		ve_printk(VE_LOG, "SPI=0x%x ", ntohl(esph.spi));
+ 		break;
+ 	}
+ 	/* Max length: 10 "PROTO 255 " */
+ 	default:
+-		printk("PROTO=%u ", iph.protocol);
++		ve_printk(VE_LOG, "PROTO=%u ", iph.protocol);
+ 	}
+ 
+ 	/* Proto    Max log string length */
+@@ -339,8 +340,8 @@ ipt_log_packet(unsigned int hooknum,
+ 	       const char *prefix)
+ {
+ 	spin_lock_bh(&log_lock);
+-	printk(level_string);
+-	printk("%sIN=%s OUT=%s ",
++	ve_printk(VE_LOG, level_string);
++	ve_printk(VE_LOG, "%sIN=%s OUT=%s ",
+ 	       prefix == NULL ? loginfo->prefix : prefix,
+ 	       in ? in->name : "",
+ 	       out ? out->name : "");
+@@ -350,29 +351,29 @@ ipt_log_packet(unsigned int hooknum,
+ 		struct net_device *physoutdev = skb->nf_bridge->physoutdev;
+ 
+ 		if (physindev && in != physindev)
+-			printk("PHYSIN=%s ", physindev->name);
++			ve_printk(VE_LOG, "PHYSIN=%s ", physindev->name);
+ 		if (physoutdev && out != physoutdev)
+-			printk("PHYSOUT=%s ", physoutdev->name);
++			ve_printk(VE_LOG, "PHYSOUT=%s ", physoutdev->name);
+ 	}
+ #endif
+ 
+ 	if (in && !out) {
+ 		/* MAC logging for input chain only. */
+-		printk("MAC=");
++		ve_printk(VE_LOG, "MAC=");
+ 		if (skb->dev && skb->dev->hard_header_len
+ 		    && skb->mac.raw != (void*)skb->nh.iph) {
+ 			int i;
+ 			unsigned char *p = skb->mac.raw;
+ 			for (i = 0; i < skb->dev->hard_header_len; i++,p++)
+-				printk("%02x%c", *p,
++				ve_printk(VE_LOG, "%02x%c", *p,
+ 				       i==skb->dev->hard_header_len - 1
+ 				       ? ' ':':');
+ 		} else
+-			printk(" ");
++			ve_printk(VE_LOG, " ");
+ 	}
+ 
+ 	dump_packet(loginfo, skb, 0);
+-	printk("\n");
++	ve_printk(VE_LOG, "\n");
+ 	spin_unlock_bh(&log_lock);
+ }
+ 
+@@ -437,28 +438,62 @@ static int ipt_log_checkentry(const char
+ 	return 1;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int ipt_log_compat(void *target,
++		void **dstptr, int *size, int convert)
++{
++	int off;
++
++	off = IPT_ALIGN(sizeof(struct ipt_log_info)) -
++		COMPAT_IPT_ALIGN(sizeof(struct ipt_log_info));
++	return ipt_target_align_compat(target, dstptr, size, off, convert);
++}
++#endif
++
+ static struct ipt_target ipt_log_reg = {
+ 	.name		= "LOG",
+ 	.target		= ipt_log_target,
+ 	.checkentry	= ipt_log_checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= ipt_log_compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
++int init_iptable_LOG(void)
++{
++	return visible_ipt_register_target(&ipt_log_reg);
++}
++
++void fini_iptable_LOG(void)
++{
++	visible_ipt_unregister_target(&ipt_log_reg);
++}
++
+ static int __init init(void)
+ {
+-	if (ipt_register_target(&ipt_log_reg))
+-		return -EINVAL;
++	int err;
++
++	err = init_iptable_LOG();
++	if (err < 0)
++		return err;
+ 	if (nflog)
+ 		nf_log_register(PF_INET, &ipt_logfn);
+-	
++
++	KSYMRESOLVE(init_iptable_LOG);
++	KSYMRESOLVE(fini_iptable_LOG);
++	KSYMMODRESOLVE(ipt_LOG);
+ 	return 0;
+ }
+ 
+ static void __exit fini(void)
+ {
++	KSYMMODUNRESOLVE(ipt_LOG);
++	KSYMUNRESOLVE(init_iptable_LOG);
++	KSYMUNRESOLVE(fini_iptable_LOG);
+ 	if (nflog)
+ 		nf_log_unregister(PF_INET, &ipt_logfn);
+-	ipt_unregister_target(&ipt_log_reg);
++	fini_iptable_LOG();
+ }
+ 
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_MARK.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_MARK.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_MARK.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_MARK.c	2006-05-11 13:05:42.000000000 +0400
+@@ -44,14 +44,15 @@ checkentry(const char *tablename,
+            unsigned int hook_mask)
+ {
+ 	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_mark_target_info))) {
+-		printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n",
++		ve_printk(VE_LOG, KERN_WARNING "MARK: targinfosize %u != %Zu\n",
+ 		       targinfosize,
+ 		       IPT_ALIGN(sizeof(struct ipt_mark_target_info)));
+ 		return 0;
+ 	}
+ 
+ 	if (strcmp(tablename, "mangle") != 0) {
+-		printk(KERN_WARNING "MARK: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
++		ve_printk(VE_LOG, KERN_WARNING "MARK: can only be called from "
++				"\"mangle\" table, not \"%s\"\n", tablename);
+ 		return 0;
+ 	}
+ 
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_MASQUERADE.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_MASQUERADE.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_MASQUERADE.c	2004-08-14 14:55:34.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_MASQUERADE.c	2006-05-11 13:05:42.000000000 +0400
+@@ -140,6 +140,7 @@ masquerade_target(struct sk_buff **pskb,
+ 	return ip_nat_setup_info(ct, &newrange, hooknum);
+ }
+ 
++#if 0
+ static inline int
+ device_cmp(const struct ip_conntrack *i, void *_ina)
+ {
+@@ -173,6 +174,7 @@ static int masq_inet_event(struct notifi
+ static struct notifier_block masq_inet_notifier = {
+ 	.notifier_call	= masq_inet_event,
+ };
++#endif
+ 
+ static struct ipt_target masquerade = {
+ 	.name		= "MASQUERADE",
+@@ -187,9 +189,13 @@ static int __init init(void)
+ 
+ 	ret = ipt_register_target(&masquerade);
+ 
++#if 0
++/*	This notifier is unnecessary and may
++	lead to oops in virtual environments */
+ 	if (ret == 0)
+ 		/* Register IP address change reports */
+ 		register_inetaddr_notifier(&masq_inet_notifier);
++#endif
+ 
+ 	return ret;
+ }
+@@ -197,7 +203,7 @@ static int __init init(void)
+ static void __exit fini(void)
+ {
+ 	ipt_unregister_target(&masquerade);
+-	unregister_inetaddr_notifier(&masq_inet_notifier);	
++/*	unregister_inetaddr_notifier(&masq_inet_notifier);	*/
+ }
+ 
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_REDIRECT.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_REDIRECT.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_REDIRECT.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_REDIRECT.c	2006-05-11 13:05:42.000000000 +0400
+@@ -17,6 +17,7 @@
+ #include <linux/inetdevice.h>
+ #include <net/protocol.h>
+ #include <net/checksum.h>
++#include <linux/nfcalls.h>
+ #include <linux/netfilter_ipv4.h>
+ #include <linux/netfilter_ipv4/ip_nat_rule.h>
+ 
+@@ -25,7 +26,7 @@ MODULE_AUTHOR("Netfilter Core Team <core
+ MODULE_DESCRIPTION("iptables REDIRECT target module");
+ 
+ #if 0
+-#define DEBUGP printk
++#define DEBUGP ve_printk
+ #else
+ #define DEBUGP(format, args...)
+ #endif
+@@ -115,14 +116,36 @@ static struct ipt_target redirect_reg = 
+ 	.me		= THIS_MODULE,
+ };
+ 
++int init_iptable_REDIRECT(void)
++{
++	return visible_ipt_register_target(&redirect_reg);
++}
++
++void fini_iptable_REDIRECT(void)
++{
++	visible_ipt_unregister_target(&redirect_reg);
++}
++
+ static int __init init(void)
+ {
+-	return ipt_register_target(&redirect_reg);
++	int err;
++
++	err = init_iptable_REDIRECT();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_iptable_REDIRECT);
++	KSYMRESOLVE(fini_iptable_REDIRECT);
++	KSYMMODRESOLVE(ipt_REDIRECT);
++	return 0;
+ }
+ 
+ static void __exit fini(void)
+ {
+-	ipt_unregister_target(&redirect_reg);
++	KSYMMODUNRESOLVE(ipt_REDIRECT);
++	KSYMUNRESOLVE(init_iptable_REDIRECT);
++	KSYMUNRESOLVE(fini_iptable_REDIRECT);
++	fini_iptable_REDIRECT();
+ }
+ 
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_REJECT.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_REJECT.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_REJECT.c	2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_REJECT.c	2006-05-11 13:05:49.000000000 +0400
+@@ -22,6 +22,7 @@
+ #include <net/ip.h>
+ #include <net/tcp.h>
+ #include <net/route.h>
++#include <linux/nfcalls.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+ #include <linux/netfilter_ipv4/ipt_REJECT.h>
+ #ifdef CONFIG_BRIDGE_NETFILTER
+@@ -440,7 +441,7 @@ static int check(const char *tablename,
+ 	}
+ 
+ 	if (rejinfo->with == IPT_ICMP_ECHOREPLY) {
+-		printk("REJECT: ECHOREPLY no longer supported.\n");
++		ve_printk(VE_LOG, "REJECT: ECHOREPLY no longer supported.\n");
+ 		return 0;
+ 	} else if (rejinfo->with == IPT_TCP_RESET) {
+ 		/* Must specify that it's a TCP packet */
+@@ -454,21 +455,58 @@ static int check(const char *tablename,
+ 	return 1;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int compat(void *target,
++		void **dstptr, int *size, int convert)
++{
++	int off;
++
++	off = IPT_ALIGN(sizeof(struct ipt_reject_info)) -
++		COMPAT_IPT_ALIGN(sizeof(struct ipt_reject_info));
++	return ipt_target_align_compat(target, dstptr, size, off, convert);
++}
++#endif
++
+ static struct ipt_target ipt_reject_reg = {
+ 	.name		= "REJECT",
+ 	.target		= reject,
+ 	.checkentry	= check,
++#ifdef CONFIG_COMPAT
++	.compat		= compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
++int init_iptable_REJECT(void)
++{
++	return visible_ipt_register_target(&ipt_reject_reg);
++}
++
++void fini_iptable_REJECT(void)
++{
++	visible_ipt_unregister_target(&ipt_reject_reg);
++}
++
+ static int __init init(void)
+ {
+-	return ipt_register_target(&ipt_reject_reg);
++	int err;
++
++	err = init_iptable_REJECT();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_iptable_REJECT);
++	KSYMRESOLVE(fini_iptable_REJECT);
++	KSYMMODRESOLVE(ipt_REJECT);
++	return 0;
+ }
+ 
+ static void __exit fini(void)
+ {
+-	ipt_unregister_target(&ipt_reject_reg);
++	KSYMMODUNRESOLVE(ipt_REJECT);
++	KSYMUNRESOLVE(init_iptable_REJECT);
++	KSYMUNRESOLVE(fini_iptable_REJECT);
++	fini_iptable_REJECT();
+ }
+ 
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_TCPMSS.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_TCPMSS.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_TCPMSS.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_TCPMSS.c	2006-05-11 13:05:49.000000000 +0400
+@@ -13,6 +13,7 @@
+ 
+ #include <linux/ip.h>
+ #include <net/tcp.h>
++#include <linux/nfcalls.h>
+ 
+ #include <linux/netfilter_ipv4/ip_tables.h>
+ #include <linux/netfilter_ipv4/ipt_TCPMSS.h>
+@@ -228,7 +229,8 @@ ipt_tcpmss_checkentry(const char *tablen
+ 			((hook_mask & ~((1 << NF_IP_FORWARD)
+ 			   	| (1 << NF_IP_LOCAL_OUT)
+ 			   	| (1 << NF_IP_POST_ROUTING))) != 0)) {
+-		printk("TCPMSS: path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n");
++		ve_printk(VE_LOG, "TCPMSS: path-MTU clamping only supported in "
++				"FORWARD, OUTPUT and POSTROUTING hooks\n");
+ 		return 0;
+ 	}
+ 
+@@ -237,25 +239,62 @@ ipt_tcpmss_checkentry(const char *tablen
+ 	    && IPT_MATCH_ITERATE(e, find_syn_match))
+ 		return 1;
+ 
+-	printk("TCPMSS: Only works on TCP SYN packets\n");
++	ve_printk(VE_LOG, "TCPMSS: Only works on TCP SYN packets\n");
+ 	return 0;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int ipt_tcpmss_compat(void *target,
++		void **dstptr, int *size, int convert)
++{
++	int off;
++
++	off = IPT_ALIGN(sizeof(struct ipt_tcpmss_info)) -
++		COMPAT_IPT_ALIGN(sizeof(struct ipt_tcpmss_info));
++	return ipt_target_align_compat(target, dstptr, size, off, convert);
++}
++#endif
++
+ static struct ipt_target ipt_tcpmss_reg = {
+ 	.name		= "TCPMSS",
+ 	.target		= ipt_tcpmss_target,
+ 	.checkentry	= ipt_tcpmss_checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= ipt_tcpmss_compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
++int init_iptable_TCPMSS(void)
++{
++	return visible_ipt_register_target(&ipt_tcpmss_reg);
++}
++
++void fini_iptable_TCPMSS(void)
++{
++	visible_ipt_unregister_target(&ipt_tcpmss_reg);
++}
++
+ static int __init init(void)
+ {
+-	return ipt_register_target(&ipt_tcpmss_reg);
++	int err;
++
++	err = init_iptable_TCPMSS();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_iptable_TCPMSS);
++	KSYMRESOLVE(fini_iptable_TCPMSS);
++	KSYMMODRESOLVE(ipt_TCPMSS);
++	return 0;
+ }
+ 
+ static void __exit fini(void)
+ {
+-	ipt_unregister_target(&ipt_tcpmss_reg);
++	KSYMMODUNRESOLVE(ipt_TCPMSS);
++	KSYMUNRESOLVE(init_iptable_TCPMSS);
++	KSYMUNRESOLVE(fini_iptable_TCPMSS);
++	fini_iptable_TCPMSS();
+ }
+ 
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_TOS.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_TOS.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_TOS.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_TOS.c	2006-05-11 13:05:49.000000000 +0400
+@@ -15,6 +15,7 @@
+ 
+ #include <linux/netfilter_ipv4/ip_tables.h>
+ #include <linux/netfilter_ipv4/ipt_TOS.h>
++#include <linux/nfcalls.h>
+ 
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+@@ -61,14 +62,15 @@ checkentry(const char *tablename,
+ 	const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos;
+ 
+ 	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_tos_target_info))) {
+-		printk(KERN_WARNING "TOS: targinfosize %u != %Zu\n",
++		ve_printk(VE_LOG, KERN_WARNING "TOS: targinfosize %u != %Zu\n",
+ 		       targinfosize,
+ 		       IPT_ALIGN(sizeof(struct ipt_tos_target_info)));
+ 		return 0;
+ 	}
+ 
+ 	if (strcmp(tablename, "mangle") != 0) {
+-		printk(KERN_WARNING "TOS: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
++		ve_printk(VE_LOG, KERN_WARNING "TOS: can only be called from "
++				"\"mangle\" table, not \"%s\"\n", tablename);
+ 		return 0;
+ 	}
+ 
+@@ -77,28 +79,65 @@ checkentry(const char *tablename,
+ 	    && tos != IPTOS_RELIABILITY
+ 	    && tos != IPTOS_MINCOST
+ 	    && tos != IPTOS_NORMALSVC) {
+-		printk(KERN_WARNING "TOS: bad tos value %#x\n", tos);
++		ve_printk(VE_LOG, KERN_WARNING "TOS: bad tos value %#x\n", tos);
+ 		return 0;
+ 	}
+ 
+ 	return 1;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int compat(void *target,
++		void **dstptr, int *size, int convert)
++{
++	int off;
++
++	off = IPT_ALIGN(sizeof(struct ipt_tos_target_info)) -
++		COMPAT_IPT_ALIGN(sizeof(struct ipt_tos_target_info));
++	return ipt_target_align_compat(target, dstptr, size, off, convert);
++}
++#endif
++
+ static struct ipt_target ipt_tos_reg = {
+ 	.name		= "TOS",
+ 	.target		= target,
+ 	.checkentry	= checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
++int init_iptable_TOS(void)
++{
++	return visible_ipt_register_target(&ipt_tos_reg);
++}
++
++void fini_iptable_TOS(void)
++{
++	visible_ipt_unregister_target(&ipt_tos_reg);
++}
++
+ static int __init init(void)
+ {
+-	return ipt_register_target(&ipt_tos_reg);
++	int err;
++
++	err = init_iptable_TOS();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_iptable_TOS);
++	KSYMRESOLVE(fini_iptable_TOS);
++	KSYMMODRESOLVE(ipt_TOS);
++	return 0;
+ }
+ 
+ static void __exit fini(void)
+ {
+-	ipt_unregister_target(&ipt_tos_reg);
++	KSYMMODUNRESOLVE(ipt_TOS);
++	KSYMUNRESOLVE(init_iptable_TOS);
++	KSYMUNRESOLVE(fini_iptable_TOS);
++	fini_iptable_TOS();
+ }
+ 
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_ULOG.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_ULOG.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_ULOG.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_ULOG.c	2006-05-11 13:05:42.000000000 +0400
+@@ -129,6 +129,9 @@ static void ulog_send(unsigned int nlgro
+ /* timer function to flush queue in ULOG_FLUSH_INTERVAL time */
+ static void ulog_timer(unsigned long data)
+ {
++#ifdef CONFIG_VE
++#error timer context should be evaluated
++#endif
+ 	DEBUGP("ipt_ULOG: timer function called, calling ulog_send\n");
+ 
+ 	/* lock to protect against somebody modifying our structure
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_conntrack.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_conntrack.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_conntrack.c	2004-08-14 14:56:15.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_conntrack.c	2006-05-11 13:05:49.000000000 +0400
+@@ -13,6 +13,7 @@
+ #include <linux/netfilter_ipv4/ip_conntrack.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+ #include <linux/netfilter_ipv4/ipt_conntrack.h>
++#include <linux/nfcalls.h>
+ 
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
+@@ -114,22 +115,146 @@ static int check(const char *tablename,
+ 	return 1;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int compat_to_user(void *match, void **dstptr,
++		int *size, int off)
++{
++	struct ipt_entry_match *pm;
++	struct ipt_conntrack_info *pinfo;
++	struct compat_ipt_conntrack_info info;
++	u_int16_t msize;
++
++	pm = (struct ipt_entry_match *)match;
++	msize = pm->u.user.match_size;
++	if (__copy_to_user(*dstptr, pm, sizeof(struct ipt_entry_match)))
++		return -EFAULT;
++	pinfo = (struct ipt_conntrack_info *)pm->data;
++	memset(&info, 0, sizeof(struct compat_ipt_conntrack_info));
++	info.statemask = pinfo->statemask;
++	info.statusmask = pinfo->statusmask;
++	memcpy(info.tuple, pinfo->tuple, IP_CT_DIR_MAX *
++			sizeof(struct ip_conntrack_tuple));
++	memcpy(info.sipmsk, pinfo->sipmsk,
++			IP_CT_DIR_MAX * sizeof(struct in_addr));
++	memcpy(info.dipmsk, pinfo->dipmsk,
++			IP_CT_DIR_MAX * sizeof(struct in_addr));
++	info.expires_min = pinfo->expires_min;
++	info.expires_max = pinfo->expires_max;
++	info.flags = pinfo->flags;
++	info.invflags = pinfo->invflags;
++	if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_match),
++				&info, sizeof(struct compat_ipt_conntrack_info)))
++		return -EFAULT;
++	msize -= off;
++	if (put_user(msize, (u_int16_t *)*dstptr))
++		return -EFAULT;
++	*size -= off;
++	*dstptr += msize;
++	return 0;
++}
++
++static int compat_from_user(void *match, void **dstptr,
++		int *size, int off)
++{
++	struct compat_ipt_entry_match *pm;
++	struct ipt_entry_match *dstpm;
++	struct compat_ipt_conntrack_info *pinfo;
++	struct ipt_conntrack_info info;
++	u_int16_t msize;
++
++	pm = (struct compat_ipt_entry_match *)match;
++	dstpm = (struct ipt_entry_match *)*dstptr;
++	msize = pm->u.user.match_size;
++	memcpy(*dstptr, pm, sizeof(struct compat_ipt_entry_match));
++	pinfo = (struct compat_ipt_conntrack_info *)pm->data;
++	memset(&info, 0, sizeof(struct ipt_conntrack_info));
++	info.statemask = pinfo->statemask;
++	info.statusmask = pinfo->statusmask;
++	memcpy(info.tuple, pinfo->tuple, IP_CT_DIR_MAX *
++			sizeof(struct ip_conntrack_tuple));
++	memcpy(info.sipmsk, pinfo->sipmsk,
++			IP_CT_DIR_MAX * sizeof(struct in_addr));
++	memcpy(info.dipmsk, pinfo->dipmsk,
++			IP_CT_DIR_MAX * sizeof(struct in_addr));
++	info.expires_min = pinfo->expires_min;
++	info.expires_max = pinfo->expires_max;
++	info.flags = pinfo->flags;
++	info.invflags = pinfo->invflags;
++	memcpy(*dstptr + sizeof(struct compat_ipt_entry_match),
++				&info, sizeof(struct ipt_conntrack_info));
++	msize += off;
++	dstpm->u.user.match_size = msize;
++	*size += off;
++	*dstptr += msize;
++	return 0;
++}
++
++static int compat(void *match, void **dstptr, int *size, int convert)
++{
++	int ret, off;
++
++	off = IPT_ALIGN(sizeof(struct ipt_conntrack_info)) -
++		COMPAT_IPT_ALIGN(sizeof(struct compat_ipt_conntrack_info));
++	switch (convert) {
++		case COMPAT_TO_USER:
++			ret = compat_to_user(match, dstptr, size, off);
++			break;
++		case COMPAT_FROM_USER:
++			ret = compat_from_user(match, dstptr, size, off);
++			break;
++		case COMPAT_CALC_SIZE:
++			*size += off;
++			ret = 0;
++			break;
++		default:
++			ret = -ENOPROTOOPT;
++			break;
++	}
++	return ret;
++}
++#endif
++
+ static struct ipt_match conntrack_match = {
+ 	.name		= "conntrack",
+ 	.match		= &match,
+ 	.checkentry	= &check,
++#ifdef CONFIG_COMPAT
++	.compat		= &compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
++int init_iptable_conntrack_match(void)
++{
++	return visible_ipt_register_match(&conntrack_match);
++}
++
++void fini_iptable_conntrack_match(void)
++{
++	visible_ipt_unregister_match(&conntrack_match);
++}
++
+ static int __init init(void)
+ {
++	int err;
++
+ 	need_ip_conntrack();
+-	return ipt_register_match(&conntrack_match);
++	err = init_iptable_conntrack_match();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_iptable_conntrack_match);
++	KSYMRESOLVE(fini_iptable_conntrack_match);
++	KSYMMODRESOLVE(ipt_conntrack);
++	return 0;
+ }
+ 
+ static void __exit fini(void)
+ {
+-	ipt_unregister_match(&conntrack_match);
++	KSYMMODUNRESOLVE(ipt_conntrack);
++	KSYMUNRESOLVE(init_iptable_conntrack_match);
++	KSYMUNRESOLVE(fini_iptable_conntrack_match);
++	fini_iptable_conntrack_match();
+ }
+ 
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_helper.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_helper.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_helper.c	2004-08-14 14:56:26.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_helper.c	2006-05-11 13:05:49.000000000 +0400
+@@ -18,6 +18,7 @@
+ #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+ #include <linux/netfilter_ipv4/ipt_helper.h>
++#include <linux/nfcalls.h>
+ 
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>");
+@@ -98,21 +99,125 @@ static int check(const char *tablename,
+ 	return 1;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int compat_to_user(void *match, void **dstptr,
++		int *size, int off)
++{
++	struct ipt_entry_match *pm;
++	struct ipt_helper_info *pinfo;
++	struct compat_ipt_helper_info info;
++	u_int16_t msize;
++
++	pm = (struct ipt_entry_match *)match;
++	msize = pm->u.user.match_size;
++	if (__copy_to_user(*dstptr, pm, sizeof(struct ipt_entry_match)))
++		return -EFAULT;
++	pinfo = (struct ipt_helper_info *)pm->data;
++	memset(&info, 0, sizeof(struct compat_ipt_helper_info));
++	info.invert = pinfo->invert;
++	memcpy(info.name, pinfo->name, 30);
++	if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_match),
++				&info, sizeof(struct compat_ipt_helper_info)))
++		return -EFAULT;
++	msize -= off;
++	if (put_user(msize, (u_int16_t *)*dstptr))
++		return -EFAULT;
++	*size -= off;
++	*dstptr += msize;
++	return 0;
++}
++
++static int compat_from_user(void *match, void **dstptr,
++		int *size, int off)
++{
++	struct compat_ipt_entry_match *pm;
++	struct ipt_entry_match *dstpm;
++	struct compat_ipt_helper_info *pinfo;
++	struct ipt_helper_info info;
++	u_int16_t msize;
++
++	pm = (struct compat_ipt_entry_match *)match;
++	dstpm = (struct ipt_entry_match *)*dstptr;
++	msize = pm->u.user.match_size;
++	memcpy(*dstptr, pm, sizeof(struct compat_ipt_entry_match));
++	pinfo = (struct compat_ipt_helper_info *)pm->data;
++	memset(&info, 0, sizeof(struct ipt_helper_info));
++	info.invert = pinfo->invert;
++	memcpy(info.name, pinfo->name, 30);
++	memcpy(*dstptr + sizeof(struct compat_ipt_entry_match),
++				&info, sizeof(struct ipt_helper_info));
++	msize += off;
++	dstpm->u.user.match_size = msize;
++	*size += off;
++	*dstptr += msize;
++	return 0;
++}
++
++static int compat(void *match, void **dstptr, int *size, int convert)
++{
++	int ret, off;
++
++	off = IPT_ALIGN(sizeof(struct ipt_helper_info)) -
++		COMPAT_IPT_ALIGN(sizeof(struct compat_ipt_helper_info));
++	switch (convert) {
++		case COMPAT_TO_USER:
++			ret = compat_to_user(match, dstptr, size, off);
++			break;
++		case COMPAT_FROM_USER:
++			ret = compat_from_user(match, dstptr, size, off);
++			break;
++		case COMPAT_CALC_SIZE:
++			*size += off;
++			ret = 0;
++			break;
++		default:
++			ret = -ENOPROTOOPT;
++			break;
++	}
++	return ret;
++}
++#endif
++
+ static struct ipt_match helper_match = {
+ 	.name		= "helper",
+ 	.match		= &match,
+ 	.checkentry	= &check,
++#ifdef CONFIG_COMPAT
++	.compat		= &compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
++int init_iptable_helper(void)
++{
++	return visible_ipt_register_match(&helper_match);
++}
++
++void fini_iptable_helper(void)
++{
++	visible_ipt_unregister_match(&helper_match);
++}
++
+ static int __init init(void)
+ {
+-	return ipt_register_match(&helper_match);
++	int err;
++
++	err = init_iptable_helper();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_iptable_helper);
++	KSYMRESOLVE(fini_iptable_helper);
++	KSYMMODRESOLVE(ipt_helper);
++	return 0;
+ }
+ 
+ static void __exit fini(void)
+ {
+-	ipt_unregister_match(&helper_match);
++	KSYMMODUNRESOLVE(ipt_helper);
++	KSYMUNRESOLVE(init_iptable_helper);
++	KSYMUNRESOLVE(fini_iptable_helper);
++	fini_iptable_helper();
+ }
+ 
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_length.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_length.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_length.c	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_length.c	2006-05-11 13:05:49.000000000 +0400
+@@ -8,6 +8,7 @@
+ 
+ #include <linux/module.h>
+ #include <linux/skbuff.h>
++#include <linux/nfcalls.h>
+ 
+ #include <linux/netfilter_ipv4/ipt_length.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+@@ -43,21 +44,58 @@ checkentry(const char *tablename,
+ 	return 1;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int compat(void *match,
++		void **dstptr, int *size, int convert)
++{
++	int off;
++
++	off = IPT_ALIGN(sizeof(struct ipt_length_info)) -
++		COMPAT_IPT_ALIGN(sizeof(struct ipt_length_info));
++	return ipt_match_align_compat(match, dstptr, size, off, convert);
++}
++#endif
++
+ static struct ipt_match length_match = {
+ 	.name		= "length",
+ 	.match		= &match,
+ 	.checkentry	= &checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= &compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
++int init_iptable_length(void)
++{
++	return visible_ipt_register_match(&length_match);
++}
++
++void fini_iptable_length(void)
++{
++	visible_ipt_unregister_match(&length_match);
++}
++
+ static int __init init(void)
+ {
+-	return ipt_register_match(&length_match);
++	int err;
++
++	err = init_iptable_length();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_iptable_length);
++	KSYMRESOLVE(fini_iptable_length);
++	KSYMMODRESOLVE(ipt_length);
++	return 0;
+ }
+ 
+ static void __exit fini(void)
+ {
+-	ipt_unregister_match(&length_match);
++	KSYMMODUNRESOLVE(ipt_length);
++	KSYMUNRESOLVE(init_iptable_length);
++	KSYMUNRESOLVE(fini_iptable_length);
++	fini_iptable_length();
+ }
+ 
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_limit.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_limit.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_limit.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_limit.c	2006-05-11 13:05:49.000000000 +0400
+@@ -17,6 +17,7 @@
+ #include <linux/skbuff.h>
+ #include <linux/spinlock.h>
+ #include <linux/interrupt.h>
++#include <linux/nfcalls.h>
+ 
+ #include <linux/netfilter_ipv4/ip_tables.h>
+ #include <linux/netfilter_ipv4/ipt_limit.h>
+@@ -25,6 +26,13 @@ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>");
+ MODULE_DESCRIPTION("iptables rate limit match");
+ 
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_ipt_limit_reg	(*(get_exec_env()->_ipt_limit_reg))
++#else
++#define ve_ipt_limit_reg	ipt_limit_reg
++#endif
++
+ /* The algorithm used is the Simple Token Bucket Filter (TBF)
+  * see net/sched/sch_tbf.c in the linux source tree
+  */
+@@ -116,7 +124,7 @@ ipt_limit_checkentry(const char *tablena
+ 	/* Check for overflow. */
+ 	if (r->burst == 0
+ 	    || user2credits(r->avg * r->burst) < user2credits(r->avg)) {
+-		printk("Overflow in ipt_limit, try lower: %u/%u\n",
++		ve_printk(VE_LOG, "Overflow in ipt_limit, try lower: %u/%u\n",
+ 		       r->avg, r->burst);
+ 		return 0;
+ 	}
+@@ -134,23 +142,128 @@ ipt_limit_checkentry(const char *tablena
+ 	return 1;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int ipt_limit_compat_to_user(void *match, void **dstptr,
++		int *size, int off)
++{
++	struct ipt_entry_match *pm;
++	struct ipt_rateinfo *pinfo;
++	struct compat_ipt_rateinfo rinfo;
++	u_int16_t msize;
++
++	pm = (struct ipt_entry_match *)match;
++	msize = pm->u.user.match_size;
++	if (__copy_to_user(*dstptr, pm, sizeof(struct ipt_entry_match)))
++		return -EFAULT;
++	pinfo = (struct ipt_rateinfo *)pm->data;
++	memset(&rinfo, 0, sizeof(struct compat_ipt_rateinfo));
++	rinfo.avg = pinfo->avg;
++	rinfo.burst = pinfo->burst;
++	if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_match),
++				&rinfo, sizeof(struct compat_ipt_rateinfo)))
++		return -EFAULT;
++	msize -= off;
++	if (put_user(msize, (u_int16_t *)*dstptr))
++		return -EFAULT;
++	*size -= off;
++	*dstptr += msize;
++	return 0;
++}
++
++static int ipt_limit_compat_from_user(void *match, void **dstptr,
++		int *size, int off)
++{
++	struct compat_ipt_entry_match *pm;
++	struct ipt_entry_match *dstpm;
++	struct compat_ipt_rateinfo *pinfo;
++	struct ipt_rateinfo rinfo;
++	u_int16_t msize;
++
++	pm = (struct compat_ipt_entry_match *)match;
++	dstpm = (struct ipt_entry_match *)*dstptr;
++	msize = pm->u.user.match_size;
++	memcpy(*dstptr, pm, sizeof(struct compat_ipt_entry_match));
++	pinfo = (struct compat_ipt_rateinfo *)pm->data;
++	memset(&rinfo, 0, sizeof(struct ipt_rateinfo));
++	rinfo.avg = pinfo->avg;
++	rinfo.burst = pinfo->burst;
++	memcpy(*dstptr + sizeof(struct compat_ipt_entry_match),
++				&rinfo, sizeof(struct ipt_rateinfo));
++	msize += off;
++	dstpm->u.user.match_size = msize;
++	*size += off;
++	*dstptr += msize;
++	return 0;
++}
++
++static int ipt_limit_compat(void *match, void **dstptr,
++		int *size, int convert)
++{
++	int ret, off;
++
++	off = IPT_ALIGN(sizeof(struct ipt_rateinfo)) -
++		COMPAT_IPT_ALIGN(sizeof(struct compat_ipt_rateinfo));
++	switch (convert) {
++		case COMPAT_TO_USER:
++			ret = ipt_limit_compat_to_user(match,
++					dstptr, size, off);
++			break;
++		case COMPAT_FROM_USER:
++			ret = ipt_limit_compat_from_user(match,
++					dstptr, size, off);
++			break;
++		case COMPAT_CALC_SIZE:
++			*size += off;
++			ret = 0;
++			break;
++		default:
++			ret = -ENOPROTOOPT;
++			break;
++	}
++	return ret;
++}
++#endif
++
+ static struct ipt_match ipt_limit_reg = {
+ 	.name		= "limit",
+ 	.match		= ipt_limit_match,
+ 	.checkentry	= ipt_limit_checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= ipt_limit_compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
++int init_iptable_limit(void)
++{
++	return visible_ipt_register_match(&ipt_limit_reg);
++}
++
++void fini_iptable_limit(void)
++{
++	visible_ipt_unregister_match(&ipt_limit_reg);
++}
++
+ static int __init init(void)
+ {
+-	if (ipt_register_match(&ipt_limit_reg))
+-		return -EINVAL;
++	int err;
++
++	err = init_iptable_limit();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_iptable_limit);
++	KSYMRESOLVE(fini_iptable_limit);
++	KSYMMODRESOLVE(ipt_limit);
+ 	return 0;
+ }
+ 
+ static void __exit fini(void)
+ {
+-	ipt_unregister_match(&ipt_limit_reg);
++	KSYMMODUNRESOLVE(ipt_limit);
++	KSYMUNRESOLVE(init_iptable_limit);
++	KSYMUNRESOLVE(fini_iptable_limit);
++	fini_iptable_limit();
+ }
+ 
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_mac.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_mac.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_mac.c	2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_mac.c	2006-05-11 13:05:42.000000000 +0400
+@@ -48,7 +48,8 @@ ipt_mac_checkentry(const char *tablename
+ 	if (hook_mask
+ 	    & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN)
+ 		| (1 << NF_IP_FORWARD))) {
+-		printk("ipt_mac: only valid for PRE_ROUTING, LOCAL_IN or FORWARD.\n");
++		ve_printk(VE_LOG, "ipt_mac: only valid for PRE_ROUTING, "
++				"LOCAL_IN or FORWARD.\n");
+ 		return 0;
+ 	}
+ 
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_multiport.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_multiport.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_multiport.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_multiport.c	2006-05-11 13:05:49.000000000 +0400
+@@ -13,6 +13,7 @@
+ #include <linux/types.h>
+ #include <linux/udp.h>
+ #include <linux/skbuff.h>
++#include <linux/nfcalls.h>
+ 
+ #include <linux/netfilter_ipv4/ipt_multiport.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+@@ -21,6 +22,13 @@ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+ MODULE_DESCRIPTION("iptables multiple port match module");
+ 
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_multiport_match	(*(get_exec_env()->_multiport_match))
++#else
++#define ve_multiport_match	multiport_match
++#endif
++
+ #if 0
+ #define duprintf(format, args...) printk(format , ## args)
+ #else
+@@ -100,21 +108,58 @@ checkentry(const char *tablename,
+ 		&& multiinfo->count <= IPT_MULTI_PORTS;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int compat(void *match,
++		void **dstptr, int *size, int convert)
++{
++	int off;
++
++	off = IPT_ALIGN(sizeof(struct ipt_multiport)) -
++		COMPAT_IPT_ALIGN(sizeof(struct ipt_multiport));
++	return ipt_match_align_compat(match, dstptr, size, off, convert);
++}
++#endif
++
+ static struct ipt_match multiport_match = {
+ 	.name		= "multiport",
+ 	.match		= &match,
+ 	.checkentry	= &checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= &compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
++int init_iptable_multiport(void)
++{
++	return visible_ipt_register_match(&multiport_match);
++}
++
++void fini_iptable_multiport(void)
++{
++	visible_ipt_unregister_match(&multiport_match);
++}
++
+ static int __init init(void)
+ {
+-	return ipt_register_match(&multiport_match);
++	int err;
++
++	err = init_iptable_multiport();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_iptable_multiport);
++	KSYMRESOLVE(fini_iptable_multiport);
++	KSYMMODRESOLVE(ipt_multiport);
++	return 0;
+ }
+ 
+ static void __exit fini(void)
+ {
+-	ipt_unregister_match(&multiport_match);
++	KSYMMODUNRESOLVE(ipt_multiport);
++	KSYMUNRESOLVE(init_iptable_multiport);
++	KSYMUNRESOLVE(fini_iptable_multiport);
++	fini_iptable_multiport();
+ }
+ 
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_owner.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_owner.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_owner.c	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_owner.c	2006-05-11 13:05:42.000000000 +0400
+@@ -23,12 +23,13 @@ MODULE_DESCRIPTION("iptables owner match
+ static int
+ match_comm(const struct sk_buff *skb, const char *comm)
+ {
++#ifndef CONFIG_VE
+ 	struct task_struct *g, *p;
+ 	struct files_struct *files;
+ 	int i;
+ 
+ 	read_lock(&tasklist_lock);
+-	do_each_thread(g, p) {
++	do_each_thread_ve(g, p) {
+ 		if(strncmp(p->comm, comm, sizeof(p->comm)))
+ 			continue;
+ 
+@@ -48,20 +49,22 @@ match_comm(const struct sk_buff *skb, co
+ 			spin_unlock(&files->file_lock);
+ 		}
+ 		task_unlock(p);
+-	} while_each_thread(g, p);
++	} while_each_thread_ve(g, p);
+ 	read_unlock(&tasklist_lock);
++#endif
+ 	return 0;
+ }
+ 
+ static int
+ match_pid(const struct sk_buff *skb, pid_t pid)
+ {
++#ifndef CONFIG_VE
+ 	struct task_struct *p;
+ 	struct files_struct *files;
+ 	int i;
+ 
+ 	read_lock(&tasklist_lock);
+-	p = find_task_by_pid(pid);
++	p = find_task_by_pid_ve(pid);
+ 	if (!p)
+ 		goto out;
+ 	task_lock(p);
+@@ -82,18 +85,20 @@ match_pid(const struct sk_buff *skb, pid
+ 	task_unlock(p);
+ out:
+ 	read_unlock(&tasklist_lock);
++#endif
+ 	return 0;
+ }
+ 
+ static int
+ match_sid(const struct sk_buff *skb, pid_t sid)
+ {
++#ifndef CONFIG_VE
+ 	struct task_struct *g, *p;
+ 	struct file *file = skb->sk->sk_socket->file;
+ 	int i, found=0;
+ 
+ 	read_lock(&tasklist_lock);
+-	do_each_thread(g, p) {
++	do_each_thread_ve(g, p) {
+ 		struct files_struct *files;
+ 		if (p->signal->session != sid)
+ 			continue;
+@@ -113,11 +118,14 @@ match_sid(const struct sk_buff *skb, pid
+ 		task_unlock(p);
+ 		if (found)
+ 			goto out;
+-	} while_each_thread(g, p);
++	} while_each_thread_ve(g, p);
+ out:
+ 	read_unlock(&tasklist_lock);
+ 
+ 	return found;
++#else
++	return 0;
++#endif
+ }
+ 
+ static int
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_recent.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_recent.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_recent.c	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_recent.c	2006-05-11 13:05:34.000000000 +0400
+@@ -222,7 +222,7 @@ static int ip_recent_ctrl(struct file *f
+ 			curr_table->table[count].last_seen = 0;
+ 			curr_table->table[count].addr = 0;
+ 			curr_table->table[count].ttl = 0;
+-			memset(curr_table->table[count].last_pkts,0,ip_pkt_list_tot*sizeof(u_int32_t));
++			memset(curr_table->table[count].last_pkts,0,ip_pkt_list_tot*sizeof(unsigned long));
+ 			curr_table->table[count].oldest_pkt = 0;
+ 			curr_table->table[count].time_pos = 0;
+ 			curr_table->time_info[count].position = count;
+@@ -501,7 +501,7 @@ match(const struct sk_buff *skb,
+ 		location = time_info[curr_table->time_pos].position;
+ 		hash_table[r_list[location].hash_entry] = -1;
+ 		hash_table[hash_result] = location;
+-		memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(u_int32_t));
++		memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(unsigned long));
+ 		r_list[location].time_pos = curr_table->time_pos;
+ 		r_list[location].addr = addr;
+ 		r_list[location].ttl = ttl;
+@@ -630,7 +630,7 @@ match(const struct sk_buff *skb,
+ 			r_list[location].last_seen = 0;
+ 			r_list[location].addr = 0;
+ 			r_list[location].ttl = 0;
+-			memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(u_int32_t));
++			memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(unsigned long));
+ 			r_list[location].oldest_pkt = 0;
+ 			ans = !info->invert;
+ 		}
+@@ -733,10 +733,10 @@ checkentry(const char *tablename,
+ 	memset(curr_table->table,0,sizeof(struct recent_ip_list)*ip_list_tot);
+ #ifdef DEBUG
+ 	if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for pkt_list.\n",
+-			sizeof(u_int32_t)*ip_pkt_list_tot*ip_list_tot);
++			sizeof(unsigned long)*ip_pkt_list_tot*ip_list_tot);
+ #endif
+ 
+-	hold = vmalloc(sizeof(u_int32_t)*ip_pkt_list_tot*ip_list_tot);
++	hold = vmalloc(sizeof(unsigned long)*ip_pkt_list_tot*ip_list_tot);
+ #ifdef DEBUG
+ 	if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: After pkt_list allocation.\n");
+ #endif
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_state.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_state.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_state.c	2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_state.c	2006-05-11 13:05:49.000000000 +0400
+@@ -10,6 +10,7 @@
+ 
+ #include <linux/module.h>
+ #include <linux/skbuff.h>
++#include <linux/nfcalls.h>
+ #include <linux/netfilter_ipv4/ip_conntrack.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+ #include <linux/netfilter_ipv4/ipt_state.h>
+@@ -52,22 +53,124 @@ static int check(const char *tablename,
+ 	return 1;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int compat_to_user(void *match, void **dstptr,
++		int *size, int off)
++{
++	struct ipt_entry_match *pm;
++	struct ipt_state_info *pinfo;
++	struct compat_ipt_state_info info;
++	u_int16_t msize;
++
++	pm = (struct ipt_entry_match *)match;
++	msize = pm->u.user.match_size;
++	if (__copy_to_user(*dstptr, pm, sizeof(struct ipt_entry_match)))
++		return -EFAULT;
++	pinfo = (struct ipt_state_info *)pm->data;
++	memset(&info, 0, sizeof(struct compat_ipt_state_info));
++	info.statemask = pinfo->statemask;
++	if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_match),
++				&info, sizeof(struct compat_ipt_state_info)))
++		return -EFAULT;
++	msize -= off;
++	if (put_user(msize, (u_int16_t *)*dstptr))
++		return -EFAULT;
++	*size -= off;
++	*dstptr += msize;
++	return 0;
++}
++
++static int compat_from_user(void *match, void **dstptr,
++		int *size, int off)
++{
++	struct compat_ipt_entry_match *pm;
++	struct ipt_entry_match *dstpm;
++	struct compat_ipt_state_info *pinfo;
++	struct ipt_state_info info;
++	u_int16_t msize;
++
++	pm = (struct compat_ipt_entry_match *)match;
++	dstpm = (struct ipt_entry_match *)*dstptr;
++	msize = pm->u.user.match_size;
++	memcpy(*dstptr, pm, sizeof(struct compat_ipt_entry_match));
++	pinfo = (struct compat_ipt_state_info *)pm->data;
++	memset(&info, 0, sizeof(struct ipt_state_info));
++	info.statemask = pinfo->statemask;
++	memcpy(*dstptr + sizeof(struct compat_ipt_entry_match),
++				&info, sizeof(struct ipt_state_info));
++	msize += off;
++	dstpm->u.user.match_size = msize;
++	*size += off;
++	*dstptr += msize;
++	return 0;
++}
++
++static int compat(void *match, void **dstptr, int *size, int convert)
++{
++	int ret, off;
++
++	off = IPT_ALIGN(sizeof(struct ipt_state_info)) -
++		COMPAT_IPT_ALIGN(sizeof(struct compat_ipt_state_info));
++	switch (convert) {
++		case COMPAT_TO_USER:
++			ret = compat_to_user(match, dstptr, size, off);
++			break;
++		case COMPAT_FROM_USER:
++			ret = compat_from_user(match, dstptr, size, off);
++			break;
++		case COMPAT_CALC_SIZE:
++			*size += off;
++			ret = 0;
++			break;
++		default:
++			ret = -ENOPROTOOPT;
++			break;
++	}
++	return ret;
++}
++#endif
++
+ static struct ipt_match state_match = {
+ 	.name		= "state",
+ 	.match		= &match,
+ 	.checkentry	= &check,
++#ifdef CONFIG_COMPAT
++	.compat		= &compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
++int init_iptable_state(void)
++{
++	return visible_ipt_register_match(&state_match);
++}
++
++void fini_iptable_state(void)
++{
++	visible_ipt_unregister_match(&state_match);
++}
++
+ static int __init init(void)
+ {
++	int err;
++
+ 	need_ip_conntrack();
+-	return ipt_register_match(&state_match);
++	err = init_iptable_state();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_iptable_state);
++	KSYMRESOLVE(fini_iptable_state);
++	KSYMMODRESOLVE(ipt_state);
++	return 0;
+ }
+ 
+ static void __exit fini(void)
+ {
+-	ipt_unregister_match(&state_match);
++	KSYMMODUNRESOLVE(ipt_state);
++	KSYMUNRESOLVE(init_iptable_state);
++	KSYMUNRESOLVE(fini_iptable_state);
++	fini_iptable_state();
+ }
+ 
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_tcpmss.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_tcpmss.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_tcpmss.c	2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_tcpmss.c	2006-05-11 13:05:49.000000000 +0400
+@@ -10,6 +10,7 @@
+ #include <linux/module.h>
+ #include <linux/skbuff.h>
+ #include <net/tcp.h>
++#include <linux/nfcalls.h>
+ 
+ #include <linux/netfilter_ipv4/ipt_tcpmss.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+@@ -103,28 +104,65 @@ checkentry(const char *tablename,
+ 
+ 	/* Must specify -p tcp */
+ 	if (ip->proto != IPPROTO_TCP || (ip->invflags & IPT_INV_PROTO)) {
+-		printk("tcpmss: Only works on TCP packets\n");
++		ve_printk(VE_LOG, "tcpmss: Only works on TCP packets\n");
+ 		return 0;
+ 	}
+ 
+ 	return 1;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int compat(void *match,
++		void **dstptr, int *size, int convert)
++{
++	int off;
++
++	off = IPT_ALIGN(sizeof(struct ipt_tcpmss_match_info)) -
++		COMPAT_IPT_ALIGN(sizeof(struct ipt_tcpmss_match_info));
++	return ipt_match_align_compat(match, dstptr, size, off, convert);
++}
++#endif
++
+ static struct ipt_match tcpmss_match = {
+ 	.name		= "tcpmss",
+ 	.match		= &match,
+ 	.checkentry	= &checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= &compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
++int init_iptable_tcpmss(void)
++{
++	return visible_ipt_register_match(&tcpmss_match);
++}
++
++void fini_iptable_tcpmss(void)
++{
++	visible_ipt_unregister_match(&tcpmss_match);
++}
++
+ static int __init init(void)
+ {
+-	return ipt_register_match(&tcpmss_match);
++	int err;
++
++	err = init_iptable_tcpmss();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_iptable_tcpmss);
++	KSYMRESOLVE(fini_iptable_tcpmss);
++	KSYMMODRESOLVE(ipt_tcpmss);
++	return 0;
+ }
+ 
+ static void __exit fini(void)
+ {
+-	ipt_unregister_match(&tcpmss_match);
++	KSYMMODUNRESOLVE(ipt_tcpmss);
++	KSYMUNRESOLVE(init_iptable_tcpmss);
++	KSYMUNRESOLVE(fini_iptable_tcpmss);
++	fini_iptable_tcpmss();
+ }
+ 
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_tos.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_tos.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_tos.c	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_tos.c	2006-05-11 13:05:49.000000000 +0400
+@@ -10,6 +10,7 @@
+ 
+ #include <linux/module.h>
+ #include <linux/skbuff.h>
++#include <linux/nfcalls.h>
+ 
+ #include <linux/netfilter_ipv4/ipt_tos.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+@@ -17,6 +18,13 @@
+ MODULE_LICENSE("GPL");
+ MODULE_DESCRIPTION("iptables TOS match module");
+ 
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_tos_match		(*(get_exec_env()->_tos_match))
++#else
++#define ve_tos_match		tos_match
++#endif
++
+ static int
+ match(const struct sk_buff *skb,
+       const struct net_device *in,
+@@ -43,21 +51,58 @@ checkentry(const char *tablename,
+ 	return 1;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int compat(void *match,
++		void **dstptr, int *size, int convert)
++{
++	int off;
++
++	off = IPT_ALIGN(sizeof(struct ipt_tos_info)) -
++		COMPAT_IPT_ALIGN(sizeof(struct ipt_tos_info));
++	return ipt_match_align_compat(match, dstptr, size, off, convert);
++}
++#endif
++
+ static struct ipt_match tos_match = {
+ 	.name		= "tos",
+ 	.match		= &match,
+ 	.checkentry	= &checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= &compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
++int init_iptable_tos(void)
++{
++	return visible_ipt_register_match(&tos_match);
++}
++
++void fini_iptable_tos(void)
++{
++	visible_ipt_unregister_match(&tos_match);
++}
++
+ static int __init init(void)
+ {
+-	return ipt_register_match(&tos_match);
++	int err;
++
++	err = init_iptable_tos();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_iptable_tos);
++	KSYMRESOLVE(fini_iptable_tos);
++	KSYMMODRESOLVE(ipt_tos);
++	return 0;
+ }
+ 
+ static void __exit fini(void)
+ {
+-	ipt_unregister_match(&tos_match);
++	KSYMMODUNRESOLVE(ipt_tos);
++	KSYMUNRESOLVE(init_iptable_tos);
++	KSYMUNRESOLVE(fini_iptable_tos);
++	fini_iptable_tos();
+ }
+ 
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_ttl.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_ttl.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/ipt_ttl.c	2004-08-14 14:56:24.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/ipt_ttl.c	2006-05-11 13:05:49.000000000 +0400
+@@ -11,6 +11,7 @@
+ 
+ #include <linux/module.h>
+ #include <linux/skbuff.h>
++#include <linux/nfcalls.h>
+ 
+ #include <linux/netfilter_ipv4/ipt_ttl.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+@@ -57,22 +58,58 @@ static int checkentry(const char *tablen
+ 	return 1;
+ }
+ 
++#ifdef CONFIG_COMPAT
++static int compat(void *match,
++		void **dstptr, int *size, int convert)
++{
++	int off;
++
++	off = IPT_ALIGN(sizeof(struct ipt_ttl_info)) -
++		COMPAT_IPT_ALIGN(sizeof(struct ipt_ttl_info));
++	return ipt_match_align_compat(match, dstptr, size, off, convert);
++}
++#endif
++
+ static struct ipt_match ttl_match = {
+ 	.name		= "ttl",
+ 	.match		= &match,
+ 	.checkentry	= &checkentry,
++#ifdef CONFIG_COMPAT
++	.compat		= &compat,
++#endif
+ 	.me		= THIS_MODULE,
+ };
+ 
++int init_iptable_ttl(void)
++{
++	return visible_ipt_register_match(&ttl_match);
++}
++
++void fini_iptable_ttl(void)
++{
++	visible_ipt_unregister_match(&ttl_match);
++}
++
+ static int __init init(void)
+ {
+-	return ipt_register_match(&ttl_match);
++	int err;
++
++	err = init_iptable_ttl();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_iptable_ttl);
++	KSYMRESOLVE(fini_iptable_ttl);
++	KSYMMODRESOLVE(ipt_ttl);
++	return 0;
+ }
+ 
+ static void __exit fini(void)
+ {
+-	ipt_unregister_match(&ttl_match);
+-
++	KSYMMODUNRESOLVE(ipt_ttl);
++	KSYMUNRESOLVE(init_iptable_ttl);
++	KSYMUNRESOLVE(fini_iptable_ttl);
++	fini_iptable_ttl();
+ }
+ 
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/iptable_filter.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/iptable_filter.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/iptable_filter.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/iptable_filter.c	2006-05-11 13:05:42.000000000 +0400
+@@ -11,12 +11,23 @@
+  */
+ 
+ #include <linux/module.h>
++#include <linux/nfcalls.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
++#include <ub/ub_mem.h>
+ 
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+ MODULE_DESCRIPTION("iptables filter table");
+ 
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_packet_filter	(*(get_exec_env()->_ve_ipt_filter_pf))
++#define ve_ipt_ops		(get_exec_env()->_ve_ipt_filter_io)
++#else
++#define	ve_packet_filter	packet_filter
++#define ve_ipt_ops		ipt_ops
++#endif
++
+ #define FILTER_VALID_HOOKS ((1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT))
+ 
+ /* Standard entry. */
+@@ -38,12 +49,12 @@ struct ipt_error
+ 	struct ipt_error_target target;
+ };
+ 
+-static struct
++static struct ipt_filter_initial_table
+ {
+ 	struct ipt_replace repl;
+ 	struct ipt_standard entries[3];
+ 	struct ipt_error term;
+-} initial_table __initdata
++} initial_table
+ = { { "filter", FILTER_VALID_HOOKS, 4,
+       sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
+       { [NF_IP_LOCAL_IN] = 0,
+@@ -108,7 +119,7 @@ ipt_hook(unsigned int hook,
+ 	 const struct net_device *out,
+ 	 int (*okfn)(struct sk_buff *))
+ {
+-	return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
++	return ipt_do_table(pskb, hook, in, out, &ve_packet_filter, NULL);
+ }
+ 
+ static unsigned int
+@@ -126,7 +137,7 @@ ipt_local_out_hook(unsigned int hook,
+ 		return NF_ACCEPT;
+ 	}
+ 
+-	return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
++	return ipt_do_table(pskb, hook, in, out, &ve_packet_filter, NULL);
+ }
+ 
+ static struct nf_hook_ops ipt_ops[] = {
+@@ -157,56 +168,161 @@ static struct nf_hook_ops ipt_ops[] = {
+ static int forward = NF_ACCEPT;
+ MODULE_PARM(forward, "i");
+ 
+-static int __init init(void)
++#ifdef CONFIG_VE_IPTABLES
++static void init_ve0_iptable_filter(struct ve_struct *envid)
++{
++	envid->_ipt_filter_initial_table = &initial_table;
++	envid->_ve_ipt_filter_pf = &packet_filter;
++	envid->_ve_ipt_filter_io = ipt_ops;
++}
++#endif
++
++int init_iptable_filter(void)
+ {
+ 	int ret;
++#ifdef CONFIG_VE_IPTABLES
++	struct ve_struct *envid;
+ 
+-	if (forward < 0 || forward > NF_MAX_VERDICT) {
+-		printk("iptables forward must be 0 or 1\n");
+-		return -EINVAL;
+-	}
++	envid = get_exec_env();
+ 
+-	/* Entry 1 is the FORWARD hook */
+-	initial_table.entries[1].target.verdict = -forward - 1;
++	if (ve_is_super(envid)) {
++		init_ve0_iptable_filter(envid);
++	} else {
++		__module_get(THIS_MODULE);
++		ret = -ENOMEM;
++		envid->_ipt_filter_initial_table =
++				ub_kmalloc(sizeof(initial_table), GFP_KERNEL);
++		if (!envid->_ipt_filter_initial_table)
++			goto nomem_1;
++		envid->_ve_ipt_filter_pf =
++				ub_kmalloc(sizeof(packet_filter), GFP_KERNEL);
++		if (!envid->_ve_ipt_filter_pf)
++			goto nomem_2;
++		envid->_ve_ipt_filter_io =
++				ub_kmalloc(sizeof(ipt_ops), GFP_KERNEL);
++		if (!envid->_ve_ipt_filter_io)
++			goto nomem_3;
++
++		/*
++		 * Note: in general, it isn't safe to copy the static table
++		 * used for VE0, since that table is already registered
++		 * and now has some run-time information.
++		 * However, inspection of ip_tables.c shows that the only
++		 * dynamically changed fields `list' and `private' are
++		 * given new values in ipt_register_table() without looking
++		 * at the old values.  2004/06/01  SAW
++		 */
++		memcpy(envid->_ipt_filter_initial_table, &initial_table,
++				sizeof(initial_table));
++		memcpy(envid->_ve_ipt_filter_pf, &packet_filter,
++				sizeof(packet_filter));
++		memcpy(envid->_ve_ipt_filter_io, &ipt_ops[0], sizeof(ipt_ops));
++
++		envid->_ve_ipt_filter_pf->table =
++				&envid->_ipt_filter_initial_table->repl;
++	}
++#endif
+ 
+ 	/* Register table */
+-	ret = ipt_register_table(&packet_filter);
++	ret = ipt_register_table(&ve_packet_filter);
+ 	if (ret < 0)
+-		return ret;
++		goto nomem_4;
+ 
+ 	/* Register hooks */
+-	ret = nf_register_hook(&ipt_ops[0]);
++	ret = nf_register_hook(&ve_ipt_ops[0]);
+ 	if (ret < 0)
+ 		goto cleanup_table;
+ 
+-	ret = nf_register_hook(&ipt_ops[1]);
++	ret = nf_register_hook(&ve_ipt_ops[1]);
+ 	if (ret < 0)
+ 		goto cleanup_hook0;
+ 
+-	ret = nf_register_hook(&ipt_ops[2]);
++	ret = nf_register_hook(&ve_ipt_ops[2]);
+ 	if (ret < 0)
+ 		goto cleanup_hook1;
+ 
+ 	return ret;
+ 
+  cleanup_hook1:
+-	nf_unregister_hook(&ipt_ops[1]);
++	nf_unregister_hook(&ve_ipt_ops[1]);
+  cleanup_hook0:
+-	nf_unregister_hook(&ipt_ops[0]);
++	nf_unregister_hook(&ve_ipt_ops[0]);
+  cleanup_table:
+-	ipt_unregister_table(&packet_filter);
+-
++	ipt_unregister_table(&ve_packet_filter);
++ nomem_4:
++#ifdef CONFIG_VE_IPTABLES
++	if (!ve_is_super(envid))
++		kfree(envid->_ve_ipt_filter_io);
++	envid->_ve_ipt_filter_io = NULL;
++ nomem_3:
++	if (!ve_is_super(envid))
++		kfree(envid->_ve_ipt_filter_pf);
++	envid->_ve_ipt_filter_pf = NULL;
++ nomem_2:
++	if (!ve_is_super(envid))
++		kfree(envid->_ipt_filter_initial_table);
++	envid->_ipt_filter_initial_table = NULL;
++ nomem_1:
++	if (!ve_is_super(envid))
++		module_put(THIS_MODULE);
++#endif
+ 	return ret;
+ }
+ 
+-static void __exit fini(void)
++void fini_iptable_filter(void)
+ {
+ 	unsigned int i;
++#ifdef CONFIG_VE_IPTABLES
++	struct ve_struct *envid;
++#endif
+ 
+ 	for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++)
+-		nf_unregister_hook(&ipt_ops[i]);
++		nf_unregister_hook(&ve_ipt_ops[i]);
++
++	ipt_unregister_table(&ve_packet_filter);
++
++#ifdef CONFIG_VE_IPTABLES
++	envid = get_exec_env();
++	if (envid->_ipt_filter_initial_table != NULL && !ve_is_super(envid)) {
++		kfree(envid->_ipt_filter_initial_table);
++		kfree(envid->_ve_ipt_filter_pf);
++		kfree(envid->_ve_ipt_filter_io);
++		module_put(THIS_MODULE);
++	}
++	envid->_ipt_filter_initial_table = NULL;
++	envid->_ve_ipt_filter_pf = NULL;
++	envid->_ve_ipt_filter_io = NULL; 
++#endif
++}
++
++static int __init init(void)
++{
++	int err;
+ 
+-	ipt_unregister_table(&packet_filter);
++	if (forward < 0 || forward > NF_MAX_VERDICT) {
++		printk("iptables forward must be 0 or 1\n");
++		return -EINVAL;
++	}
++
++	/* Entry 1 is the FORWARD hook */
++	initial_table.entries[1].target.verdict = -forward - 1;
++
++	err = init_iptable_filter();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_iptable_filter);
++	KSYMRESOLVE(fini_iptable_filter);
++	KSYMMODRESOLVE(iptable_filter);
++	return 0;
++}
++
++static void __exit fini(void)
++{
++	KSYMMODUNRESOLVE(iptable_filter);
++	KSYMUNRESOLVE(init_iptable_filter);
++	KSYMUNRESOLVE(fini_iptable_filter);
++	fini_iptable_filter();
+ }
+ 
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/netfilter/iptable_mangle.c linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/iptable_mangle.c
+--- linux-2.6.8.1.orig/net/ipv4/netfilter/iptable_mangle.c	2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/netfilter/iptable_mangle.c	2006-05-11 13:05:42.000000000 +0400
+@@ -17,6 +17,7 @@
+ #include <linux/skbuff.h>
+ #include <net/sock.h>
+ #include <net/route.h>
++#include <linux/nfcalls.h>
+ #include <linux/ip.h>
+ 
+ MODULE_LICENSE("GPL");
+@@ -54,7 +55,7 @@ static struct
+ 	struct ipt_replace repl;
+ 	struct ipt_standard entries[5];
+ 	struct ipt_error term;
+-} initial_table __initdata
++} initial_table
+ = { { "mangle", MANGLE_VALID_HOOKS, 6,
+       sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error),
+       { [NF_IP_PRE_ROUTING] 	= 0,
+@@ -131,6 +132,13 @@ static struct ipt_table packet_mangler =
+ 	.me		= THIS_MODULE,
+ };
+ 
++#ifdef CONFIG_VE_IPTABLES
++#include <linux/sched.h>
++#define ve_packet_mangler	(*(get_exec_env()->_ipt_mangle_table))
++#else
++#define ve_packet_mangler	packet_mangler
++#endif
++
+ /* The work comes in here from netfilter.c. */
+ static unsigned int
+ ipt_route_hook(unsigned int hook,
+@@ -139,7 +147,7 @@ ipt_route_hook(unsigned int hook,
+ 	 const struct net_device *out,
+ 	 int (*okfn)(struct sk_buff *))
+ {
+-	return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
++	return ipt_do_table(pskb, hook, in, out, &ve_packet_mangler, NULL);
+ }
+ 
+ static unsigned int
+@@ -168,7 +176,8 @@ ipt_local_hook(unsigned int hook,
+ 	daddr = (*pskb)->nh.iph->daddr;
+ 	tos = (*pskb)->nh.iph->tos;
+ 
+-	ret = ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
++	ret = ipt_do_table(pskb, hook, in, out, &ve_packet_mangler, NULL);
++
+ 	/* Reroute for ANY change. */
+ 	if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE
+ 	    && ((*pskb)->nh.iph->saddr != saddr
+@@ -220,12 +229,12 @@ static struct nf_hook_ops ipt_ops[] = {
+ 	},
+ };
+ 
+-static int __init init(void)
++static int mangle_init(struct ipt_table *packet_mangler, struct nf_hook_ops ipt_ops[])
+ {
+ 	int ret;
+ 
+ 	/* Register table */
+-	ret = ipt_register_table(&packet_mangler);
++	ret = ipt_register_table(packet_mangler);
+ 	if (ret < 0)
+ 		return ret;
+ 
+@@ -261,19 +270,117 @@ static int __init init(void)
+  cleanup_hook0:
+ 	nf_unregister_hook(&ipt_ops[0]);
+  cleanup_table:
+-	ipt_unregister_table(&packet_mangler);
++	ipt_unregister_table(packet_mangler);
+ 
+ 	return ret;
+ }
+ 
+-static void __exit fini(void)
++static void mangle_fini(struct ipt_table *packet_mangler, struct nf_hook_ops ipt_ops[])
+ {
+ 	unsigned int i;
+ 
+-	for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++)
++	for (i = 0; i < 5; i++)
+ 		nf_unregister_hook(&ipt_ops[i]);
+ 
+-	ipt_unregister_table(&packet_mangler);
++	ipt_unregister_table(packet_mangler);
++}
++
++int init_iptable_mangle(void)
++{
++#ifdef CONFIG_VE_IPTABLES
++	struct ve_struct *envid;
++	struct ipt_table *table;
++	struct nf_hook_ops *hooks;
++	int err;
++
++	envid = get_exec_env();
++	if (ve_is_super(envid)) {
++		table = &packet_mangler;
++		hooks = ipt_ops;
++	} else {
++		__module_get(THIS_MODULE);
++		err = -ENOMEM;
++		table = kmalloc(sizeof(packet_mangler), GFP_KERNEL);
++		if (table == NULL)
++			goto nomem_1;
++		hooks = kmalloc(sizeof(ipt_ops), GFP_KERNEL);
++		if (hooks == NULL)
++			goto nomem_2;
++
++		memcpy(table, &packet_mangler, sizeof(packet_mangler));
++		memcpy(hooks, ipt_ops, sizeof(ipt_ops));
++	}
++	envid->_ipt_mangle_hooks = hooks;
++	envid->_ipt_mangle_table = table;
++
++	err = mangle_init(table, hooks);
++	if (err)
++		goto err_minit;
++
++	return 0;
++
++err_minit:
++	envid->_ipt_mangle_table = NULL;
++	envid->_ipt_mangle_hooks = NULL;
++	if (!ve_is_super(envid))
++		kfree(hooks);
++nomem_2:
++	if (!ve_is_super(envid)) {
++		kfree(table);
++nomem_1:
++		module_put(THIS_MODULE);
++	}
++	return err;
++#else
++	return mangle_init(&packet_mangler, ipt_ops);
++#endif
++}
++
++void fini_iptable_mangle(void)
++{
++#ifdef CONFIG_VE_IPTABLES
++	struct ve_struct *envid;
++	struct ipt_table *table;
++	struct nf_hook_ops *hooks;
++
++	envid = get_exec_env();
++	table = envid->_ipt_mangle_table;
++	hooks = envid->_ipt_mangle_hooks;
++	if (table == NULL)
++		return;
++	mangle_fini(table, hooks);
++	envid->_ipt_mangle_table = NULL;
++	envid->_ipt_mangle_hooks = NULL;
++	if (!ve_is_super(envid)) {
++		kfree(hooks);
++		kfree(table);
++		module_put(THIS_MODULE);
++	}
++#else
++	mangle_fini(&packet_mangler, ipt_ops);
++#endif
++}
++
++static int __init init(void)
++{
++	int err;
++
++	err = init_iptable_mangle();
++	if (err < 0)
++		return err;
++
++	KSYMRESOLVE(init_iptable_mangle);
++	KSYMRESOLVE(fini_iptable_mangle);
++	KSYMMODRESOLVE(iptable_mangle);
++	return 0;
++}
++
++static void __exit fini(void)
++{
++	KSYMMODUNRESOLVE(iptable_mangle);
++	KSYMUNRESOLVE(init_iptable_mangle);
++	KSYMUNRESOLVE(fini_iptable_mangle);
++	fini_iptable_mangle();
+ }
+ 
+ module_init(init);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/proc.c linux-2.6.8.1-ve022stab078/net/ipv4/proc.c
+--- linux-2.6.8.1.orig/net/ipv4/proc.c	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/proc.c	2006-05-11 13:05:42.000000000 +0400
+@@ -262,11 +262,12 @@ static int snmp_seq_show(struct seq_file
+ 		seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
+ 
+ 	seq_printf(seq, "\nIp: %d %d",
+-			ipv4_devconf.forwarding ? 1 : 2, sysctl_ip_default_ttl);
++			ve_ipv4_devconf.forwarding ? 1 : 2,
++			sysctl_ip_default_ttl);
+ 
+ 	for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
+ 		seq_printf(seq, " %lu",
+-			   fold_field((void **) ip_statistics, 
++			   fold_field((void **) ve_ip_statistics, 
+ 				      snmp4_ipstats_list[i].entry));
+ 
+ 	seq_puts(seq, "\nIcmp:");
+@@ -276,7 +277,7 @@ static int snmp_seq_show(struct seq_file
+ 	seq_puts(seq, "\nIcmp:");
+ 	for (i = 0; snmp4_icmp_list[i].name != NULL; i++)
+ 		seq_printf(seq, " %lu",
+-			   fold_field((void **) icmp_statistics, 
++			   fold_field((void **) ve_icmp_statistics, 
+ 				      snmp4_icmp_list[i].entry));
+ 
+ 	seq_puts(seq, "\nTcp:");
+@@ -288,11 +289,11 @@ static int snmp_seq_show(struct seq_file
+ 		/* MaxConn field is signed, RFC 2012 */
+ 		if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
+ 			seq_printf(seq, " %ld",
+-				   fold_field((void **) tcp_statistics, 
++				   fold_field((void **) ve_tcp_statistics, 
+ 					      snmp4_tcp_list[i].entry));
+ 		else
+ 			seq_printf(seq, " %lu",
+-				   fold_field((void **) tcp_statistics,
++				   fold_field((void **) ve_tcp_statistics,
+ 					      snmp4_tcp_list[i].entry));
+ 	}
+ 
+@@ -303,7 +304,7 @@ static int snmp_seq_show(struct seq_file
+ 	seq_puts(seq, "\nUdp:");
+ 	for (i = 0; snmp4_udp_list[i].name != NULL; i++)
+ 		seq_printf(seq, " %lu",
+-			   fold_field((void **) udp_statistics, 
++			   fold_field((void **) ve_udp_statistics, 
+ 				      snmp4_udp_list[i].entry));
+ 
+ 	seq_putc(seq, '\n');
+@@ -337,7 +338,7 @@ static int netstat_seq_show(struct seq_f
+ 	seq_puts(seq, "\nTcpExt:");
+ 	for (i = 0; snmp4_net_list[i].name != NULL; i++)
+ 		seq_printf(seq, " %lu",
+-			   fold_field((void **) net_statistics, 
++			   fold_field((void **) ve_net_statistics, 
+ 				      snmp4_net_list[i].entry));
+ 
+ 	seq_putc(seq, '\n');
+diff -uprN linux-2.6.8.1.orig/net/ipv4/raw.c linux-2.6.8.1-ve022stab078/net/ipv4/raw.c
+--- linux-2.6.8.1.orig/net/ipv4/raw.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/raw.c	2006-05-11 13:05:42.000000000 +0400
+@@ -114,7 +114,8 @@ struct sock *__raw_v4_lookup(struct sock
+ 		if (inet->num == num 					&&
+ 		    !(inet->daddr && inet->daddr != raddr) 		&&
+ 		    !(inet->rcv_saddr && inet->rcv_saddr != laddr)	&&
+-		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
++		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) &&
++		    ve_accessible_strict(VE_OWNER_SK(sk), get_exec_env()))
+ 			goto found; /* gotcha */
+ 	}
+ 	sk = NULL;
+@@ -689,8 +690,12 @@ static struct sock *raw_get_first(struct
+ 		struct hlist_node *node;
+ 
+ 		sk_for_each(sk, node, &raw_v4_htable[state->bucket])
+-			if (sk->sk_family == PF_INET)
++			if (sk->sk_family == PF_INET) {
++				if (!ve_accessible(VE_OWNER_SK(sk),
++							get_exec_env()))
++					continue;
+ 				goto found;
++			}
+ 	}
+ 	sk = NULL;
+ found:
+@@ -704,8 +709,14 @@ static struct sock *raw_get_next(struct 
+ 	do {
+ 		sk = sk_next(sk);
+ try_again:
+-		;
+-	} while (sk && sk->sk_family != PF_INET);
++		if (!sk)
++			break;
++		if (sk->sk_family != PF_INET)
++			continue;
++		if (ve_accessible(VE_OWNER_SK(sk),
++					get_exec_env()))
++			break;
++	} while (1);
+ 
+ 	if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) {
+ 		sk = sk_head(&raw_v4_htable[state->bucket]);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/route.c linux-2.6.8.1-ve022stab078/net/ipv4/route.c
+--- linux-2.6.8.1.orig/net/ipv4/route.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/route.c	2006-05-11 13:05:42.000000000 +0400
+@@ -108,6 +108,8 @@
+ 
+ #define RT_GC_TIMEOUT (300*HZ)
+ 
++int ip_rt_src_check		= 1;
++
+ int ip_rt_min_delay		= 2 * HZ;
+ int ip_rt_max_delay		= 10 * HZ;
+ int ip_rt_max_size;
+@@ -215,11 +217,28 @@ static unsigned int rt_hash_code(u32 dad
+ 		& rt_hash_mask);
+ }
+ 
++void prepare_rt_cache(void)
++{
++#ifdef CONFIG_VE
++	struct rtable *r;
++	int i;
++
++	for (i = rt_hash_mask; i >= 0; i--) {
++		spin_lock_bh(&rt_hash_table[i].lock);
++		for (r = rt_hash_table[i].chain; r; r = r->u.rt_next) {
++			r->fl.owner_env = get_ve0();
++		}
++		spin_unlock_bh(&rt_hash_table[i].lock);
++        }
++#endif
++}
++
+ #ifdef CONFIG_PROC_FS
+ struct rt_cache_iter_state {
+ 	int bucket;
+ };
+ 
++static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r);
+ static struct rtable *rt_cache_get_first(struct seq_file *seq)
+ {
+ 	struct rtable *r = NULL;
+@@ -232,6 +251,8 @@ static struct rtable *rt_cache_get_first
+ 			break;
+ 		rcu_read_unlock();
+ 	}
++	if (r && !ve_accessible_strict(r->fl.owner_env, get_exec_env()))
++		r = rt_cache_get_next(seq, r);
+ 	return r;
+ }
+ 
+@@ -239,15 +260,20 @@ static struct rtable *rt_cache_get_next(
+ {
+ 	struct rt_cache_iter_state *st = seq->private;
+ 
++start:
+ 	smp_read_barrier_depends();
+-	r = r->u.rt_next;
++	do {
++		r = r->u.rt_next;
++	} while (r && !ve_accessible_strict(r->fl.owner_env, get_exec_env()));
+ 	while (!r) {
+ 		rcu_read_unlock();
+ 		if (--st->bucket < 0)
+-			break;
++			goto out;
+ 		rcu_read_lock();
+ 		r = rt_hash_table[st->bucket].chain;
+ 	}
++	goto start;
++out:
+ 	return r;
+ }
+ 
+@@ -549,26 +575,106 @@ static void rt_check_expire(unsigned lon
+ 	mod_timer(&rt_periodic_timer, now + ip_rt_gc_interval);
+ }
+ 
++typedef unsigned long rt_flush_gen_t;
++
++#ifdef CONFIG_VE
++
++static rt_flush_gen_t rt_flush_gen;
++
++/* called under rt_flush_lock */
++static void set_rt_flush_required(struct ve_struct *env)
++{
++	/*
++	 * If the global generation rt_flush_gen is equal to G, then
++	 * the pass considering entries labelled by G is yet to come.
++	 */
++	env->rt_flush_required = rt_flush_gen;
++}
++
++static spinlock_t rt_flush_lock;
++static rt_flush_gen_t reset_rt_flush_required(void)
++{
++	rt_flush_gen_t g;
++
++	spin_lock_bh(&rt_flush_lock);
++	g = rt_flush_gen++;
++	spin_unlock_bh(&rt_flush_lock);
++	return g;
++}
++
++static int check_rt_flush_required(struct ve_struct *env, rt_flush_gen_t gen)
++{
++	/* can be checked without the lock */
++	return env->rt_flush_required >= gen;
++}
++
++#else
++
++static void set_rt_flush_required(struct ve_struct *env)
++{
++}
++
++static rt_flush_gen_t reset_rt_flush_required(void)
++{
++	return 0;
++}
++
++#endif
++
+ /* This can run from both BH and non-BH contexts, the latter
+  * in the case of a forced flush event.
+  */
+ static void rt_run_flush(unsigned long dummy)
+ {
+ 	int i;
+-	struct rtable *rth, *next;
++	struct rtable * rth, * next;
++	struct rtable * tail;
++	rt_flush_gen_t gen;
+ 
+ 	rt_deadline = 0;
+ 
+ 	get_random_bytes(&rt_hash_rnd, 4);
+ 
++	gen = reset_rt_flush_required();
++
+ 	for (i = rt_hash_mask; i >= 0; i--) {
++#ifdef CONFIG_VE
++		struct rtable ** prev, * p;
++
++		spin_lock_bh(&rt_hash_table[i].lock);
++		rth = rt_hash_table[i].chain;
++
++		/* defer releasing the head of the list after spin_unlock */
++		for (tail = rth; tail; tail = tail->u.rt_next)
++			if (!check_rt_flush_required(tail->fl.owner_env, gen))
++				break;
++		if (rth != tail)
++			rt_hash_table[i].chain = tail;
++
++		/* call rt_free on entries after the tail requiring flush */
++		prev = &rt_hash_table[i].chain;
++		for (p = *prev; p; p = next) {
++			next = p->u.rt_next;
++			if (!check_rt_flush_required(p->fl.owner_env, gen)) {
++				prev = &p->u.rt_next;
++			} else {
++				*prev = next;
++				rt_free(p);
++			}
++		}
++
++#else
+ 		spin_lock_bh(&rt_hash_table[i].lock);
+ 		rth = rt_hash_table[i].chain;
++
+ 		if (rth)
+ 			rt_hash_table[i].chain = NULL;
++		tail = NULL;
++
++#endif
+ 		spin_unlock_bh(&rt_hash_table[i].lock);
+ 
+-		for (; rth; rth = next) {
++		for (; rth != tail; rth = next) {
+ 			next = rth->u.rt_next;
+ 			rt_free(rth);
+ 		}
+@@ -604,6 +710,8 @@ void rt_cache_flush(int delay)
+ 			delay = tmo;
+ 	}
+ 
++	set_rt_flush_required(get_exec_env());
++
+ 	if (delay <= 0) {
+ 		spin_unlock_bh(&rt_flush_lock);
+ 		rt_run_flush(0);
+@@ -619,9 +727,30 @@ void rt_cache_flush(int delay)
+ 
+ static void rt_secret_rebuild(unsigned long dummy)
+ {
++	int i;
++	struct rtable *rth, *next;
+ 	unsigned long now = jiffies;
+ 
+-	rt_cache_flush(0);
++	spin_lock_bh(&rt_flush_lock);
++	del_timer(&rt_flush_timer);
++	spin_unlock_bh(&rt_flush_lock);
++
++	rt_deadline = 0;
++	get_random_bytes(&rt_hash_rnd, 4);
++
++	for (i = rt_hash_mask; i >= 0; i--) {
++		spin_lock_bh(&rt_hash_table[i].lock);
++		rth = rt_hash_table[i].chain;
++		if (rth)
++			rt_hash_table[i].chain = NULL;
++		spin_unlock_bh(&rt_hash_table[i].lock);
++
++		for (; rth; rth = next) {
++			next = rth->u.rt_next;
++			rt_free(rth);
++		}
++	}
++
+ 	mod_timer(&rt_secret_timer, now + ip_rt_secret_interval);
+ }
+ 
+@@ -763,7 +892,8 @@ static inline int compare_keys(struct fl
+ {
+ 	return memcmp(&fl1->nl_u.ip4_u, &fl2->nl_u.ip4_u, sizeof(fl1->nl_u.ip4_u)) == 0 &&
+ 	       fl1->oif     == fl2->oif &&
+-	       fl1->iif     == fl2->iif;
++	       fl1->iif     == fl2->iif &&
++	       ve_accessible_strict(fl1->owner_env, fl2->owner_env);
+ }
+ 
+ static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)
+@@ -975,7 +1105,9 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
+ 	struct rtable *rth, **rthp;
+ 	u32  skeys[2] = { saddr, 0 };
+ 	int  ikeys[2] = { dev->ifindex, 0 };
++	struct ve_struct *ve;
+ 
++	ve = get_exec_env();
+ 	tos &= IPTOS_RT_MASK;
+ 
+ 	if (!in_dev)
+@@ -1012,6 +1144,10 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
+ 				    rth->fl.fl4_src != skeys[i] ||
+ 				    rth->fl.fl4_tos != tos ||
+ 				    rth->fl.oif != ikeys[k] ||
++#ifdef CONFIG_VE
++				    !ve_accessible_strict(rth->fl.owner_env,
++					    		  ve) ||
++#endif
+ 				    rth->fl.iif != 0) {
+ 					rthp = &rth->u.rt_next;
+ 					continue;
+@@ -1050,6 +1186,9 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
+ 				rt->u.dst.neighbour	= NULL;
+ 				rt->u.dst.hh		= NULL;
+ 				rt->u.dst.xfrm		= NULL;
++#ifdef CONFIG_VE
++				rt->fl.owner_env = ve;
++#endif
+ 
+ 				rt->rt_flags		|= RTCF_REDIRECTED;
+ 
+@@ -1495,6 +1634,9 @@ static int ip_route_input_mc(struct sk_b
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+ 	rth->fl.fl4_fwmark= skb->nfmark;
+ #endif
++#ifdef CONFIG_VE
++	rth->fl.owner_env = get_exec_env();
++#endif
+ 	rth->fl.fl4_src	= saddr;
+ 	rth->rt_src	= saddr;
+ #ifdef CONFIG_IP_ROUTE_NAT
+@@ -1506,7 +1648,7 @@ static int ip_route_input_mc(struct sk_b
+ #endif
+ 	rth->rt_iif	=
+ 	rth->fl.iif	= dev->ifindex;
+-	rth->u.dst.dev	= &loopback_dev;
++	rth->u.dst.dev	= &visible_loopback_dev;
+ 	dev_hold(rth->u.dst.dev);
+ 	rth->idev	= in_dev_get(rth->u.dst.dev);
+ 	rth->fl.oif	= 0;
+@@ -1641,7 +1783,7 @@ static int ip_route_input_slow(struct sk
+ 	if (res.type == RTN_LOCAL) {
+ 		int result;
+ 		result = fib_validate_source(saddr, daddr, tos,
+-					     loopback_dev.ifindex,
++					     visible_loopback_dev.ifindex,
+ 					     dev, &spec_dst, &itag);
+ 		if (result < 0)
+ 			goto martian_source;
+@@ -1705,6 +1847,9 @@ static int ip_route_input_slow(struct sk
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+ 	rth->fl.fl4_fwmark= skb->nfmark;
+ #endif
++#ifdef CONFIG_VE
++	rth->fl.owner_env = get_exec_env();
++#endif
+ 	rth->fl.fl4_src	= saddr;
+ 	rth->rt_src	= saddr;
+ 	rth->rt_gateway	= daddr;
+@@ -1774,6 +1919,9 @@ local_input:
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+ 	rth->fl.fl4_fwmark= skb->nfmark;
+ #endif
++#ifdef CONFIG_VE
++	rth->fl.owner_env = get_exec_env();
++#endif
+ 	rth->fl.fl4_src	= saddr;
+ 	rth->rt_src	= saddr;
+ #ifdef CONFIG_IP_ROUTE_NAT
+@@ -1785,7 +1933,7 @@ local_input:
+ #endif
+ 	rth->rt_iif	=
+ 	rth->fl.iif	= dev->ifindex;
+-	rth->u.dst.dev	= &loopback_dev;
++	rth->u.dst.dev	= &visible_loopback_dev;
+ 	dev_hold(rth->u.dst.dev);
+ 	rth->idev	= in_dev_get(rth->u.dst.dev);
+ 	rth->rt_gateway	= daddr;
+@@ -1873,6 +2021,9 @@ int ip_route_input(struct sk_buff *skb, 
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+ 		    rth->fl.fl4_fwmark == skb->nfmark &&
+ #endif
++#ifdef CONFIG_VE
++		    rth->fl.owner_env == get_exec_env() &&
++#endif
+ 		    rth->fl.fl4_tos == tos) {
+ 			rth->u.dst.lastuse = jiffies;
+ 			dst_hold(&rth->u.dst);
+@@ -1938,7 +2089,7 @@ static int ip_route_output_slow(struct r
+ 					.fwmark = oldflp->fl4_fwmark
+ #endif
+ 				      } },
+-			    .iif = loopback_dev.ifindex,
++			    .iif = visible_loopback_dev.ifindex,
+ 			    .oif = oldflp->oif };
+ 	struct fib_result res;
+ 	unsigned flags = 0;
+@@ -1961,10 +2112,13 @@ static int ip_route_output_slow(struct r
+ 		    ZERONET(oldflp->fl4_src))
+ 			goto out;
+ 
+-		/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
+-		dev_out = ip_dev_find(oldflp->fl4_src);
+-		if (dev_out == NULL)
+-			goto out;
++		if (ip_rt_src_check) {
++			/* It is equivalent to
++			   inet_addr_type(saddr) == RTN_LOCAL */
++			dev_out = ip_dev_find(oldflp->fl4_src);
++			if (dev_out == NULL)
++				goto out;
++		}
+ 
+ 		/* I removed check for oif == dev_out->oif here.
+ 		   It was wrong for two reasons:
+@@ -1991,6 +2145,12 @@ static int ip_route_output_slow(struct r
+ 			   Luckily, this hack is good workaround.
+ 			 */
+ 
++			if (dev_out == NULL) {
++				dev_out = ip_dev_find(oldflp->fl4_src);
++				if (dev_out == NULL)
++					goto out;
++			}
++
+ 			fl.oif = dev_out->ifindex;
+ 			goto make_route;
+ 		}
+@@ -2030,9 +2190,9 @@ static int ip_route_output_slow(struct r
+ 			fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);
+ 		if (dev_out)
+ 			dev_put(dev_out);
+-		dev_out = &loopback_dev;
++		dev_out = &visible_loopback_dev;
+ 		dev_hold(dev_out);
+-		fl.oif = loopback_dev.ifindex;
++		fl.oif = visible_loopback_dev.ifindex;
+ 		res.type = RTN_LOCAL;
+ 		flags |= RTCF_LOCAL;
+ 		goto make_route;
+@@ -2080,7 +2240,7 @@ static int ip_route_output_slow(struct r
+ 			fl.fl4_src = fl.fl4_dst;
+ 		if (dev_out)
+ 			dev_put(dev_out);
+-		dev_out = &loopback_dev;
++		dev_out = &visible_loopback_dev;
+ 		dev_hold(dev_out);
+ 		fl.oif = dev_out->ifindex;
+ 		if (res.fi)
+@@ -2162,6 +2322,9 @@ make_route:
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+ 	rth->fl.fl4_fwmark= oldflp->fl4_fwmark;
+ #endif
++#ifdef CONFIG_VE
++	rth->fl.owner_env = get_exec_env();
++#endif
+ 	rth->rt_dst	= fl.fl4_dst;
+ 	rth->rt_src	= fl.fl4_src;
+ #ifdef CONFIG_IP_ROUTE_NAT
+@@ -2241,6 +2404,7 @@ int __ip_route_output_key(struct rtable 
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+ 		    rth->fl.fl4_fwmark == flp->fl4_fwmark &&
+ #endif
++		    ve_accessible_strict(rth->fl.owner_env, get_exec_env()) &&
+ 		    !((rth->fl.fl4_tos ^ flp->fl4_tos) &
+ 			    (IPTOS_RT_MASK | RTO_ONLINK))) {
+ 			rth->u.dst.lastuse = jiffies;
+@@ -2345,7 +2509,7 @@ static int rt_fill_info(struct sk_buff *
+ 		u32 dst = rt->rt_dst;
+ 
+ 		if (MULTICAST(dst) && !LOCAL_MCAST(dst) &&
+-		    ipv4_devconf.mc_forwarding) {
++		    ve_ipv4_devconf.mc_forwarding) {
+ 			int err = ipmr_get_route(skb, r, nowait);
+ 			if (err <= 0) {
+ 				if (!nowait) {
+@@ -2390,7 +2554,10 @@ int inet_rtm_getroute(struct sk_buff *in
+ 	/* Reserve room for dummy headers, this skb can pass
+ 	   through good chunk of routing engine.
+ 	 */
+-	skb->mac.raw = skb->data;
++	skb->mac.raw = skb->nh.raw = skb->data;
++
++	/* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
++	skb->nh.iph->protocol = IPPROTO_ICMP;
+ 	skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
+ 
+ 	if (rta[RTA_SRC - 1])
+@@ -2496,6 +2663,11 @@ void ip_rt_multicast_event(struct in_dev
+ #ifdef CONFIG_SYSCTL
+ static int flush_delay;
+ 
++void *get_flush_delay_addr(void)
++{
++	return &flush_delay;
++}
++
+ static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
+ 					struct file *filp, void __user *buffer,
+ 					size_t *lenp, loff_t *ppos)
+@@ -2509,6 +2681,13 @@ static int ipv4_sysctl_rtcache_flush(ctl
+ 	return -EINVAL;
+ }
+ 
++int visible_ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
++					struct file *filp, void __user *buffer,
++					size_t *lenp, loff_t *ppos)
++{
++	return ipv4_sysctl_rtcache_flush(ctl, write, filp, buffer, lenp, ppos);
++}
++
+ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
+ 						int __user *name,
+ 						int nlen,
+@@ -2527,6 +2706,19 @@ static int ipv4_sysctl_rtcache_flush_str
+ 	return 0;
+ }
+ 
++int visible_ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
++						int __user *name,
++						int nlen,
++						void __user *oldval,
++						size_t __user *oldlenp,
++						void __user *newval,
++						size_t newlen,
++						void **context)
++{
++	return ipv4_sysctl_rtcache_flush_strategy(table, name, nlen, oldval,
++			oldlenp, newval, newlen, context);
++}
++
+ ctl_table ipv4_route_table[] = {
+         {
+ 		.ctl_name 	= NET_IPV4_ROUTE_FLUSH,
+@@ -2838,7 +3030,7 @@ int __init ip_rt_init(void)
+ 	}
+ 
+ #ifdef CONFIG_NET_CLS_ROUTE
+-	create_proc_read_entry("rt_acct", 0, proc_net, ip_rt_acct_read, NULL);
++	create_proc_read_entry("net/rt_acct", 0, NULL, ip_rt_acct_read, NULL);
+ #endif
+ #endif
+ #ifdef CONFIG_XFRM
+diff -uprN linux-2.6.8.1.orig/net/ipv4/sysctl_net_ipv4.c linux-2.6.8.1-ve022stab078/net/ipv4/sysctl_net_ipv4.c
+--- linux-2.6.8.1.orig/net/ipv4/sysctl_net_ipv4.c	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/sysctl_net_ipv4.c	2006-05-11 13:05:42.000000000 +0400
+@@ -48,6 +48,8 @@ extern int inet_peer_maxttl;
+ extern int inet_peer_gc_mintime;
+ extern int inet_peer_gc_maxtime;
+ 
++int sysctl_tcp_use_sg = 1;
++
+ #ifdef CONFIG_SYSCTL
+ static int tcp_retr1_max = 255; 
+ static int ip_local_port_range_min[] = { 1, 1 };
+@@ -64,17 +66,23 @@ static
+ int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
+ 			void __user *buffer, size_t *lenp, loff_t *ppos)
+ {
+-	int val = ipv4_devconf.forwarding;
++	int val = ve_ipv4_devconf.forwarding;
+ 	int ret;
+ 
+ 	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ 
+-	if (write && ipv4_devconf.forwarding != val)
++	if (write && ve_ipv4_devconf.forwarding != val)
+ 		inet_forward_change();
+ 
+ 	return ret;
+ }
+ 
++int visible_ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
++			void __user *buffer, size_t *lenp, loff_t *ppos)
++{
++	return ipv4_sysctl_forward(ctl, write, filp, buffer, lenp, ppos);
++}
++
+ static int ipv4_sysctl_forward_strategy(ctl_table *table,
+ 			 int __user *name, int nlen,
+ 			 void __user *oldval, size_t __user *oldlenp,
+@@ -117,6 +125,16 @@ static int ipv4_sysctl_forward_strategy(
+ 	return 1;
+ }
+ 
++int visible_ipv4_sysctl_forward_strategy(ctl_table *table,
++			 int __user *name, int nlen,
++			 void __user *oldval, size_t __user *oldlenp,
++			 void __user *newval, size_t newlen, 
++			 void **context)
++{
++	return ipv4_sysctl_forward_strategy(table, name, nlen,
++			oldval, oldlenp, newval, newlen, context);
++}
++
+ ctl_table ipv4_table[] = {
+         {
+ 		.ctl_name	= NET_IPV4_TCP_TIMESTAMPS,
+@@ -682,6 +700,14 @@ ctl_table ipv4_table[] = {
+ 		.mode		= 0644,
+ 		.proc_handler	= &proc_dointvec,
+ 	},
++	{
++		.ctl_name	= NET_TCP_USE_SG,
++		.procname	= "tcp_use_sg",
++		.data		= &sysctl_tcp_use_sg,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec,
++	},
+ 	{ .ctl_name = 0 }
+ };
+ 
+diff -uprN linux-2.6.8.1.orig/net/ipv4/tcp.c linux-2.6.8.1-ve022stab078/net/ipv4/tcp.c
+--- linux-2.6.8.1.orig/net/ipv4/tcp.c	2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/tcp.c	2006-05-11 13:05:44.000000000 +0400
+@@ -248,6 +248,7 @@
+  */
+ 
+ #include <linux/config.h>
++#include <linux/kmem_cache.h>
+ #include <linux/module.h>
+ #include <linux/types.h>
+ #include <linux/fcntl.h>
+@@ -262,6 +263,9 @@
+ #include <net/xfrm.h>
+ #include <net/ip.h>
+ 
++#include <ub/ub_orphan.h>
++#include <ub/ub_net.h>
++#include <ub/ub_tcp.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/ioctls.h>
+@@ -333,6 +337,7 @@ unsigned int tcp_poll(struct file *file,
+ 	unsigned int mask;
+ 	struct sock *sk = sock->sk;
+ 	struct tcp_opt *tp = tcp_sk(sk);
++	int check_send_space;
+ 
+ 	poll_wait(file, sk->sk_sleep, wait);
+ 	if (sk->sk_state == TCP_LISTEN)
+@@ -347,6 +352,21 @@ unsigned int tcp_poll(struct file *file,
+ 	if (sk->sk_err)
+ 		mask = POLLERR;
+ 
++	check_send_space = 1;
++#ifdef CONFIG_USER_RESOURCE
++	if (!(sk->sk_shutdown & SEND_SHUTDOWN) && sock_has_ubc(sk)) {
++		unsigned long size;
++		size = MAX_TCP_HEADER + tp->mss_cache;
++		if (size > SOCK_MIN_UBCSPACE)
++			size = SOCK_MIN_UBCSPACE;
++		size = skb_charge_size(size);   
++		if (ub_sock_makewres_tcp(sk, size)) {
++			check_send_space = 0;
++			ub_sock_sndqueueadd_tcp(sk, size);
++		}
++	}
++#endif
++
+ 	/*
+ 	 * POLLHUP is certainly not done right. But poll() doesn't
+ 	 * have a notion of HUP in just one direction, and for a
+@@ -390,7 +410,7 @@ unsigned int tcp_poll(struct file *file,
+ 		     sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data))
+ 			mask |= POLLIN | POLLRDNORM;
+ 
+-		if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
++		if (check_send_space && !(sk->sk_shutdown & SEND_SHUTDOWN)) {
+ 			if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
+ 				mask |= POLLOUT | POLLWRNORM;
+ 			} else {  /* send SIGIO later */
+@@ -566,7 +586,7 @@ static void tcp_listen_stop (struct sock
+ 
+ 		sock_orphan(child);
+ 
+-		atomic_inc(&tcp_orphan_count);
++		tcp_inc_orphan_count(child);
+ 
+ 		tcp_destroy_sock(child);
+ 
+@@ -659,16 +679,23 @@ static ssize_t do_tcp_sendpages(struct s
+ 		int copy, i;
+ 		int offset = poffset % PAGE_SIZE;
+ 		int size = min_t(size_t, psize, PAGE_SIZE - offset);
++		unsigned long chargesize = 0;
+ 
+ 		if (!sk->sk_send_head || (copy = mss_now - skb->len) <= 0) {
+ new_segment:
++			chargesize = 0;
+ 			if (!sk_stream_memory_free(sk))
+ 				goto wait_for_sndbuf;
+ 
++			chargesize = skb_charge_size(MAX_TCP_HEADER +
++						     tp->mss_cache);
++			if (ub_sock_getwres_tcp(sk, chargesize) < 0)
++				goto wait_for_ubspace;
+ 			skb = sk_stream_alloc_pskb(sk, 0, tp->mss_cache,
+ 						   sk->sk_allocation);
+ 			if (!skb)
+ 				goto wait_for_memory;
++			ub_skb_set_charge(skb, sk, chargesize, UB_TCPSNDBUF);
+ 
+ 			skb_entail(sk, tp, skb);
+ 			copy = mss_now;
+@@ -715,10 +742,14 @@ new_segment:
+ wait_for_sndbuf:
+ 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ wait_for_memory:
++		ub_sock_retwres_tcp(sk, chargesize,
++			skb_charge_size(MAX_TCP_HEADER + tp->mss_cache));
++		chargesize = 0;
++wait_for_ubspace:
+ 		if (copied)
+ 			tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+ 
+-		if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
++		if ((err = sk_stream_wait_memory(sk, &timeo, chargesize)) != 0)
+ 			goto do_error;
+ 
+ 		mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
+@@ -758,9 +789,6 @@ ssize_t tcp_sendpage(struct socket *sock
+ 	return res;
+ }
+ 
+-#define TCP_PAGE(sk)	(sk->sk_sndmsg_page)
+-#define TCP_OFF(sk)	(sk->sk_sndmsg_off)
+-
+ static inline int select_size(struct sock *sk, struct tcp_opt *tp)
+ {
+ 	int tmp = tp->mss_cache_std;
+@@ -814,6 +842,7 @@ int tcp_sendmsg(struct kiocb *iocb, stru
+ 	while (--iovlen >= 0) {
+ 		int seglen = iov->iov_len;
+ 		unsigned char __user *from = iov->iov_base;
++		unsigned long chargesize = 0;
+ 
+ 		iov++;
+ 
+@@ -824,18 +853,26 @@ int tcp_sendmsg(struct kiocb *iocb, stru
+ 
+ 			if (!sk->sk_send_head ||
+ 			    (copy = mss_now - skb->len) <= 0) {
++				unsigned long size;
+ 
+ new_segment:
+ 				/* Allocate new segment. If the interface is SG,
+ 				 * allocate skb fitting to single page.
+ 				 */
++				chargesize = 0;
+ 				if (!sk_stream_memory_free(sk))
+ 					goto wait_for_sndbuf;
+-
+-				skb = sk_stream_alloc_pskb(sk, select_size(sk, tp),
+-							   0, sk->sk_allocation);
++				size = select_size(sk, tp);
++				chargesize = skb_charge_size(MAX_TCP_HEADER +
++							     size);
++				if (ub_sock_getwres_tcp(sk, chargesize) < 0)
++					goto wait_for_ubspace;
++				skb = sk_stream_alloc_pskb(sk, size, 0,
++							   sk->sk_allocation);
+ 				if (!skb)
+ 					goto wait_for_memory;
++				ub_skb_set_charge(skb, sk, chargesize,
++						  UB_TCPSNDBUF);
+ 
+ 				/*
+ 				 * Check whether we can use HW checksum.
+@@ -888,11 +925,15 @@ new_segment:
+ 					      ~(L1_CACHE_BYTES - 1);
+ 					if (off == PAGE_SIZE) {
+ 						put_page(page);
++						ub_sock_tcp_detachpage(sk);
+ 						TCP_PAGE(sk) = page = NULL;
+ 					}
+ 				}
+ 
+ 				if (!page) {
++					chargesize = PAGE_SIZE;
++					if (ub_sock_tcp_chargepage(sk) < 0)
++						goto wait_for_ubspace;
+ 					/* Allocate new cache page. */
+ 					if (!(page = sk_stream_alloc_page(sk)))
+ 						goto wait_for_memory;
+@@ -928,7 +969,8 @@ new_segment:
+ 					} else if (off + copy < PAGE_SIZE) {
+ 						get_page(page);
+ 						TCP_PAGE(sk) = page;
+-					}
++					} else
++						ub_sock_tcp_detachpage(sk);
+ 				}
+ 
+ 				TCP_OFF(sk) = off + copy;
+@@ -958,10 +1000,15 @@ new_segment:
+ wait_for_sndbuf:
+ 			set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ wait_for_memory:
++			ub_sock_retwres_tcp(sk, chargesize,
++				skb_charge_size(MAX_TCP_HEADER+tp->mss_cache));
++			chargesize = 0;
++wait_for_ubspace:
+ 			if (copied)
+ 				tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+ 
+-			if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
++			if ((err = sk_stream_wait_memory(sk, &timeo,
++							chargesize)) != 0)
+ 				goto do_error;
+ 
+ 			mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
+@@ -1058,7 +1105,18 @@ static void cleanup_rbuf(struct sock *sk
+ #if TCP_DEBUG
+ 	struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
+ 
+-	BUG_TRAP(!skb || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq));
++	if (!(skb==NULL || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq))) {
++		printk("KERNEL: assertion: skb==NULL || "
++				"before(tp->copied_seq, skb->end_seq)\n");
++		printk("VE%u pid %d comm %.16s\n", 
++				(get_exec_env() ? VEID(get_exec_env()) : 0),
++				current->pid, current->comm);
++		printk("copied=%d, copied_seq=%d, rcv_nxt=%d\n", copied,
++				tp->copied_seq, tp->rcv_nxt);
++		printk("skb->len=%d, skb->seq=%d, skb->end_seq=%d\n",
++				skb->len, TCP_SKB_CB(skb)->seq, 
++				TCP_SKB_CB(skb)->end_seq);
++	}
+ #endif
+ 
+ 	if (tcp_ack_scheduled(tp)) {
+@@ -1281,7 +1339,22 @@ int tcp_recvmsg(struct kiocb *iocb, stru
+ 				goto found_ok_skb;
+ 			if (skb->h.th->fin)
+ 				goto found_fin_ok;
+-			BUG_TRAP(flags & MSG_PEEK);
++			if (!(flags & MSG_PEEK)) {
++				printk("KERNEL: assertion: flags&MSG_PEEK\n");
++				printk("VE%u pid %d comm %.16s\n", 
++						(get_exec_env() ? 
++						 VEID(get_exec_env()) : 0),
++						current->pid, current->comm);
++				printk("flags=0x%x, len=%d, copied_seq=%d, "
++						"rcv_nxt=%d\n", flags, len,
++						tp->copied_seq, tp->rcv_nxt);
++				printk("skb->len=%d, *seq=%d, skb->seq=%d, "
++						"skb->end_seq=%d, offset=%d\n",
++						skb->len, *seq, 
++						TCP_SKB_CB(skb)->seq,
++						TCP_SKB_CB(skb)->end_seq, 
++						offset);
++			}
+ 			skb = skb->next;
+ 		} while (skb != (struct sk_buff *)&sk->sk_receive_queue);
+ 
+@@ -1344,8 +1417,18 @@ int tcp_recvmsg(struct kiocb *iocb, stru
+ 
+ 			tp->ucopy.len = len;
+ 
+-			BUG_TRAP(tp->copied_seq == tp->rcv_nxt ||
+-				 (flags & (MSG_PEEK | MSG_TRUNC)));
++			if (!(tp->copied_seq == tp->rcv_nxt || 
++						(flags&(MSG_PEEK|MSG_TRUNC)))) {
++				printk("KERNEL: assertion: tp->copied_seq == "
++						"tp->rcv_nxt || ...\n");
++				printk("VE%u pid %d comm %.16s\n", 
++						(get_exec_env() ?
++						 VEID(get_exec_env()) : 0),
++						current->pid, current->comm);
++				printk("flags=0x%x, len=%d, copied_seq=%d, "
++						"rcv_nxt=%d\n", flags, len,
++						tp->copied_seq, tp->rcv_nxt);
++			}
+ 
+ 			/* Ugly... If prequeue is not empty, we have to
+ 			 * process it before releasing socket, otherwise
+@@ -1614,7 +1697,7 @@ void tcp_destroy_sock(struct sock *sk)
+ 	}
+ #endif
+ 
+-	atomic_dec(&tcp_orphan_count);
++	tcp_dec_orphan_count(sk);
+ 	sock_put(sk);
+ }
+ 
+@@ -1738,7 +1821,7 @@ adjudge_to_death:
+ 			if (tmo > TCP_TIMEWAIT_LEN) {
+ 				tcp_reset_keepalive_timer(sk, tcp_fin_time(tp));
+ 			} else {
+-				atomic_inc(&tcp_orphan_count);
++				tcp_inc_orphan_count(sk);
+ 				tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
+ 				goto out;
+ 			}
+@@ -1746,9 +1829,7 @@ adjudge_to_death:
+ 	}
+ 	if (sk->sk_state != TCP_CLOSE) {
+ 		sk_stream_mem_reclaim(sk);
+-		if (atomic_read(&tcp_orphan_count) > sysctl_tcp_max_orphans ||
+-		    (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
+-		     atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
++		if (tcp_too_many_orphans(sk, tcp_get_orphan_count(sk))) {
+ 			if (net_ratelimit())
+ 				printk(KERN_INFO "TCP: too many of orphaned "
+ 				       "sockets\n");
+@@ -1757,7 +1838,7 @@ adjudge_to_death:
+ 			NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
+ 		}
+ 	}
+-	atomic_inc(&tcp_orphan_count);
++	tcp_inc_orphan_count(sk);
+ 
+ 	if (sk->sk_state == TCP_CLOSE)
+ 		tcp_destroy_sock(sk);
+@@ -1823,12 +1904,13 @@ int tcp_disconnect(struct sock *sk, int 
+ 	tp->packets_out = 0;
+ 	tp->snd_ssthresh = 0x7fffffff;
+ 	tp->snd_cwnd_cnt = 0;
++	tp->advmss = 65535;
+ 	tcp_set_ca_state(tp, TCP_CA_Open);
+ 	tcp_clear_retrans(tp);
+ 	tcp_delack_init(tp);
+ 	sk->sk_send_head = NULL;
+-	tp->saw_tstamp = 0;
+-	tcp_sack_reset(tp);
++	tp->rx_opt.saw_tstamp = 0;
++	tcp_sack_reset(&tp->rx_opt);
+ 	__sk_dst_reset(sk);
+ 
+ 	BUG_TRAP(!inet->num || tp->bind_hash);
+@@ -1967,7 +2049,7 @@ int tcp_setsockopt(struct sock *sk, int 
+ 			err = -EINVAL;
+ 			break;
+ 		}
+-		tp->user_mss = val;
++		tp->rx_opt.user_mss = val;
+ 		break;
+ 
+ 	case TCP_NODELAY:
+@@ -2125,7 +2207,7 @@ int tcp_getsockopt(struct sock *sk, int 
+ 	case TCP_MAXSEG:
+ 		val = tp->mss_cache_std;
+ 		if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
+-			val = tp->user_mss;
++			val = tp->rx_opt.user_mss;
+ 		break;
+ 	case TCP_NODELAY:
+ 		val = !!(tp->nonagle&TCP_NAGLE_OFF);
+@@ -2189,6 +2271,7 @@ int tcp_getsockopt(struct sock *sk, int 
+ 
+ extern void __skb_cb_too_small_for_tcp(int, int);
+ extern void tcpdiag_init(void);
++extern unsigned int nr_free_lowpages(void);
+ 
+ static __initdata unsigned long thash_entries;
+ static int __init set_thash_entries(char *str)
+@@ -2212,24 +2295,26 @@ void __init tcp_init(void)
+ 
+ 	tcp_openreq_cachep = kmem_cache_create("tcp_open_request",
+ 						   sizeof(struct open_request),
+-					       0, SLAB_HWCACHE_ALIGN,
++					       0, SLAB_HWCACHE_ALIGN | SLAB_UBC,
+ 					       NULL, NULL);
+ 	if (!tcp_openreq_cachep)
+ 		panic("tcp_init: Cannot alloc open_request cache.");
+ 
+ 	tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket",
+ 					      sizeof(struct tcp_bind_bucket),
+-					      0, SLAB_HWCACHE_ALIGN,
++					      0, SLAB_HWCACHE_ALIGN | SLAB_UBC,
+ 					      NULL, NULL);
+ 	if (!tcp_bucket_cachep)
+ 		panic("tcp_init: Cannot alloc tcp_bind_bucket cache.");
+ 
+ 	tcp_timewait_cachep = kmem_cache_create("tcp_tw_bucket",
+ 						sizeof(struct tcp_tw_bucket),
+-						0, SLAB_HWCACHE_ALIGN,
++						0,
++						SLAB_HWCACHE_ALIGN | SLAB_UBC,
+ 						NULL, NULL);
+ 	if (!tcp_timewait_cachep)
+ 		panic("tcp_init: Cannot alloc tcp_tw_bucket cache.");
++	tcp_timewait_cachep->flags |= CFLGS_ENVIDS;
+ 
+ 	/* Size and allocate the main established and bind bucket
+ 	 * hash tables.
+@@ -2295,10 +2380,19 @@ void __init tcp_init(void)
+ 	}
+ 	tcp_port_rover = sysctl_local_port_range[0] - 1;
+ 
++	goal = nr_free_lowpages() / 6;
++	while (order >= 3 && (1536<<order) > goal)
++		order--;
++
+ 	sysctl_tcp_mem[0] =  768 << order;
+ 	sysctl_tcp_mem[1] = 1024 << order;
+ 	sysctl_tcp_mem[2] = 1536 << order;
+ 
++	if (sysctl_tcp_mem[2] - sysctl_tcp_mem[1] > 4096)
++		sysctl_tcp_mem[1] = sysctl_tcp_mem[2] - 4096;
++	if (sysctl_tcp_mem[1] - sysctl_tcp_mem[0] > 4096)
++		sysctl_tcp_mem[0] = sysctl_tcp_mem[1] - 4096;
++
+ 	if (order < 3) {
+ 		sysctl_tcp_wmem[2] = 64 * 1024;
+ 		sysctl_tcp_rmem[0] = PAGE_SIZE;
+diff -uprN linux-2.6.8.1.orig/net/ipv4/tcp_diag.c linux-2.6.8.1-ve022stab078/net/ipv4/tcp_diag.c
+--- linux-2.6.8.1.orig/net/ipv4/tcp_diag.c	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/tcp_diag.c	2006-05-11 13:05:42.000000000 +0400
+@@ -55,14 +55,14 @@ void tcp_get_info(struct sock *sk, struc
+ 	info->tcpi_probes = tp->probes_out;
+ 	info->tcpi_backoff = tp->backoff;
+ 
+-	if (tp->tstamp_ok)
++	if (tp->rx_opt.tstamp_ok)
+ 		info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
+-	if (tp->sack_ok)
++	if (tp->rx_opt.sack_ok)
+ 		info->tcpi_options |= TCPI_OPT_SACK;
+-	if (tp->wscale_ok) {
++	if (tp->rx_opt.wscale_ok) {
+ 		info->tcpi_options |= TCPI_OPT_WSCALE;
+-		info->tcpi_snd_wscale = tp->snd_wscale;
+-		info->tcpi_rcv_wscale = tp->rcv_wscale;
++		info->tcpi_snd_wscale = tp->rx_opt.snd_wscale;
++		info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale;
+ 	} 
+ 
+ 	if (tp->ecn_flags&TCP_ECN_OK)
+@@ -253,7 +253,7 @@ static int tcpdiag_get_exact(struct sk_b
+ 		return -EINVAL;
+ 	}
+ 
+-	if (sk == NULL)
++	if (sk == NULL || !ve_accessible(VE_OWNER_SK(sk), get_exec_env()))
+ 		return -ENOENT;
+ 
+ 	err = -ESTALE;
+@@ -465,6 +465,9 @@ static int tcpdiag_dump(struct sk_buff *
+ 	int s_i, s_num;
+ 	struct tcpdiagreq *r = NLMSG_DATA(cb->nlh);
+ 	struct rtattr *bc = NULL;
++	struct ve_struct *ve;
++
++	ve = get_exec_env();
+ 
+ 	if (cb->nlh->nlmsg_len > 4+NLMSG_SPACE(sizeof(struct tcpdiagreq)))
+ 		bc = (struct rtattr*)(r+1);
+@@ -486,6 +489,9 @@ static int tcpdiag_dump(struct sk_buff *
+ 			num = 0;
+ 			sk_for_each(sk, node, &tcp_listening_hash[i]) {
+ 				struct inet_opt *inet = inet_sk(sk);
++
++				if (!ve_accessible(VE_OWNER_SK(sk), ve))
++					continue;
+ 				if (num < s_num)
+ 					continue;
+ 				if (!(r->tcpdiag_states&TCPF_LISTEN) ||
+@@ -528,6 +534,8 @@ skip_listen_ht:
+ 		sk_for_each(sk, node, &head->chain) {
+ 			struct inet_opt *inet = inet_sk(sk);
+ 
++			if (!ve_accessible(VE_OWNER_SK(sk), ve))
++				continue;
+ 			if (num < s_num)
+ 				continue;
+ 			if (!(r->tcpdiag_states & (1 << sk->sk_state)))
+@@ -552,10 +560,14 @@ skip_listen_ht:
+ 			sk_for_each(sk, node,
+ 				    &tcp_ehash[i + tcp_ehash_size].chain) {
+ 				struct inet_opt *inet = inet_sk(sk);
++				struct tcp_tw_bucket *tw;
+ 
++				tw = (struct tcp_tw_bucket*)sk;
++				if (!ve_accessible_veid(TW_VEID(tw), VEID(ve)))
++					continue;
+ 				if (num < s_num)
+ 					continue;
+-				if (!(r->tcpdiag_states & (1 << sk->sk_zapped)))
++				if (!(r->tcpdiag_states & (1 << tw->tw_substate)))
+ 					continue;
+ 				if (r->id.tcpdiag_sport != inet->sport &&
+ 				    r->id.tcpdiag_sport)
+diff -uprN linux-2.6.8.1.orig/net/ipv4/tcp_input.c linux-2.6.8.1-ve022stab078/net/ipv4/tcp_input.c
+--- linux-2.6.8.1.orig/net/ipv4/tcp_input.c	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/tcp_input.c	2006-05-11 13:05:39.000000000 +0400
+@@ -72,6 +72,8 @@
+ #include <net/inet_common.h>
+ #include <linux/ipsec.h>
+ 
++#include <ub/ub_tcp.h>
++
+ int sysctl_tcp_timestamps = 1;
+ int sysctl_tcp_window_scaling = 1;
+ int sysctl_tcp_sack = 1;
+@@ -118,9 +120,9 @@ int sysctl_tcp_bic_low_window = 14;
+ #define FLAG_CA_ALERT		(FLAG_DATA_SACKED|FLAG_ECE)
+ #define FLAG_FORWARD_PROGRESS	(FLAG_ACKED|FLAG_DATA_SACKED)
+ 
+-#define IsReno(tp) ((tp)->sack_ok == 0)
+-#define IsFack(tp) ((tp)->sack_ok & 2)
+-#define IsDSack(tp) ((tp)->sack_ok & 4)
++#define IsReno(tp) ((tp)->rx_opt.sack_ok == 0)
++#define IsFack(tp) ((tp)->rx_opt.sack_ok & 2)
++#define IsDSack(tp) ((tp)->rx_opt.sack_ok & 4)
+ 
+ #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
+ 
+@@ -203,7 +205,7 @@ static __inline__ int tcp_in_quickack_mo
+ 
+ static void tcp_fixup_sndbuf(struct sock *sk)
+ {
+-	int sndmem = tcp_sk(sk)->mss_clamp + MAX_TCP_HEADER + 16 +
++	int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 +
+ 		     sizeof(struct sk_buff);
+ 
+ 	if (sk->sk_sndbuf < 3 * sndmem)
+@@ -259,7 +261,7 @@ tcp_grow_window(struct sock *sk, struct 
+ 	/* Check #1 */
+ 	if (tp->rcv_ssthresh < tp->window_clamp &&
+ 	    (int)tp->rcv_ssthresh < tcp_space(sk) &&
+-	    !tcp_memory_pressure) {
++	    ub_tcp_rmem_allows_expand(sk)) {
+ 		int incr;
+ 
+ 		/* Check #2. Increase window, if skb with such overhead
+@@ -328,6 +330,8 @@ static void tcp_init_buffer_space(struct
+ 
+ 	tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
+ 	tp->snd_cwnd_stamp = tcp_time_stamp;
++
++	ub_tcp_update_maxadvmss(sk);
+ }
+ 
+ static void init_bictcp(struct tcp_opt *tp)
+@@ -358,7 +362,7 @@ static void tcp_clamp_window(struct sock
+ 	if (ofo_win) {
+ 		if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
+ 		    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
+-		    !tcp_memory_pressure &&
++		    !ub_tcp_memory_pressure(sk) &&
+ 		    atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0])
+ 			sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
+ 					    sysctl_tcp_rmem[2]);
+@@ -438,10 +442,10 @@ new_measure:
+ 
+ static inline void tcp_rcv_rtt_measure_ts(struct tcp_opt *tp, struct sk_buff *skb)
+ {
+-	if (tp->rcv_tsecr &&
++	if (tp->rx_opt.rcv_tsecr &&
+ 	    (TCP_SKB_CB(skb)->end_seq -
+ 	     TCP_SKB_CB(skb)->seq >= tp->ack.rcv_mss))
+-		tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_tsecr, 0);
++		tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0);
+ }
+ 
+ /*
+@@ -828,7 +832,7 @@ static void tcp_init_metrics(struct sock
+ 	}
+ 	if (dst_metric(dst, RTAX_REORDERING) &&
+ 	    tp->reordering != dst_metric(dst, RTAX_REORDERING)) {
+-		tp->sack_ok &= ~2;
++		tp->rx_opt.sack_ok &= ~2;
+ 		tp->reordering = dst_metric(dst, RTAX_REORDERING);
+ 	}
+ 
+@@ -860,7 +864,7 @@ static void tcp_init_metrics(struct sock
+ 	}
+ 	tcp_set_rto(tp);
+ 	tcp_bound_rto(tp);
+-	if (tp->rto < TCP_TIMEOUT_INIT && !tp->saw_tstamp)
++	if (tp->rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp)
+ 		goto reset;
+ 	tp->snd_cwnd = tcp_init_cwnd(tp, dst);
+ 	tp->snd_cwnd_stamp = tcp_time_stamp;
+@@ -871,7 +875,7 @@ reset:
+ 	 * supported, TCP will fail to recalculate correct
+ 	 * rtt, if initial rto is too small. FORGET ALL AND RESET!
+ 	 */
+-	if (!tp->saw_tstamp && tp->srtt) {
++	if (!tp->rx_opt.saw_tstamp && tp->srtt) {
+ 		tp->srtt = 0;
+ 		tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
+ 		tp->rto = TCP_TIMEOUT_INIT;
+@@ -894,12 +898,12 @@ static void tcp_update_reordering(struct
+ 			NET_INC_STATS_BH(LINUX_MIB_TCPSACKREORDER);
+ #if FASTRETRANS_DEBUG > 1
+ 		printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
+-		       tp->sack_ok, tp->ca_state,
++		       tp->rx_opt.sack_ok, tp->ca_state,
+ 		       tp->reordering, tp->fackets_out, tp->sacked_out,
+ 		       tp->undo_marker ? tp->undo_retrans : 0);
+ #endif
+ 		/* Disable FACK yet. */
+-		tp->sack_ok &= ~2;
++		tp->rx_opt.sack_ok &= ~2;
+ 	}
+ }
+ 
+@@ -989,13 +993,13 @@ tcp_sacktag_write_queue(struct sock *sk,
+ 
+ 			if (before(start_seq, ack)) {
+ 				dup_sack = 1;
+-				tp->sack_ok |= 4;
++				tp->rx_opt.sack_ok |= 4;
+ 				NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV);
+ 			} else if (num_sacks > 1 &&
+ 				   !after(end_seq, ntohl(sp[1].end_seq)) &&
+ 				   !before(start_seq, ntohl(sp[1].start_seq))) {
+ 				dup_sack = 1;
+-				tp->sack_ok |= 4;
++				tp->rx_opt.sack_ok |= 4;
+ 				NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV);
+ 			}
+ 
+@@ -1617,8 +1621,8 @@ static void tcp_cwnd_down(struct tcp_opt
+ static __inline__ int tcp_packet_delayed(struct tcp_opt *tp)
+ {
+ 	return !tp->retrans_stamp ||
+-		(tp->saw_tstamp && tp->rcv_tsecr &&
+-		 (__s32)(tp->rcv_tsecr - tp->retrans_stamp) < 0);
++		(tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
++		 (__s32)(tp->rx_opt.rcv_tsecr - tp->retrans_stamp) < 0);
+ }
+ 
+ /* Undo procedures. */
+@@ -1966,7 +1970,7 @@ static void tcp_ack_saw_tstamp(struct tc
+ 	 * answer arrives rto becomes 120 seconds! If at least one of segments
+ 	 * in window is lost... Voila.	 			--ANK (010210)
+ 	 */
+-	seq_rtt = tcp_time_stamp - tp->rcv_tsecr;
++	seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
+ 	tcp_rtt_estimator(tp, seq_rtt);
+ 	tcp_set_rto(tp);
+ 	tp->backoff = 0;
+@@ -1997,7 +2001,7 @@ static __inline__ void
+ tcp_ack_update_rtt(struct tcp_opt *tp, int flag, s32 seq_rtt)
+ {
+ 	/* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */
+-	if (tp->saw_tstamp && tp->rcv_tsecr)
++	if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
+ 		tcp_ack_saw_tstamp(tp, flag);
+ 	else if (seq_rtt >= 0)
+ 		tcp_ack_no_tstamp(tp, seq_rtt, flag);
+@@ -2401,7 +2405,7 @@ static int tcp_clean_rtx_queue(struct so
+ 	BUG_TRAP((int)tp->sacked_out >= 0);
+ 	BUG_TRAP((int)tp->lost_out >= 0);
+ 	BUG_TRAP((int)tp->retrans_out >= 0);
+-	if (!tp->packets_out && tp->sack_ok) {
++	if (!tp->packets_out && tp->rx_opt.sack_ok) {
+ 		if (tp->lost_out) {
+ 			printk(KERN_DEBUG "Leak l=%u %d\n", tp->lost_out,
+ 							    tp->ca_state);
+@@ -2477,7 +2481,7 @@ static int tcp_ack_update_window(struct 
+ 	u32 nwin = ntohs(skb->h.th->window);
+ 
+ 	if (likely(!skb->h.th->syn))
+-		nwin <<= tp->snd_wscale;
++		nwin <<= tp->rx_opt.snd_wscale;
+ 
+ 	if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
+ 		flag |= FLAG_WIN_UPDATE;
+@@ -2888,14 +2892,15 @@ uninteresting_ack:
+  * But, this can also be called on packets in the established flow when
+  * the fast version below fails.
+  */
+-void tcp_parse_options(struct sk_buff *skb, struct tcp_opt *tp, int estab)
++void tcp_parse_options(struct sk_buff *skb,
++		struct tcp_options_received *opt_rx, int estab)
+ {
+ 	unsigned char *ptr;
+ 	struct tcphdr *th = skb->h.th;
+ 	int length=(th->doff*4)-sizeof(struct tcphdr);
+ 
+ 	ptr = (unsigned char *)(th + 1);
+-	tp->saw_tstamp = 0;
++	opt_rx->saw_tstamp = 0;
+ 
+ 	while(length>0) {
+ 	  	int opcode=*ptr++;
+@@ -2918,41 +2923,41 @@ void tcp_parse_options(struct sk_buff *s
+ 					if(opsize==TCPOLEN_MSS && th->syn && !estab) {
+ 						u16 in_mss = ntohs(*(__u16 *)ptr);
+ 						if (in_mss) {
+-							if (tp->user_mss && tp->user_mss < in_mss)
+-								in_mss = tp->user_mss;
+-							tp->mss_clamp = in_mss;
++							if (opt_rx->user_mss && opt_rx->user_mss < in_mss)
++								in_mss = opt_rx->user_mss;
++							opt_rx->mss_clamp = in_mss;
+ 						}
+ 					}
+ 					break;
+ 				case TCPOPT_WINDOW:
+ 					if(opsize==TCPOLEN_WINDOW && th->syn && !estab)
+ 						if (sysctl_tcp_window_scaling) {
+-							tp->wscale_ok = 1;
+-							tp->snd_wscale = *(__u8 *)ptr;
+-							if(tp->snd_wscale > 14) {
++							opt_rx->wscale_ok = 1;
++							opt_rx->snd_wscale = *(__u8 *)ptr;
++							if(opt_rx->snd_wscale > 14) {
+ 								if(net_ratelimit())
+ 									printk("tcp_parse_options: Illegal window "
+ 									       "scaling value %d >14 received.",
+-									       tp->snd_wscale);
+-								tp->snd_wscale = 14;
++									       opt_rx->snd_wscale);
++								opt_rx->snd_wscale = 14;
+ 							}
+ 						}
+ 					break;
+ 				case TCPOPT_TIMESTAMP:
+ 					if(opsize==TCPOLEN_TIMESTAMP) {
+-						if ((estab && tp->tstamp_ok) ||
++						if ((estab && opt_rx->tstamp_ok) ||
+ 						    (!estab && sysctl_tcp_timestamps)) {
+-							tp->saw_tstamp = 1;
+-							tp->rcv_tsval = ntohl(*(__u32 *)ptr);
+-							tp->rcv_tsecr = ntohl(*(__u32 *)(ptr+4));
++							opt_rx->saw_tstamp = 1;
++							opt_rx->rcv_tsval = ntohl(*(__u32 *)ptr);
++							opt_rx->rcv_tsecr = ntohl(*(__u32 *)(ptr+4));
+ 						}
+ 					}
+ 					break;
+ 				case TCPOPT_SACK_PERM:
+ 					if(opsize==TCPOLEN_SACK_PERM && th->syn && !estab) {
+ 						if (sysctl_tcp_sack) {
+-							tp->sack_ok = 1;
+-							tcp_sack_reset(tp);
++							opt_rx->sack_ok = 1;
++							tcp_sack_reset(opt_rx);
+ 						}
+ 					}
+ 					break;
+@@ -2960,7 +2965,7 @@ void tcp_parse_options(struct sk_buff *s
+ 				case TCPOPT_SACK:
+ 					if((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
+ 					   !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
+-					   tp->sack_ok) {
++					   opt_rx->sack_ok) {
+ 						TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
+ 					}
+ 	  			};
+@@ -2976,36 +2981,36 @@ void tcp_parse_options(struct sk_buff *s
+ static __inline__ int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, struct tcp_opt *tp)
+ {
+ 	if (th->doff == sizeof(struct tcphdr)>>2) {
+-		tp->saw_tstamp = 0;
++		tp->rx_opt.saw_tstamp = 0;
+ 		return 0;
+-	} else if (tp->tstamp_ok &&
++	} else if (tp->rx_opt.tstamp_ok &&
+ 		   th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) {
+ 		__u32 *ptr = (__u32 *)(th + 1);
+ 		if (*ptr == ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
+ 				  | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
+-			tp->saw_tstamp = 1;
++			tp->rx_opt.saw_tstamp = 1;
+ 			++ptr;
+-			tp->rcv_tsval = ntohl(*ptr);
++			tp->rx_opt.rcv_tsval = ntohl(*ptr);
+ 			++ptr;
+-			tp->rcv_tsecr = ntohl(*ptr);
++			tp->rx_opt.rcv_tsecr = ntohl(*ptr);
+ 			return 1;
+ 		}
+ 	}
+-	tcp_parse_options(skb, tp, 1);
++	tcp_parse_options(skb, &tp->rx_opt, 1);
+ 	return 1;
+ }
+ 
+ static __inline__ void
+ tcp_store_ts_recent(struct tcp_opt *tp)
+ {
+-	tp->ts_recent = tp->rcv_tsval;
+-	tp->ts_recent_stamp = xtime.tv_sec;
++	tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
++	tp->rx_opt.ts_recent_stamp = xtime.tv_sec;
+ }
+ 
+ static __inline__ void
+ tcp_replace_ts_recent(struct tcp_opt *tp, u32 seq)
+ {
+-	if (tp->saw_tstamp && !after(seq, tp->rcv_wup)) {
++	if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
+ 		/* PAWS bug workaround wrt. ACK frames, the PAWS discard
+ 		 * extra check below makes sure this can only happen
+ 		 * for pure ACK frames.  -DaveM
+@@ -3013,8 +3018,8 @@ tcp_replace_ts_recent(struct tcp_opt *tp
+ 		 * Not only, also it occurs for expired timestamps.
+ 		 */
+ 
+-		if((s32)(tp->rcv_tsval - tp->ts_recent) >= 0 ||
+-		   xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_24DAYS)
++		if((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 ||
++		   xtime.tv_sec >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS)
+ 			tcp_store_ts_recent(tp);
+ 	}
+ }
+@@ -3055,16 +3060,16 @@ static int tcp_disordered_ack(struct tcp
+ 		ack == tp->snd_una &&
+ 
+ 		/* 3. ... and does not update window. */
+-		!tcp_may_update_window(tp, ack, seq, ntohs(th->window)<<tp->snd_wscale) &&
++		!tcp_may_update_window(tp, ack, seq, ntohs(th->window)<<tp->rx_opt.snd_wscale) &&
+ 
+ 		/* 4. ... and sits in replay window. */
+-		(s32)(tp->ts_recent - tp->rcv_tsval) <= (tp->rto*1024)/HZ);
++		(s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (tp->rto*1024)/HZ);
+ }
+ 
+ static __inline__ int tcp_paws_discard(struct tcp_opt *tp, struct sk_buff *skb)
+ {
+-	return ((s32)(tp->ts_recent - tp->rcv_tsval) > TCP_PAWS_WINDOW &&
+-		xtime.tv_sec < tp->ts_recent_stamp + TCP_PAWS_24DAYS &&
++	return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW &&
++		xtime.tv_sec < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS &&
+ 		!tcp_disordered_ack(tp, skb));
+ }
+ 
+@@ -3177,8 +3182,8 @@ static void tcp_fin(struct sk_buff *skb,
+ 	 * Probably, we should reset in this case. For now drop them.
+ 	 */
+ 	__skb_queue_purge(&tp->out_of_order_queue);
+-	if (tp->sack_ok)
+-		tcp_sack_reset(tp);
++	if (tp->rx_opt.sack_ok)
++		tcp_sack_reset(&tp->rx_opt);
+ 	sk_stream_mem_reclaim(sk);
+ 
+ 	if (!sock_flag(sk, SOCK_DEAD)) {
+@@ -3208,22 +3213,22 @@ tcp_sack_extend(struct tcp_sack_block *s
+ 
+ static __inline__ void tcp_dsack_set(struct tcp_opt *tp, u32 seq, u32 end_seq)
+ {
+-	if (tp->sack_ok && sysctl_tcp_dsack) {
++	if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) {
+ 		if (before(seq, tp->rcv_nxt))
+ 			NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOLDSENT);
+ 		else
+ 			NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFOSENT);
+ 
+-		tp->dsack = 1;
++		tp->rx_opt.dsack = 1;
+ 		tp->duplicate_sack[0].start_seq = seq;
+ 		tp->duplicate_sack[0].end_seq = end_seq;
+-		tp->eff_sacks = min(tp->num_sacks+1, 4-tp->tstamp_ok);
++		tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks+1, 4-tp->rx_opt.tstamp_ok);
+ 	}
+ }
+ 
+ static __inline__ void tcp_dsack_extend(struct tcp_opt *tp, u32 seq, u32 end_seq)
+ {
+-	if (!tp->dsack)
++	if (!tp->rx_opt.dsack)
+ 		tcp_dsack_set(tp, seq, end_seq);
+ 	else
+ 		tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
+@@ -3238,7 +3243,7 @@ static void tcp_send_dupack(struct sock 
+ 		NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST);
+ 		tcp_enter_quickack_mode(tp);
+ 
+-		if (tp->sack_ok && sysctl_tcp_dsack) {
++		if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) {
+ 			u32 end_seq = TCP_SKB_CB(skb)->end_seq;
+ 
+ 			if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
+@@ -3262,16 +3267,16 @@ static void tcp_sack_maybe_coalesce(stru
+ 	/* See if the recent change to the first SACK eats into
+ 	 * or hits the sequence space of other SACK blocks, if so coalesce.
+ 	 */
+-	for (this_sack = 1; this_sack < tp->num_sacks; ) {
++	for (this_sack = 1; this_sack < tp->rx_opt.num_sacks; ) {
+ 		if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
+ 			int i;
+ 
+ 			/* Zap SWALK, by moving every further SACK up by one slot.
+ 			 * Decrease num_sacks.
+ 			 */
+-			tp->num_sacks--;
+-			tp->eff_sacks = min(tp->num_sacks+tp->dsack, 4-tp->tstamp_ok);
+-			for(i=this_sack; i < tp->num_sacks; i++)
++			tp->rx_opt.num_sacks--;
++			tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
++			for(i=this_sack; i < tp->rx_opt.num_sacks; i++)
+ 				sp[i] = sp[i+1];
+ 			continue;
+ 		}
+@@ -3296,7 +3301,7 @@ static void tcp_sack_new_ofo_skb(struct 
+ {
+ 	struct tcp_opt *tp = tcp_sk(sk);
+ 	struct tcp_sack_block *sp = &tp->selective_acks[0];
+-	int cur_sacks = tp->num_sacks;
++	int cur_sacks = tp->rx_opt.num_sacks;
+ 	int this_sack;
+ 
+ 	if (!cur_sacks)
+@@ -3321,7 +3326,7 @@ static void tcp_sack_new_ofo_skb(struct 
+ 	 */
+ 	if (this_sack >= 4) {
+ 		this_sack--;
+-		tp->num_sacks--;
++		tp->rx_opt.num_sacks--;
+ 		sp--;
+ 	}
+ 	for(; this_sack > 0; this_sack--, sp--)
+@@ -3331,8 +3336,8 @@ new_sack:
+ 	/* Build the new head SACK, and we're done. */
+ 	sp->start_seq = seq;
+ 	sp->end_seq = end_seq;
+-	tp->num_sacks++;
+-	tp->eff_sacks = min(tp->num_sacks + tp->dsack, 4 - tp->tstamp_ok);
++	tp->rx_opt.num_sacks++;
++	tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
+ }
+ 
+ /* RCV.NXT advances, some SACKs should be eaten. */
+@@ -3340,13 +3345,13 @@ new_sack:
+ static void tcp_sack_remove(struct tcp_opt *tp)
+ {
+ 	struct tcp_sack_block *sp = &tp->selective_acks[0];
+-	int num_sacks = tp->num_sacks;
++	int num_sacks = tp->rx_opt.num_sacks;
+ 	int this_sack;
+ 
+ 	/* Empty ofo queue, hence, all the SACKs are eaten. Clear. */
+ 	if (skb_queue_len(&tp->out_of_order_queue) == 0) {
+-		tp->num_sacks = 0;
+-		tp->eff_sacks = tp->dsack;
++		tp->rx_opt.num_sacks = 0;
++		tp->rx_opt.eff_sacks = tp->rx_opt.dsack;
+ 		return;
+ 	}
+ 
+@@ -3367,9 +3372,9 @@ static void tcp_sack_remove(struct tcp_o
+ 		this_sack++;
+ 		sp++;
+ 	}
+-	if (num_sacks != tp->num_sacks) {
+-		tp->num_sacks = num_sacks;
+-		tp->eff_sacks = min(tp->num_sacks+tp->dsack, 4-tp->tstamp_ok);
++	if (num_sacks != tp->rx_opt.num_sacks) {
++		tp->rx_opt.num_sacks = num_sacks;
++		tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
+ 	}
+ }
+ 
+@@ -3427,10 +3432,10 @@ static void tcp_data_queue(struct sock *
+ 
+ 	TCP_ECN_accept_cwr(tp, skb);
+ 
+-	if (tp->dsack) {
+-		tp->dsack = 0;
+-		tp->eff_sacks = min_t(unsigned int, tp->num_sacks,
+-						    4 - tp->tstamp_ok);
++	if (tp->rx_opt.dsack) {
++		tp->rx_opt.dsack = 0;
++		tp->rx_opt.eff_sacks = min_t(unsigned int, tp->rx_opt.num_sacks,
++						    4 - tp->rx_opt.tstamp_ok);
+ 	}
+ 
+ 	/*  Queue data for delivery to the user.
+@@ -3467,7 +3472,7 @@ queue_and_out:
+ 			     !sk_stream_rmem_schedule(sk, skb))) {
+ 				if (tcp_prune_queue(sk) < 0 ||
+ 				    !sk_stream_rmem_schedule(sk, skb))
+-					goto drop;
++					goto drop_part;
+ 			}
+ 			sk_stream_set_owner_r(skb, sk);
+ 			__skb_queue_tail(&sk->sk_receive_queue, skb);
+@@ -3488,7 +3493,7 @@ queue_and_out:
+ 				tp->ack.pingpong = 0;
+ 		}
+ 
+-		if (tp->num_sacks)
++		if (tp->rx_opt.num_sacks)
+ 			tcp_sack_remove(tp);
+ 
+ 		tcp_fast_path_check(sk, tp);
+@@ -3511,6 +3516,12 @@ out_of_window:
+ drop:
+ 		__kfree_skb(skb);
+ 		return;
++
++drop_part:
++		if (after(tp->copied_seq, tp->rcv_nxt))
++			tp->rcv_nxt = tp->copied_seq;
++		__kfree_skb(skb);
++		return;
+ 	}
+ 
+ 	/* Out of window. F.e. zero window probe. */
+@@ -3555,10 +3566,10 @@ drop:
+ 
+ 	if (!skb_peek(&tp->out_of_order_queue)) {
+ 		/* Initial out of order segment, build 1 SACK. */
+-		if (tp->sack_ok) {
+-			tp->num_sacks = 1;
+-			tp->dsack     = 0;
+-			tp->eff_sacks = 1;
++		if (tp->rx_opt.sack_ok) {
++			tp->rx_opt.num_sacks = 1;
++			tp->rx_opt.dsack     = 0;
++			tp->rx_opt.eff_sacks = 1;
+ 			tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
+ 			tp->selective_acks[0].end_seq =
+ 						TCP_SKB_CB(skb)->end_seq;
+@@ -3572,7 +3583,7 @@ drop:
+ 		if (seq == TCP_SKB_CB(skb1)->end_seq) {
+ 			__skb_append(skb1, skb);
+ 
+-			if (!tp->num_sacks ||
++			if (!tp->rx_opt.num_sacks ||
+ 			    tp->selective_acks[0].end_seq != seq)
+ 				goto add_sack;
+ 
+@@ -3620,7 +3631,7 @@ drop:
+ 		}
+ 
+ add_sack:
+-		if (tp->sack_ok)
++		if (tp->rx_opt.sack_ok)
+ 			tcp_sack_new_ofo_skb(sk, seq, end_seq);
+ 	}
+ }
+@@ -3682,6 +3693,10 @@ tcp_collapse(struct sock *sk, struct sk_
+ 		nskb = alloc_skb(copy+header, GFP_ATOMIC);
+ 		if (!nskb)
+ 			return;
++		if (ub_tcprcvbuf_charge_forced(skb->sk, nskb) < 0) {
++			kfree_skb(nskb);
++			return;
++		}
+ 		skb_reserve(nskb, header);
+ 		memcpy(nskb->head, skb->head, header);
+ 		nskb->nh.raw = nskb->head + (skb->nh.raw-skb->head);
+@@ -3777,7 +3792,7 @@ static int tcp_prune_queue(struct sock *
+ 
+ 	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
+ 		tcp_clamp_window(sk, tp);
+-	else if (tcp_memory_pressure)
++	else if (ub_tcp_memory_pressure(sk))
+ 		tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
+ 
+ 	tcp_collapse_ofo_queue(sk);
+@@ -3803,8 +3818,8 @@ static int tcp_prune_queue(struct sock *
+ 		 * is in a sad state like this, we care only about integrity
+ 		 * of the connection not performance.
+ 		 */
+-		if (tp->sack_ok)
+-			tcp_sack_reset(tp);
++		if (tp->rx_opt.sack_ok)
++			tcp_sack_reset(&tp->rx_opt);
+ 		sk_stream_mem_reclaim(sk);
+ 	}
+ 
+@@ -3859,7 +3874,7 @@ static void tcp_new_space(struct sock *s
+ 	    !(sk->sk_userlocks & SOCK_SNDBUF_LOCK) &&
+ 	    !tcp_memory_pressure &&
+ 	    atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
+- 		int sndmem = max_t(u32, tp->mss_clamp, tp->mss_cache) +
++ 		int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
+ 			MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
+ 		    demanded = max_t(unsigned int, tp->snd_cwnd,
+ 						   tp->reordering + 1);
+@@ -4126,7 +4141,7 @@ int tcp_rcv_established(struct sock *sk,
+ 	 *	We do checksum and copy also but from device to kernel.
+ 	 */
+ 
+-	tp->saw_tstamp = 0;
++	tp->rx_opt.saw_tstamp = 0;
+ 
+ 	/*	pred_flags is 0xS?10 << 16 + snd_wnd
+ 	 *	if header_predition is to be made
+@@ -4155,14 +4170,14 @@ int tcp_rcv_established(struct sock *sk,
+ 					  | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP))
+ 				goto slow_path;
+ 
+-			tp->saw_tstamp = 1;
++			tp->rx_opt.saw_tstamp = 1;
+ 			++ptr; 
+-			tp->rcv_tsval = ntohl(*ptr);
++			tp->rx_opt.rcv_tsval = ntohl(*ptr);
+ 			++ptr;
+-			tp->rcv_tsecr = ntohl(*ptr);
++			tp->rx_opt.rcv_tsecr = ntohl(*ptr);
+ 
+ 			/* If PAWS failed, check it more carefully in slow path */
+-			if ((s32)(tp->rcv_tsval - tp->ts_recent) < 0)
++			if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
+ 				goto slow_path;
+ 
+ 			/* DO NOT update ts_recent here, if checksum fails
+@@ -4242,6 +4257,10 @@ int tcp_rcv_established(struct sock *sk,
+ 
+ 				if ((int)skb->truesize > sk->sk_forward_alloc)
+ 					goto step5;
++				/* This is OK not to try to free memory here.
++				 * Do this below on slow path. Den */
++				if (ub_tcprcvbuf_charge(sk, skb) < 0)
++					goto step5;
+ 
+ 				NET_INC_STATS_BH(LINUX_MIB_TCPHPHITS);
+ 
+@@ -4288,7 +4307,7 @@ slow_path:
+ 	/*
+ 	 * RFC1323: H1. Apply PAWS check first.
+ 	 */
+-	if (tcp_fast_parse_options(skb, th, tp) && tp->saw_tstamp &&
++	if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
+ 	    tcp_paws_discard(tp, skb)) {
+ 		if (!th->rst) {
+ 			NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
+@@ -4360,9 +4379,9 @@ static int tcp_rcv_synsent_state_process
+ 					 struct tcphdr *th, unsigned len)
+ {
+ 	struct tcp_opt *tp = tcp_sk(sk);
+-	int saved_clamp = tp->mss_clamp;
++	int saved_clamp = tp->rx_opt.mss_clamp;
+ 
+-	tcp_parse_options(skb, tp, 0);
++	tcp_parse_options(skb, &tp->rx_opt, 0);
+ 
+ 	if (th->ack) {
+ 		/* rfc793:
+@@ -4379,8 +4398,8 @@ static int tcp_rcv_synsent_state_process
+ 		if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt)
+ 			goto reset_and_undo;
+ 
+-		if (tp->saw_tstamp && tp->rcv_tsecr &&
+-		    !between(tp->rcv_tsecr, tp->retrans_stamp,
++		if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
++		    !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
+ 			     tcp_time_stamp)) {
+ 			NET_INC_STATS_BH(LINUX_MIB_PAWSACTIVEREJECTED);
+ 			goto reset_and_undo;
+@@ -4435,13 +4454,13 @@ static int tcp_rcv_synsent_state_process
+ 		tp->snd_wnd = ntohs(th->window);
+ 		tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(skb)->seq);
+ 
+-		if (!tp->wscale_ok) {
+-			tp->snd_wscale = tp->rcv_wscale = 0;
++		if (!tp->rx_opt.wscale_ok) {
++			tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
+ 			tp->window_clamp = min(tp->window_clamp, 65535U);
+ 		}
+ 
+-		if (tp->saw_tstamp) {
+-			tp->tstamp_ok	   = 1;
++		if (tp->rx_opt.saw_tstamp) {
++			tp->rx_opt.tstamp_ok	   = 1;
+ 			tp->tcp_header_len =
+ 				sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
+ 			tp->advmss	    -= TCPOLEN_TSTAMP_ALIGNED;
+@@ -4450,8 +4469,8 @@ static int tcp_rcv_synsent_state_process
+ 			tp->tcp_header_len = sizeof(struct tcphdr);
+ 		}
+ 
+-		if (tp->sack_ok && sysctl_tcp_fack)
+-			tp->sack_ok |= 2;
++		if (tp->rx_opt.sack_ok && sysctl_tcp_fack)
++			tp->rx_opt.sack_ok |= 2;
+ 
+ 		tcp_sync_mss(sk, tp->pmtu_cookie);
+ 		tcp_initialize_rcv_mss(sk);
+@@ -4478,7 +4497,7 @@ static int tcp_rcv_synsent_state_process
+ 		if (sock_flag(sk, SOCK_KEEPOPEN))
+ 			tcp_reset_keepalive_timer(sk, keepalive_time_when(tp));
+ 
+-		if (!tp->snd_wscale)
++		if (!tp->rx_opt.snd_wscale)
+ 			__tcp_fast_path_on(tp, tp->snd_wnd);
+ 		else
+ 			tp->pred_flags = 0;
+@@ -4525,7 +4544,7 @@ discard:
+ 	}
+ 
+ 	/* PAWS check. */
+-	if (tp->ts_recent_stamp && tp->saw_tstamp && tcp_paws_check(tp, 0))
++	if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp && tcp_paws_check(&tp->rx_opt, 0))
+ 		goto discard_and_undo;
+ 
+ 	if (th->syn) {
+@@ -4535,8 +4554,8 @@ discard:
+ 		 */
+ 		tcp_set_state(sk, TCP_SYN_RECV);
+ 
+-		if (tp->saw_tstamp) {
+-			tp->tstamp_ok = 1;
++		if (tp->rx_opt.saw_tstamp) {
++			tp->rx_opt.tstamp_ok = 1;
+ 			tcp_store_ts_recent(tp);
+ 			tp->tcp_header_len =
+ 				sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
+@@ -4583,13 +4602,13 @@ discard:
+ 	 */
+ 
+ discard_and_undo:
+-	tcp_clear_options(tp);
+-	tp->mss_clamp = saved_clamp;
++	tcp_clear_options(&tp->rx_opt);
++	tp->rx_opt.mss_clamp = saved_clamp;
+ 	goto discard;
+ 
+ reset_and_undo:
+-	tcp_clear_options(tp);
+-	tp->mss_clamp = saved_clamp;
++	tcp_clear_options(&tp->rx_opt);
++	tp->rx_opt.mss_clamp = saved_clamp;
+ 	return 1;
+ }
+ 
+@@ -4607,7 +4626,7 @@ int tcp_rcv_state_process(struct sock *s
+ 	struct tcp_opt *tp = tcp_sk(sk);
+ 	int queued = 0;
+ 
+-	tp->saw_tstamp = 0;
++	tp->rx_opt.saw_tstamp = 0;
+ 
+ 	switch (sk->sk_state) {
+ 	case TCP_CLOSE:
+@@ -4662,7 +4681,7 @@ int tcp_rcv_state_process(struct sock *s
+ 		return 0;
+ 	}
+ 
+-	if (tcp_fast_parse_options(skb, th, tp) && tp->saw_tstamp &&
++	if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
+ 	    tcp_paws_discard(tp, skb)) {
+ 		if (!th->rst) {
+ 			NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
+@@ -4722,7 +4741,7 @@ int tcp_rcv_state_process(struct sock *s
+ 
+ 				tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
+ 				tp->snd_wnd = ntohs(th->window) <<
+-					      tp->snd_wscale;
++					      tp->rx_opt.snd_wscale;
+ 				tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq,
+ 					    TCP_SKB_CB(skb)->seq);
+ 
+@@ -4730,11 +4749,11 @@ int tcp_rcv_state_process(struct sock *s
+ 				 * and does not calculate rtt.
+ 				 * Fix it at least with timestamps.
+ 				 */
+-				if (tp->saw_tstamp && tp->rcv_tsecr &&
++				if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
+ 				    !tp->srtt)
+ 					tcp_ack_saw_tstamp(tp, 0);
+ 
+-				if (tp->tstamp_ok)
++				if (tp->rx_opt.tstamp_ok)
+ 					tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
+ 
+ 				/* Make sure socket is routed, for
+diff -uprN linux-2.6.8.1.orig/net/ipv4/tcp_ipv4.c linux-2.6.8.1-ve022stab078/net/ipv4/tcp_ipv4.c
+--- linux-2.6.8.1.orig/net/ipv4/tcp_ipv4.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/tcp_ipv4.c	2006-05-11 13:05:44.000000000 +0400
+@@ -69,12 +69,16 @@
+ #include <net/inet_common.h>
+ #include <net/xfrm.h>
+ 
++#include <ub/ub_tcp.h>
++
+ #include <linux/inet.h>
+ #include <linux/ipv6.h>
+ #include <linux/stddef.h>
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
+ 
++#include <linux/ve_owner.h>
++
+ extern int sysctl_ip_dynaddr;
+ int sysctl_tcp_tw_reuse;
+ int sysctl_tcp_low_latency;
+@@ -105,9 +109,10 @@ int sysctl_local_port_range[2] = { 1024,
+ int tcp_port_rover = 1024 - 1;
+ 
+ static __inline__ int tcp_hashfn(__u32 laddr, __u16 lport,
+-				 __u32 faddr, __u16 fport)
++				 __u32 faddr, __u16 fport,
++				 envid_t veid)
+ {
+-	int h = (laddr ^ lport) ^ (faddr ^ fport);
++	int h = (laddr ^ lport) ^ (faddr ^ fport) ^ (veid ^ (veid >> 16));
+ 	h ^= h >> 16;
+ 	h ^= h >> 8;
+ 	return h & (tcp_ehash_size - 1);
+@@ -120,15 +125,20 @@ static __inline__ int tcp_sk_hashfn(stru
+ 	__u16 lport = inet->num;
+ 	__u32 faddr = inet->daddr;
+ 	__u16 fport = inet->dport;
++	envid_t veid = VEID(VE_OWNER_SK(sk));
+ 
+-	return tcp_hashfn(laddr, lport, faddr, fport);
++	return tcp_hashfn(laddr, lport, faddr, fport, veid);
+ }
+ 
++DCL_VE_OWNER(TB, GENERIC, struct tcp_bind_bucket, owner_env,
++						inline, (always_inline))
++
+ /* Allocate and initialize a new TCP local port bind bucket.
+  * The bindhash mutex for snum's hash chain must be held here.
+  */
+ struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
+-					  unsigned short snum)
++					  unsigned short snum,
++					  struct ve_struct *env)
+ {
+ 	struct tcp_bind_bucket *tb = kmem_cache_alloc(tcp_bucket_cachep,
+ 						      SLAB_ATOMIC);
+@@ -136,6 +146,7 @@ struct tcp_bind_bucket *tcp_bucket_creat
+ 		tb->port = snum;
+ 		tb->fastreuse = 0;
+ 		INIT_HLIST_HEAD(&tb->owners);
++		SET_VE_OWNER_TB(tb, env);
+ 		hlist_add_head(&tb->node, &head->chain);
+ 	}
+ 	return tb;
+@@ -153,10 +164,11 @@ void tcp_bucket_destroy(struct tcp_bind_
+ /* Caller must disable local BH processing. */
+ static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child)
+ {
+-	struct tcp_bind_hashbucket *head =
+-				&tcp_bhash[tcp_bhashfn(inet_sk(child)->num)];
++	struct tcp_bind_hashbucket *head;
+ 	struct tcp_bind_bucket *tb;
+ 
++	head = &tcp_bhash[tcp_bhashfn(inet_sk(child)->num,
++					VEID(VE_OWNER_SK(child)))];
+ 	spin_lock(&head->lock);
+ 	tb = tcp_sk(sk)->bind_hash;
+ 	sk_add_bind_node(child, &tb->owners);
+@@ -212,8 +224,10 @@ static int tcp_v4_get_port(struct sock *
+ 	struct tcp_bind_hashbucket *head;
+ 	struct hlist_node *node;
+ 	struct tcp_bind_bucket *tb;
++	struct ve_struct *env;
+ 	int ret;
+ 
++	env = VE_OWNER_SK(sk);
+ 	local_bh_disable();
+ 	if (!snum) {
+ 		int low = sysctl_local_port_range[0];
+@@ -227,10 +241,11 @@ static int tcp_v4_get_port(struct sock *
+ 			rover++;
+ 			if (rover < low || rover > high)
+ 				rover = low;
+-			head = &tcp_bhash[tcp_bhashfn(rover)];
++			head = &tcp_bhash[tcp_bhashfn(rover, VEID(env))];
+ 			spin_lock(&head->lock);
+ 			tb_for_each(tb, node, &head->chain)
+-				if (tb->port == rover)
++				if (tb->port == rover &&
++				    ve_accessible_strict(VE_OWNER_TB(tb), env))
+ 					goto next;
+ 			break;
+ 		next:
+@@ -249,10 +264,11 @@ static int tcp_v4_get_port(struct sock *
+ 		 */
+ 		snum = rover;
+ 	} else {
+-		head = &tcp_bhash[tcp_bhashfn(snum)];
++		head = &tcp_bhash[tcp_bhashfn(snum, VEID(env))];
+ 		spin_lock(&head->lock);
+ 		tb_for_each(tb, node, &head->chain)
+-			if (tb->port == snum)
++			if (tb->port == snum &&
++			    ve_accessible_strict(VE_OWNER_TB(tb), env))
+ 				goto tb_found;
+ 	}
+ 	tb = NULL;
+@@ -272,7 +288,7 @@ tb_found:
+ 	}
+ tb_not_found:
+ 	ret = 1;
+-	if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
++	if (!tb && (tb = tcp_bucket_create(head, snum, env)) == NULL)
+ 		goto fail_unlock;
+ 	if (hlist_empty(&tb->owners)) {
+ 		if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
+@@ -301,9 +317,10 @@ fail:
+ static void __tcp_put_port(struct sock *sk)
+ {
+ 	struct inet_opt *inet = inet_sk(sk);
+-	struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(inet->num)];
++	struct tcp_bind_hashbucket *head;
+ 	struct tcp_bind_bucket *tb;
+ 
++	head = &tcp_bhash[tcp_bhashfn(inet->num, VEID(VE_OWNER_SK(sk)))];
+ 	spin_lock(&head->lock);
+ 	tb = tcp_sk(sk)->bind_hash;
+ 	__sk_del_bind_node(sk);
+@@ -412,7 +429,8 @@ void tcp_unhash(struct sock *sk)
+  * during the search since they can never be otherwise.
+  */
+ static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, u32 daddr,
+-					     unsigned short hnum, int dif)
++					     unsigned short hnum, int dif,
++					     struct ve_struct *env)
+ {
+ 	struct sock *result = NULL, *sk;
+ 	struct hlist_node *node;
+@@ -422,7 +440,9 @@ static struct sock *__tcp_v4_lookup_list
+ 	sk_for_each(sk, node, head) {
+ 		struct inet_opt *inet = inet_sk(sk);
+ 
+-		if (inet->num == hnum && !ipv6_only_sock(sk)) {
++		if (inet->num == hnum &&
++		    ve_accessible_strict(VE_OWNER_SK(sk), env) &&
++		    !ipv6_only_sock(sk)) {
+ 			__u32 rcv_saddr = inet->rcv_saddr;
+ 
+ 			score = (sk->sk_family == PF_INET ? 1 : 0);
+@@ -453,18 +473,21 @@ inline struct sock *tcp_v4_lookup_listen
+ {
+ 	struct sock *sk = NULL;
+ 	struct hlist_head *head;
++	struct ve_struct *env;
+ 
++	env = get_exec_env();
+ 	read_lock(&tcp_lhash_lock);
+-	head = &tcp_listening_hash[tcp_lhashfn(hnum)];
++	head = &tcp_listening_hash[tcp_lhashfn(hnum, VEID(env))];
+ 	if (!hlist_empty(head)) {
+ 		struct inet_opt *inet = inet_sk((sk = __sk_head(head)));
+ 
+ 		if (inet->num == hnum && !sk->sk_node.next &&
++		    ve_accessible_strict(VE_OWNER_SK(sk), env) &&
+ 		    (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
+ 		    (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
+ 		    !sk->sk_bound_dev_if)
+ 			goto sherry_cache;
+-		sk = __tcp_v4_lookup_listener(head, daddr, hnum, dif);
++		sk = __tcp_v4_lookup_listener(head, daddr, hnum, dif, env);
+ 	}
+ 	if (sk) {
+ sherry_cache:
+@@ -492,17 +515,22 @@ static inline struct sock *__tcp_v4_look
+ 	/* Optimize here for direct hit, only listening connections can
+ 	 * have wildcards anyways.
+ 	 */
+-	int hash = tcp_hashfn(daddr, hnum, saddr, sport);
++	int hash;
++	struct ve_struct *env;
++
++	env = get_exec_env();
++	hash = tcp_hashfn(daddr, hnum, saddr, sport, VEID(env));
+ 	head = &tcp_ehash[hash];
+ 	read_lock(&head->lock);
+ 	sk_for_each(sk, node, &head->chain) {
+-		if (TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif))
++		if (TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif, env))
+ 			goto hit; /* You sunk my battleship! */
+ 	}
+ 
+ 	/* Must check for a TIME_WAIT'er before going to listener hash. */
+ 	sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
+-		if (TCP_IPV4_TW_MATCH(sk, acookie, saddr, daddr, ports, dif))
++		if (TCP_IPV4_TW_MATCH(sk, acookie, saddr, daddr,
++							ports, dif, env))
+ 			goto hit;
+ 	}
+ 	sk = NULL;
+@@ -553,11 +581,16 @@ static int __tcp_v4_check_established(st
+ 	int dif = sk->sk_bound_dev_if;
+ 	TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
+ 	__u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
+-	int hash = tcp_hashfn(daddr, lport, saddr, inet->dport);
+-	struct tcp_ehash_bucket *head = &tcp_ehash[hash];
++	int hash;
++	struct tcp_ehash_bucket *head;
+ 	struct sock *sk2;
+ 	struct hlist_node *node;
+ 	struct tcp_tw_bucket *tw;
++	struct ve_struct *env;
++
++	env = VE_OWNER_SK(sk);
++	hash = tcp_hashfn(daddr, lport, saddr, inet->dport, VEID(env));
++	head = &tcp_ehash[hash];
+ 
+ 	write_lock(&head->lock);
+ 
+@@ -565,7 +598,8 @@ static int __tcp_v4_check_established(st
+ 	sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
+ 		tw = (struct tcp_tw_bucket *)sk2;
+ 
+-		if (TCP_IPV4_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) {
++		if (TCP_IPV4_TW_MATCH(sk2, acookie, saddr, daddr,
++							ports, dif, env)) {
+ 			struct tcp_opt *tp = tcp_sk(sk);
+ 
+ 			/* With PAWS, it is safe from the viewpoint
+@@ -589,8 +623,8 @@ static int __tcp_v4_check_established(st
+ 				if ((tp->write_seq =
+ 						tw->tw_snd_nxt + 65535 + 2) == 0)
+ 					tp->write_seq = 1;
+-				tp->ts_recent	    = tw->tw_ts_recent;
+-				tp->ts_recent_stamp = tw->tw_ts_recent_stamp;
++				tp->rx_opt.ts_recent	   = tw->tw_ts_recent;
++				tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
+ 				sock_hold(sk2);
+ 				goto unique;
+ 			} else
+@@ -601,7 +635,7 @@ static int __tcp_v4_check_established(st
+ 
+ 	/* And established part... */
+ 	sk_for_each(sk2, node, &head->chain) {
+-		if (TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif))
++		if (TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif, env))
+ 			goto not_unique;
+ 	}
+ 
+@@ -643,7 +677,9 @@ static int tcp_v4_hash_connect(struct so
+  	struct tcp_bind_hashbucket *head;
+  	struct tcp_bind_bucket *tb;
+ 	int ret;
++	struct ve_struct *env;
+ 
++	env = VE_OWNER_SK(sk);
+  	if (!snum) {
+  		int rover;
+  		int low = sysctl_local_port_range[0];
+@@ -674,7 +710,7 @@ static int tcp_v4_hash_connect(struct so
+  			rover++;
+  			if ((rover < low) || (rover > high))
+  				rover = low;
+- 			head = &tcp_bhash[tcp_bhashfn(rover)];
++ 			head = &tcp_bhash[tcp_bhashfn(rover, VEID(env))];
+  			spin_lock(&head->lock);
+ 
+  			/* Does not bother with rcv_saddr checks,
+@@ -682,7 +718,9 @@ static int tcp_v4_hash_connect(struct so
+  			 * unique enough.
+  			 */
+ 			tb_for_each(tb, node, &head->chain) {
+- 				if (tb->port == rover) {
++				if (tb->port == rover &&
++				    ve_accessible_strict(VE_OWNER_TB(tb), env))
++				{
+  					BUG_TRAP(!hlist_empty(&tb->owners));
+  					if (tb->fastreuse >= 0)
+  						goto next_port;
+@@ -694,7 +732,7 @@ static int tcp_v4_hash_connect(struct so
+  				}
+  			}
+ 
+- 			tb = tcp_bucket_create(head, rover);
++ 			tb = tcp_bucket_create(head, rover, env);
+  			if (!tb) {
+  				spin_unlock(&head->lock);
+  				break;
+@@ -733,7 +771,7 @@ ok:
+ 		goto out;
+  	}
+ 
+- 	head  = &tcp_bhash[tcp_bhashfn(snum)];
++ 	head  = &tcp_bhash[tcp_bhashfn(snum, VEID(env))];
+  	tb  = tcp_sk(sk)->bind_hash;
+ 	spin_lock_bh(&head->lock);
+ 	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
+@@ -793,25 +831,25 @@ int tcp_v4_connect(struct sock *sk, stru
+ 		inet->saddr = rt->rt_src;
+ 	inet->rcv_saddr = inet->saddr;
+ 
+-	if (tp->ts_recent_stamp && inet->daddr != daddr) {
++	if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
+ 		/* Reset inherited state */
+-		tp->ts_recent	    = 0;
+-		tp->ts_recent_stamp = 0;
+-		tp->write_seq	    = 0;
++		tp->rx_opt.ts_recent	   = 0;
++		tp->rx_opt.ts_recent_stamp = 0;
++		tp->write_seq		   = 0;
+ 	}
+ 
+ 	if (sysctl_tcp_tw_recycle &&
+-	    !tp->ts_recent_stamp && rt->rt_dst == daddr) {
++	    !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
+ 		struct inet_peer *peer = rt_get_peer(rt);
+ 
+ 		/* VJ's idea. We save last timestamp seen from
+ 		 * the destination in peer table, when entering state TIME-WAIT
+-		 * and initialize ts_recent from it, when trying new connection.
++		 * and initialize rx_opt.ts_recent from it, when trying new connection.
+ 		 */
+ 
+ 		if (peer && peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
+-			tp->ts_recent_stamp = peer->tcp_ts_stamp;
+-			tp->ts_recent = peer->tcp_ts;
++			tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
++			tp->rx_opt.ts_recent = peer->tcp_ts;
+ 		}
+ 	}
+ 
+@@ -822,7 +860,7 @@ int tcp_v4_connect(struct sock *sk, stru
+ 	if (inet->opt)
+ 		tp->ext_header_len = inet->opt->optlen;
+ 
+-	tp->mss_clamp = 536;
++	tp->rx_opt.mss_clamp = 536;
+ 
+ 	/* Socket identity is still unknown (sport may be zero).
+ 	 * However we set state to SYN-SENT and not releasing socket
+@@ -1033,11 +1071,7 @@ void tcp_v4_err(struct sk_buff *skb, u32
+ 
+ 	switch (type) {
+ 	case ICMP_SOURCE_QUENCH:
+-		/* This is deprecated, but if someone generated it,
+-		 * we have no reasons to ignore it.
+-		 */
+-		if (!sock_owned_by_user(sk))
+-			tcp_enter_cwr(tp);
++		/* Just silently ignore these. */
+ 		goto out;
+ 	case ICMP_PARAMETERPROB:
+ 		err = EPROTO;
+@@ -1261,9 +1295,8 @@ static void tcp_v4_timewait_ack(struct s
+ 	struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
+ 
+ 	tcp_v4_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
+-			tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
+-
+-	tcp_tw_put(tw);
++			tw->tw_rcv_wnd >> (tw->tw_rcv_wscale & TW_WSCALE_MASK),
++			tw->tw_ts_recent);
+ }
+ 
+ static void tcp_v4_or_send_ack(struct sk_buff *skb, struct open_request *req)
+@@ -1407,7 +1440,7 @@ struct or_calltable or_ipv4 = {
+ 
+ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
+ {
+-	struct tcp_opt tp;
++	struct tcp_options_received tmp_opt;
+ 	struct open_request *req;
+ 	__u32 saddr = skb->nh.iph->saddr;
+ 	__u32 daddr = skb->nh.iph->daddr;
+@@ -1449,29 +1482,29 @@ int tcp_v4_conn_request(struct sock *sk,
+ 	if (!req)
+ 		goto drop;
+ 
+-	tcp_clear_options(&tp);
+-	tp.mss_clamp = 536;
+-	tp.user_mss  = tcp_sk(sk)->user_mss;
++	tcp_clear_options(&tmp_opt);
++	tmp_opt.mss_clamp = 536;
++	tmp_opt.user_mss  = tcp_sk(sk)->rx_opt.user_mss;
+ 
+-	tcp_parse_options(skb, &tp, 0);
++	tcp_parse_options(skb, &tmp_opt, 0);
+ 
+ 	if (want_cookie) {
+-		tcp_clear_options(&tp);
+-		tp.saw_tstamp = 0;
++		tcp_clear_options(&tmp_opt);
++		tmp_opt.saw_tstamp = 0;
+ 	}
+ 
+-	if (tp.saw_tstamp && !tp.rcv_tsval) {
++	if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
+ 		/* Some OSes (unknown ones, but I see them on web server, which
+ 		 * contains information interesting only for windows'
+ 		 * users) do not send their stamp in SYN. It is easy case.
+ 		 * We simply do not advertise TS support.
+ 		 */
+-		tp.saw_tstamp = 0;
+-		tp.tstamp_ok  = 0;
++		tmp_opt.saw_tstamp = 0;
++		tmp_opt.tstamp_ok  = 0;
+ 	}
+-	tp.tstamp_ok = tp.saw_tstamp;
++	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
+ 
+-	tcp_openreq_init(req, &tp, skb);
++	tcp_openreq_init(req, &tmp_opt, skb);
+ 
+ 	req->af.v4_req.loc_addr = daddr;
+ 	req->af.v4_req.rmt_addr = saddr;
+@@ -1497,7 +1530,7 @@ int tcp_v4_conn_request(struct sock *sk,
+ 		 * timewait bucket, so that all the necessary checks
+ 		 * are made in the function processing timewait state.
+ 		 */
+-		if (tp.saw_tstamp &&
++		if (tmp_opt.saw_tstamp &&
+ 		    sysctl_tcp_tw_recycle &&
+ 		    (dst = tcp_v4_route_req(sk, req)) != NULL &&
+ 		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
+@@ -1684,12 +1717,15 @@ static int tcp_v4_checksum_init(struct s
+  */
+ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
+ {
++	struct user_beancounter *ub;
++
++	ub = set_sk_exec_ub(sk);
+ 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
+ 		TCP_CHECK_TIMER(sk);
+ 		if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
+ 			goto reset;
+ 		TCP_CHECK_TIMER(sk);
+-		return 0;
++		goto restore_context;
+ 	}
+ 
+ 	if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
+@@ -1703,7 +1739,7 @@ int tcp_v4_do_rcv(struct sock *sk, struc
+ 		if (nsk != sk) {
+ 			if (tcp_child_process(sk, nsk, skb))
+ 				goto reset;
+-			return 0;
++			goto restore_context;
+ 		}
+ 	}
+ 
+@@ -1711,6 +1747,9 @@ int tcp_v4_do_rcv(struct sock *sk, struc
+ 	if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
+ 		goto reset;
+ 	TCP_CHECK_TIMER(sk);
++
++restore_context:
++	(void)set_exec_ub(ub);
+ 	return 0;
+ 
+ reset:
+@@ -1722,7 +1761,7 @@ discard:
+ 	 * might be destroyed here. This current version compiles correctly,
+ 	 * but you have been warned.
+ 	 */
+-	return 0;
++	goto restore_context;
+ 
+ csum_err:
+ 	TCP_INC_STATS_BH(TCP_MIB_INERRS);
+@@ -1835,13 +1874,17 @@ do_time_wait:
+ 		tcp_tw_put((struct tcp_tw_bucket *) sk);
+ 		goto discard_it;
+ 	}
++	spin_lock(&((struct tcp_tw_bucket *)sk)->tw_lock);
+ 	switch (tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
+ 					   skb, th, skb->len)) {
+ 	case TCP_TW_SYN: {
+-		struct sock *sk2 = tcp_v4_lookup_listener(skb->nh.iph->daddr,
++		struct sock *sk2;
++
++		sk2 = tcp_v4_lookup_listener(skb->nh.iph->daddr,
+ 							  ntohs(th->dest),
+ 							  tcp_v4_iif(skb));
+ 		if (sk2) {
++			spin_unlock(&((struct tcp_tw_bucket *)sk)->tw_lock);
+ 			tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
+ 			tcp_tw_put((struct tcp_tw_bucket *)sk);
+ 			sk = sk2;
+@@ -1853,9 +1896,13 @@ do_time_wait:
+ 		tcp_v4_timewait_ack(sk, skb);
+ 		break;
+ 	case TCP_TW_RST:
++		spin_unlock(&((struct tcp_tw_bucket *)sk)->tw_lock);
++		tcp_tw_put((struct tcp_tw_bucket *)sk);
+ 		goto no_tcp_socket;
+ 	case TCP_TW_SUCCESS:;
+ 	}
++	spin_unlock(&((struct tcp_tw_bucket *)sk)->tw_lock);
++	tcp_tw_put((struct tcp_tw_bucket *)sk);
+ 	goto discard_it;
+ }
+ 
+@@ -2001,11 +2048,11 @@ int tcp_v4_remember_stamp(struct sock *s
+ 	}
+ 
+ 	if (peer) {
+-		if ((s32)(peer->tcp_ts - tp->ts_recent) <= 0 ||
++		if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
+ 		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
+-		     peer->tcp_ts_stamp <= tp->ts_recent_stamp)) {
+-			peer->tcp_ts_stamp = tp->ts_recent_stamp;
+-			peer->tcp_ts = tp->ts_recent;
++		     peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
++			peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
++			peer->tcp_ts = tp->rx_opt.ts_recent;
+ 		}
+ 		if (release_it)
+ 			inet_putpeer(peer);
+@@ -2077,6 +2124,8 @@ static int tcp_v4_init_sock(struct sock 
+ 	tp->snd_cwnd_clamp = ~0;
+ 	tp->mss_cache = 536;
+ 
++	tp->advmss = 65535; /* max value */
++
+ 	tp->reordering = sysctl_tcp_reordering;
+ 
+ 	sk->sk_state = TCP_CLOSE;
+@@ -2117,6 +2166,8 @@ int tcp_v4_destroy_sock(struct sock *sk)
+ 	 * If sendmsg cached page exists, toss it.
+ 	 */
+ 	if (sk->sk_sndmsg_page) {
++		/* queue is empty, uncharge */
++		ub_sock_tcp_detachpage(sk);
+ 		__free_page(sk->sk_sndmsg_page);
+ 		sk->sk_sndmsg_page = NULL;
+ 	}
+@@ -2131,16 +2182,34 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
+ #ifdef CONFIG_PROC_FS
+ /* Proc filesystem TCP sock list dumping. */
+ 
+-static inline struct tcp_tw_bucket *tw_head(struct hlist_head *head)
++static inline struct tcp_tw_bucket *tw_head(struct hlist_head *head,
++		envid_t veid)
+ {
+-	return hlist_empty(head) ? NULL :
+-		list_entry(head->first, struct tcp_tw_bucket, tw_node);
++	struct tcp_tw_bucket *tw;
++	struct hlist_node *pos;
++
++	if (hlist_empty(head))
++		return NULL;
++	hlist_for_each_entry(tw, pos, head, tw_node) {
++		if (!ve_accessible_veid(TW_VEID(tw), veid))
++			continue;
++		return tw;
++	}
++	return NULL;
+ }
+ 
+-static inline struct tcp_tw_bucket *tw_next(struct tcp_tw_bucket *tw)
++static inline struct tcp_tw_bucket *tw_next(struct tcp_tw_bucket *tw,
++		envid_t veid)
+ {
+-	return tw->tw_node.next ?
+-		hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
++	while (1) {
++		if (tw->tw_node.next == NULL)
++			return NULL;
++		tw = hlist_entry(tw->tw_node.next, typeof(*tw), tw_node);
++		if (!ve_accessible_veid(TW_VEID(tw), veid))
++			continue;
++		return tw;
++	}
++	return NULL;	/* make compiler happy */
+ }
+ 
+ static void *listening_get_next(struct seq_file *seq, void *cur)
+@@ -2149,7 +2218,9 @@ static void *listening_get_next(struct s
+ 	struct hlist_node *node;
+ 	struct sock *sk = cur;
+ 	struct tcp_iter_state* st = seq->private;
++	struct ve_struct *ve;
+ 
++	ve = get_exec_env();
+ 	if (!sk) {
+ 		st->bucket = 0;
+ 		sk = sk_head(&tcp_listening_hash[0]);
+@@ -2183,6 +2254,8 @@ get_req:
+ 		sk = sk_next(sk);
+ get_sk:
+ 	sk_for_each_from(sk, node) {
++		if (!ve_accessible(VE_OWNER_SK(sk), ve))
++			continue;
+ 		if (sk->sk_family == st->family) {
+ 			cur = sk;
+ 			goto out;
+@@ -2222,7 +2295,9 @@ static void *established_get_first(struc
+ {
+ 	struct tcp_iter_state* st = seq->private;
+ 	void *rc = NULL;
++	struct ve_struct *ve;
+ 
++	ve = get_exec_env();
+ 	for (st->bucket = 0; st->bucket < tcp_ehash_size; ++st->bucket) {
+ 		struct sock *sk;
+ 		struct hlist_node *node;
+@@ -2230,6 +2305,8 @@ static void *established_get_first(struc
+ 	       
+ 		read_lock(&tcp_ehash[st->bucket].lock);
+ 		sk_for_each(sk, node, &tcp_ehash[st->bucket].chain) {
++			if (!ve_accessible(VE_OWNER_SK(sk), ve))
++				continue;
+ 			if (sk->sk_family != st->family) {
+ 				continue;
+ 			}
+@@ -2239,6 +2316,8 @@ static void *established_get_first(struc
+ 		st->state = TCP_SEQ_STATE_TIME_WAIT;
+ 		tw_for_each(tw, node,
+ 			    &tcp_ehash[st->bucket + tcp_ehash_size].chain) {
++			if (!ve_accessible_veid(TW_VEID(tw), VEID(ve)))
++				continue;
+ 			if (tw->tw_family != st->family) {
+ 				continue;
+ 			}
+@@ -2258,16 +2337,17 @@ static void *established_get_next(struct
+ 	struct tcp_tw_bucket *tw;
+ 	struct hlist_node *node;
+ 	struct tcp_iter_state* st = seq->private;
++	struct ve_struct *ve;
+ 
++	ve = get_exec_env();
+ 	++st->num;
+ 
+ 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
+ 		tw = cur;
+-		tw = tw_next(tw);
++		tw = tw_next(tw, VEID(ve));
+ get_tw:
+-		while (tw && tw->tw_family != st->family) {
+-			tw = tw_next(tw);
+-		}
++		while (tw && tw->tw_family != st->family)
++			tw = tw_next(tw, VEID(ve));
+ 		if (tw) {
+ 			cur = tw;
+ 			goto out;
+@@ -2285,12 +2365,14 @@ get_tw:
+ 		sk = sk_next(sk);
+ 
+ 	sk_for_each_from(sk, node) {
++		if (!ve_accessible(VE_OWNER_SK(sk), ve))
++			continue;
+ 		if (sk->sk_family == st->family)
+ 			goto found;
+ 	}
+ 
+ 	st->state = TCP_SEQ_STATE_TIME_WAIT;
+-	tw = tw_head(&tcp_ehash[st->bucket + tcp_ehash_size].chain);
++	tw = tw_head(&tcp_ehash[st->bucket + tcp_ehash_size].chain, VEID(ve));
+ 	goto get_tw;
+ found:
+ 	cur = sk;
+@@ -2636,6 +2718,85 @@ void __init tcp_v4_init(struct net_proto
+ 	tcp_socket->sk->sk_prot->unhash(tcp_socket->sk);
+ }
+ 
++#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
++static void tcp_kill_ve_onesk(struct sock *sk)
++{
++	struct tcp_opt *tp = tcp_sk(sk);
++
++	/* Check the assumed state of the socket. */
++	if (!sock_flag(sk, SOCK_DEAD)) {
++		static int printed;
++invalid:
++		if (!printed)
++			printk(KERN_DEBUG "Killing sk: dead %d, state %d, "
++				"wrseq %u unseq %u, wrqu %d.\n",
++				sock_flag(sk, SOCK_DEAD), sk->sk_state,
++				tp->write_seq, tp->snd_una,
++				!skb_queue_empty(&sk->sk_write_queue));
++		printed = 1;
++		return;
++	}
++
++	tcp_send_active_reset(sk, GFP_ATOMIC);
++	switch (sk->sk_state) {
++		case TCP_FIN_WAIT1:
++		case TCP_CLOSING:
++			/* In these 2 states the peer may want us to retransmit
++			 * some data and/or FIN.  Entering "resetting mode"
++			 * instead.
++			 */
++			tcp_time_wait(sk, TCP_CLOSE, 0);
++			break;
++		case TCP_FIN_WAIT2:
++			/* By some reason the socket may stay in this state
++			 * without turning into a TW bucket.  Fix it.
++			 */
++			tcp_time_wait(sk, TCP_FIN_WAIT2, 0);
++			break;
++		case TCP_LAST_ACK:
++			/* Just jump into CLOSED state. */
++			tcp_done(sk);
++			break;
++		default:
++			/* The socket must be already close()d. */
++			goto invalid;
++	}
++}
++
++void tcp_v4_kill_ve_sockets(struct ve_struct *envid)
++{
++	struct tcp_ehash_bucket *head;
++	int i;
++
++	/* alive */
++	local_bh_disable();
++	head = tcp_ehash;
++	for (i = 0; i < tcp_ehash_size; i++) {
++		struct sock *sk;
++		struct hlist_node *node;
++more_work:
++		write_lock(&head[i].lock);
++		sk_for_each(sk, node, &head[i].chain) {
++			if (ve_accessible_strict(VE_OWNER_SK(sk), envid)) {
++				sock_hold(sk);
++				write_unlock(&head[i].lock);
++
++				bh_lock_sock(sk);
++				/* sk might have disappeared from the hash before
++				 * we got the lock */
++				if (sk->sk_state != TCP_CLOSE)
++					tcp_kill_ve_onesk(sk);
++				bh_unlock_sock(sk);
++				sock_put(sk);
++				goto more_work;
++			}
++		}
++		write_unlock(&head[i].lock);
++	}
++	local_bh_enable();
++}
++#endif
++
+ EXPORT_SYMBOL(ipv4_specific);
+ EXPORT_SYMBOL(tcp_bind_hash);
+ EXPORT_SYMBOL(tcp_bucket_create);
+@@ -2654,6 +2815,7 @@ EXPORT_SYMBOL(tcp_v4_rebuild_header);
+ EXPORT_SYMBOL(tcp_v4_remember_stamp);
+ EXPORT_SYMBOL(tcp_v4_send_check);
+ EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
++EXPORT_SYMBOL(tcp_v4_kill_ve_sockets);
+ 
+ #ifdef CONFIG_PROC_FS
+ EXPORT_SYMBOL(tcp_proc_register);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/tcp_minisocks.c linux-2.6.8.1-ve022stab078/net/ipv4/tcp_minisocks.c
+--- linux-2.6.8.1.orig/net/ipv4/tcp_minisocks.c	2004-08-14 14:55:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/tcp_minisocks.c	2006-05-11 13:05:44.000000000 +0400
+@@ -29,6 +29,8 @@
+ #include <net/inet_common.h>
+ #include <net/xfrm.h>
+ 
++#include <ub/ub_net.h>
++
+ #ifdef CONFIG_SYSCTL
+ #define SYNC_INIT 0 /* let the user enable it */
+ #else
+@@ -74,7 +76,7 @@ static void tcp_timewait_kill(struct tcp
+ 	write_unlock(&ehead->lock);
+ 
+ 	/* Disassociate with bind bucket. */
+-	bhead = &tcp_bhash[tcp_bhashfn(tw->tw_num)];
++	bhead = &tcp_bhash[tcp_bhashfn(tw->tw_num, TW_VEID(tw))];
+ 	spin_lock(&bhead->lock);
+ 	tb = tw->tw_tb;
+ 	__hlist_del(&tw->tw_bind_node);
+@@ -123,17 +125,17 @@ enum tcp_tw_status
+ tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb,
+ 			   struct tcphdr *th, unsigned len)
+ {
+-	struct tcp_opt tp;
++	struct tcp_options_received tmp_opt;
+ 	int paws_reject = 0;
+ 
+-	tp.saw_tstamp = 0;
++	tmp_opt.saw_tstamp = 0;
+ 	if (th->doff > (sizeof(struct tcphdr) >> 2) && tw->tw_ts_recent_stamp) {
+-		tcp_parse_options(skb, &tp, 0);
++		tcp_parse_options(skb, &tmp_opt, 0);
+ 
+-		if (tp.saw_tstamp) {
+-			tp.ts_recent	   = tw->tw_ts_recent;
+-			tp.ts_recent_stamp = tw->tw_ts_recent_stamp;
+-			paws_reject = tcp_paws_check(&tp, th->rst);
++		if (tmp_opt.saw_tstamp) {
++			tmp_opt.ts_recent	   = tw->tw_ts_recent;
++			tmp_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
++			paws_reject = tcp_paws_check(&tmp_opt, th->rst);
+ 		}
+ 	}
+ 
+@@ -150,33 +152,28 @@ tcp_timewait_state_process(struct tcp_tw
+ 		if (th->rst)
+ 			goto kill;
+ 
+-		if (th->syn && !before(TCP_SKB_CB(skb)->seq, tw->tw_rcv_nxt))
+-			goto kill_with_rst;
++		if (th->syn && !before(TCP_SKB_CB(skb)->seq, tw->tw_rcv_nxt)) {
++			tw->tw_substate = TCP_CLOSE;
++			tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN);
++			return TCP_TW_RST;
++		}
+ 
+ 		/* Dup ACK? */
+ 		if (!after(TCP_SKB_CB(skb)->end_seq, tw->tw_rcv_nxt) ||
+-		    TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) {
+-			tcp_tw_put(tw);
++		    TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq)
+ 			return TCP_TW_SUCCESS;
+-		}
+ 
+-		/* New data or FIN. If new data arrive after half-duplex close,
+-		 * reset.
+-		 */
+-		if (!th->fin ||
+-		    TCP_SKB_CB(skb)->end_seq != tw->tw_rcv_nxt + 1) {
+-kill_with_rst:
+-			tcp_tw_deschedule(tw);
+-			tcp_tw_put(tw);
+-			return TCP_TW_RST;
+-		}
+-
+-		/* FIN arrived, enter true time-wait state. */
+-		tw->tw_substate	= TCP_TIME_WAIT;
+-		tw->tw_rcv_nxt	= TCP_SKB_CB(skb)->end_seq;
+-		if (tp.saw_tstamp) {
++		/* New data or FIN. */
++		if (th->fin && TCP_SKB_CB(skb)->end_seq == tw->tw_rcv_nxt + 1) {
++			/* FIN arrived, enter true time-wait state. */
++			tw->tw_substate = TCP_TIME_WAIT;
++			tw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq;
++		} else
++			/* If new data arrive after half-duplex close, reset. */
++			tw->tw_substate = TCP_CLOSE;
++		if (tmp_opt.saw_tstamp) {
+ 			tw->tw_ts_recent_stamp	= xtime.tv_sec;
+-			tw->tw_ts_recent	= tp.rcv_tsval;
++			tw->tw_ts_recent	= tmp_opt.rcv_tsval;
+ 		}
+ 
+ 		/* I am shamed, but failed to make it more elegant.
+@@ -190,7 +187,9 @@ kill_with_rst:
+ 			tcp_tw_schedule(tw, tw->tw_timeout);
+ 		else
+ 			tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN);
+-		return TCP_TW_ACK;
++
++		return (tw->tw_substate == TCP_TIME_WAIT) ?
++			TCP_TW_ACK : TCP_TW_RST;
+ 	}
+ 
+ 	/*
+@@ -223,18 +222,16 @@ kill_with_rst:
+ 			if (sysctl_tcp_rfc1337 == 0) {
+ kill:
+ 				tcp_tw_deschedule(tw);
+-				tcp_tw_put(tw);
+ 				return TCP_TW_SUCCESS;
+ 			}
+ 		}
+ 		tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN);
+ 
+-		if (tp.saw_tstamp) {
+-			tw->tw_ts_recent	= tp.rcv_tsval;
++		if (tmp_opt.saw_tstamp) {
++			tw->tw_ts_recent	= tmp_opt.rcv_tsval;
+ 			tw->tw_ts_recent_stamp	= xtime.tv_sec;
+ 		}
+ 
+-		tcp_tw_put(tw);
+ 		return TCP_TW_SUCCESS;
+ 	}
+ 
+@@ -257,7 +254,7 @@ kill:
+ 
+ 	if (th->syn && !th->rst && !th->ack && !paws_reject &&
+ 	    (after(TCP_SKB_CB(skb)->seq, tw->tw_rcv_nxt) ||
+-	     (tp.saw_tstamp && (s32)(tw->tw_ts_recent - tp.rcv_tsval) < 0))) {
++	     (tmp_opt.saw_tstamp && (s32)(tw->tw_ts_recent - tmp_opt.rcv_tsval) < 0))) {
+ 		u32 isn = tw->tw_snd_nxt + 65535 + 2;
+ 		if (isn == 0)
+ 			isn++;
+@@ -268,7 +265,7 @@ kill:
+ 	if (paws_reject)
+ 		NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
+ 
+-	if(!th->rst) {
++	if (!th->rst) {
+ 		/* In this case we must reset the TIMEWAIT timer.
+ 		 *
+ 		 * If it is ACKless SYN it may be both old duplicate
+@@ -278,12 +275,9 @@ kill:
+ 		if (paws_reject || th->ack)
+ 			tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN);
+ 
+-		/* Send ACK. Note, we do not put the bucket,
+-		 * it will be released by caller.
+-		 */
+-		return TCP_TW_ACK;
++		return (tw->tw_substate == TCP_TIME_WAIT) ?
++			TCP_TW_ACK : TCP_TW_RST;
+ 	}
+-	tcp_tw_put(tw);
+ 	return TCP_TW_SUCCESS;
+ }
+ 
+@@ -301,7 +295,8 @@ static void __tcp_tw_hashdance(struct so
+ 	   Note, that any socket with inet_sk(sk)->num != 0 MUST be bound in
+ 	   binding cache, even if it is closed.
+ 	 */
+-	bhead = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)];
++	bhead = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num,
++						VEID(VE_OWNER_SK(sk)))];
+ 	spin_lock(&bhead->lock);
+ 	tw->tw_tb = tcp_sk(sk)->bind_hash;
+ 	BUG_TRAP(tcp_sk(sk)->bind_hash);
+@@ -329,12 +324,15 @@ void tcp_time_wait(struct sock *sk, int 
+ 	struct tcp_tw_bucket *tw = NULL;
+ 	struct tcp_opt *tp = tcp_sk(sk);
+ 	int recycle_ok = 0;
++	struct user_beancounter *ub;
+ 
+-	if (sysctl_tcp_tw_recycle && tp->ts_recent_stamp)
++	if (sysctl_tcp_tw_recycle && tp->rx_opt.ts_recent_stamp)
+ 		recycle_ok = tp->af_specific->remember_stamp(sk);
+ 
++	ub = set_sk_exec_ub(sk);
+ 	if (tcp_tw_count < sysctl_tcp_max_tw_buckets)
+ 		tw = kmem_cache_alloc(tcp_timewait_cachep, SLAB_ATOMIC);
++	(void)set_exec_ub(ub);
+ 
+ 	if(tw != NULL) {
+ 		struct inet_opt *inet = inet_sk(sk);
+@@ -351,16 +349,19 @@ void tcp_time_wait(struct sock *sk, int 
+ 		tw->tw_dport		= inet->dport;
+ 		tw->tw_family		= sk->sk_family;
+ 		tw->tw_reuse		= sk->sk_reuse;
+-		tw->tw_rcv_wscale	= tp->rcv_wscale;
++		tw->tw_rcv_wscale	= tp->rx_opt.rcv_wscale;
++		if (sk->sk_user_data != NULL)
++			tw->tw_rcv_wscale |= TW_WSCALE_SPEC;
+ 		atomic_set(&tw->tw_refcnt, 1);
+ 
+ 		tw->tw_hashent		= sk->sk_hashent;
+ 		tw->tw_rcv_nxt		= tp->rcv_nxt;
+ 		tw->tw_snd_nxt		= tp->snd_nxt;
+ 		tw->tw_rcv_wnd		= tcp_receive_window(tp);
+-		tw->tw_ts_recent	= tp->ts_recent;
+-		tw->tw_ts_recent_stamp	= tp->ts_recent_stamp;
++		tw->tw_ts_recent	= tp->rx_opt.ts_recent;
++		tw->tw_ts_recent_stamp	= tp->rx_opt.ts_recent_stamp;
+ 		tw_dead_node_init(tw);
++		spin_lock_init(&tw->tw_lock);
+ 
+ #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ 		if (tw->tw_family == PF_INET6) {
+@@ -375,6 +376,8 @@ void tcp_time_wait(struct sock *sk, int 
+ 			tw->tw_v6_ipv6only = 0;
+ 		}
+ #endif
++		SET_TW_VEID(tw, VEID(VE_OWNER_SK(sk)));
++
+ 		/* Linkage updates. */
+ 		__tcp_tw_hashdance(sk, tw);
+ 
+@@ -401,7 +404,8 @@ void tcp_time_wait(struct sock *sk, int 
+ 			printk(KERN_INFO "TCP: time wait bucket table overflow\n");
+ 	}
+ 
+-	tcp_update_metrics(sk);
++	if (state != TCP_CLOSE)
++		tcp_update_metrics(sk);
+ 	tcp_done(sk);
+ }
+ 
+@@ -694,6 +698,10 @@ struct sock *tcp_create_openreq_child(st
+ 		struct sk_filter *filter;
+ 
+ 		memcpy(newsk, sk, sizeof(struct tcp_sock));
++
++		if (ub_tcp_sock_charge(newsk) < 0)
++			goto out_sk_free;
++
+ 		newsk->sk_state = TCP_SYN_RECV;
+ 
+ 		/* SANITY */
+@@ -703,6 +711,7 @@ struct sock *tcp_create_openreq_child(st
+ 		/* Clone the TCP header template */
+ 		inet_sk(newsk)->dport = req->rmt_port;
+ 
++		SET_VE_OWNER_SK(newsk, VE_OWNER_SK(sk));
+ 		sock_lock_init(newsk);
+ 		bh_lock_sock(newsk);
+ 
+@@ -729,6 +738,7 @@ struct sock *tcp_create_openreq_child(st
+ 		if (unlikely(xfrm_sk_clone_policy(newsk))) {
+ 			/* It is still raw copy of parent, so invalidate
+ 			 * destructor and make plain sk_free() */
++out_sk_free:
+ 			newsk->sk_destruct = NULL;
+ 			sk_free(newsk);
+ 			return NULL;
+@@ -778,13 +788,13 @@ struct sock *tcp_create_openreq_child(st
+ 		newtp->pushed_seq = newtp->write_seq;
+ 		newtp->copied_seq = req->rcv_isn + 1;
+ 
+-		newtp->saw_tstamp = 0;
++		newtp->rx_opt.saw_tstamp = 0;
+ 
+-		newtp->dsack = 0;
+-		newtp->eff_sacks = 0;
++		newtp->rx_opt.dsack = 0;
++		newtp->rx_opt.eff_sacks = 0;
+ 
+ 		newtp->probes_out = 0;
+-		newtp->num_sacks = 0;
++		newtp->rx_opt.num_sacks = 0;
+ 		newtp->urg_data = 0;
+ 		newtp->listen_opt = NULL;
+ 		newtp->accept_queue = newtp->accept_queue_tail = NULL;
+@@ -807,36 +817,36 @@ struct sock *tcp_create_openreq_child(st
+ 		newsk->sk_sleep = NULL;
+ 		newsk->sk_owner = NULL;
+ 
+-		newtp->tstamp_ok = req->tstamp_ok;
+-		if((newtp->sack_ok = req->sack_ok) != 0) {
++		newtp->rx_opt.tstamp_ok = req->tstamp_ok;
++		if((newtp->rx_opt.sack_ok = req->sack_ok) != 0) {
+ 			if (sysctl_tcp_fack)
+-				newtp->sack_ok |= 2;
++				newtp->rx_opt.sack_ok |= 2;
+ 		}
+ 		newtp->window_clamp = req->window_clamp;
+ 		newtp->rcv_ssthresh = req->rcv_wnd;
+ 		newtp->rcv_wnd = req->rcv_wnd;
+-		newtp->wscale_ok = req->wscale_ok;
+-		if (newtp->wscale_ok) {
+-			newtp->snd_wscale = req->snd_wscale;
+-			newtp->rcv_wscale = req->rcv_wscale;
++		newtp->rx_opt.wscale_ok = req->wscale_ok;
++		if (newtp->rx_opt.wscale_ok) {
++			newtp->rx_opt.snd_wscale = req->snd_wscale;
++			newtp->rx_opt.rcv_wscale = req->rcv_wscale;
+ 		} else {
+-			newtp->snd_wscale = newtp->rcv_wscale = 0;
++			newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0;
+ 			newtp->window_clamp = min(newtp->window_clamp, 65535U);
+ 		}
+-		newtp->snd_wnd = ntohs(skb->h.th->window) << newtp->snd_wscale;
++		newtp->snd_wnd = ntohs(skb->h.th->window) << newtp->rx_opt.snd_wscale;
+ 		newtp->max_window = newtp->snd_wnd;
+ 
+-		if (newtp->tstamp_ok) {
+-			newtp->ts_recent = req->ts_recent;
+-			newtp->ts_recent_stamp = xtime.tv_sec;
++		if (newtp->rx_opt.tstamp_ok) {
++			newtp->rx_opt.ts_recent = req->ts_recent;
++			newtp->rx_opt.ts_recent_stamp = xtime.tv_sec;
+ 			newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
+ 		} else {
+-			newtp->ts_recent_stamp = 0;
++			newtp->rx_opt.ts_recent_stamp = 0;
+ 			newtp->tcp_header_len = sizeof(struct tcphdr);
+ 		}
+ 		if (skb->len >= TCP_MIN_RCVMSS+newtp->tcp_header_len)
+ 			newtp->ack.last_seg_size = skb->len-newtp->tcp_header_len;
+-		newtp->mss_clamp = req->mss;
++		newtp->rx_opt.mss_clamp = req->mss;
+ 		TCP_ECN_openreq_child(newtp, req);
+ 		if (newtp->ecn_flags&TCP_ECN_OK)
+ 			newsk->sk_no_largesend = 1;
+@@ -860,21 +870,21 @@ struct sock *tcp_check_req(struct sock *
+ 	struct tcp_opt *tp = tcp_sk(sk);
+ 	u32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
+ 	int paws_reject = 0;
+-	struct tcp_opt ttp;
++	struct tcp_options_received tmp_opt;
+ 	struct sock *child;
+ 
+-	ttp.saw_tstamp = 0;
++	tmp_opt.saw_tstamp = 0;
+ 	if (th->doff > (sizeof(struct tcphdr)>>2)) {
+-		tcp_parse_options(skb, &ttp, 0);
++		tcp_parse_options(skb, &tmp_opt, 0);
+ 
+-		if (ttp.saw_tstamp) {
+-			ttp.ts_recent = req->ts_recent;
++		if (tmp_opt.saw_tstamp) {
++			tmp_opt.ts_recent = req->ts_recent;
+ 			/* We do not store true stamp, but it is not required,
+ 			 * it can be estimated (approximately)
+ 			 * from another data.
+ 			 */
+-			ttp.ts_recent_stamp = xtime.tv_sec - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
+-			paws_reject = tcp_paws_check(&ttp, th->rst);
++			tmp_opt.ts_recent_stamp = xtime.tv_sec - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
++			paws_reject = tcp_paws_check(&tmp_opt, th->rst);
+ 		}
+ 	}
+ 
+@@ -979,63 +989,63 @@ struct sock *tcp_check_req(struct sock *
+ 
+ 	/* In sequence, PAWS is OK. */
+ 
+-	if (ttp.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, req->rcv_isn+1))
+-		req->ts_recent = ttp.rcv_tsval;
++	if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, req->rcv_isn+1))
++			req->ts_recent = tmp_opt.rcv_tsval;
+ 
+-	if (TCP_SKB_CB(skb)->seq == req->rcv_isn) {
+-		/* Truncate SYN, it is out of window starting
+-		   at req->rcv_isn+1. */
+-		flg &= ~TCP_FLAG_SYN;
+-	}
++		if (TCP_SKB_CB(skb)->seq == req->rcv_isn) {
++			/* Truncate SYN, it is out of window starting
++			   at req->rcv_isn+1. */
++			flg &= ~TCP_FLAG_SYN;
++		}
+ 
+-	/* RFC793: "second check the RST bit" and
+-	 *	   "fourth, check the SYN bit"
+-	 */
+-	if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN))
+-		goto embryonic_reset;
++		/* RFC793: "second check the RST bit" and
++		 *	   "fourth, check the SYN bit"
++		 */
++		if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN))
++			goto embryonic_reset;
+ 
+-	/* ACK sequence verified above, just make sure ACK is
+-	 * set.  If ACK not set, just silently drop the packet.
+-	 */
+-	if (!(flg & TCP_FLAG_ACK))
+-		return NULL;
++		/* ACK sequence verified above, just make sure ACK is
++		 * set.  If ACK not set, just silently drop the packet.
++		 */
++		if (!(flg & TCP_FLAG_ACK))
++			return NULL;
+ 
+-	/* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
+-	if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == req->rcv_isn+1) {
+-		req->acked = 1;
+-		return NULL;
+-	}
++		/* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
++		if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == req->rcv_isn+1) {
++			req->acked = 1;
++			return NULL;
++		}
+ 
+-	/* OK, ACK is valid, create big socket and
+-	 * feed this segment to it. It will repeat all
+-	 * the tests. THIS SEGMENT MUST MOVE SOCKET TO
+-	 * ESTABLISHED STATE. If it will be dropped after
+-	 * socket is created, wait for troubles.
+-	 */
+-	child = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
+-	if (child == NULL)
+-		goto listen_overflow;
+-
+-	sk_set_owner(child, sk->sk_owner);
+-	tcp_synq_unlink(tp, req, prev);
+-	tcp_synq_removed(sk, req);
+-
+-	tcp_acceptq_queue(sk, req, child);
+-	return child;
+-
+-listen_overflow:
+-	if (!sysctl_tcp_abort_on_overflow) {
+-		req->acked = 1;
+-		return NULL;
+-	}
++		/* OK, ACK is valid, create big socket and
++		 * feed this segment to it. It will repeat all
++		 * the tests. THIS SEGMENT MUST MOVE SOCKET TO
++		 * ESTABLISHED STATE. If it will be dropped after
++		 * socket is created, wait for troubles.
++		 */
++		child = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
++		if (child == NULL)
++			goto listen_overflow;
++
++		sk_set_owner(child, sk->sk_owner);
++		tcp_synq_unlink(tp, req, prev);
++		tcp_synq_removed(sk, req);
++
++		tcp_acceptq_queue(sk, req, child);
++		return child;
++
++	listen_overflow:
++		if (!sysctl_tcp_abort_on_overflow) {
++			req->acked = 1;
++			return NULL;
++		}
+ 
+-embryonic_reset:
+-	NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS);
+-	if (!(flg & TCP_FLAG_RST))
+-		req->class->send_reset(skb);
++	embryonic_reset:
++		NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS);
++		if (!(flg & TCP_FLAG_RST))
++			req->class->send_reset(skb);
+ 
+-	tcp_synq_drop(sk, req, prev);
+-	return NULL;
++		tcp_synq_drop(sk, req, prev);
++		return NULL;
+ }
+ 
+ /*
+diff -uprN linux-2.6.8.1.orig/net/ipv4/tcp_output.c linux-2.6.8.1-ve022stab078/net/ipv4/tcp_output.c
+--- linux-2.6.8.1.orig/net/ipv4/tcp_output.c	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/tcp_output.c	2006-05-11 13:05:44.000000000 +0400
+@@ -42,6 +42,9 @@
+ #include <linux/module.h>
+ #include <linux/smp_lock.h>
+ 
++#include <ub/ub_net.h>
++#include <ub/ub_tcp.h>
++
+ /* People can turn this off for buggy TCP's found in printers etc. */
+ int sysctl_tcp_retrans_collapse = 1;
+ 
+@@ -171,13 +174,13 @@ static __inline__ u16 tcp_select_window(
+ 	/* Make sure we do not exceed the maximum possible
+ 	 * scaled window.
+ 	 */
+-	if (!tp->rcv_wscale)
++	if (!tp->rx_opt.rcv_wscale)
+ 		new_win = min(new_win, MAX_TCP_WINDOW);
+ 	else
+-		new_win = min(new_win, (65535U << tp->rcv_wscale));
++		new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
+ 
+ 	/* RFC1323 scaling applied */
+-	new_win >>= tp->rcv_wscale;
++	new_win >>= tp->rx_opt.rcv_wscale;
+ 
+ 	/* If we advertise zero window, disable fast path. */
+ 	if (new_win == 0)
+@@ -187,6 +190,13 @@ static __inline__ u16 tcp_select_window(
+ }
+ 
+ 
++static int skb_header_size(struct sock *sk, int tcp_hlen)
++{
++	struct ip_options *opt = inet_sk(sk)->opt;
++	return tcp_hlen + sizeof(struct iphdr) +
++		(opt ? opt->optlen : 0)	+ ETH_HLEN /* For hard header */;
++}
++
+ /* This routine actually transmits TCP packets queued in by
+  * tcp_do_sendmsg().  This is used by both the initial
+  * transmission and possible later retransmissions.
+@@ -205,6 +215,7 @@ int tcp_transmit_skb(struct sock *sk, st
+ 		struct tcp_opt *tp = tcp_sk(sk);
+ 		struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
+ 		int tcp_header_size = tp->tcp_header_len;
++		int header_size;
+ 		struct tcphdr *th;
+ 		int sysctl_flags;
+ 		int err;
+@@ -229,14 +240,28 @@ int tcp_transmit_skb(struct sock *sk, st
+ 				if(!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
+ 					tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
+ 			}
+-		} else if (tp->eff_sacks) {
++		} else if (tp->rx_opt.eff_sacks) {
+ 			/* A SACK is 2 pad bytes, a 2 byte header, plus
+ 			 * 2 32-bit sequence numbers for each SACK block.
+ 			 */
+ 			tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
+-					    (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK));
++					    (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK));
+ 		}
+-		
++
++		/* Unfortunately, we can have skb from outside world here
++		 * with size insufficient for header. It is impossible to make
++		 * guess when we queue skb, so the decision should be made
++		 * here. Den
++		 */
++		header_size = skb_header_size(sk, tcp_header_size);
++		if (skb->data - header_size < skb->head) {
++			int delta = header_size - skb_headroom(skb);
++			err = pskb_expand_head(skb, SKB_DATA_ALIGN(delta),
++					0, GFP_ATOMIC);
++			if (err)
++				return err;
++		}
++
+ 		/*
+ 		 * If the connection is idle and we are restarting,
+ 		 * then we don't want to do any Vegas calculations
+@@ -282,9 +307,9 @@ int tcp_transmit_skb(struct sock *sk, st
+ 					      (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
+ 					      (sysctl_flags & SYSCTL_FLAG_SACK),
+ 					      (sysctl_flags & SYSCTL_FLAG_WSCALE),
+-					      tp->rcv_wscale,
++					      tp->rx_opt.rcv_wscale,
+ 					      tcb->when,
+-		      			      tp->ts_recent);
++		      			      tp->rx_opt.ts_recent);
+ 		} else {
+ 			tcp_build_and_update_options((__u32 *)(th + 1),
+ 						     tp, tcb->when);
+@@ -374,15 +399,23 @@ static int tcp_fragment(struct sock *sk,
+ 	int nsize = skb->len - len;
+ 	u16 flags;
+ 
+-	if (skb_cloned(skb) &&
+-	    skb_is_nonlinear(skb) &&
+-	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+-		return -ENOMEM;
++	if (skb_cloned(skb) && skb_is_nonlinear(skb)) {
++		unsigned long chargesize;
++		chargesize = skb_bc(skb)->charged;
++		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
++			return -ENOMEM;
++		ub_sock_retwres_tcp(sk, chargesize, chargesize);
++		ub_tcpsndbuf_charge_forced(sk, skb);
++	}
+ 
+ 	/* Get a new skb... force flag on. */
+ 	buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
+ 	if (buff == NULL)
+ 		return -ENOMEM; /* We'll just try again later. */
++	if (ub_tcpsndbuf_charge(sk, buff) < 0) {
++		kfree_skb(buff);
++		return -ENOMEM;
++	}
+ 	sk_charge_skb(sk, buff);
+ 
+ 	/* Correct the sequence numbers. */
+@@ -479,10 +512,10 @@ static int tcp_trim_head(struct sock *sk
+ 
+ /* This function synchronize snd mss to current pmtu/exthdr set.
+ 
+-   tp->user_mss is mss set by user by TCP_MAXSEG. It does NOT counts
++   tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts
+    for TCP options, but includes only bare TCP header.
+ 
+-   tp->mss_clamp is mss negotiated at connection setup.
++   tp->rx_opt.mss_clamp is mss negotiated at connection setup.
+    It is minumum of user_mss and mss received with SYN.
+    It also does not include TCP options.
+ 
+@@ -491,7 +524,7 @@ static int tcp_trim_head(struct sock *sk
+    tp->mss_cache is current effective sending mss, including
+    all tcp options except for SACKs. It is evaluated,
+    taking into account current pmtu, but never exceeds
+-   tp->mss_clamp.
++   tp->rx_opt.mss_clamp.
+ 
+    NOTE1. rfc1122 clearly states that advertised MSS
+    DOES NOT include either tcp or ip options.
+@@ -515,8 +548,8 @@ int tcp_sync_mss(struct sock *sk, u32 pm
+ 	mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct tcphdr);
+ 
+ 	/* Clamp it (mss_clamp does not include tcp options) */
+-	if (mss_now > tp->mss_clamp)
+-		mss_now = tp->mss_clamp;
++	if (mss_now > tp->rx_opt.mss_clamp)
++		mss_now = tp->rx_opt.mss_clamp;
+ 
+ 	/* Now subtract optional transport overhead */
+ 	mss_now -= tp->ext_header_len + tp->ext2_header_len;
+@@ -680,7 +713,7 @@ u32 __tcp_select_window(struct sock *sk)
+ 	if (free_space < full_space/2) {
+ 		tp->ack.quick = 0;
+ 
+-		if (tcp_memory_pressure)
++		if (ub_tcp_shrink_rcvbuf(sk))
+ 			tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss);
+ 
+ 		if (free_space < mss)
+@@ -694,16 +727,16 @@ u32 __tcp_select_window(struct sock *sk)
+ 	 * scaled window will not line up with the MSS boundary anyway.
+ 	 */
+ 	window = tp->rcv_wnd;
+-	if (tp->rcv_wscale) {
++	if (tp->rx_opt.rcv_wscale) {
+ 		window = free_space;
+ 
+ 		/* Advertise enough space so that it won't get scaled away.
+ 		 * Import case: prevent zero window announcement if
+ 		 * 1<<rcv_wscale > mss.
+ 		 */
+-		if (((window >> tp->rcv_wscale) << tp->rcv_wscale) != window)
+-			window = (((window >> tp->rcv_wscale) + 1)
+-				  << tp->rcv_wscale);
++		if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window)
++			window = (((window >> tp->rx_opt.rcv_wscale) + 1)
++				  << tp->rx_opt.rcv_wscale);
+ 	} else {
+ 		/* Get the largest window that is a nice multiple of mss.
+ 		 * Window clamp already applied above.
+@@ -778,7 +811,7 @@ static void tcp_retrans_try_collapse(str
+ 			tp->left_out--;
+ 		}
+ 		/* Reno case is special. Sigh... */
+-		if (!tp->sack_ok && tp->sacked_out) {
++		if (!tp->rx_opt.sack_ok && tp->sacked_out) {
+ 			tp->sacked_out--;
+ 			tp->left_out--;
+ 		}
+@@ -998,7 +1031,7 @@ void tcp_xmit_retransmit_queue(struct so
+ 		return;
+ 
+ 	/* No forward retransmissions in Reno are possible. */
+-	if (!tp->sack_ok)
++	if (!tp->rx_opt.sack_ok)
+ 		return;
+ 
+ 	/* Yeah, we have to make difficult choice between forward transmission
+@@ -1062,6 +1095,7 @@ void tcp_send_fin(struct sock *sk)
+ 				break;
+ 			yield();
+ 		}
++		ub_tcpsndbuf_charge_forced(sk, skb);
+ 
+ 		/* Reserve space for headers and prepare control bits. */
+ 		skb_reserve(skb, MAX_TCP_HEADER);
+@@ -1127,6 +1161,10 @@ int tcp_send_synack(struct sock *sk)
+ 			struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
+ 			if (nskb == NULL)
+ 				return -ENOMEM;
++			if (ub_tcpsndbuf_charge(sk, skb) < 0) {
++				kfree_skb(nskb);
++				return -ENOMEM;
++			}
+ 			__skb_unlink(skb, &sk->sk_write_queue);
+ 			__skb_queue_head(&sk->sk_write_queue, nskb);
+ 			sk_stream_free_skb(sk, skb);
+@@ -1224,23 +1262,38 @@ static inline void tcp_connect_init(stru
+ 		(sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
+ 
+ 	/* If user gave his TCP_MAXSEG, record it to clamp */
+-	if (tp->user_mss)
+-		tp->mss_clamp = tp->user_mss;
++	if (tp->rx_opt.user_mss)
++		tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
+ 	tp->max_window = 0;
+ 	tcp_sync_mss(sk, dst_pmtu(dst));
+ 
++	if (tp->advmss == 0 || dst_metric(dst, RTAX_ADVMSS) == 0) {
++		printk("Oops in connect_init! tp->advmss=%d, dst->advmss=%d\n",
++				tp->advmss, dst_metric(dst, RTAX_ADVMSS));
++		printk("dst: pmtu=%u, advmss=%u\n",
++				dst_metric(dst, RTAX_MTU),
++				dst_metric(dst, RTAX_ADVMSS));
++		printk("sk->state=%d, tp: ack.rcv_mss=%d, mss_cache=%d, "
++				"advmss=%d, user_mss=%d\n",
++				sk->sk_state, tp->ack.rcv_mss, tp->mss_cache,
++				tp->advmss, tp->rx_opt.user_mss);
++	}
++
+ 	if (!tp->window_clamp)
+ 		tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
+-	tp->advmss = dst_metric(dst, RTAX_ADVMSS);
++	if (dst_metric(dst, RTAX_ADVMSS) < tp->advmss)
++		tp->advmss = dst_metric(dst, RTAX_ADVMSS);
++	if (tp->advmss == 0)
++		tp->advmss = 1460;
+ 	tcp_initialize_rcv_mss(sk);
+ 	tcp_vegas_init(tp);
+ 
+ 	tcp_select_initial_window(tcp_full_space(sk),
+-				  tp->advmss - (tp->ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
++				  tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
+ 				  &tp->rcv_wnd,
+ 				  &tp->window_clamp,
+ 				  sysctl_tcp_window_scaling,
+-				  &tp->rcv_wscale);
++				  &tp->rx_opt.rcv_wscale);
+ 
+ 	tp->rcv_ssthresh = tp->rcv_wnd;
+ 
+@@ -1272,6 +1325,10 @@ int tcp_connect(struct sock *sk)
+ 	buff = alloc_skb(MAX_TCP_HEADER + 15, sk->sk_allocation);
+ 	if (unlikely(buff == NULL))
+ 		return -ENOBUFS;
++	if (ub_tcpsndbuf_charge(sk, buff) < 0) {
++		kfree_skb(buff);
++		return -ENOBUFS;
++	}
+ 
+ 	/* Reserve space for headers. */
+ 	skb_reserve(buff, MAX_TCP_HEADER);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/tcp_timer.c linux-2.6.8.1-ve022stab078/net/ipv4/tcp_timer.c
+--- linux-2.6.8.1.orig/net/ipv4/tcp_timer.c	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/tcp_timer.c	2006-05-11 13:05:42.000000000 +0400
+@@ -22,6 +22,8 @@
+ 
+ #include <linux/module.h>
+ #include <net/tcp.h>
++#include <ub/ub_orphan.h>
++#include <ub/ub_tcp.h>
+ 
+ int sysctl_tcp_syn_retries = TCP_SYN_RETRIES; 
+ int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES; 
+@@ -100,7 +102,7 @@ static void tcp_write_err(struct sock *s
+ static int tcp_out_of_resources(struct sock *sk, int do_reset)
+ {
+ 	struct tcp_opt *tp = tcp_sk(sk);
+-	int orphans = atomic_read(&tcp_orphan_count);
++	int orphans = tcp_get_orphan_count(sk);
+ 
+ 	/* If peer does not open window for long time, or did not transmit 
+ 	 * anything for long time, penalize it. */
+@@ -111,9 +113,7 @@ static int tcp_out_of_resources(struct s
+ 	if (sk->sk_err_soft)
+ 		orphans <<= 1;
+ 
+-	if (orphans >= sysctl_tcp_max_orphans ||
+-	    (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
+-	     atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
++	if (tcp_too_many_orphans(sk, orphans)) {
+ 		if (net_ratelimit())
+ 			printk(KERN_INFO "Out of socket memory\n");
+ 
+@@ -206,6 +206,7 @@ static int tcp_write_timeout(struct sock
+ static void tcp_delack_timer(unsigned long data)
+ {
+ 	struct sock *sk = (struct sock*)data;
++	struct ve_struct *env = set_exec_env(VE_OWNER_SK(sk));
+ 	struct tcp_opt *tp = tcp_sk(sk);
+ 
+ 	bh_lock_sock(sk);
+@@ -257,11 +258,12 @@ static void tcp_delack_timer(unsigned lo
+ 	TCP_CHECK_TIMER(sk);
+ 
+ out:
+-	if (tcp_memory_pressure)
++	if (ub_tcp_memory_pressure(sk))
+ 		sk_stream_mem_reclaim(sk);
+ out_unlock:
+ 	bh_unlock_sock(sk);
+ 	sock_put(sk);
++	(void)set_exec_env(env);
+ }
+ 
+ static void tcp_probe_timer(struct sock *sk)
+@@ -315,6 +317,9 @@ static void tcp_probe_timer(struct sock 
+ static void tcp_retransmit_timer(struct sock *sk)
+ {
+ 	struct tcp_opt *tp = tcp_sk(sk);
++	struct ve_struct *ve_old;
++
++	ve_old = set_exec_env(VE_OWNER_SK(sk));
+ 
+ 	if (tp->packets_out == 0)
+ 		goto out;
+@@ -351,7 +356,7 @@ static void tcp_retransmit_timer(struct 
+ 
+ 	if (tp->retransmits == 0) {
+ 		if (tp->ca_state == TCP_CA_Disorder || tp->ca_state == TCP_CA_Recovery) {
+-			if (tp->sack_ok) {
++			if (tp->rx_opt.sack_ok) {
+ 				if (tp->ca_state == TCP_CA_Recovery)
+ 					NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL);
+ 				else
+@@ -410,12 +415,14 @@ out_reset_timer:
+ 	if (tp->retransmits > sysctl_tcp_retries1)
+ 		__sk_dst_reset(sk);
+ 
+-out:;
++out:
++	(void)set_exec_env(ve_old);
+ }
+ 
+ static void tcp_write_timer(unsigned long data)
+ {
+ 	struct sock *sk = (struct sock*)data;
++	struct ve_struct *env = set_exec_env(VE_OWNER_SK(sk));
+ 	struct tcp_opt *tp = tcp_sk(sk);
+ 	int event;
+ 
+@@ -452,6 +459,7 @@ out:
+ out_unlock:
+ 	bh_unlock_sock(sk);
+ 	sock_put(sk);
++	(void)set_exec_env(env);
+ }
+ 
+ /*
+@@ -571,6 +579,7 @@ void tcp_set_keepalive(struct sock *sk, 
+ static void tcp_keepalive_timer (unsigned long data)
+ {
+ 	struct sock *sk = (struct sock *) data;
++	struct ve_struct *env = set_exec_env(VE_OWNER_SK(sk));
+ 	struct tcp_opt *tp = tcp_sk(sk);
+ 	__u32 elapsed;
+ 
+@@ -645,6 +654,7 @@ death:	
+ out:
+ 	bh_unlock_sock(sk);
+ 	sock_put(sk);
++	(void)set_exec_env(env);
+ }
+ 
+ EXPORT_SYMBOL(tcp_clear_xmit_timers);
+diff -uprN linux-2.6.8.1.orig/net/ipv4/udp.c linux-2.6.8.1-ve022stab078/net/ipv4/udp.c
+--- linux-2.6.8.1.orig/net/ipv4/udp.c	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv4/udp.c	2006-05-11 13:05:42.000000000 +0400
+@@ -125,7 +125,9 @@ static int udp_v4_get_port(struct sock *
+ 	struct hlist_node *node;
+ 	struct sock *sk2;
+ 	struct inet_opt *inet = inet_sk(sk);
++	struct ve_struct *env;
+ 
++	env = VE_OWNER_SK(sk);
+ 	write_lock_bh(&udp_hash_lock);
+ 	if (snum == 0) {
+ 		int best_size_so_far, best, result, i;
+@@ -139,7 +141,7 @@ static int udp_v4_get_port(struct sock *
+ 			struct hlist_head *list;
+ 			int size;
+ 
+-			list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
++			list = &udp_hash[udp_hashfn(result, VEID(env))];
+ 			if (hlist_empty(list)) {
+ 				if (result > sysctl_local_port_range[1])
+ 					result = sysctl_local_port_range[0] +
+@@ -161,7 +163,7 @@ static int udp_v4_get_port(struct sock *
+ 				result = sysctl_local_port_range[0]
+ 					+ ((result - sysctl_local_port_range[0]) &
+ 					   (UDP_HTABLE_SIZE - 1));
+-			if (!udp_lport_inuse(result))
++			if (!udp_lport_inuse(result, env))
+ 				break;
+ 		}
+ 		if (i >= (1 << 16) / UDP_HTABLE_SIZE)
+@@ -170,11 +172,12 @@ gotit:
+ 		udp_port_rover = snum = result;
+ 	} else {
+ 		sk_for_each(sk2, node,
+-			    &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
++			    &udp_hash[udp_hashfn(snum, VEID(env))]) {
+ 			struct inet_opt *inet2 = inet_sk(sk2);
+ 
+ 			if (inet2->num == snum &&
+ 			    sk2 != sk &&
++			    ve_accessible_strict(VE_OWNER_SK(sk2), env) &&
+ 			    !ipv6_only_sock(sk2) &&
+ 			    (!sk2->sk_bound_dev_if ||
+ 			     !sk->sk_bound_dev_if ||
+@@ -188,7 +191,7 @@ gotit:
+ 	}
+ 	inet->num = snum;
+ 	if (sk_unhashed(sk)) {
+-		struct hlist_head *h = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
++		struct hlist_head *h = &udp_hash[udp_hashfn(snum, VEID(env))];
+ 
+ 		sk_add_node(sk, h);
+ 		sock_prot_inc_use(sk->sk_prot);
+@@ -225,11 +228,15 @@ struct sock *udp_v4_lookup_longway(u32 s
+ 	struct hlist_node *node;
+ 	unsigned short hnum = ntohs(dport);
+ 	int badness = -1;
++	struct ve_struct *env;
+ 
+-	sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) {
++	env = get_exec_env();
++	sk_for_each(sk, node, &udp_hash[udp_hashfn(hnum, VEID(env))]) {
+ 		struct inet_opt *inet = inet_sk(sk);
+ 
+-		if (inet->num == hnum && !ipv6_only_sock(sk)) {
++		if (inet->num == hnum &&
++		    ve_accessible_strict(VE_OWNER_SK(sk), env) &&
++		    !ipv6_only_sock(sk)) {
+ 			int score = (sk->sk_family == PF_INET ? 1 : 0);
+ 			if (inet->rcv_saddr) {
+ 				if (inet->rcv_saddr != daddr)
+@@ -1053,7 +1060,8 @@ static int udp_v4_mcast_deliver(struct s
+ 	int dif;
+ 
+ 	read_lock(&udp_hash_lock);
+-	sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
++	sk = sk_head(&udp_hash[udp_hashfn(ntohs(uh->dest),
++				VEID(VE_OWNER_SKB(skb)))]);
+ 	dif = skb->dev->ifindex;
+ 	sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
+ 	if (sk) {
+@@ -1329,10 +1337,14 @@ static struct sock *udp_get_first(struct
+ {
+ 	struct sock *sk;
+ 	struct udp_iter_state *state = seq->private;
++	struct ve_struct *env;
+ 
++	env = get_exec_env();
+ 	for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
+ 		struct hlist_node *node;
+ 		sk_for_each(sk, node, &udp_hash[state->bucket]) {
++			if (!ve_accessible(VE_OWNER_SK(sk), env))
++				continue;
+ 			if (sk->sk_family == state->family)
+ 				goto found;
+ 		}
+@@ -1349,8 +1361,13 @@ static struct sock *udp_get_next(struct 
+ 	do {
+ 		sk = sk_next(sk);
+ try_again:
+-		;
+-	} while (sk && sk->sk_family != state->family);
++		if (!sk)
++			break;
++		if (sk->sk_family != state->family)
++			continue;
++		if (ve_accessible(VE_OWNER_SK(sk), get_exec_env()))
++			break;
++	} while (1);
+ 
+ 	if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
+ 		sk = sk_head(&udp_hash[state->bucket]);
+diff -uprN linux-2.6.8.1.orig/net/ipv6/addrconf.c linux-2.6.8.1-ve022stab078/net/ipv6/addrconf.c
+--- linux-2.6.8.1.orig/net/ipv6/addrconf.c	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv6/addrconf.c	2006-05-11 13:05:42.000000000 +0400
+@@ -1875,6 +1875,10 @@ static int addrconf_notify(struct notifi
+ 	struct net_device *dev = (struct net_device *) data;
+ 	struct inet6_dev *idev = __in6_dev_get(dev);
+ 
++	/* not virtualized yet */
++	if (!ve_is_super(get_exec_env()))
++		return NOTIFY_OK;
++
+ 	switch(event) {
+ 	case NETDEV_UP:
+ 		switch(dev->type) {
+diff -uprN linux-2.6.8.1.orig/net/ipv6/datagram.c linux-2.6.8.1-ve022stab078/net/ipv6/datagram.c
+--- linux-2.6.8.1.orig/net/ipv6/datagram.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv6/datagram.c	2006-05-11 13:05:33.000000000 +0400
+@@ -416,9 +416,7 @@ int datagram_send_ctl(struct msghdr *msg
+ 		int addr_type;
+ 		struct net_device *dev = NULL;
+ 
+-		if (cmsg->cmsg_len < sizeof(struct cmsghdr) ||
+-		    (unsigned long)(((char*)cmsg - (char*)msg->msg_control)
+-				    + cmsg->cmsg_len) > msg->msg_controllen) {
++		if (!CMSG_OK(msg, cmsg)) {
+ 			err = -EINVAL;
+ 			goto exit_f;
+ 		}
+diff -uprN linux-2.6.8.1.orig/net/ipv6/ip6_output.c linux-2.6.8.1-ve022stab078/net/ipv6/ip6_output.c
+--- linux-2.6.8.1.orig/net/ipv6/ip6_output.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv6/ip6_output.c	2006-05-11 13:05:25.000000000 +0400
+@@ -593,6 +593,7 @@ static int ip6_fragment(struct sk_buff *
+ 			/* Prepare header of the next frame,
+ 			 * before previous one went down. */
+ 			if (frag) {
++				frag->ip_summed = CHECKSUM_NONE;
+ 				frag->h.raw = frag->data;
+ 				fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
+ 				frag->nh.raw = __skb_push(frag, hlen);
+diff -uprN linux-2.6.8.1.orig/net/ipv6/ipv6_sockglue.c linux-2.6.8.1-ve022stab078/net/ipv6/ipv6_sockglue.c
+--- linux-2.6.8.1.orig/net/ipv6/ipv6_sockglue.c	2004-08-14 14:54:48.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv6/ipv6_sockglue.c	2006-05-11 13:05:34.000000000 +0400
+@@ -503,6 +503,9 @@ done:
+ 		break;
+ 	case IPV6_IPSEC_POLICY:
+ 	case IPV6_XFRM_POLICY:
++		retv = -EPERM;
++		if (!capable(CAP_NET_ADMIN))
++			break;
+ 		retv = xfrm_user_policy(sk, optname, optval, optlen);
+ 		break;
+ 
+diff -uprN linux-2.6.8.1.orig/net/ipv6/mcast.c linux-2.6.8.1-ve022stab078/net/ipv6/mcast.c
+--- linux-2.6.8.1.orig/net/ipv6/mcast.c	2004-08-14 14:56:01.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv6/mcast.c	2006-05-11 13:05:42.000000000 +0400
+@@ -389,12 +389,12 @@ int ip6_mc_source(int add, int omode, st
+ 			goto done;
+ 		rv = !0;
+ 		for (i=0; i<psl->sl_count; i++) {
+-			rv = memcmp(&psl->sl_addr, group,
++			rv = memcmp(&psl->sl_addr[i], source,
+ 				sizeof(struct in6_addr));
+-			if (rv >= 0)
++			if (rv == 0)
+ 				break;
+ 		}
+-		if (!rv)	/* source not found */
++		if (rv)		/* source not found */
+ 			goto done;
+ 
+ 		/* update the interface filter */
+@@ -435,8 +435,8 @@ int ip6_mc_source(int add, int omode, st
+ 	}
+ 	rv = 1;	/* > 0 for insert logic below if sl_count is 0 */
+ 	for (i=0; i<psl->sl_count; i++) {
+-		rv = memcmp(&psl->sl_addr, group, sizeof(struct in6_addr));
+-		if (rv >= 0)
++		rv = memcmp(&psl->sl_addr[i], source, sizeof(struct in6_addr));
++		if (rv == 0)
+ 			break;
+ 	}
+ 	if (rv == 0)		/* address already there is an error */
+@@ -1175,6 +1175,11 @@ int igmp6_event_report(struct sk_buff *s
+ 	if (skb->pkt_type == PACKET_LOOPBACK)
+ 		return 0;
+ 
++	/* send our report if the MC router may not have heard this report */
++	if (skb->pkt_type != PACKET_MULTICAST &&
++	    skb->pkt_type != PACKET_BROADCAST)
++		return 0;
++
+ 	if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
+ 		return -EINVAL;
+ 
+diff -uprN linux-2.6.8.1.orig/net/ipv6/netfilter/ip6_queue.c linux-2.6.8.1-ve022stab078/net/ipv6/netfilter/ip6_queue.c
+--- linux-2.6.8.1.orig/net/ipv6/netfilter/ip6_queue.c	2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv6/netfilter/ip6_queue.c	2006-05-11 13:05:27.000000000 +0400
+@@ -71,7 +71,9 @@ static DECLARE_MUTEX(ipqnl_sem);
+ static void
+ ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
+ {
++	local_bh_disable();
+ 	nf_reinject(entry->skb, entry->info, verdict);
++	local_bh_enable();
+ 	kfree(entry);
+ }
+ 
+diff -uprN linux-2.6.8.1.orig/net/ipv6/tcp_ipv6.c linux-2.6.8.1-ve022stab078/net/ipv6/tcp_ipv6.c
+--- linux-2.6.8.1.orig/net/ipv6/tcp_ipv6.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv6/tcp_ipv6.c	2006-05-11 13:05:42.000000000 +0400
+@@ -142,7 +142,7 @@ static int tcp_v6_get_port(struct sock *
+ 		do {	rover++;
+ 			if ((rover < low) || (rover > high))
+ 				rover = low;
+-			head = &tcp_bhash[tcp_bhashfn(rover)];
++			head = &tcp_bhash[tcp_bhashfn(rover, 0)];
+ 			spin_lock(&head->lock);
+ 			tb_for_each(tb, node, &head->chain)
+ 				if (tb->port == rover)
+@@ -162,7 +162,7 @@ static int tcp_v6_get_port(struct sock *
+ 		/* OK, here is the one we will use. */
+ 		snum = rover;
+ 	} else {
+-		head = &tcp_bhash[tcp_bhashfn(snum)];
++		head = &tcp_bhash[tcp_bhashfn(snum, 0)];
+ 		spin_lock(&head->lock);
+ 		tb_for_each(tb, node, &head->chain)
+ 			if (tb->port == snum)
+@@ -183,7 +183,7 @@ tb_found:
+ 	}
+ tb_not_found:
+ 	ret = 1;
+-	if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
++	if (!tb && (tb = tcp_bucket_create(head, snum, NULL)) == NULL)
+ 		goto fail_unlock;
+ 	if (hlist_empty(&tb->owners)) {
+ 		if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
+@@ -255,7 +255,7 @@ static struct sock *tcp_v6_lookup_listen
+ 
+ 	hiscore=0;
+ 	read_lock(&tcp_lhash_lock);
+-	sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
++	sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum, 0)]) {
+ 		if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
+ 			struct ipv6_pinfo *np = inet6_sk(sk);
+ 			
+@@ -470,8 +470,8 @@ static int tcp_v6_check_established(stru
+ 				tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
+ 				if (!tp->write_seq)
+ 					tp->write_seq = 1;
+-				tp->ts_recent = tw->tw_ts_recent;
+-				tp->ts_recent_stamp = tw->tw_ts_recent_stamp;
++				tp->rx_opt.ts_recent = tw->tw_ts_recent;
++				tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
+ 				sock_hold(sk2);
+ 				goto unique;
+ 			} else
+@@ -522,7 +522,7 @@ static int tcp_v6_hash_connect(struct so
+ 		inet_sk(sk)->sport = htons(inet_sk(sk)->num);
+ 	}
+ 
+-	head = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)];
++	head = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num, 0)];
+ 	tb = tb_head(head);
+ 
+ 	spin_lock_bh(&head->lock);
+@@ -606,10 +606,10 @@ static int tcp_v6_connect(struct sock *s
+ 			return -EINVAL;
+ 	}
+ 
+-	if (tp->ts_recent_stamp &&
++	if (tp->rx_opt.ts_recent_stamp &&
+ 	    ipv6_addr_cmp(&np->daddr, &usin->sin6_addr)) {
+-		tp->ts_recent = 0;
+-		tp->ts_recent_stamp = 0;
++		tp->rx_opt.ts_recent = 0;
++		tp->rx_opt.ts_recent_stamp = 0;
+ 		tp->write_seq = 0;
+ 	}
+ 
+@@ -686,13 +686,15 @@ static int tcp_v6_connect(struct sock *s
+ 	ip6_dst_store(sk, dst, NULL);
+ 	sk->sk_route_caps = dst->dev->features &
+ 		~(NETIF_F_IP_CSUM | NETIF_F_TSO);
++ 	if (!sysctl_tcp_use_sg)
++ 		sk->sk_route_caps &= ~NETIF_F_SG;
+ 
+ 	tp->ext_header_len = 0;
+ 	if (np->opt)
+ 		tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
+ 	tp->ext2_header_len = dst->header_len;
+ 
+-	tp->mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
++	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
+ 
+ 	inet->dport = usin->sin6_port;
+ 
+@@ -1166,7 +1168,8 @@ static void tcp_v6_synq_add(struct sock 
+ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
+ {
+ 	struct ipv6_pinfo *np = inet6_sk(sk);
+-	struct tcp_opt tmptp, *tp = tcp_sk(sk);
++	struct tcp_options_received tmp_opt;
++	struct tcp_opt *tp = tcp_sk(sk);
+ 	struct open_request *req = NULL;
+ 	__u32 isn = TCP_SKB_CB(skb)->when;
+ 
+@@ -1192,14 +1195,14 @@ static int tcp_v6_conn_request(struct so
+ 	if (req == NULL)
+ 		goto drop;
+ 
+-	tcp_clear_options(&tmptp);
+-	tmptp.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
+-	tmptp.user_mss = tp->user_mss;
++	tcp_clear_options(&tmp_opt);
++	tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
++	tmp_opt.user_mss = tp->rx_opt.user_mss;
+ 
+-	tcp_parse_options(skb, &tmptp, 0);
++	tcp_parse_options(skb, &tmp_opt, 0);
+ 
+-	tmptp.tstamp_ok = tmptp.saw_tstamp;
+-	tcp_openreq_init(req, &tmptp, skb);
++	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
++	tcp_openreq_init(req, &tmp_opt, skb);
+ 
+ 	req->class = &or_ipv6;
+ 	ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
+@@ -1343,6 +1346,8 @@ static struct sock * tcp_v6_syn_recv_soc
+ 	ip6_dst_store(newsk, dst, NULL);
+ 	newsk->sk_route_caps = dst->dev->features &
+ 		~(NETIF_F_IP_CSUM | NETIF_F_TSO);
++ 	if (!sysctl_tcp_use_sg)
++ 		sk->sk_route_caps &= ~NETIF_F_SG;
+ 
+ 	newtcp6sk = (struct tcp6_sock *)newsk;
+ 	newtcp6sk->pinet6 = &newtcp6sk->inet6;
+@@ -1675,12 +1680,14 @@ do_time_wait:
+ 		goto discard_it;
+ 	}
+ 
++	spin_lock(&((struct tcp_tw_bucket *)sk)->tw_lock);
+ 	switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
+ 					  skb, th, skb->len)) {
+ 	case TCP_TW_SYN:
+ 	{
+ 		struct sock *sk2;
+ 
++		spin_unlock(&((struct tcp_tw_bucket *)sk)->tw_lock);
+ 		sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
+ 		if (sk2 != NULL) {
+ 			tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
+@@ -1694,9 +1701,13 @@ do_time_wait:
+ 		tcp_v6_timewait_ack(sk, skb);
+ 		break;
+ 	case TCP_TW_RST:
++		spin_unlock(&((struct tcp_tw_bucket *)sk)->tw_lock);
++		tcp_tw_put((struct tcp_tw_bucket *)sk);
+ 		goto no_tcp_socket;
+ 	case TCP_TW_SUCCESS:;
+ 	}
++	spin_unlock(&((struct tcp_tw_bucket *)sk)->tw_lock);
++	tcp_tw_put((struct tcp_tw_bucket *)sk);
+ 	goto discard_it;
+ }
+ 
+@@ -1736,6 +1747,8 @@ static int tcp_v6_rebuild_header(struct 
+ 		ip6_dst_store(sk, dst, NULL);
+ 		sk->sk_route_caps = dst->dev->features &
+ 			~(NETIF_F_IP_CSUM | NETIF_F_TSO);
++		if (!sysctl_tcp_use_sg)
++			sk->sk_route_caps &= ~NETIF_F_SG;
+ 		tcp_sk(sk)->ext2_header_len = dst->header_len;
+ 	}
+ 
+@@ -1778,6 +1791,8 @@ static int tcp_v6_xmit(struct sk_buff *s
+ 		ip6_dst_store(sk, dst, NULL);
+ 		sk->sk_route_caps = dst->dev->features &
+ 			~(NETIF_F_IP_CSUM | NETIF_F_TSO);
++		if (!sysctl_tcp_use_sg)
++			sk->sk_route_caps &= ~NETIF_F_SG;
+ 		tcp_sk(sk)->ext2_header_len = dst->header_len;
+ 	}
+ 
+diff -uprN linux-2.6.8.1.orig/net/ipv6/udp.c linux-2.6.8.1-ve022stab078/net/ipv6/udp.c
+--- linux-2.6.8.1.orig/net/ipv6/udp.c	2004-08-14 14:56:00.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/ipv6/udp.c	2006-05-11 13:05:42.000000000 +0400
+@@ -67,7 +67,9 @@ static int udp_v6_get_port(struct sock *
+ {
+ 	struct sock *sk2;
+ 	struct hlist_node *node;
++	struct ve_struct *env;
+ 
++	env = VE_OWNER_SK(sk);
+ 	write_lock_bh(&udp_hash_lock);
+ 	if (snum == 0) {
+ 		int best_size_so_far, best, result, i;
+@@ -81,7 +83,7 @@ static int udp_v6_get_port(struct sock *
+ 			int size;
+ 			struct hlist_head *list;
+ 
+-			list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
++			list = &udp_hash[udp_hashfn(result, VEID(env))];
+ 			if (hlist_empty(list)) {
+ 				if (result > sysctl_local_port_range[1])
+ 					result = sysctl_local_port_range[0] +
+@@ -103,16 +105,17 @@ static int udp_v6_get_port(struct sock *
+ 				result = sysctl_local_port_range[0]
+ 					+ ((result - sysctl_local_port_range[0]) &
+ 					   (UDP_HTABLE_SIZE - 1));
+-			if (!udp_lport_inuse(result))
++			if (!udp_lport_inuse(result, env))
+ 				break;
+ 		}
+ gotit:
+ 		udp_port_rover = snum = result;
+ 	} else {
+ 		sk_for_each(sk2, node,
+-			    &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
++			    &udp_hash[udp_hashfn(snum, VEID(env))]) {
+ 			if (inet_sk(sk2)->num == snum &&
+ 			    sk2 != sk &&
++			    ve_accessible_strict(VE_OWNER_SK(sk2), env) &&
+ 			    (!sk2->sk_bound_dev_if ||
+ 			     !sk->sk_bound_dev_if ||
+ 			     sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+@@ -124,7 +127,7 @@ gotit:
+ 
+ 	inet_sk(sk)->num = snum;
+ 	if (sk_unhashed(sk)) {
+-		sk_add_node(sk, &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]);
++		sk_add_node(sk, &udp_hash[udp_hashfn(snum, VEID(env))]);
+ 		sock_prot_inc_use(sk->sk_prot);
+ 	}
+ 	write_unlock_bh(&udp_hash_lock);
+diff -uprN linux-2.6.8.1.orig/net/netlink/af_netlink.c linux-2.6.8.1-ve022stab078/net/netlink/af_netlink.c
+--- linux-2.6.8.1.orig/net/netlink/af_netlink.c	2004-08-14 14:55:32.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/netlink/af_netlink.c	2006-05-11 13:05:45.000000000 +0400
+@@ -47,26 +47,15 @@
+ #include <net/sock.h>
+ #include <net/scm.h>
+ 
++#include <ub/beancounter.h>
++#include <ub/ub_net.h>
++
+ #define Nprintk(a...)
+ 
+ #if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
+ #define NL_EMULATE_DEV
+ #endif
+ 
+-struct netlink_opt
+-{
+-	u32			pid;
+-	unsigned		groups;
+-	u32			dst_pid;
+-	unsigned		dst_groups;
+-	unsigned long		state;
+-	int			(*handler)(int unit, struct sk_buff *skb);
+-	wait_queue_head_t	wait;
+-	struct netlink_callback	*cb;
+-	spinlock_t		cb_lock;
+-	void			(*data_ready)(struct sock *sk, int bytes);
+-};
+-
+ #define nlk_sk(__sk) ((struct netlink_opt *)(__sk)->sk_protinfo)
+ 
+ static struct hlist_head nl_table[MAX_LINKS];
+@@ -165,7 +154,10 @@ static __inline__ struct sock *netlink_l
+ 
+ 	read_lock(&nl_table_lock);
+ 	sk_for_each(sk, node, &nl_table[protocol]) {
+-		if (nlk_sk(sk)->pid == pid) {
++		/* VEs should find sockets, created by kernel */
++		if ((nlk_sk(sk)->pid == pid) &&
++			(!pid || ve_accessible_strict(VE_OWNER_SK(sk),
++				get_exec_env()))){
+ 			sock_hold(sk);
+ 			goto found;
+ 		}
+@@ -186,7 +178,9 @@ static int netlink_insert(struct sock *s
+ 
+ 	netlink_table_grab();
+ 	sk_for_each(osk, node, &nl_table[sk->sk_protocol]) {
+-		if (nlk_sk(osk)->pid == pid)
++		if ((nlk_sk(osk)->pid == pid) &&
++			ve_accessible_strict(VE_OWNER_SK(osk),
++				get_exec_env()))
+ 			break;
+ 	}
+ 	if (!node) {
+@@ -226,15 +220,16 @@ static int netlink_create(struct socket 
+ 	sk = sk_alloc(PF_NETLINK, GFP_KERNEL, 1, NULL);
+ 	if (!sk)
+ 		return -ENOMEM;
++	if (ub_other_sock_charge(sk))
++		goto out_free;
+ 
+ 	sock_init_data(sock,sk);
+ 	sk_set_owner(sk, THIS_MODULE);
+ 
+ 	nlk = sk->sk_protinfo = kmalloc(sizeof(*nlk), GFP_KERNEL);
+-	if (!nlk) {
+-		sk_free(sk);
+-		return -ENOMEM;
+-	}
++	if (!nlk)
++		goto out_free;
++
+ 	memset(nlk, 0, sizeof(*nlk));
+ 
+ 	spin_lock_init(&nlk->cb_lock);
+@@ -244,6 +239,10 @@ static int netlink_create(struct socket 
+ 
+ 	sk->sk_protocol = protocol;
+ 	return 0;
++
++out_free:
++	sk_free(sk);
++	return -ENOMEM;
+ }
+ 
+ static int netlink_release(struct socket *sock)
+@@ -255,6 +254,7 @@ static int netlink_release(struct socket
+ 		return 0;
+ 
+ 	netlink_remove(sk);
++	sock_orphan(sk);
+ 	nlk = nlk_sk(sk);
+ 
+ 	spin_lock(&nlk->cb_lock);
+@@ -269,7 +269,6 @@ static int netlink_release(struct socket
+ 	/* OK. Socket is unlinked, and, therefore,
+ 	   no new packets will arrive */
+ 
+-	sock_orphan(sk);
+ 	sock->sk = NULL;
+ 	wake_up_interruptible_all(&nlk->wait);
+ 
+@@ -292,13 +291,15 @@ static int netlink_autobind(struct socke
+ 	struct sock *sk = sock->sk;
+ 	struct sock *osk;
+ 	struct hlist_node *node;
+-	s32 pid = current->pid;
++	s32 pid = virt_pid(current);
+ 	int err;
+ 
+ retry:
+ 	netlink_table_grab();
+ 	sk_for_each(osk, node, &nl_table[sk->sk_protocol]) {
+-		if (nlk_sk(osk)->pid == pid) {
++		if ((nlk_sk(osk)->pid == pid) &&
++			ve_accessible_strict(VE_OWNER_SK(osk),
++				get_exec_env())){
+ 			/* Bind collision, search negative pid values. */
+ 			if (pid > 0)
+ 				pid = -4096;
+@@ -319,7 +320,7 @@ retry:
+ static inline int netlink_capable(struct socket *sock, unsigned flag) 
+ { 
+ 	return (nl_nonroot[sock->sk->sk_protocol] & flag) ||
+-	       capable(CAP_NET_ADMIN);
++	       capable(CAP_VE_NET_ADMIN);
+ } 
+ 
+ static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+@@ -465,7 +466,8 @@ struct sock *netlink_getsockbyfilp(struc
+  * 0: continue
+  * 1: repeat lookup - reference dropped while waiting for socket memory.
+  */
+-int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long timeo)
++int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock,
++		      long timeo, struct sock *ssk)
+ {
+ 	struct netlink_opt *nlk;
+ 
+@@ -479,7 +481,7 @@ int netlink_attachskb(struct sock *sk, s
+ 	    test_bit(0, &nlk->state)) {
+ 		DECLARE_WAITQUEUE(wait, current);
+ 		if (!timeo) {
+-			if (!nlk->pid)
++			if (!ssk || nlk_sk(ssk)->pid == 0)
+ 				netlink_overrun(sk);
+ 			sock_put(sk);
+ 			kfree_skb(skb);
+@@ -523,6 +525,11 @@ int netlink_sendskb(struct sock *sk, str
+ 		return len;
+ 	}
+ #endif
++	if (ub_sockrcvbuf_charge(sk, skb) < 0) {
++		sock_put(sk);
++		kfree_skb(skb);
++		return -EACCES;
++	}
+ 
+ 	skb_queue_tail(&sk->sk_receive_queue, skb);
+ 	sk->sk_data_ready(sk, len);
+@@ -549,7 +556,7 @@ retry:
+ 		kfree_skb(skb);
+ 		return PTR_ERR(sk);
+ 	}
+-	err = netlink_attachskb(sk, skb, nonblock, timeo);
++	err = netlink_attachskb(sk, skb, nonblock, timeo, ssk);
+ 	if (err == 1)
+ 		goto retry;
+ 	if (err)
+@@ -570,12 +577,15 @@ static __inline__ int netlink_broadcast_
+ #endif
+ 	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
+ 	    !test_bit(0, &nlk->state)) {
++		if (ub_sockrcvbuf_charge(sk, skb))
++			goto out;
+                 skb_orphan(skb);
+ 		skb_set_owner_r(skb, sk);
+ 		skb_queue_tail(&sk->sk_receive_queue, skb);
+ 		sk->sk_data_ready(sk, skb->len);
+ 		return 0;
+ 	}
++out:
+ 	return -1;
+ }
+ 
+@@ -601,6 +611,9 @@ int netlink_broadcast(struct sock *ssk, 
+ 		if (nlk->pid == pid || !(nlk->groups & group))
+ 			continue;
+ 
++		if (!ve_accessible_strict(get_exec_env(), VE_OWNER_SK(sk)))
++			continue;
++
+ 		if (failure) {
+ 			netlink_overrun(sk);
+ 			continue;
+@@ -656,6 +669,9 @@ void netlink_set_err(struct sock *ssk, u
+ 		if (nlk->pid == pid || !(nlk->groups & group))
+ 			continue;
+ 
++		if (!ve_accessible_strict(get_exec_env(), VE_OWNER_SK(sk)))
++			continue;
++
+ 		sk->sk_err = code;
+ 		sk->sk_error_report(sk);
+ 	}
+@@ -678,12 +694,17 @@ static int netlink_sendmsg(struct kiocb 
+ 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
+ 	struct sock *sk = sock->sk;
+ 	struct netlink_opt *nlk = nlk_sk(sk);
+-	struct sockaddr_nl *addr=msg->msg_name;
++	struct sockaddr_nl *addr = msg->msg_name;
+ 	u32 dst_pid;
+-	u32 dst_groups;
+ 	struct sk_buff *skb;
+ 	int err;
+ 	struct scm_cookie scm;
++	struct sock *dstsk;
++	long timeo;
++	int no_ubc, no_buf;
++	unsigned long chargesize;
++
++	DECLARE_WAITQUEUE(wait, current);
+ 
+ 	if (msg->msg_flags&MSG_OOB)
+ 		return -EOPNOTSUPP;
+@@ -694,17 +715,16 @@ static int netlink_sendmsg(struct kiocb 
+ 	if (err < 0)
+ 		return err;
+ 
++	/* Broadcasts are disabled as it was in 2.4 with UBC. According to
++	 * ANK this is OK. Den */
+ 	if (msg->msg_namelen) {
+ 		if (addr->nl_family != AF_NETLINK)
+ 			return -EINVAL;
+ 		dst_pid = addr->nl_pid;
+-		dst_groups = addr->nl_groups;
+-		if (dst_groups && !netlink_capable(sock, NL_NONROOT_SEND))
++		if (addr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND))
+ 			return -EPERM;
+-	} else {
++	} else
+ 		dst_pid = nlk->dst_pid;
+-		dst_groups = nlk->dst_groups;
+-	}
+ 
+ 	if (!nlk->pid) {
+ 		err = netlink_autobind(sock);
+@@ -717,13 +737,13 @@ static int netlink_sendmsg(struct kiocb 
+ 		goto out;
+ 	err = -ENOBUFS;
+ 	skb = alloc_skb(len, GFP_KERNEL);
+-	if (skb==NULL)
++	if (skb == NULL)
+ 		goto out;
+ 
+ 	NETLINK_CB(skb).pid	= nlk->pid;
+ 	NETLINK_CB(skb).groups	= nlk->groups;
+ 	NETLINK_CB(skb).dst_pid = dst_pid;
+-	NETLINK_CB(skb).dst_groups = dst_groups;
++	NETLINK_CB(skb).dst_groups = 0;
+ 	memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
+ 
+ 	/* What can I do? Netlink is asynchronous, so that
+@@ -733,25 +753,88 @@ static int netlink_sendmsg(struct kiocb 
+ 	 */
+ 
+ 	err = -EFAULT;
+-	if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len)) {
+-		kfree_skb(skb);
+-		goto out;
+-	}
++	if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len))
++		goto out_free;
+ 
+ 	err = security_netlink_send(sk, skb);
+-	if (err) {
+-		kfree_skb(skb);
+-		goto out;
++	if (err)
++		goto out_free;
++
++	timeo = sock_sndtimeo(sk, msg->msg_flags&MSG_DONTWAIT);
++retry:
++	dstsk = netlink_getsockbypid(sk, dst_pid);
++	if (IS_ERR(dstsk)) {
++		err = PTR_ERR(dstsk);
++		goto out_free;
++	}
++
++	nlk = nlk_sk(dstsk);
++#ifdef NL_EMULATE_DEV
++	if (nlk->handler) {
++		skb_orphan(skb);
++		err = nlk->handler(protocol, skb);
++		goto out_put;
+ 	}
++#endif
++
++	/* BTW, it could be done once, before the retry loop */
++	chargesize = skb_charge_fullsize(skb);
++	no_ubc = ub_sock_getwres_other(sk, chargesize);
++	no_buf = atomic_read(&dstsk->sk_rmem_alloc) > dstsk->sk_rcvbuf ||
++		test_bit(0, &nlk->state);
++	if (no_ubc || no_buf) {
++		wait_queue_head_t *sleep;
++
++		if (!no_ubc)
++			ub_sock_retwres_other(sk, chargesize,
++					      SOCK_MIN_UBCSPACE_CH);
++		err = -EAGAIN;
++		if (timeo == 0) {
++			kfree_skb(skb);
++			goto out_put;
++		}
+ 
+-	if (dst_groups) {
+-		atomic_inc(&skb->users);
+-		netlink_broadcast(sk, skb, dst_pid, dst_groups, GFP_KERNEL);
++		/* wake up comes to different queues */
++		sleep = no_ubc ? sk->sk_sleep : &nlk->wait;
++		__set_current_state(TASK_INTERRUPTIBLE);
++		add_wait_queue(sleep, &wait);
++
++		/* this if can't be moved upper because ub_sock_snd_queue_add()
++		 * may change task state to TASK_RUNNING */
++		if (no_ubc)
++			ub_sock_sndqueueadd_other(sk, chargesize);
++
++		if ((atomic_read(&dstsk->sk_rmem_alloc) > dstsk->sk_rcvbuf ||
++		     test_bit(0, &nlk->state) || no_ubc) &&
++		    !sock_flag(dstsk, SOCK_DEAD))
++			timeo = schedule_timeout(timeo);
++
++		__set_current_state(TASK_RUNNING);
++		remove_wait_queue(sleep, &wait);
++		if (no_ubc)
++			ub_sock_sndqueuedel(sk);
++		sock_put(dstsk);
++
++		if (!signal_pending(current))
++			goto retry;
++		err = sock_intr_errno(timeo);
++		goto out_free;
+ 	}
+-	err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
+ 
++	skb_orphan(skb);
++	skb_set_owner_r(skb, dstsk);
++	ub_skb_set_charge(skb, sk, chargesize, UB_OTHERSOCKBUF);
++	skb_queue_tail(&dstsk->sk_receive_queue, skb);
++	dstsk->sk_data_ready(dstsk, len);
++	err = len;
++out_put:
++	sock_put(dstsk);
+ out:
+ 	return err;
++
++out_free:
++	kfree_skb(skb);
++	return err;
+ }
+ 
+ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
+@@ -882,6 +965,10 @@ static int netlink_dump(struct sock *sk)
+ 	skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL);
+ 	if (!skb)
+ 		return -ENOBUFS;
++	if (ub_nlrcvbuf_charge(skb, sk)	< 0) {
++		kfree_skb(skb);
++		return -EACCES;
++	}
+ 
+ 	spin_lock(&nlk->cb_lock);
+ 
+@@ -942,9 +1029,9 @@ int netlink_dump_start(struct sock *ssk,
+ 		return -ECONNREFUSED;
+ 	}
+ 	nlk = nlk_sk(sk);
+-	/* A dump is in progress... */
++	/* A dump or destruction is in progress... */
+ 	spin_lock(&nlk->cb_lock);
+-	if (nlk->cb) {
++	if (nlk->cb || sock_flag(sk, SOCK_DEAD)) {
+ 		spin_unlock(&nlk->cb_lock);
+ 		netlink_destroy_callback(cb);
+ 		sock_put(sk);
+@@ -1198,6 +1285,7 @@ static int __init netlink_proto_init(voi
+ 	}
+ 	sock_register(&netlink_family_ops);
+ #ifdef CONFIG_PROC_FS
++	/* FIXME: virtualize before give access from VEs */
+ 	proc_net_fops_create("netlink", 0, &netlink_seq_fops);
+ #endif
+ 	/* The netlink device handler may be needed early. */ 
+diff -uprN linux-2.6.8.1.orig/net/packet/af_packet.c linux-2.6.8.1-ve022stab078/net/packet/af_packet.c
+--- linux-2.6.8.1.orig/net/packet/af_packet.c	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/packet/af_packet.c	2006-05-11 13:05:42.000000000 +0400
+@@ -71,6 +71,8 @@
+ #include <linux/module.h>
+ #include <linux/init.h>
+ 
++#include <ub/ub_net.h>
++
+ #ifdef CONFIG_INET
+ #include <net/inet_common.h>
+ #endif
+@@ -260,7 +262,8 @@ static int packet_rcv_spkt(struct sk_buf
+ 	 *	so that this procedure is noop.
+ 	 */
+ 
+-	if (skb->pkt_type == PACKET_LOOPBACK)
++	if (skb->pkt_type == PACKET_LOOPBACK ||
++	    !ve_accessible(VE_OWNER_SKB(skb), VE_OWNER_SK(sk)))
+ 		goto out;
+ 
+ 	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+@@ -449,6 +452,9 @@ static int packet_rcv(struct sk_buff *sk
+ 	sk = pt->af_packet_priv;
+ 	po = pkt_sk(sk);
+ 
++	if (!ve_accessible(VE_OWNER_SKB(skb), VE_OWNER_SK(sk)))
++		goto drop;
++
+ 	skb->dev = dev;
+ 
+ 	if (dev->hard_header) {
+@@ -508,6 +514,9 @@ static int packet_rcv(struct sk_buff *sk
+ 	if (pskb_trim(skb, snaplen))
+ 		goto drop_n_acct;
+ 
++	if (ub_sockrcvbuf_charge(sk, skb))
++		goto drop_n_acct;
++
+ 	skb_set_owner_r(skb, sk);
+ 	skb->dev = NULL;
+ 	dst_release(skb->dst);
+@@ -555,6 +564,9 @@ static int tpacket_rcv(struct sk_buff *s
+ 	sk = pt->af_packet_priv;
+ 	po = pkt_sk(sk);
+ 
++	if (!ve_accessible(VE_OWNER_SKB(skb), VE_OWNER_SK(sk)))
++		goto drop;
++
+ 	if (dev->hard_header) {
+ 		if (sk->sk_type != SOCK_DGRAM)
+ 			skb_push(skb, skb->data - skb->mac.raw);
+@@ -604,6 +616,12 @@ static int tpacket_rcv(struct sk_buff *s
+ 	if (snaplen > skb->len-skb->data_len)
+ 		snaplen = skb->len-skb->data_len;
+ 
++	if (copy_skb &&
++	    ub_sockrcvbuf_charge(sk, copy_skb)) {
++		spin_lock(&sk->sk_receive_queue.lock);
++		goto ring_is_full;
++	}
++
+ 	spin_lock(&sk->sk_receive_queue.lock);
+ 	h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head);
+ 	
+@@ -975,6 +993,8 @@ static int packet_create(struct socket *
+ 	sk = sk_alloc(PF_PACKET, GFP_KERNEL, 1, NULL);
+ 	if (sk == NULL)
+ 		goto out;
++	if (ub_other_sock_charge(sk))
++		goto out_free;
+ 
+ 	sock->ops = &packet_ops;
+ #ifdef CONFIG_SOCK_PACKET
+@@ -1394,11 +1414,16 @@ static int packet_notifier(struct notifi
+ 	struct sock *sk;
+ 	struct hlist_node *node;
+ 	struct net_device *dev = (struct net_device*)data;
++	struct ve_struct *ve;
+ 
++	ve = get_exec_env();
+ 	read_lock(&packet_sklist_lock);
+ 	sk_for_each(sk, node, &packet_sklist) {
+ 		struct packet_opt *po = pkt_sk(sk);
+ 
++		if (!ve_accessible_strict(VE_OWNER_SK(sk), ve))
++			continue;
++
+ 		switch (msg) {
+ 		case NETDEV_UNREGISTER:
+ #ifdef CONFIG_PACKET_MULTICAST
+@@ -1797,6 +1822,8 @@ static inline struct sock *packet_seq_id
+ 	struct hlist_node *node;
+ 
+ 	sk_for_each(s, node, &packet_sklist) {
++		if (!ve_accessible(VE_OWNER_SK(s), get_exec_env()))
++			continue;
+ 		if (!off--)
+ 			return s;
+ 	}
+@@ -1812,9 +1839,13 @@ static void *packet_seq_start(struct seq
+ static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+ {
+ 	++*pos;
+-	return  (v == SEQ_START_TOKEN) 
+-		? sk_head(&packet_sklist) 
+-		: sk_next((struct sock*)v) ;
++	do {
++		v = (v == SEQ_START_TOKEN) 
++			? sk_head(&packet_sklist) 
++			: sk_next((struct sock*)v);
++	} while (v != NULL &&
++		!ve_accessible(VE_OWNER_SK((struct sock*)v), get_exec_env()));	
++	return v;
+ }
+ 
+ static void packet_seq_stop(struct seq_file *seq, void *v)
+diff -uprN linux-2.6.8.1.orig/net/rose/rose_route.c linux-2.6.8.1-ve022stab078/net/rose/rose_route.c
+--- linux-2.6.8.1.orig/net/rose/rose_route.c	2004-08-14 14:56:23.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/rose/rose_route.c	2006-05-11 13:05:34.000000000 +0400
+@@ -727,7 +727,8 @@ int rose_rt_ioctl(unsigned int cmd, void
+ 		}
+ 		if (rose_route.mask > 10) /* Mask can't be more than 10 digits */
+ 			return -EINVAL;
+-
++		if (rose_route.ndigis > 8) /* No more than 8 digipeats */
++			return -EINVAL;
+ 		err = rose_add_node(&rose_route, dev);
+ 		dev_put(dev);
+ 		return err;
+diff -uprN linux-2.6.8.1.orig/net/sched/sch_api.c linux-2.6.8.1-ve022stab078/net/sched/sch_api.c
+--- linux-2.6.8.1.orig/net/sched/sch_api.c	2004-08-14 14:55:20.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/sched/sch_api.c	2006-05-11 13:05:42.000000000 +0400
+@@ -1204,7 +1204,7 @@ static int __init pktsched_init(void)
+ 
+ 	register_qdisc(&pfifo_qdisc_ops);
+ 	register_qdisc(&bfifo_qdisc_ops);
+-	proc_net_fops_create("psched", 0, &psched_fops);
++	__proc_net_fops_create("net/psched", 0, &psched_fops, NULL);
+ 
+ 	return 0;
+ }
+diff -uprN linux-2.6.8.1.orig/net/sched/sch_cbq.c linux-2.6.8.1-ve022stab078/net/sched/sch_cbq.c
+--- linux-2.6.8.1.orig/net/sched/sch_cbq.c	2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/sched/sch_cbq.c	2006-05-11 13:05:36.000000000 +0400
+@@ -956,8 +956,8 @@ cbq_dequeue_prio(struct Qdisc *sch, int 
+ 
+ 			if (cl->deficit <= 0) {
+ 				q->active[prio] = cl;
+-				cl = cl->next_alive;
+ 				cl->deficit += cl->quantum;
++				cl = cl->next_alive;
+ 			}
+ 			return skb;
+ 
+@@ -1133,17 +1133,19 @@ static void cbq_normalize_quanta(struct 
+ 
+ 	for (h=0; h<16; h++) {
+ 		for (cl = q->classes[h]; cl; cl = cl->next) {
++			long mtu;
+ 			/* BUGGGG... Beware! This expression suffer of
+ 			   arithmetic overflows!
+ 			 */
+ 			if (cl->priority == prio) {
+-				cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
+-					q->quanta[prio];
+-			}
+-			if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
+-				printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
+-				cl->quantum = cl->qdisc->dev->mtu/2 + 1;
++				cl->quantum = (cl->weight * cl->allot) /
++					(q->quanta[prio] / q->nclasses[prio]);
+ 			}
++			mtu = cl->qdisc->dev->mtu;
++			if (cl->quantum <= mtu/2)
++				cl->quantum = mtu/2 + 1;
++			else if (cl->quantum > 32*mtu) 
++				cl->quantum = 32*mtu;
+ 		}
+ 	}
+ }
+@@ -1746,15 +1748,20 @@ static void cbq_destroy_filters(struct c
+ 	}
+ }
+ 
+-static void cbq_destroy_class(struct cbq_class *cl)
++static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
+ {
++	struct cbq_sched_data *q = qdisc_priv(sch);
++
++	BUG_TRAP(!cl->filters);
++
+ 	cbq_destroy_filters(cl);
+ 	qdisc_destroy(cl->q);
+ 	qdisc_put_rtab(cl->R_tab);
+ #ifdef CONFIG_NET_ESTIMATOR
+ 	qdisc_kill_estimator(&cl->stats);
+ #endif
+-	kfree(cl);
++	if (cl != &q->link)
++		kfree(cl);
+ }
+ 
+ static void
+@@ -1767,22 +1774,23 @@ cbq_destroy(struct Qdisc* sch)
+ #ifdef CONFIG_NET_CLS_POLICE
+ 	q->rx_class = NULL;
+ #endif
+-	for (h = 0; h < 16; h++) {
++	/*
++	 * Filters must be destroyed first because we don't destroy the
++	 * classes from root to leafs which means that filters can still
++	 * be bound to classes which have been destroyed already. --TGR '04
++	 */
++	for (h = 0; h < 16; h++)
+ 		for (cl = q->classes[h]; cl; cl = cl->next)
+ 			cbq_destroy_filters(cl);
+-	}
+ 
+ 	for (h = 0; h < 16; h++) {
+ 		struct cbq_class *next;
+ 
+ 		for (cl = q->classes[h]; cl; cl = next) {
+ 			next = cl->next;
+-			if (cl != &q->link)
+-				cbq_destroy_class(cl);
++			cbq_destroy_class(sch, cl);
+ 		}
+ 	}
+-
+-	qdisc_put_rtab(q->link.R_tab);
+ }
+ 
+ static void cbq_put(struct Qdisc *sch, unsigned long arg)
+@@ -1799,7 +1807,7 @@ static void cbq_put(struct Qdisc *sch, u
+ 		spin_unlock_bh(&sch->dev->queue_lock);
+ #endif
+ 
+-		cbq_destroy_class(cl);
++		cbq_destroy_class(sch, cl);
+ 	}
+ }
+ 
+@@ -2035,7 +2043,7 @@ static int cbq_delete(struct Qdisc *sch,
+ 	sch_tree_unlock(sch);
+ 
+ 	if (--cl->refcnt == 0)
+-		cbq_destroy_class(cl);
++		cbq_destroy_class(sch, cl);
+ 
+ 	return 0;
+ }
+diff -uprN linux-2.6.8.1.orig/net/sched/sch_generic.c linux-2.6.8.1-ve022stab078/net/sched/sch_generic.c
+--- linux-2.6.8.1.orig/net/sched/sch_generic.c	2004-08-14 14:54:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/sched/sch_generic.c	2006-05-11 13:05:42.000000000 +0400
+@@ -97,6 +97,9 @@ int qdisc_restart(struct net_device *dev
+ 
+ 	/* Dequeue packet */
+ 	if ((skb = q->dequeue(q)) != NULL) {
++		struct ve_struct *envid;
++
++		envid = set_exec_env(VE_OWNER_SKB(skb));
+ 		if (spin_trylock(&dev->xmit_lock)) {
+ 			/* Remember that the driver is grabbed by us. */
+ 			dev->xmit_lock_owner = smp_processor_id();
+@@ -113,6 +116,7 @@ int qdisc_restart(struct net_device *dev
+ 					spin_unlock(&dev->xmit_lock);
+ 
+ 					spin_lock(&dev->queue_lock);
++					(void)set_exec_env(envid);
+ 					return -1;
+ 				}
+ 			}
+@@ -134,6 +138,7 @@ int qdisc_restart(struct net_device *dev
+ 				kfree_skb(skb);
+ 				if (net_ratelimit())
+ 					printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
++				(void)set_exec_env(envid);
+ 				return -1;
+ 			}
+ 			__get_cpu_var(netdev_rx_stat).cpu_collision++;
+@@ -151,6 +156,7 @@ int qdisc_restart(struct net_device *dev
+ 
+ 		q->ops->requeue(skb, q);
+ 		netif_schedule(dev);
++		(void)set_exec_env(envid);
+ 		return 1;
+ 	}
+ 	return q->q.qlen;
+@@ -557,3 +563,4 @@ EXPORT_SYMBOL(qdisc_reset);
+ EXPORT_SYMBOL(qdisc_restart);
+ EXPORT_SYMBOL(qdisc_lock_tree);
+ EXPORT_SYMBOL(qdisc_unlock_tree);
++EXPORT_SYMBOL(dev_shutdown);
+diff -uprN linux-2.6.8.1.orig/net/sched/sch_teql.c linux-2.6.8.1-ve022stab078/net/sched/sch_teql.c
+--- linux-2.6.8.1.orig/net/sched/sch_teql.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/sched/sch_teql.c	2006-05-11 13:05:42.000000000 +0400
+@@ -186,6 +186,9 @@ static int teql_qdisc_init(struct Qdisc 
+ 	struct teql_master *m = (struct teql_master*)sch->ops;
+ 	struct teql_sched_data *q = qdisc_priv(sch);
+ 
++	if (!capable(CAP_NET_ADMIN))
++		return -EPERM;
++
+ 	if (dev->hard_header_len > m->dev->hard_header_len)
+ 		return -EINVAL;
+ 
+diff -uprN linux-2.6.8.1.orig/net/sctp/socket.c linux-2.6.8.1-ve022stab078/net/sctp/socket.c
+--- linux-2.6.8.1.orig/net/sctp/socket.c	2004-08-14 14:56:25.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/sctp/socket.c	2006-05-11 13:05:33.000000000 +0400
+@@ -4052,12 +4052,8 @@ SCTP_STATIC int sctp_msghdr_parse(const 
+ 	for (cmsg = CMSG_FIRSTHDR(msg);
+ 	     cmsg != NULL;
+ 	     cmsg = CMSG_NXTHDR((struct msghdr*)msg, cmsg)) {
+-		/* Check for minimum length.  The SCM code has this check.  */
+-		if (cmsg->cmsg_len < sizeof(struct cmsghdr) ||
+-		    (unsigned long)(((char*)cmsg - (char*)msg->msg_control)
+-				    + cmsg->cmsg_len) > msg->msg_controllen) {
++		if (!CMSG_OK(msg, cmsg))
+ 			return -EINVAL;
+-		}
+ 
+ 		/* Should we parse this header or ignore?  */
+ 		if (cmsg->cmsg_level != IPPROTO_SCTP)
+diff -uprN linux-2.6.8.1.orig/net/socket.c linux-2.6.8.1-ve022stab078/net/socket.c
+--- linux-2.6.8.1.orig/net/socket.c	2004-08-14 14:55:10.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/socket.c	2006-05-11 13:05:42.000000000 +0400
+@@ -81,6 +81,7 @@
+ #include <linux/syscalls.h>
+ #include <linux/compat.h>
+ #include <linux/kmod.h>
++#include <linux/in.h>
+ 
+ #ifdef CONFIG_NET_RADIO
+ #include <linux/wireless.h>		/* Note : will define WIRELESS_EXT */
+@@ -1071,6 +1072,37 @@ int sock_wake_async(struct socket *sock,
+ 	return 0;
+ }
+ 
++int vz_security_proto_check(int family, int type, int protocol)
++{
++#ifdef CONFIG_VE
++	if (ve_is_super(get_exec_env()))
++		return 0;
++
++	switch (family) {
++	case PF_UNSPEC:
++	case PF_PACKET:
++	case PF_NETLINK:
++	case PF_UNIX:
++		break;
++	case PF_INET:
++		switch (protocol) {
++		case  IPPROTO_IP:
++		case  IPPROTO_ICMP:
++		case  IPPROTO_TCP:
++		case  IPPROTO_UDP:
++		case  IPPROTO_RAW:
++			break;
++		default:
++			return -EAFNOSUPPORT;
++		}
++		break;
++	default:
++		return -EAFNOSUPPORT;
++	}
++#endif
++	return 0;
++}
++
+ static int __sock_create(int family, int type, int protocol, struct socket **res, int kern)
+ {
+ 	int i;
+@@ -1099,6 +1131,11 @@ static int __sock_create(int family, int
+ 		family = PF_PACKET;
+ 	}
+ 
++	/* VZ compatibility layer */
++	err = vz_security_proto_check(family, type, protocol);
++	if (err < 0)
++		return err;
++
+ 	err = security_socket_create(family, type, protocol, kern);
+ 	if (err)
+ 		return err;
+@@ -1746,10 +1783,11 @@ asmlinkage long sys_sendmsg(int fd, stru
+ 		goto out_freeiov;
+ 	ctl_len = msg_sys.msg_controllen; 
+ 	if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
+-		err = cmsghdr_from_user_compat_to_kern(&msg_sys, ctl, sizeof(ctl));
++		err = cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, sizeof(ctl));
+ 		if (err)
+ 			goto out_freeiov;
+ 		ctl_buf = msg_sys.msg_control;
++		ctl_len = msg_sys.msg_controllen;
+ 	} else if (ctl_len) {
+ 		if (ctl_len > sizeof(ctl))
+ 		{
+diff -uprN linux-2.6.8.1.orig/net/sunrpc/clnt.c linux-2.6.8.1-ve022stab078/net/sunrpc/clnt.c
+--- linux-2.6.8.1.orig/net/sunrpc/clnt.c	2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/sunrpc/clnt.c	2006-05-11 13:05:42.000000000 +0400
+@@ -164,10 +164,10 @@ rpc_create_client(struct rpc_xprt *xprt,
+ 	}
+ 
+ 	/* save the nodename */
+-	clnt->cl_nodelen = strlen(system_utsname.nodename);
++	clnt->cl_nodelen = strlen(ve_utsname.nodename);
+ 	if (clnt->cl_nodelen > UNX_MAXNODENAME)
+ 		clnt->cl_nodelen = UNX_MAXNODENAME;
+-	memcpy(clnt->cl_nodename, system_utsname.nodename, clnt->cl_nodelen);
++	memcpy(clnt->cl_nodename, ve_utsname.nodename, clnt->cl_nodelen);
+ 	return clnt;
+ 
+ out_no_auth:
+diff -uprN linux-2.6.8.1.orig/net/sunrpc/sched.c linux-2.6.8.1-ve022stab078/net/sunrpc/sched.c
+--- linux-2.6.8.1.orig/net/sunrpc/sched.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/sunrpc/sched.c	2006-05-11 13:05:25.000000000 +0400
+@@ -1125,9 +1125,9 @@ rpciod(void *ptr)
+ 			spin_lock_bh(&rpc_queue_lock);
+ 		}
+ 		__rpc_schedule();
+-		if (current->flags & PF_FREEZE) {
++		if (test_thread_flag(TIF_FREEZE)) {
+ 			spin_unlock_bh(&rpc_queue_lock);
+-			refrigerator(PF_FREEZE);
++			refrigerator();
+ 			spin_lock_bh(&rpc_queue_lock);
+ 		}
+ 
+diff -uprN linux-2.6.8.1.orig/net/sunrpc/svcsock.c linux-2.6.8.1-ve022stab078/net/sunrpc/svcsock.c
+--- linux-2.6.8.1.orig/net/sunrpc/svcsock.c	2004-08-14 14:54:49.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/sunrpc/svcsock.c	2006-05-11 13:05:44.000000000 +0400
+@@ -362,6 +362,9 @@ svc_sendto(struct svc_rqst *rqstp, struc
+ 	size_t		base = xdr->page_base;
+ 	unsigned int	pglen = xdr->page_len;
+ 	unsigned int	flags = MSG_MORE;
++	struct ve_struct *old_env;
++
++	old_env = set_exec_env(get_ve0());
+ 
+ 	slen = xdr->len;
+ 
+@@ -426,6 +429,8 @@ out:
+ 			rqstp->rq_sock, xdr->head[0].iov_base, xdr->head[0].iov_len, xdr->len, len,
+ 		rqstp->rq_addr.sin_addr.s_addr);
+ 
++	(void)set_exec_env(old_env);
++
+ 	return len;
+ }
+ 
+@@ -438,9 +443,12 @@ svc_recv_available(struct svc_sock *svsk
+ 	mm_segment_t	oldfs;
+ 	struct socket	*sock = svsk->sk_sock;
+ 	int		avail, err;
++	struct ve_struct *old_env;
+ 
+ 	oldfs = get_fs(); set_fs(KERNEL_DS);
++	old_env = set_exec_env(get_ve0());
+ 	err = sock->ops->ioctl(sock, TIOCINQ, (unsigned long) &avail);
++	(void)set_exec_env(old_env);
+ 	set_fs(oldfs);
+ 
+ 	return (err >= 0)? avail : err;
+@@ -455,6 +463,7 @@ svc_recvfrom(struct svc_rqst *rqstp, str
+ 	struct msghdr	msg;
+ 	struct socket	*sock;
+ 	int		len, alen;
++	struct ve_struct *old_env;
+ 
+ 	rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
+ 	sock = rqstp->rq_sock->sk_sock;
+@@ -466,7 +475,9 @@ svc_recvfrom(struct svc_rqst *rqstp, str
+ 
+ 	msg.msg_flags	= MSG_DONTWAIT;
+ 
++	old_env = set_exec_env(get_ve0());
+ 	len = kernel_recvmsg(sock, &msg, iov, nr, buflen, MSG_DONTWAIT);
++	(void)set_exec_env(get_ve0());
+ 
+ 	/* sock_recvmsg doesn't fill in the name/namelen, so we must..
+ 	 * possibly we should cache this in the svc_sock structure
+@@ -770,17 +781,19 @@ svc_tcp_accept(struct svc_sock *svsk)
+ 	struct proto_ops *ops;
+ 	struct svc_sock	*newsvsk;
+ 	int		err, slen;
++	struct ve_struct *old_env;
+ 
+ 	dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
+ 	if (!sock)
+ 		return;
+ 
++	old_env = set_exec_env(get_ve0());
+ 	err = sock_create_lite(PF_INET, SOCK_STREAM, IPPROTO_TCP, &newsock);
+ 	if (err) {
+ 		if (err == -ENOMEM)
+ 			printk(KERN_WARNING "%s: no more sockets!\n",
+ 			       serv->sv_name);
+-		return;
++		goto restore;
+ 	}
+ 
+ 	dprintk("svc: tcp_accept %p allocated\n", newsock);
+@@ -874,6 +887,8 @@ svc_tcp_accept(struct svc_sock *svsk)
+ 
+ 	}
+ 
++	(void)set_exec_env(old_env);
++
+ 	if (serv->sv_stats)
+ 		serv->sv_stats->nettcpconn++;
+ 
+@@ -881,6 +896,8 @@ svc_tcp_accept(struct svc_sock *svsk)
+ 
+ failed:
+ 	sock_release(newsock);
++restore:
++	(void)set_exec_env(old_env);
+ 	return;
+ }
+ 
+@@ -1227,8 +1244,8 @@ svc_recv(struct svc_serv *serv, struct s
+ 
+ 		schedule_timeout(timeout);
+ 
+-		if (current->flags & PF_FREEZE)
+-			refrigerator(PF_FREEZE);
++		if (test_thread_flag(TIF_FREEZE))
++			refrigerator();
+ 
+ 		spin_lock_bh(&serv->sv_lock);
+ 		remove_wait_queue(&rqstp->rq_wait, &wait);
+@@ -1397,6 +1414,7 @@ svc_create_socket(struct svc_serv *serv,
+ 	struct socket	*sock;
+ 	int		error;
+ 	int		type;
++	struct ve_struct *old_env;
+ 
+ 	dprintk("svc: svc_create_socket(%s, %d, %u.%u.%u.%u:%d)\n",
+ 				serv->sv_program->pg_name, protocol,
+@@ -1410,8 +1428,10 @@ svc_create_socket(struct svc_serv *serv,
+ 	}
+ 	type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
+ 
++	old_env = set_exec_env(get_ve0());
++
+ 	if ((error = sock_create_kern(PF_INET, type, protocol, &sock)) < 0)
+-		return error;
++		goto restore;
+ 
+ 	if (sin != NULL) {
+ 		if (type == SOCK_STREAM)
+@@ -1427,12 +1447,16 @@ svc_create_socket(struct svc_serv *serv,
+ 			goto bummer;
+ 	}
+ 
+-	if ((svsk = svc_setup_socket(serv, sock, &error, 1)) != NULL)
++	if ((svsk = svc_setup_socket(serv, sock, &error, 1)) != NULL) {
++		(void)set_exec_env(old_env);
+ 		return 0;
++	}
+ 
+ bummer:
+ 	dprintk("svc: svc_create_socket error = %d\n", -error);
+ 	sock_release(sock);
++restore:
++	(void)set_exec_env(old_env);
+ 	return error;
+ }
+ 
+@@ -1450,6 +1474,8 @@ svc_delete_socket(struct svc_sock *svsk)
+ 	serv = svsk->sk_server;
+ 	sk = svsk->sk_sk;
+ 
++	/* XXX: serialization? */
++	sk->sk_user_data = NULL;
+ 	sk->sk_state_change = svsk->sk_ostate;
+ 	sk->sk_data_ready = svsk->sk_odata;
+ 	sk->sk_write_space = svsk->sk_owspace;
+diff -uprN linux-2.6.8.1.orig/net/sunrpc/xprt.c linux-2.6.8.1-ve022stab078/net/sunrpc/xprt.c
+--- linux-2.6.8.1.orig/net/sunrpc/xprt.c	2004-08-14 14:55:47.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/sunrpc/xprt.c	2006-05-11 13:05:42.000000000 +0400
+@@ -246,6 +246,7 @@ xprt_sendmsg(struct rpc_xprt *xprt, stru
+ 	int addrlen = 0;
+ 	unsigned int	skip;
+ 	int		result;
++	struct ve_struct *old_env;
+ 
+ 	if (!sock)
+ 		return -ENOTCONN;
+@@ -263,7 +264,9 @@ xprt_sendmsg(struct rpc_xprt *xprt, stru
+ 	skip = req->rq_bytes_sent;
+ 
+ 	clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
++	old_env = set_exec_env(get_ve0());
+ 	result = xdr_sendpages(sock, addr, addrlen, xdr, skip, MSG_DONTWAIT);
++	(void)set_exec_env(old_env);
+ 
+ 	dprintk("RPC:      xprt_sendmsg(%d) = %d\n", xdr->len - skip, result);
+ 
+@@ -484,6 +487,7 @@ static void xprt_socket_connect(void *ar
+ 	struct rpc_xprt *xprt = (struct rpc_xprt *)args;
+ 	struct socket *sock = xprt->sock;
+ 	int status = -EIO;
++	struct ve_struct *old_env;
+ 
+ 	if (xprt->shutdown || xprt->addr.sin_port == 0)
+ 		goto out;
+@@ -508,8 +512,10 @@ static void xprt_socket_connect(void *ar
+ 	/*
+ 	 * Tell the socket layer to start connecting...
+ 	 */
++	old_env = set_exec_env(get_ve0());
+ 	status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr,
+ 			sizeof(xprt->addr), O_NONBLOCK);
++	(void)set_exec_env(old_env);
+ 	dprintk("RPC: %p  connect status %d connected %d sock state %d\n",
+ 			xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
+ 	if (status < 0) {
+@@ -1506,13 +1512,16 @@ static inline int xprt_bindresvport(stru
+ 		.sin_family = AF_INET,
+ 	};
+ 	int		err, port;
++	struct ve_struct *old_env;
+ 
+ 	/* Were we already bound to a given port? Try to reuse it */
+ 	port = xprt->port;
+ 	do {
+ 		myaddr.sin_port = htons(port);
++		old_env = set_exec_env(get_ve0());
+ 		err = sock->ops->bind(sock, (struct sockaddr *) &myaddr,
+ 						sizeof(myaddr));
++		(void)set_exec_env(old_env);
+ 		if (err == 0) {
+ 			xprt->port = port;
+ 			return 0;
+@@ -1588,15 +1597,18 @@ static struct socket * xprt_create_socke
+ {
+ 	struct socket	*sock;
+ 	int		type, err;
++	struct ve_struct *old_env;
+ 
+ 	dprintk("RPC:      xprt_create_socket(%s %d)\n",
+ 			   (proto == IPPROTO_UDP)? "udp" : "tcp", proto);
+ 
+ 	type = (proto == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
+ 
++	old_env = set_exec_env(get_ve0());
++
+ 	if ((err = sock_create_kern(PF_INET, type, proto, &sock)) < 0) {
+ 		printk("RPC: can't create socket (%d).\n", -err);
+-		return NULL;
++		goto out;
+ 	}
+ 
+ 	/* If the caller has the capability, bind to a reserved port */
+@@ -1605,10 +1617,13 @@ static struct socket * xprt_create_socke
+ 		goto failed;
+ 	}
+ 
++	(void)set_exec_env(old_env);
+ 	return sock;
+ 
+ failed:
+ 	sock_release(sock);
++out:
++	(void)set_exec_env(old_env);
+ 	return NULL;
+ }
+ 
+diff -uprN linux-2.6.8.1.orig/net/unix/af_unix.c linux-2.6.8.1-ve022stab078/net/unix/af_unix.c
+--- linux-2.6.8.1.orig/net/unix/af_unix.c	2004-08-14 14:55:35.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/unix/af_unix.c	2006-05-11 13:05:42.000000000 +0400
+@@ -119,6 +119,9 @@
+ #include <net/checksum.h>
+ #include <linux/security.h>
+ 
++#include <ub/ub_net.h>
++#include <ub/beancounter.h>
++
+ int sysctl_unix_max_dgram_qlen = 10;
+ 
+ kmem_cache_t *unix_sk_cachep;
+@@ -242,6 +245,8 @@ static struct sock *__unix_find_socket_b
+ 	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
+ 		struct unix_sock *u = unix_sk(s);
+ 
++		if (!ve_accessible(VE_OWNER_SK(s), get_exec_env()))
++			continue;
+ 		if (u->addr->len == len &&
+ 		    !memcmp(u->addr->name, sunname, len))
+ 			goto found;
+@@ -446,7 +451,7 @@ static int unix_listen(struct socket *so
+ 	sk->sk_max_ack_backlog	= backlog;
+ 	sk->sk_state		= TCP_LISTEN;
+ 	/* set credentials so connect can copy them */
+-	sk->sk_peercred.pid	= current->tgid;
++	sk->sk_peercred.pid	= virt_tgid(current);
+ 	sk->sk_peercred.uid	= current->euid;
+ 	sk->sk_peercred.gid	= current->egid;
+ 	err = 0;
+@@ -553,6 +558,8 @@ static struct sock * unix_create1(struct
+ 		      unix_sk_cachep);
+ 	if (!sk)
+ 		goto out;
++	if (ub_other_sock_charge(sk))
++		goto out_sk_free;
+ 
+ 	atomic_inc(&unix_nr_socks);
+ 
+@@ -572,6 +579,9 @@ static struct sock * unix_create1(struct
+ 	unix_insert_socket(unix_sockets_unbound, sk);
+ out:
+ 	return sk;
++out_sk_free:
++	sk_free(sk);
++	return NULL;
+ }
+ 
+ static int unix_create(struct socket *sock, int protocol)
+@@ -677,7 +687,7 @@ static struct sock *unix_find_other(stru
+ 		err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
+ 		if (err)
+ 			goto fail;
+-		err = permission(nd.dentry->d_inode,MAY_WRITE, &nd);
++		err = permission(nd.dentry->d_inode, MAY_WRITE, &nd, NULL);
+ 		if (err)
+ 			goto put_fail;
+ 
+@@ -955,6 +965,7 @@ static int unix_stream_connect(struct so
+ 	int st;
+ 	int err;
+ 	long timeo;
++	unsigned long chargesize;
+ 
+ 	err = unix_mkname(sunaddr, addr_len, &hash);
+ 	if (err < 0)
+@@ -982,6 +993,10 @@ static int unix_stream_connect(struct so
+ 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
+ 	if (skb == NULL)
+ 		goto out;
++	chargesize = skb_charge_fullsize(skb);
++	if (ub_sock_getwres_other(newsk, chargesize) < 0)
++		goto out;	
++	ub_skb_set_charge(skb, newsk, chargesize, UB_OTHERSOCKBUF);
+ 
+ restart:
+ 	/*  Find listening sock. */
+@@ -1065,7 +1080,7 @@ restart:
+ 	unix_peer(newsk)	= sk;
+ 	newsk->sk_state		= TCP_ESTABLISHED;
+ 	newsk->sk_type		= sk->sk_type;
+-	newsk->sk_peercred.pid	= current->tgid;
++	newsk->sk_peercred.pid	= virt_tgid(current);
+ 	newsk->sk_peercred.uid	= current->euid;
+ 	newsk->sk_peercred.gid	= current->egid;
+ 	newu = unix_sk(newsk);
+@@ -1127,7 +1142,7 @@ static int unix_socketpair(struct socket
+ 	sock_hold(skb);
+ 	unix_peer(ska)=skb;
+ 	unix_peer(skb)=ska;
+-	ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
++	ska->sk_peercred.pid = skb->sk_peercred.pid = virt_tgid(current);
+ 	ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
+ 	ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
+ 
+@@ -1450,6 +1465,16 @@ static int unix_stream_sendmsg(struct ki
+ 
+ 		size=len-sent;
+ 
++		if (msg->msg_flags & MSG_DONTWAIT)
++			ub_sock_makewres_other(sk, skb_charge_size(size));
++		if (sock_bc(sk) != NULL && 
++				sock_bc(sk)->poll_reserv >= 
++					SOCK_MIN_UBCSPACE &&
++				skb_charge_size(size) >
++					sock_bc(sk)->poll_reserv)
++			size = skb_charge_datalen(sock_bc(sk)->poll_reserv);
++				
++
+ 		/* Keep two messages in the pipe so it schedules better */
+ 		if (size > sk->sk_sndbuf / 2 - 64)
+ 			size = sk->sk_sndbuf / 2 - 64;
+@@ -1461,7 +1486,8 @@ static int unix_stream_sendmsg(struct ki
+ 		 *	Grab a buffer
+ 		 */
+ 		 
+-		skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
++		skb = sock_alloc_send_skb2(sk, size, SOCK_MIN_UBCSPACE,
++				msg->msg_flags&MSG_DONTWAIT, &err);
+ 
+ 		if (skb==NULL)
+ 			goto out_err;
+@@ -1546,9 +1572,11 @@ static int unix_dgram_recvmsg(struct kio
+ 
+ 	msg->msg_namelen = 0;
+ 
++	down(&u->readsem);
++
+ 	skb = skb_recv_datagram(sk, flags, noblock, &err);
+ 	if (!skb)
+-		goto out;
++		goto out_unlock;
+ 
+ 	wake_up_interruptible(&u->peer_wait);
+ 
+@@ -1598,6 +1626,8 @@ static int unix_dgram_recvmsg(struct kio
+ 
+ out_free:
+ 	skb_free_datagram(sk,skb);
++out_unlock:
++	up(&u->readsem);
+ out:
+ 	return err;
+ }
+@@ -1859,6 +1889,7 @@ static unsigned int unix_poll(struct fil
+ {
+ 	struct sock *sk = sock->sk;
+ 	unsigned int mask;
++	int no_ub_res;
+ 
+ 	poll_wait(file, sk->sk_sleep, wait);
+ 	mask = 0;
+@@ -1869,6 +1900,10 @@ static unsigned int unix_poll(struct fil
+ 	if (sk->sk_shutdown == SHUTDOWN_MASK)
+ 		mask |= POLLHUP;
+ 
++	no_ub_res = ub_sock_makewres_other(sk, SOCK_MIN_UBCSPACE_CH);
++	if (no_ub_res)
++		ub_sock_sndqueueadd_other(sk, SOCK_MIN_UBCSPACE_CH);
++
+ 	/* readable? */
+ 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
+ 	    (sk->sk_shutdown & RCV_SHUTDOWN))
+@@ -1882,7 +1917,7 @@ static unsigned int unix_poll(struct fil
+ 	 * we set writable also when the other side has shut down the
+ 	 * connection. This prevents stuck sockets.
+ 	 */
+-	if (unix_writable(sk))
++	if (!no_ub_res && unix_writable(sk))
+ 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ 
+ 	return mask;
+diff -uprN linux-2.6.8.1.orig/net/xfrm/xfrm_user.c linux-2.6.8.1-ve022stab078/net/xfrm/xfrm_user.c
+--- linux-2.6.8.1.orig/net/xfrm/xfrm_user.c	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/net/xfrm/xfrm_user.c	2006-05-11 13:05:34.000000000 +0400
+@@ -1139,6 +1139,9 @@ struct xfrm_policy *xfrm_compile_policy(
+ 	if (nr > XFRM_MAX_DEPTH)
+ 		return NULL;
+ 
++	if (p->dir > XFRM_POLICY_OUT)
++		return NULL;
++
+ 	xp = xfrm_policy_alloc(GFP_KERNEL);
+ 	if (xp == NULL) {
+ 		*dir = -ENOBUFS;
+diff -uprN linux-2.6.8.1.orig/scripts/kconfig/mconf.c linux-2.6.8.1-ve022stab078/scripts/kconfig/mconf.c
+--- linux-2.6.8.1.orig/scripts/kconfig/mconf.c	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/scripts/kconfig/mconf.c	2006-05-11 13:05:32.000000000 +0400
+@@ -88,7 +88,7 @@ static char *args[1024], **argptr = args
+ static int indent;
+ static struct termios ios_org;
+ static int rows = 0, cols = 0;
+-static struct menu *current_menu;
++struct menu *current_menu;
+ static int child_count;
+ static int do_resize;
+ static int single_menu_mode;
+diff -uprN linux-2.6.8.1.orig/security/commoncap.c linux-2.6.8.1-ve022stab078/security/commoncap.c
+--- linux-2.6.8.1.orig/security/commoncap.c	2004-08-14 14:55:19.000000000 +0400
++++ linux-2.6.8.1-ve022stab078/security/commoncap.c	2006-05-11 13:05:49.000000000 +0400
+@@ -17,6 +17,7 @@
+ #include <linux/mman.h>
+ #include <linux/pagemap.h>
+ #include <linux/swap.h>
++#include <linux/virtinfo.h>
+ #include <linux/smp_lock.h>
+ #include <linux/skbuff.h>
+ #include <linux/netlink.h>
+@@ -174,7 +175,7 @@ int cap_inode_setxattr(struct dentry *de
+ {
+ 	if (!strncmp(name, XATTR_SECURITY_PREFIX,
+ 		     sizeof(XATTR_SECURITY_PREFIX) - 1)  &&
+-	    !capable(CAP_SYS_ADMIN))
++	    !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_ADMIN))
+ 		return -EPERM;
+ 	return 0;
+ }
+@@ -183,7 +184,7 @@ int cap_inode_removexattr(struct dentry 
+ {
+ 	if (!strncmp(name, XATTR_SECURITY_PREFIX,
+ 		     sizeof(XATTR_SECURITY_PREFIX) - 1)  &&
+-	    !capable(CAP_SYS_ADMIN))
++	    !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_ADMIN))
+ 		return -EPERM;
+ 	return 0;
+ }
+@@ -289,7 +290,7 @@ void cap_task_reparent_to_init (struct t
+ 
+ int cap_syslog (int type)
+ {
+-	if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN))
++	if ((type != 3 && type != 10) && !capable(CAP_VE_SYS_ADMIN))
+ 		return -EPERM;
+ 	return 0;
+ }
+@@ -311,6 +312,18 @@ int cap_vm_enough_memory(long pages)
+ 
+ 	vm_acct_memory(pages);
+ 
++#ifdef CONFIG_USER_RESOURCE
++	switch (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_ENOUGHMEM,
++				(void *)pages)
++			& (NOTIFY_OK | NOTIFY_FAIL)) {
++		case NOTIFY_OK:
++			return 0;
++		case NOTIFY_FAIL:
++			vm_unacct_memory(pages);
++			return -ENOMEM;
++	}
++#endif
++
+ 	/*
+ 	 * Sometimes we want to use more memory than we have
+ 	 */
+diff -uprN linux-2.6.8.1.orig/arch/i386/Kconfig linux-2.6.8.1-ve022test023/arch/i386/Kconfig
+--- linux-2.6.8.1.orig/arch/i386/Kconfig	2004-08-14 14:54:50.000000000 +0400
++++ linux-2.6.8.1-ve022test023/arch/i386/Kconfig	2005-06-08 13:32:09.000000000 +0400
+@@ -424,6 +424,54 @@ config X86_OOSTORE
+ 	depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR
+ 	default y
+ 
++config X86_4G
++	bool "4 GB kernel-space and 4 GB user-space virtual memory support"
++	help
++          This option is only useful for systems that have more than 1 GB
++          of RAM.
++
++          The default kernel VM layout leaves 1 GB of virtual memory for
++          kernel-space mappings, and 3 GB of VM for user-space applications.
++          This option ups both the kernel-space VM and the user-space VM to
++          4 GB.
++
++          The cost of this option is additional TLB flushes done at
++          system-entry points that transition from user-mode into kernel-mode.
++          I.e. system calls and page faults, and IRQs that interrupt user-mode
++          code. There's also additional overhead to kernel operations that copy
++          memory to/from user-space. The overhead from this is hard to tell and
++          depends on the workload - it can be anything from no visible overhead
++          to 20-30% overhead. A good rule of thumb is to count with a runtime
++          overhead of 20%.
++
++          The upside is the much increased kernel-space VM, which more than
++          quadruples the maximum amount of RAM supported. Kernels compiled with
++          this option boot on 64GB of RAM and still have more than 3.1 GB of
++          'lowmem' left. Another bonus is that highmem IO bouncing decreases,
++          if used with drivers that still use bounce-buffers.
++
++          There's also a 33% increase in user-space VM size - database
++          applications might see a boost from this.
++
++          But the cost of the TLB flushes and the runtime overhead has to be
++          weighed against the bonuses offered by the larger VM spaces. The
++          dividing line depends on the actual workload - there might be 4 GB
++          systems that benefit from this option. Systems with less than 4 GB
++          of RAM will rarely see a benefit from this option - but it's not
++          out of question, the exact circumstances have to be considered.
++
++config X86_SWITCH_PAGETABLES
++	def_bool X86_4G
++
++config X86_4G_VM_LAYOUT
++	def_bool X86_4G
++
++config X86_UACCESS_INDIRECT
++	def_bool X86_4G
++
++config X86_HIGH_ENTRY
++	def_bool X86_4G
++
+ config HPET_TIMER
+ 	bool "HPET Timer Support"
+ 	help
+@@ -482,6 +530,28 @@ config NR_CPUS
+ 	  This is purely to save memory - each supported CPU adds
+ 	  approximately eight kilobytes to the kernel image.
+ 
++config FAIRSCHED
++	bool "Fair CPU scheduler (EXPERIMENTAL)"
++	default y
++	help
++	  Config option for Fair CPU scheduler (fairsched).
++	  This option allows to group processes to scheduling nodes
++	  which receive CPU proportional to their weight.
++	  This is very important feature for process groups isolation and
++	  QoS management.
++
++	  If unsure, say N.
++
++config SCHED_VCPU
++	bool "VCPU scheduler support"
++	depends on SMP || FAIRSCHED
++	default FAIRSCHED
++	help
++	  VCPU scheduler support adds additional layer of abstraction
++	  which allows to virtualize cpu notion and split physical cpus
++	  and virtual cpus. This support allows to use CPU fair scheduler,
++	  dynamically add/remove cpus to/from VPS and so on.
++
+ config SCHED_SMT
+ 	bool "SMT (Hyperthreading) scheduler support"
+ 	depends on SMP
+@@ -1242,6 +1316,14 @@ config MAGIC_SYSRQ
+ 	  keys are documented in <file:Documentation/sysrq.txt>. Don't say Y
+ 	  unless you really know what this hack does.
+ 
++config SYSRQ_DEBUG
++	bool "Debugging via sysrq keys"
++	depends on MAGIC_SYSRQ
++	help
++	  Say Y if you want to extend functionality of magic key. It will
++	  provide you with some debugging facilities such as dumping and
++	  writing memory, resolving symbols and some other.
++
+ config DEBUG_SPINLOCK
+ 	bool "Spinlock debugging"
+ 	depends on DEBUG_KERNEL
+@@ -1298,6 +1380,14 @@ config 4KSTACKS
+ 	  on the VM subsystem for higher order allocations. This option
+ 	  will also use IRQ stacks to compensate for the reduced stackspace.
+ 
++config NMI_WATCHDOG
++	bool "NMI Watchdog"
++	default y
++	help
++	  If you say Y here the kernel will activate NMI watchdog by default
++	  on boot. You can still activate NMI watchdog via nmi_watchdog
++	  command line option even if you say N here.
++
+ config X86_FIND_SMP_CONFIG
+ 	bool
+ 	depends on X86_LOCAL_APIC || X86_VOYAGER
+@@ -1310,12 +1400,18 @@ config X86_MPPARSE
+ 
+ endmenu
+ 
++menu "OpenVZ"
++source "kernel/Kconfig.openvz"
++endmenu
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+ 
+ source "lib/Kconfig"
+ 
++source "kernel/ub/Kconfig"
++
+ config X86_SMP
+ 	bool
+ 	depends on SMP && !X86_VOYAGER
+diff -uprN linux-2.6.8.1.orig/drivers/net/Makefile linux-2.6.8.1-ve022stab028/drivers/net/Makefile
+--- linux-2.6.8.1.orig/drivers/net/Makefile	2004-08-14 14:55:09.000000000 +0400
++++ linux-2.6.8.1-ve022stab028/drivers/net/Makefile	2005-07-22 11:16:23.000000000 +0400
+@@ -11,6 +11,9 @@ obj-$(CONFIG_IBM_EMAC) += ibm_emac/
+ obj-$(CONFIG_IXGB) += ixgb/
+ obj-$(CONFIG_BONDING) += bonding/
+ 
++obj-$(CONFIG_VE_NETDEV) += vznetdev.o
++vznetdev-objs := open_vznet.o venet_core.o
++
+ #
+ # link order important here
+ #
+diff -uprN linux-2.6.8.1.orig/fs/Kconfig linux-2.6.8.1-ve022stab038/fs/Kconfig
+--- linux-2.6.8.1.orig/fs/Kconfig	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab038/fs/Kconfig	2005-09-22 14:49:52.000000000 +0400
+@@ -417,6 +417,15 @@ config QUOTA
+ 	  with the quota tools. Probably the quota support is only useful for
+ 	  multi user systems. If unsure, say N.
+ 
++config QUOTA_COMPAT
++	bool "Compatibility with older quotactl interface"
++	depends on QUOTA
++	help
++	  This option enables compatibility layer for older version
++	  of quotactl interface with byte granularity (QUOTAON at 0x0100,
++	  GETQUOTA at 0x0D00).  Interface versions older than that one and
++	  with block granularity are still not supported.
++
+ config QFMT_V1
+ 	tristate "Old quota format support"
+ 	depends on QUOTA
+@@ -433,6 +442,38 @@ config QFMT_V2
+ 	  need this functionality say Y here. Note that you will need recent
+ 	  quota utilities (>= 3.01) for new quota format with this kernel.
+ 
++config SIM_FS
++	tristate "VPS filesystem"
++	depends on VZ_QUOTA
++	default m
++	help
++	  This file system is a part of Virtuozzo. It intoduces a fake
++	  superblock and blockdev to VE to hide real device and show
++	  statfs results taken from quota.
++
++config VZ_QUOTA
++	tristate "Virtuozzo Disk Quota support"
++	depends on QUOTA
++	default m
++	help
++	  Virtuozzo Disk Quota imposes disk quota on directories with their
++	  files and subdirectories in total.  Such disk quota is used to
++	  account and limit disk usage by Virtuozzo VPS, but also may be used
++	  separately.
++
++config VZ_QUOTA_UNLOAD
++	bool "Unloadable Virtuozzo Disk Quota module"
++	depends on VZ_QUOTA=m
++	default n
++	help
++	  Make Virtuozzo Disk Quota module unloadable.
++	  Doesn't work reliably now.
++
++config VZ_QUOTA_UGID
++	bool "Per-user and per-group quota in Virtuozzo quota partitions"
++	depends on VZ_QUOTA!=n
++	default y
++
+ config QUOTACTL
+ 	bool
+ 	depends on XFS_QUOTA || QUOTA
+diff -uprN linux-2.6.8.1.orig/kernel/Makefile linux-2.6.8.1-ve022stab036/kernel/Makefile
+--- linux-2.6.8.1.orig/kernel/Makefile	2004-08-14 14:54:51.000000000 +0400
++++ linux-2.6.8.1-ve022stab036/kernel/Makefile	2005-09-17 15:18:16.000000000 +0400
+@@ -2,13 +2,22 @@
+ # Makefile for the linux kernel.
+ #
+ 
+-obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
++obj-y     = sched.o fairsched.o \
++	    fork.o exec_domain.o panic.o printk.o profile.o \
+ 	    exit.o itimer.o time.o softirq.o resource.o \
+ 	    sysctl.o capability.o ptrace.o timer.o user.o \
+ 	    signal.o sys.o kmod.o workqueue.o pid.o \
+ 	    rcupdate.o intermodule.o extable.o params.o posix-timers.o \
+ 	    kthread.o
+ 
++obj-$(CONFIG_VE) += ve.o
++obj-y += ub/
++obj-y += veowner.o
++obj-$(CONFIG_VE_CALLS) += vzdev.o
++obj-$(CONFIG_VZ_WDOG) += vzwdog.o
++obj-$(CONFIG_VE_CALLS) += vzmon.o
++vzmon-objs = vecalls.o
++
+ obj-$(CONFIG_FUTEX) += futex.o
+ obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
+ obj-$(CONFIG_SMP) += cpu.o
+diff -uprN linux-2.6.8.1.orig/fs/Makefile linux-2.6.8.1-ve022stab026/fs/Makefile
+--- linux-2.6.8.1.orig/fs/Makefile	2004-08-14 14:55:33.000000000 +0400
++++ linux-2.6.8.1-ve022stab026/fs/Makefile	2005-07-08 16:26:55.000000000 +0400
+@@ -36,6 +36,12 @@ obj-$(CONFIG_QUOTA)		+= dquot.o
+ obj-$(CONFIG_QFMT_V1)		+= quota_v1.o
+ obj-$(CONFIG_QFMT_V2)		+= quota_v2.o
+ obj-$(CONFIG_QUOTACTL)		+= quota.o
++obj-$(CONFIG_VZ_QUOTA)		+= vzdquota.o
++vzdquota-y			+= vzdquot.o vzdq_mgmt.o vzdq_ops.o vzdq_tree.o
++vzdquota-$(CONFIG_VZ_QUOTA_UGID) += vzdq_ugid.o
++vzdquota-$(CONFIG_VZ_QUOTA_UGID) += vzdq_file.o
++
++obj-$(CONFIG_SIM_FS)		+= simfs.o
+ 
+ obj-$(CONFIG_PROC_FS)		+= proc/
+ obj-y				+= partitions/
+diff -uprN linux-2.6.8.1.orig/arch/x86_64/Kconfig linux-2.6.8.1-ve022stab036/arch/x86_64/Kconfig
+--- linux-2.6.8.1.orig/arch/x86_64/Kconfig	2004-08-14 14:55:59.000000000 +0400
++++ linux-2.6.8.1-ve022stab036/arch/x86_64/Kconfig	2005-09-17 15:18:15.000000000 +0400
+@@ -239,6 +239,28 @@ config PREEMPT
+ 	  Say Y here if you are feeling brave and building a kernel for a 
+ 	  desktop, embedded or real-time system.  Say N if you are unsure. 
+ 
++config FAIRSCHED
++	bool "Fair CPU scheduler (EXPERIMENTAL)"
++	default y
++	help
++	  Config option for Fair CPU scheduler (fairsched).
++	  This option allows to group processes to scheduling nodes
++	  which receive CPU proportional to their weight.
++	  This is very important feature for process groups isolation and
++	  QoS management.
++
++	  If unsure, say N.
++
++config SCHED_VCPU
++	bool "VCPU scheduler support"
++	depends on SMP || FAIRSCHED
++	default FAIRSCHED
++	help
++	  VCPU scheduler support adds additional layer of abstraction
++	  which allows to virtualize cpu notion and split physical cpus
++	  and virtual cpus. This support allows to use CPU fair scheduler,
++	  dynamically add/remove cpus to/from VPS and so on.
++
+ config SCHED_SMT
+ 	bool "SMT (Hyperthreading) scheduler support"
+ 	depends on SMP
+@@ -499,9 +525,14 @@ config IOMMU_LEAK
+ 
+ endmenu
+ 
++menu "OpenVZ"
++source "kernel/Kconfig.openvz"
++endmenu
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
+ 
+ source "lib/Kconfig"
+ 
++source "kernel/ub/Kconfig"
+diff -uprN linux-2.6.8.1.orig/arch/ia64/Kconfig linux-2.6.8.1-ve022stab042/arch/ia64/Kconfig
+--- linux-2.6.8.1.orig/arch/ia64/Kconfig	2004-08-14 14:56:22.000000000 +0400
++++ linux-2.6.8.1-ve022stab042/arch/ia64/Kconfig	2005-10-14 14:56:03.000000000 +0400
+@@ -251,6 +251,28 @@ config PREEMPT
+           Say Y here if you are building a kernel for a desktop, embedded
+           or real-time system.  Say N if you are unsure.
+ 
++config FAIRSCHED
++	bool "Fair CPU scheduler (EXPERIMENTAL)"
++	default y
++	help
++	  Config option for Fair CPU scheduler (fairsched).
++	  This option allows to group processes to scheduling nodes
++	  which receive CPU proportional to their weight.
++	  This is very important feature for process groups isolation and
++	  QoS management.
++
++	  If unsure, say N.
++
++config SCHED_VCPU
++	bool "VCPU scheduler support"
++	depends on SMP || FAIRSCHED
++	default FAIRSCHED
++	help
++	  VCPU scheduler support adds additional layer of abstraction
++	  which allows to virtualize cpu notion and split physical cpus
++	  and virtual cpus. This support allows to use CPU fair scheduler,
++	  dynamically add/remove cpus to/from VPS and so on.
++
+ config HAVE_DEC_LOCK
+ 	bool
+ 	depends on (SMP || PREEMPT)
+@@ -486,6 +512,12 @@ config SYSVIPC_COMPAT
+ 	default y
+ endmenu
+ 
++menu "OpenVZ"
++source "kernel/Kconfig.openvz"
++endmenu
++
+ source "security/Kconfig"
+ 
+ source "crypto/Kconfig"
++
++source "kernel/ub/Kconfig"