diff -urN linux-2.4.17/CREDITS linux_umopenmosix/CREDITS --- linux-2.4.17/CREDITS Fri Dec 21 19:41:53 2001 +++ linux_umopenmosix/CREDITS Wed Jun 26 23:45:14 2002 @@ -434,6 +434,7 @@ E: lars@nocrew.org W: http://lars.nocrew.org/ D: dsp56k device driver +D: ptrace proxy in user mode kernel port S: Kopmansg 2 S: 411 13 Goteborg S: Sweden @@ -696,7 +697,7 @@ E: jdike@karaya.com W: http://user-mode-linux.sourceforge.net D: User mode kernel port -S: RR1 Box 67C +S: 375 Tubbs Hill Rd S: Deering NH 03244 S: USA diff -urN linux-2.4.17/Documentation/Configure.help linux_umopenmosix/Documentation/Configure.help --- linux-2.4.17/Documentation/Configure.help Fri Dec 21 19:41:53 2001 +++ linux_umopenmosix/Documentation/Configure.help Wed Jun 26 23:45:14 2002 @@ -75,6 +75,182 @@ # 1995-2000 by Axel Boldt and many others and are governed by the GNU # General Public License. +Mosix extensions +CONFIG_MOSIX + Say Y to support process migration and automatic load-balancing + within a cluster. + +Support clusters with a complex network topology +CONFIG_MOSIX_TOPOLOGY + This option is intended for configurations where the network + "distance" between the nodes is not uniform, so you require + node-dependent fine-tuning (see "man tune"). + + If all the nodes in your MOSIX cluster are connected via a simple + network and use the same networking hardware (which is the more + common case), Say N here (and save kernel-time!). + + Say Y here if not all your MOSIX cluster is connected via a simple + network or if some nodes use significantly different networking + hardware (if you are not sure whether the difference is significant, + compile some MOSIX kernel anyway and compare the results of "tune"). + +Maximum network-topology complexity to support +CONFIG_MOSIX_MAXTOPOLOGY + In a complex network-toplogy, each node can identify a number of + subsets of the other nodes, each set having the same networking + hardware, the same processor type (perhaps slower or faster, but + of the same type) and the same routing path. + The maximum network-complexity is defined as the maximum number of + those sets over all the nodes in your cluster. minimizing this + number helps saving kernel time and inter-node communication, so + please use only the level of complexity that you actually need. + + IMPORTANT NOTE: even if you prepare different kernels for different + nodes, all the nodes in your cluster must have the same value + configured here. + + MOSIX allows this value to be in the range of 2-10 (higher values + will be truncated and lower values imply no complex topology). + +MOSIX kernel debugger +CONFIG_MOSIX_UDB + Say Y to use the MOSIX kernel-debugger: + the MOSIX kernel-debugger is provided AS IS and can be entered from the + console by pressing . It is being used for the + development of MOSIX and no claims to reliability are made. + +MOSIX diagnostics +CONFIG_MOSIX_DIAG + Say Y to include MOSIX consistency checks. + While this adds code to the kernel, it may prevent unexpected + occurences when running new versions of MOSIX. + +MOSIX debug-code +CONFIG_MOSIX_DEBUG + Say Y to include extensive debugging-messages option in MOSIX + (used for MOSIX kernel-development). + +Process arrival messages +CONFIG_MOSIX_WEEEEEEEEE + If you say Y to this option, the console will display messages whenever + a process arrives: Weeeeeeeee..... for remote (guest) processes and + Wooooooooo..... for local processes returning home. + +Loopback process-migration testing +CONFIG_MOSIX_CHEAT_MIGSELF + Say Y to allow loopback process "migration" from a node into itself + (useful only for MOSIX kernel-debugging). + +Level of process-identity disclosure +CONFIG_MOSIX_DISCLOSURE + Determine how much information about processes is disclosed by default + when they run as guests on remote nodes: + 0 = no information + 1 = only PID (and TGID if different) + 2 = PID(/TGID), UID, GID + 3 = PID(/TGID), UID, GID, PGRP, SESSION, COMMAND + Processes may modify this default by writing to /proc/self/disclosure. + +Prevent the "-mosix" extension on kernel name +CONFIG_MOSIX_EXTMOSIX + Say Y if you want the kernel and module directory to have the "-mosix" + extension (to distinguish it from coexistant non-MOSIX kernels of the + same kernel version number). + +Stricter security on MOSIX ports +CONFIG_MOSIX_SECUREPORTS + The internal kernel TCP/UDP ports used by MOSIX may not be accessed + by normal users (and even the Super-User has no real reason to access + them). The question arises whether to allow user-connections to those + ports on other (internet) nodes, outside the cluster. + If routing schemes allow a MOSIX node to also be accessed from within + the MOSIX cluster using an IP address that is not listed in the MOSIX + configuration, you must say Y (or risk that a hacking user will mess + with the MOSIX internals by connecting to that IP address and one of + the MOSIX port numbers). However, these port numbers, though not + listed in "/etc/services", may be in use for other purposes somewhere + else on the internet - and this option would prevent accessing them, + so if your users require such access, say N and make sure that all the + IP addresses by which your nodes can be reached are listed as aliases + in the MOSIX configuration. + +Direct File-System Access for MOSIX +CONFIG_MOSIX_DFSA + Direct File System Access: + DFSA is now ready for Beta-testing. It can currently only be used + by MFS (MOSIX File-System). You may be interested in this option + if you either like experimenting and experiencing the power of DFSA + with MFS, or if you are interested in either developing a new + file-system, or adapting an existing one to DFSA. + + For the lay user, all you need is to also configure MFS, then run + the following commands on each node: + mkdir /mfs + mount -t mfs cluster /mfs -odfsa=1 + + If you are more serious about it, please read "Documentation/DFSA" + or run "man dfsa". + +MOSIX File-System +CONFIG_MOSIX_FS + The MOSIX File-System (MFS) is now ready for Beta-testing. + Along with DFSA, it forms the basis for the next generation + of MOSIX, expanding the power of MOSIX beyond CPU-bound tasks + into I/O tasks as well. + + On its own, MFS allows processes to access most files (more accurately, + all regular files, directories and symbolic-links, but excluding the + "/proc" file-system and MFS itself) on all the nodes in the cluster. + + MFS assumes that all users/group-ID's throughout the cluster are + equivalent: if this is not the case, you cannot use MFS, so you must + say 'N' here, unless you have a sub-cluster where the user/group-ID's + are equivalent, in which case you may still choose to configure MFS + only in that sub-cluster. + + To use MFS, all you need is to type: + mount -t mfs cluster /mfs + (the word "cluster" can be changed to suit your taste and the "/mfs" + mount-point is only a suggestion, to be used in this discussion). + For a permanent solution, enter the following line in "/etc/fstab": + cluster /mfs mfs defaults 0 0 + + You can now access each node via "/mfs/{node_number}", you may also + access the following useful directories: + + /mfs/here - The current node where your process runs + /mfs/home - Your home node + /mfs/magic - The current node when used by the "creat" system call + (or an "open" with the "O_CREAT" option) - otherwise, + the last node on which an MFS magical file was + successfully created (this is very useful for creating + temporary-files, then immediately unlinking them) + /mfs/lastexec - The node on which the process last issued a successful + "execve" system-call. + /mfs/selected - The node you selected by either your process itself or + one of its ancesstors (before forking this process), + writing a number into "/proc/self/selected". + + MFS is currently the only file-system that can be used with DFSA. + Detailed technical information is available in + "Documentation/filesystems/mfs.txt". + +Poll/Select exceptions on pipes +CONFIG_MOSIX_PIPE_EXCEPTIONS + This feature is not required for MOSIX-proper, but some user-mode + programs like it. It allows a program to be notified when someone + is wishing to read from a pipe. If you say Y, you may then use: + ioctl(pipefd, TCSBRK, arg) to set or clear exception conditions. + If (arg & 1), an exception is generated when someone is trying to + read the pipe. If (arg & 2), an exception is generated when the + pipe has no more readers. The default is that pipes never generate + any exceptions. An exception can be returned by the "select" system + call and will also cause POLLPRI to be included in the return-value + of the "poll" system-call. You can also get an under-estimate of the + number of bytes that processes curretly try to read from the pipe using + ioctl(pipefd, TIOCGWINSZ, 0). + Prompt for development and/or incomplete code/drivers CONFIG_EXPERIMENTAL Some of the various things that Linux supports (such as network @@ -13501,7 +13677,7 @@ Synchronous operation (i.e. always writing data to the host's disk immediately) is configurable on a per-UBD basis by using a special kernel command line option. Alternatively, you can say Y here to - turn on synchronous operation by default for all block. + turn on synchronous operation by default for all block devices. If you're running a journalling file system (like reiserfs, for example) in your virtual machine, you will want to say Y here. If @@ -13513,6 +13689,7 @@ CONFIG_PT_PROXY This option enables a debugging interface which allows gdb to debug the kernel without needing to actually attach to kernel threads. + CONFIG_XTERM_CHAN must be enabled in order to enable CONFIG_PT_PROXY. If you want to do kernel debugging, say Y here; otherwise say N. Management console @@ -13707,26 +13884,116 @@ SLIP transport CONFIG_UML_NET_SLIP - The Slip User-Mode Linux network transport allows a running UML to + The slip User-Mode Linux network transport allows a running UML to network with its host over a point-to-point link. Unlike Ethertap, which can carry any Ethernet frame (and hence even non-IP packets), - the Slip transport can only carry IP packets. + the slip transport can only carry IP packets. - To use this, your host must support Slip devices. + To use this, your host must support slip devices. For more information, see . That site - has examples of the UML command line to use to enable Slip + has examples of the UML command line to use to enable slip networking, and details of a few quirks with it. - The Ethertap Transport is preferred over Slip because of its - limitation. If you prefer Slip, however, say Y here. Otherwise + The Ethertap Transport is preferred over slip because of its + limitations. If you prefer slip, however, say Y here. Otherwise choose the Multicast transport (to network multiple UMLs on multiple hosts), Ethertap (to network with the host and the outside world), and/or the Daemon transport (to network multiple UMLs on a single host). You may choose more than one without conflict. If you don't need UML networking, say N. +Default main console channel initialization +CONFIG_CON_ZERO_CHAN + This is the string describing the channel to which the main console + will be attached by default. This value can be overridden from the + command line. The default value is "fd:0,fd:1", which attaches the + main console to stdin and stdout. + It is safe to leave this unchanged. + +Default console channel initialization +CONFIG_CON_CHAN + This is the string describing the channel to which all consoles + except the main console will be attached by default. This value can + be overridden from the command line. The default value is "xterm", + which brings them up in xterms. + It is safe to leave this unchanged, although you may wish to change + this if you expect the UML that you build to be run in environments + which don't have X or xterm available. + +Default serial line channel initialization +CONFIG_SSL_CHAN + This is the string describing the channel to which the serial lines + will be attached by default. This value can be overridden from the + command line. The default value is "pty", which attaches them to + traditional pseudo-terminals. + It is safe to leave this unchanged, although you may wish to change + this if you expect the UML that you build to be run in environments + which don't have a set of /dev/pty* devices. + +UML sound support +CONFIG_UML_SOUND + This option enables UML sound support. If enabled, it will pull in + soundcore and the UML hostaudio relay, which acts as a intermediary + between the host's dsp and mixer devices and the UML sound system. + It is safe to say 'Y' here. + +UML SMP support +CONFIG_UML_SMP + This option enables UML SMP support. UML implements virtual SMP by + allowing as many processes to run simultaneously on the host as + there are virtual processors configured. Obviously, if the host is + a uniprocessor, those processes will timeshare, but, inside UML, + will appear to be running simultaneously. If the host is a + multiprocessor, then UML processes may run simultaneously, depending + on the host scheduler. + CONFIG_SMP will be set to whatever this option is set to. + It is safe to leave this unchanged. + +file descriptor channel support +CONFIG_FD_CHAN + This option enables support for attaching UML consoles and serial + lines to already set up file descriptors. Generally, the main + console is attached to file descriptors 0 and 1 (stdin and stdout), + so it would be wise to leave this enabled unless you intend to + attach it to some other host device. + +port channel support +CONFIG_PORT_CHAN + This option enables support for attaching UML consoles and serial + lines to host portals. They may be accessed with 'telnet + '. Any number of consoles and serial lines may be + attached to a single portal, although what UML device you get when + you telnet to that portal will be unpredictable. + It is safe to say 'Y' here. + +pty channel support +CONFIG_PTY_CHAN + This option enables support for attaching UML consoles and serial + lines to host pseudo-terminals. Access to both traditional + pseudo-terminals (/dev/pty*) and pts pseudo-terminals are controlled + with this option. The assignment of UML devices to host devices + will be announced in the kernel message log. + It is safe to say 'Y' here. + +tty channel support +CONFIG_TTY_CHAN + This option enables support for attaching UML consoles and serial + lines to host terminals. Access to both virtual consoles + (/dev/tty*) and the slave side of pseudo-terminals (/dev/ttyp* and + /dev/pts/*) are controlled by this option. + It is safe to say 'Y' here. + +xterm channel support +CONFIG_XTERM_CHAN + This option enables support for attaching UML consoles and serial + lines to xterms. Each UML device so assigned will be brought up in + its own xterm. + If you disable this option, then CONFIG_PT_PROXY will be disabled as + well, since UML's gdb currently requires an xterm. + It is safe to say 'Y' here. + Microtek USB scanner support CONFIG_USB_MICROTEK Say Y here if you want support for the Microtek X6USB and diff -urN linux-2.4.17/Documentation/DFSA linux_umopenmosix/Documentation/DFSA --- linux-2.4.17/Documentation/DFSA Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/Documentation/DFSA Wed Jun 26 23:45:14 2002 @@ -0,0 +1,215 @@ +DFSA - Direct File-System Access: + +The first generation of MOSIX has brought about great performance +improvements in CPU jobs - "number crunchers", but cannot help in the +case of I/O tasks, which need to communicate with their home-node as +often as every system-call, and are therefore better off remaining there. + +The second generation of MOSIX, includes DFSA, whereby the more common +system-calls can be (under certain conditions) performed directly on the +caller's current node, thus increasing the benefit and probability that +I/O-oriented (or mixed I/O and CPU) tasks will also migrate. + +DFSA operates over suitable, cluster-wide shared file-systems that fulfill +certain requirements. The only file-system to currently fulfill those +requirements is the MOSIX File-System (MFS). + +To use DFSA without violating access permissions, the permission-scheme +(user and group ID's) must be identical, or at least compatible throughout +the MOSIX cluster. + +Each partition that is to operate in DFSA mode must be assigned a unique +DFSA index, currently in the range of 1-8, that must be identical on all +the nodes in the MOSIX cluster. + +To request a particular partition to operate in DFSA mode, mount (or remount) +it with the "-odfsa={n}" argument (1 <= n <= 8). + +You should do the same on all the nodes in the cluster either at about the +same time or before MOSIX is configured: failure to assign all DFSA +mount-points on some of the nodes is not fatal, but may result in serious +performance degradation, while simultaneous use of the same index for +different partitions, is likely to cause various faults. + +To disassociate a partition from DFSA, run: + +mount -o remount {mount-point} -odfsa=0. + +You may also designate symbolic-links to operate in DFSA mode: this is +equivalent to a declaration that the given links are identical on all +nodes and point to the same partition. It saves remote processes who use +those link(s) the need to contact their home node every use in order to +read those links. To declare a symbolic-link as identical, type: + +echo {symbolic-link} > /proc/mosix/admin/dfsalinks, + +where the symbolic-link must be an absolute-pathname, pointing at an existing +file (or directory or another symbolic-link) on an already-mounted partition +that is capable of DFSA (but it is not required to be already associated with +a DFSA index). + +To remove a symbolic-link declaration, type: + +echo -{symbolic-link} > /proc/mosix/admin/dfsalinks: + +If you intend to re-define a declared symbolic link, you must first re- +move its declaration, then re-declare after the change is made. + +To see a list of all currently-declared symbolic links, type: + +cat /proc/mosix/admin/dfsalinks. + +To cancel all symbolic-link declarations, type: + +echo - > /proc/mosix/admin/dfsalinks. + +The number of declared symbolic-links is currently limited to 8 and their +path-name length is limited to 128 characters. + +requirements from a complying file-system: +------------------------------------------ +1) all operations on the file system must be synchronous, in the sense that + there is [at most] only one buffer/inode cache throughout the cluster. + (on client-server file-systems, this usually means that the whole cache + is maintained on the server - however, a sophisticated server may "lend" + the cache of particular inodes to particular clients at any given time. + on shared-hardware file-systems, this probably requires either a hardware + invalidation signal or a new version to be marked on each inode after each + modification). + +2) The time-stamps on files and between files of the same file-system must be + consistent and advancing (unless the clock is deliberately set backwards), + regardless from which node modifications are made. + +3) The file-system must populate the following two new super-block methods: + a) "identify": + Given a "dentry", encapsulate identifying information about it into + a finite, rather-small structure, in a way that is sufficient to be + able to re-establish that open file/directory on another node. + b) "reconstruct": + Given only a mount-structure ("vfsmnt") and information that was + provided by "identify", produce a live new "dentry". + + Also, while not enforced by DFSA itself, in order for the getcwd + system-call to work correctly on a shared file-system, regardless + of where the call is made from, it is also highly recommended to + populate the following new inode-method: + c) "checkpath": + Given a "dentry", ensure that following its path via the "dcache" + will truly reflect its current position on the shared file-system - + and if not, make the necessary fixes by adjusting the "dentry" around + the directory cache: The "dcache" of shared file-systems cannot be + trusted, since processes running on other nodes can move (or remove) + a directory at any time. + +4) The file-system must ensure that files/directories are not cleared when + unlinked, for as long as any process in the cluster still holds them open. + There are several possible techniques to achieve this, but given the + distributed nature of the file-system, some form of garbage-collection + is probably also called upon. + +Which system-calls are supported: +--------------------------------- +The following system-calls are normally supported and usually run directly +by the process, while any other calls, or hard cases still need to go via +the home-node: + + read, readv, write, writev, readahead + lseek, llseek + open, creat, close + dup, dup2, fcntl/fcntl64 (F_DUPFD,F_GETFL,F_SETFL) + getdents, getdents64, old_readdir + fsync, fdatasync + chdir, fchdir, getcwd + stat, stat64, newstat, lstat, lstat64, newlstat, + fstat, fstat64, newfstat + access + truncate, truncate64, ftruncate, ftruncate64 + chmod, chown, chown16, lchown, lchown16, fchmod, fchown, fchown16 + utime, utimes + symlink, readlink + mkdir, rmdir + link, unlink, rename + +Examples of hard cases: +* if not all nodes have the same mounted DFSA partitions, or they do - + but with different mount-flags. +* if the calling process is being traced. +* if the process has a non-standard root-directory. +* If the calling process has an emulating personality that causes it + to use an alternate root (but this is currently not relevant for the + i386 architecture). +* if the calling process shares either its files or current directory + as a result of the "clone" system-call. +* operations occuring during re-configuration of DFSA on either the + home-node or the node where the process runs. +* operations involving special files (eg. other than regular, directories + or symbolic-links) +* operations on files that were commonly opened and still shared with other + related processes. +* dup2, where the second file-descriptor is an already open non-DFSA file + (that requires closing). +* chdir/fchdir when the previous directory is non-DFSA. +* link/rename that fail due to an attempt to cross-device link. +* open/dup/dup2/fcntl(F_DUPFD) that requires an allowable-increase in + the maximal file-descriptor index (initially 1023!). +* When the home-node has pending requests for the process (such as + signals, requests for "ps" information, request to migrate or consider + migration, etc.) +* Use of path-names that leave the DFSA partition, as demonstrated by + the following example: + "/mfs" is a DFSA file-system + /mfstmp is a symbolic link to /mfs/2/tmp, and is declared in + /proc/mosix/admin/dfsalinks. + /mtmp is a symbolic link to /mfstmp, and is declared in + /proc/mosix/admin/dfsalinks. + /mfs2 is a symbolic link to /mfs/2, but is not declared. + on node #2, /fie is a symbolic link to "/tmp/foo". + then the following are accepted as simple cases (and identical): + /mfs/2/tmp/foo + //mfs//2/tmp/foo + /./mfs/2/tmp/foo + /mfstmp/foo + /mtmp/foo + /mfs/2/fie + mfs/2/tmp/foo (when in the root directory) + + but not the following: + /tmp/../mfs/tmp/foo + (the kernel is not allowed to assume that each node has an + accessible "/tmp" directory!) + /mfs/2/../../mfs/2/tmp/foo + (the secon ".." steps out of the "/mfs" DFSA partition) + /mfs2/tmp/foo + (/mfs2 is not declared, hence no assurance was provided + that it is identical on all nodes) + mfstmp/foo (or mfstmp/foo) when in the root directory + (just a difficult case to recognize) + +* when the home-node DEPUTY has pending requests for the process (such as + signals, requests for "ps" information, request to migrate or consider + migration, etc.) + +Deviations from normal Linux/Unix/Posix behavior: +-------------------------------------------------- +It was impossible to maintain 100% compatibility on DFSA file-systems, +but the deviations are kept to the very minimum: + +* A process that received a signal may continue running a few DFSA system-calls + before it actually receives and handles the signal. + (in contrast, any POSIX process that receives a signal may possibly + complete the next system-call, but cannot issue any new ones beyond that). + +* Simultaneous mapping and I/O on the same DFSA file creates unpredictable + results as follows: + 1) execution (and library and all other file-mappings) is not always + protected against other process(es) modifying the file: either the + writing-process or the executing/mapping process may fail to receive the + "ETXTBSY" error. + 2) The "MS_INVALIDATE" flag of "msync" may fail to ensure that previous + "write"(s) to a mapped DFSA file are discarded. + 3) when a process modifies memory that is mapped as "MAP_SHARED" to a DFSA + file, but has not yet written it back (using "msync", "munmap", "exec" + or "exit"), it is possible that another process that reads that file as + it migrates will first see some of the changes but later (as opposed to + normal behavior), see the old values (or some of them) again. diff -urN linux-2.4.17/Documentation/filesystems/00-INDEX linux_umopenmosix/Documentation/filesystems/00-INDEX --- linux-2.4.17/Documentation/filesystems/00-INDEX Wed Jun 20 21:10:27 2001 +++ linux_umopenmosix/Documentation/filesystems/00-INDEX Wed Jun 26 23:45:14 2002 @@ -22,6 +22,8 @@ - info and mount options for the OS/2 HPFS. isofs.txt - info and mount options for the ISO 9660 (CDROM) filesystem. +mfs.txt + - info on the Mosix filesystem. ncpfs.txt - info on Novell Netware(tm) filesystem using NCP protocol. ntfs.txt diff -urN linux-2.4.17/Documentation/filesystems/mfs.txt linux_umopenmosix/Documentation/filesystems/mfs.txt --- linux-2.4.17/Documentation/filesystems/mfs.txt Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/Documentation/filesystems/mfs.txt Wed Jun 26 23:45:14 2002 @@ -0,0 +1,197 @@ +MFS interface, capabilities and limitations: +============================================ +To use MFS, configure the "CONFIG_MOSIX_FS" option into the kernel +then mount it, using: + mount -t mfs {any_name} {mount_point} [-o dfsa={n}] +This gives access to nearly all files throughout the MOSIX cluster, +with the root of each node available via {mount_point}/{node_number}/. +Also available as sub-directories are: + +/{mount_point}/here/ + The current node where your process runs +/{mount_point}/home/ + Your home node +/{mount_point}/magic/ + The current node when used by the "creat" system call (or an "open" + with the "O_CREAT" option) - otherwise, the last node on which an MFS + magical file was successfully created (this is very useful for creating + temporary-files, then immediately unlinking them) +/{mount_point}/lastexec/ + The node on which the process last issued a successful "execve" + system-call. +/{mount_point}/selected/ + The node you selected by either your process itself or as inherited + by one of its ancesstors (before forking this process), writing a + number into "/proc/self/selected". + +You may also wish to have MFS be automatically by entering the following +line into "/etc/fstab": + cluster /{mount_point} mfs defaults 0 0 +or for DFSA use: + cluster /{mount_point} mfs dfsa=1 0 0 + +Once CONFIG_MOSIX_FS is configured in the kernel and MOSIX has been configured +(See "man setpe"), other nodes can access the node's file-system even without +the above mount. To disallow MFS access to this node, write a "1" to +"/proc/mosix/admin/nomfs" (to re-allow, write a "0"). + +MFS was designed to also run under DFSA, allowing direct access by processes, +from wherever they run at each moment, to the node holding the files/directories +that they require, bypassing their "DEPUTY" in most cases. This makes it even +more efficient if the required files are on the same node as the process, in +which case the process can serve itself without resorting to the network. +To use MFS with DFSA, make sure that the mount-point is the same on all nodes, +then mount (or remount) MFS with the "-odfsa={n} flag, where {n} is in the +range of 1-8 and identical on all nodes in the cluster: + +Users and Groups: +----------------- +MFS assumes that all user and group ID's throughout the cluster have +equivalent access rights. You should not use MFS on clusters with +heterogenous user/group scheme. While allowing the Super-User to access +all files throughout the cluster, this is implied anyway by the security +requirements of MOSIX (See "man mosix"). If most of your cluster uses the +same scheme, but some nodes do not, you may either configure MFS only in the +kernel of those nodes that use the same scheme, or write a "1" to +"/proc/mosix/admin/nomfs" during node-startup and before MOSIX is configured +on the other nodes, as well as not mounting MFS there. + +Temporary files: +---------------- +the "here", "magic", "lastexec" and "selected" directories are designed +to provide easier access to temporary files, so that programs are helped +to create their temporary files where they run. With many programs, you +can make use these directories without recompiling, by using the "TMPDIR" +environment variable. + +The most conservative, but safest thing to do, which can be applied to all +programs, is to: + setenv TMPDIR "/{mount-point}/selected/tmp +In this case, your shell (or the calling script) should run + echo `cat /proc/self/where` > /proc/self/selected +before calling the program. +(note that "cp" cannot be used here, since only the shell may modify its +own "selected", but "echo" works because it is built into most shells) + +The next, little less conservative approach, but still safe for programs +that do not rely on passing file-names to their children as arguments of +"exec", is to: + setenv TMPDIR "/{mount-point}/lastexec/tmp + (or "env TMPDIR=/{mount-point}/lastexec/tmp program [args]") + +The next, still less conservative, but more powerful approach, can be used +for programs that create temporary-files, which either create only one MFS file, +or unlink temporary-files as soon as they are created. For such programs: + setenv TMPDIR "/{mount-point}/magic/tmp + (or "env TMPDIR=/{mount-point}/magic/tmp program [args]") + +Finally, programs that are locked on any particular node, may use: + setenv TMPDIR "/{mount-point}/here/tmp + (or "env TMPDIR=/{mount-point}/here/tmp program [args]") +Please note that this approach is not 100% safe, because even while locked, +migration back to the home-node may still occur if/when the node where the +program runs is being shut-down for reboot. + +Of course, when designing a new program to run with MFS, +all the above methods can be freely mixed. + +Interpretation of symbolic-links: +--------------------------------- +The following non-trivial interpretation of symbolic links found within MFS, +was designed to provide uniformity of access between links created locally +and via MFS, especially by scripts and "makefile"s that use `pwd` as part +of symbolic links: + +The rule is that when a symbolic link begins with a '/', it refers to the +root of the file-system's node - not the home-node! +Similarly, a "/.." (or any combination with ".." that calls for the parent of +the file-system's root) refers to the file-system's root again, rather than to +the MFS mount-point. + +One of the implications is that a symbolic link is never allowed to cross nodes. + +Excluded files: +--------------- +The following may not be accessed via the MFS file-system: +* nodes that excluded themselves. +* special files - other than regular-files, directories or symbolic-links. +* the "proc" file-system. +* any subdirectories of the recursive MFS mount-point with the exception + of symblic links starting in '/', pointing to the same node, and doing + so only once. + +Examples: +assuming that there are 3 nodes in the cluster and on node #2: +1) MFS is mounted on "/mfs" +2) "/usr/src/linux_here" is a symbolic link to "/mfs/2/usr/src/linux" +3) "/usr/src/local_linux" is a symbolic link to "../../mfs/2/usr/src/linux" +4) "/usr/src/other_linux" is a symbolic link to "/mfs/3/usr/src/linux" +5) "/usr/src/mfs_linux" is a symbolic link to "/mfs/2/mfs/2/usr/src/linux" + +then the following are accessible: + +/mfs/2/usr/src/linux +/mfs/2/etc/hosts +/mfs/2/mfs +/mfs/2/usr/src/linux_here +/usr/src/local_linux +/usr/src/other_linux + +but the following are not (and will result in "Permission denied" error): + +/mfs/2/dev/tty6 (special character device) +/mfs/2/proc/mosix ("proc" file system) +/mfs/2/mfs/2/tmp (to prevent infinite recursion and confusing the shell) +/mfs/2/usr/src/local_linux (symbolic-link does not start with '/') +/mfs/2/usr/src/other_linux (symbolic-link pointing to another node) +/usr/src/mfs_linux (symbolic-link pointing to local node twice) + +(please note, however, that symbolic-links are still readable +with "lstat" and "readlink" regardless of their contents) + +Garbage Collection: +------------------- +When either a client node or part of the network crashes, a garbage-collection +mechanism will eventually clean up the references to the held-files or +directories on the serving node(s). It may take, however, up to an hour +until the server(s) finally give up the connection, during which the serving +node(s) will not be able to un-mount the particular file-system(s) involved. + +The Super-User may still force an un-mount in 3 ways: +1) disable MFS by writing a "1" to "/proc/mosix/admin/nomfs". +2) un-configure MOSIX by running "setpe -off". +3) write the name of a file or directory to be released to + "/proc/mosix/admin/mfskill". If the given name is of a directory, all + files and sub-directories under it will be released as well (with the + possible exception of files being actively accessed at that very moment), + thus writing '/' releases everything, but is very distruptive to users, + so it is better to write the name of the mount-point of the file-system + that you wish to un-mount. + +Functionality limitations: +-------------------------- +* Mandatory file-locking is not supported. +* the F_NOTIFY fcntl option is not supported. +* Voluntary file-locking only operates among processes of the same home-node + (and since it will not be supported by DFSA, it always requires DEPUTY- + assitance on the home-node). +* file-ioctl is currently only supported for the EXT2 file-system. +* mmap of MFS files only supports private mappings (MAP_PRIVATE). + Open files must have read-permission. + The actual implementation of "mmap" and "execve" does not use demand-paging, + but rather reads in the relevant text/data from the file before proceeding. +* Every effort was attempted to prevent giving the same inode-number to + different files, and in most cases this is the case, but it is not totally + possible with only 32 bits inode-numbers and the large potential number of + files on numerous nodes and devices within each node. Priority is given so + that files on any particular node do not get the same inode-numbers, but even + this cannot be absolutely guaranteed when some of the files are NFS (or other + file-systems that use the full 32-bit space for inode numbers). To identify + an inode most accurately, one should use the raw "stat" ("fstat"/"lstat") + system-call as provided by the kernel before being filtered by the + compatibility library, providing the node-number in the "__unused1" field, + the device-number in the "__unused2" field and the local inode-numer in the + "__unused3" field (these fields are currently always 0 for non-MFS). + In the "stat64"/"lstat64"/"fstat64" system-calls, the node number can be + found in "__pad0[2-3]", the device-number in "__pad0[4-5]" and the local + inode number in "__pad0[6-9]". diff -urN linux-2.4.17/MAINTAINERS linux_umopenmosix/MAINTAINERS --- linux-2.4.17/MAINTAINERS Fri Dec 21 19:41:53 2001 +++ linux_umopenmosix/MAINTAINERS Wed Jun 26 23:45:14 2002 @@ -1694,6 +1694,14 @@ L: linux-usb-devel@lists.sourceforge.net W: http://usb.in.tum.de S: Maintained + +USER-MODE PORT +P: Jeff Dike +M: jdike@karaya.com +L: user-mode-linux-devel@lists.sourceforge.net +L: user-mode-linux-user@lists.sourceforge.net +W: http://user-mode-linux.sourceforge.net +S: Maintained VFAT FILESYSTEM: P: Gordon Chaffee diff -urN linux-2.4.17/Makefile linux_umopenmosix/Makefile --- linux-2.4.17/Makefile Fri Dec 21 19:41:53 2001 +++ linux_umopenmosix/Makefile Sat Jun 29 16:47:12 2002 @@ -5,7 +5,15 @@ KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) -ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) +# SUBARCH tells the usermode build what the underlying arch is. That is set +# first, and if a usermode build is happening, the "ARCH=um" on the command +# line overrides the setting of ARCH below. If a native build is happening, +# then ARCH is assigned, getting whatever value it gets normally, and +# SUBARCH is subsequently ignored. + +SUBARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) +ARCH := $(SUBARCH) + KERNELPATH=kernel-$(shell echo $(KERNELRELEASE) | sed -e "s/-//") CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \ @@ -54,6 +62,11 @@ ifeq (.config,$(wildcard .config)) include .config + +ifdef CONFIG_MOSIX_EXTMOSIX +EXTRAVERSION := $(EXTRAVERSION)-mosix +endif + ifeq (.depend,$(wildcard .depend)) include .depend do-it-all: Version vmlinux @@ -88,8 +101,13 @@ CPPFLAGS := -D__KERNEL__ -I$(HPATH) +ifdef CONFIG_MOSIX_UDB +CFLAGS := $(CPPFLAGS) -Wall -Wstrict-prototypes -Wno-trigraphs -O2 \ + -fno-omit-frame-pointer -fno-strict-aliasing -fno-common +else CFLAGS := $(CPPFLAGS) -Wall -Wstrict-prototypes -Wno-trigraphs -O2 \ -fomit-frame-pointer -fno-strict-aliasing -fno-common +endif AFLAGS := -D__ASSEMBLY__ $(CPPFLAGS) # @@ -122,7 +140,16 @@ NETWORKS =net/network.o LIBS =$(TOPDIR)/lib/lib.a +ifdef CONFIG_MOSIX +CORE_FILES += $(TOPDIR)/mos/mos.o +SUBDIRS =mos kernel drivers mm fs net ipc lib +else SUBDIRS =kernel drivers mm fs net ipc lib +endif +ifdef CONFIG_MOSIX_UDB +CORE_FILES += $(TOPDIR)/udb/debugger.o +SUBDIRS += udb +endif DRIVERS-n := DRIVERS-y := @@ -208,6 +235,11 @@ net/khttpd/make_times_h \ net/khttpd/times.h \ submenu* + +CLEAN_FILES += arch/i386/kernel/mosasm.H arch/i386/kernel/offset \ + mos/auto_syscalls.c mos/mkdefcalls mos/alternate.c \ + udb/symbols.c udb/symtab vmlinux.symtab + # directories removed with 'make clean' CLEAN_DIRS = \ modules @@ -266,6 +298,20 @@ $(LIBS) \ --end-group \ -o vmlinux +ifdef CONFIG_MOSIX_UDB + $(NM) vmlinux | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] \)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' | sort > vmlinux.symtab + udb/symtab < vmlinux.symtab > udb/symbols.c + $(CC) -c $(CFLAGS) udb/symbols.c -o udb/symbols.o + $(LD) $(LINKFLAGS) $(HEAD) init/main.o init/version.o \ + --start-group \ + $(CORE_FILES) \ + $(DRIVERS) \ + $(NETWORKS) \ + $(LIBS) \ + --end-group \ + udb/symbols.o \ + -o vmlinux +endif $(NM) vmlinux | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] \)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' | sort > System.map symlinks: diff -urN linux-2.4.17/Makefile.ccache linux_umopenmosix/Makefile.ccache --- linux-2.4.17/Makefile.ccache Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/Makefile.ccache Wed Jun 26 23:45:14 2002 @@ -0,0 +1,584 @@ +VERSION = 2 +PATCHLEVEL = 4 +SUBLEVEL = 17 +EXTRAVERSION = + +KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) + +# SUBARCH tells the usermode build what the underlying arch is. That is set +# first, and if a usermode build is happening, the "ARCH=um" on the command +# line overrides the setting of ARCH below. If a native build is happening, +# then ARCH is assigned, getting whatever value it gets normally, and +# SUBARCH is subsequently ignored. + +SUBARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) +ARCH := $(SUBARCH) + +KERNELPATH=kernel-$(shell echo $(KERNELRELEASE) | sed -e "s/-//") + +CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \ + else if [ -x /bin/bash ]; then echo /bin/bash; \ + else echo sh; fi ; fi) +TOPDIR := $(shell /bin/pwd) + +HPATH = $(TOPDIR)/include +FINDHPATH = $(HPATH)/asm $(HPATH)/linux $(HPATH)/scsi $(HPATH)/net + +HOSTCC = ccache gcc +HOSTCFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer + +CROSS_COMPILE = + +# +# Include the make variables (CC, etc...) +# + +AS = $(CROSS_COMPILE)as +LD = $(CROSS_COMPILE)ld +CC = ccache $(CROSS_COMPILE)gcc +CPP = $(CC) -E +AR = $(CROSS_COMPILE)ar +NM = $(CROSS_COMPILE)nm +STRIP = $(CROSS_COMPILE)strip +OBJCOPY = $(CROSS_COMPILE)objcopy +OBJDUMP = $(CROSS_COMPILE)objdump +MAKEFILES = $(TOPDIR)/.config +GENKSYMS = /sbin/genksyms +DEPMOD = /sbin/depmod +MODFLAGS = -DMODULE +CFLAGS_KERNEL = +PERL = perl + +export VERSION PATCHLEVEL SUBLEVEL EXTRAVERSION KERNELRELEASE ARCH \ + CONFIG_SHELL TOPDIR HPATH HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC \ + CPP AR NM STRIP OBJCOPY OBJDUMP MAKE MAKEFILES GENKSYMS MODFLAGS PERL + +all: do-it-all + +# +# Make "config" the default target if there is no configuration file or +# "depend" the target if there is no top-level dependency information. +# + +ifeq (.config,$(wildcard .config)) +include .config + +ifdef CONFIG_MOSIX_EXTMOSIX +EXTRAVERSION := $(EXTRAVERSION)-mosix +endif + +ifeq (.depend,$(wildcard .depend)) +include .depend +do-it-all: Version vmlinux +else +CONFIGURATION = depend +do-it-all: depend +endif +else +CONFIGURATION = config +do-it-all: config +endif + +# +# INSTALL_PATH specifies where to place the updated kernel and system map +# images. Uncomment if you want to place them anywhere other than root. +# + +#export INSTALL_PATH=/boot + +# +# INSTALL_MOD_PATH specifies a prefix to MODLIB for module directory +# relocations required by build roots. This is not defined in the +# makefile but the arguement can be passed to make if needed. +# + +MODLIB := $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE) +export MODLIB + +# +# standard CFLAGS +# + +CPPFLAGS := -D__KERNEL__ -I$(HPATH) + +ifdef CONFIG_MOSIX_UDB +CFLAGS := $(CPPFLAGS) -Wall -Wstrict-prototypes -Wno-trigraphs -O2 \ + -fno-omit-frame-pointer -fno-strict-aliasing -fno-common +else +CFLAGS := $(CPPFLAGS) -Wall -Wstrict-prototypes -Wno-trigraphs -O2 \ + -fomit-frame-pointer -fno-strict-aliasing -fno-common +endif +AFLAGS := -D__ASSEMBLY__ $(CPPFLAGS) + +# +# ROOT_DEV specifies the default root-device when making the image. +# This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case +# the default of FLOPPY is used by 'build'. +# This is i386 specific. +# + +export ROOT_DEV = CURRENT + +# +# If you want to preset the SVGA mode, uncomment the next line and +# set SVGA_MODE to whatever number you want. +# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. +# The number is the same as you would ordinarily press at bootup. +# This is i386 specific. +# + +export SVGA_MODE = -DSVGA_MODE=NORMAL_VGA + +# +# If you want the RAM disk device, define this to be the size in blocks. +# This is i386 specific. +# + +#export RAMDISK = -DRAMDISK=512 + +CORE_FILES =kernel/kernel.o mm/mm.o fs/fs.o ipc/ipc.o +NETWORKS =net/network.o + +LIBS =$(TOPDIR)/lib/lib.a +ifdef CONFIG_MOSIX +CORE_FILES += $(TOPDIR)/mos/mos.o +SUBDIRS =mos kernel drivers mm fs net ipc lib +else +SUBDIRS =kernel drivers mm fs net ipc lib +endif +ifdef CONFIG_MOSIX_UDB +CORE_FILES += $(TOPDIR)/udb/debugger.o +SUBDIRS += udb +endif + +DRIVERS-n := +DRIVERS-y := +DRIVERS-m := +DRIVERS- := + +DRIVERS-$(CONFIG_ACPI) += drivers/acpi/acpi.o +DRIVERS-$(CONFIG_PARPORT) += drivers/parport/driver.o +DRIVERS-y += drivers/char/char.o \ + drivers/block/block.o \ + drivers/misc/misc.o \ + drivers/net/net.o \ + drivers/media/media.o +DRIVERS-$(CONFIG_AGP) += drivers/char/agp/agp.o +DRIVERS-$(CONFIG_DRM) += drivers/char/drm/drm.o +DRIVERS-$(CONFIG_NUBUS) += drivers/nubus/nubus.a +DRIVERS-$(CONFIG_ISDN) += drivers/isdn/isdn.a +DRIVERS-$(CONFIG_NET_FC) += drivers/net/fc/fc.o +DRIVERS-$(CONFIG_APPLETALK) += drivers/net/appletalk/appletalk.o +DRIVERS-$(CONFIG_TR) += drivers/net/tokenring/tr.o +DRIVERS-$(CONFIG_WAN) += drivers/net/wan/wan.o +DRIVERS-$(CONFIG_ARCNET) += drivers/net/arcnet/arcnetdrv.o +DRIVERS-$(CONFIG_ATM) += drivers/atm/atm.o +DRIVERS-$(CONFIG_IDE) += drivers/ide/idedriver.o +DRIVERS-$(CONFIG_FC4) += drivers/fc4/fc4.a +DRIVERS-$(CONFIG_SCSI) += drivers/scsi/scsidrv.o +DRIVERS-$(CONFIG_FUSION_BOOT) += drivers/message/fusion/fusion.o +DRIVERS-$(CONFIG_IEEE1394) += drivers/ieee1394/ieee1394drv.o + +ifneq ($(CONFIG_CD_NO_IDESCSI)$(CONFIG_BLK_DEV_IDECD)$(CONFIG_BLK_DEV_SR)$(CONFIG_PARIDE_PCD),) +DRIVERS-y += drivers/cdrom/driver.o +endif + +DRIVERS-$(CONFIG_SOUND) += drivers/sound/sounddrivers.o +DRIVERS-$(CONFIG_PCI) += drivers/pci/driver.o +DRIVERS-$(CONFIG_MTD) += drivers/mtd/mtdlink.o +DRIVERS-$(CONFIG_PCMCIA) += drivers/pcmcia/pcmcia.o +DRIVERS-$(CONFIG_NET_PCMCIA) += drivers/net/pcmcia/pcmcia_net.o +DRIVERS-$(CONFIG_NET_WIRELESS) += drivers/net/wireless/wireless_net.o +DRIVERS-$(CONFIG_PCMCIA_CHRDEV) += drivers/char/pcmcia/pcmcia_char.o +DRIVERS-$(CONFIG_DIO) += drivers/dio/dio.a +DRIVERS-$(CONFIG_SBUS) += drivers/sbus/sbus_all.o +DRIVERS-$(CONFIG_ZORRO) += drivers/zorro/driver.o +DRIVERS-$(CONFIG_FC4) += drivers/fc4/fc4.a +DRIVERS-$(CONFIG_ALL_PPC) += drivers/macintosh/macintosh.o +DRIVERS-$(CONFIG_MAC) += drivers/macintosh/macintosh.o +DRIVERS-$(CONFIG_ISAPNP) += drivers/pnp/pnp.o +DRIVERS-$(CONFIG_SGI_IP22) += drivers/sgi/sgi.a +DRIVERS-$(CONFIG_VT) += drivers/video/video.o +DRIVERS-$(CONFIG_PARIDE) += drivers/block/paride/paride.a +DRIVERS-$(CONFIG_HAMRADIO) += drivers/net/hamradio/hamradio.o +DRIVERS-$(CONFIG_TC) += drivers/tc/tc.a +DRIVERS-$(CONFIG_USB) += drivers/usb/usbdrv.o +DRIVERS-$(CONFIG_INPUT) += drivers/input/inputdrv.o +DRIVERS-$(CONFIG_I2O) += drivers/message/i2o/i2o.o +DRIVERS-$(CONFIG_IRDA) += drivers/net/irda/irda.o +DRIVERS-$(CONFIG_I2C) += drivers/i2c/i2c.o +DRIVERS-$(CONFIG_PHONE) += drivers/telephony/telephony.o +DRIVERS-$(CONFIG_MD) += drivers/md/mddev.o +DRIVERS-$(CONFIG_BLUEZ) += drivers/bluetooth/bluetooth.o +DRIVERS-$(CONFIG_HOTPLUG_PCI) += drivers/hotplug/vmlinux-obj.o + +DRIVERS := $(DRIVERS-y) + + +# files removed with 'make clean' +CLEAN_FILES = \ + kernel/ksyms.lst include/linux/compile.h \ + vmlinux System.map \ + .tmp* \ + drivers/char/consolemap_deftbl.c drivers/video/promcon_tbl.c \ + drivers/char/conmakehash \ + drivers/char/drm/*-mod.c \ + drivers/pci/devlist.h drivers/pci/classlist.h drivers/pci/gen-devlist \ + drivers/zorro/devlist.h drivers/zorro/gen-devlist \ + drivers/sound/bin2hex drivers/sound/hex2hex \ + drivers/atm/fore200e_mkfirm drivers/atm/{pca,sba}*{.bin,.bin1,.bin2} \ + drivers/scsi/aic7xxx/aicasm/aicasm_gram.c \ + drivers/scsi/aic7xxx/aicasm/aicasm_scan.c \ + drivers/scsi/aic7xxx/aicasm/y.tab.h \ + drivers/scsi/aic7xxx/aicasm/aicasm \ + drivers/scsi/53c700-mem.c \ + net/khttpd/make_times_h \ + net/khttpd/times.h \ + submenu* + +CLEAN_FILES += arch/i386/kernel/mosasm.H arch/i386/kernel/offset \ + mos/auto_syscalls.c mos/mkdefcalls mos/alternate.c \ + udb/symbols.c udb/symtab vmlinux.symtab + +# directories removed with 'make clean' +CLEAN_DIRS = \ + modules + +# files removed with 'make mrproper' +MRPROPER_FILES = \ + include/linux/autoconf.h include/linux/version.h \ + drivers/net/hamradio/soundmodem/sm_tbl_{afsk1200,afsk2666,fsk9600}.h \ + drivers/net/hamradio/soundmodem/sm_tbl_{hapn4800,psk4800}.h \ + drivers/net/hamradio/soundmodem/sm_tbl_{afsk2400_7,afsk2400_8}.h \ + drivers/net/hamradio/soundmodem/gentbl \ + drivers/sound/*_boot.h drivers/sound/.*.boot \ + drivers/sound/msndinit.c \ + drivers/sound/msndperm.c \ + drivers/sound/pndsperm.c \ + drivers/sound/pndspini.c \ + drivers/atm/fore200e_*_fw.c drivers/atm/.fore200e_*.fw \ + .version .config* config.in config.old \ + scripts/tkparse scripts/kconfig.tk scripts/kconfig.tmp \ + scripts/lxdialog/*.o scripts/lxdialog/lxdialog \ + .menuconfig.log \ + include/asm \ + .hdepend scripts/mkdep scripts/split-include scripts/docproc \ + $(TOPDIR)/include/linux/modversions.h \ + kernel.spec + +# directories removed with 'make mrproper' +MRPROPER_DIRS = \ + include/config \ + $(TOPDIR)/include/linux/modules + + +include arch/$(ARCH)/Makefile + +export CPPFLAGS CFLAGS AFLAGS + +export NETWORKS DRIVERS LIBS HEAD LDFLAGS LINKFLAGS MAKEBOOT ASFLAGS + +.S.s: + $(CPP) $(AFLAGS) -traditional -o $*.s $< +.S.o: + $(CC) $(AFLAGS) -traditional -c -o $*.o $< + +Version: dummy + @rm -f include/linux/compile.h + +boot: vmlinux + @$(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" -C arch/$(ARCH)/boot + +vmlinux: include/linux/version.h $(CONFIGURATION) init/main.o init/version.o linuxsubdirs + $(LD) $(LINKFLAGS) $(HEAD) init/main.o init/version.o \ + --start-group \ + $(CORE_FILES) \ + $(DRIVERS) \ + $(NETWORKS) \ + $(LIBS) \ + --end-group \ + -o vmlinux +ifdef CONFIG_MOSIX_UDB + $(NM) vmlinux | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] \)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' | sort > vmlinux.symtab + udb/symtab < vmlinux.symtab > udb/symbols.c + $(CC) -c $(CFLAGS) udb/symbols.c -o udb/symbols.o + $(LD) $(LINKFLAGS) $(HEAD) init/main.o init/version.o \ + --start-group \ + $(CORE_FILES) \ + $(DRIVERS) \ + $(NETWORKS) \ + $(LIBS) \ + --end-group \ + udb/symbols.o \ + -o vmlinux +endif + $(NM) vmlinux | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] \)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' | sort > System.map + +symlinks: + rm -f include/asm + ( cd include ; ln -sf asm-$(ARCH) asm) + @if [ ! -d include/linux/modules ]; then \ + mkdir include/linux/modules; \ + fi + +oldconfig: symlinks + $(CONFIG_SHELL) scripts/Configure -d arch/$(ARCH)/config.in + +xconfig: symlinks + $(MAKE) -C scripts kconfig.tk + wish -f scripts/kconfig.tk + +menuconfig: include/linux/version.h symlinks + $(MAKE) -C scripts/lxdialog all + $(CONFIG_SHELL) scripts/Menuconfig arch/$(ARCH)/config.in + +config: symlinks + $(CONFIG_SHELL) scripts/Configure arch/$(ARCH)/config.in + +include/config/MARKER: scripts/split-include include/linux/autoconf.h + scripts/split-include include/linux/autoconf.h include/config + @ touch include/config/MARKER + +linuxsubdirs: $(patsubst %, _dir_%, $(SUBDIRS)) + +$(patsubst %, _dir_%, $(SUBDIRS)) : dummy include/linux/version.h include/config/MARKER + $(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" -C $(patsubst _dir_%, %, $@) + +$(TOPDIR)/include/linux/version.h: include/linux/version.h +$(TOPDIR)/include/linux/compile.h: include/linux/compile.h + +newversion: + . scripts/mkversion > .tmpversion + @mv -f .tmpversion .version + +include/linux/compile.h: $(CONFIGURATION) include/linux/version.h newversion + @echo -n \#define UTS_VERSION \"\#`cat .version` > .ver + @if [ -n "$(CONFIG_SMP)" ] ; then echo -n " SMP" >> .ver; fi + @if [ -f .name ]; then echo -n \-`cat .name` >> .ver; fi + @echo ' '`date`'"' >> .ver + @echo \#define LINUX_COMPILE_TIME \"`date +%T`\" >> .ver + @echo \#define LINUX_COMPILE_BY \"`whoami`\" >> .ver + @echo \#define LINUX_COMPILE_HOST \"`hostname`\" >> .ver + @if [ -x /bin/dnsdomainname ]; then \ + echo \#define LINUX_COMPILE_DOMAIN \"`dnsdomainname`\"; \ + elif [ -x /bin/domainname ]; then \ + echo \#define LINUX_COMPILE_DOMAIN \"`domainname`\"; \ + else \ + echo \#define LINUX_COMPILE_DOMAIN ; \ + fi >> .ver + @echo \#define LINUX_COMPILER \"`$(CC) $(CFLAGS) -v 2>&1 | tail -1`\" >> .ver + @mv -f .ver $@ + +include/linux/version.h: ./Makefile + @echo \#define UTS_RELEASE \"$(KERNELRELEASE)\" > .ver + @echo \#define LINUX_VERSION_CODE `expr $(VERSION) \\* 65536 + $(PATCHLEVEL) \\* 256 + $(SUBLEVEL)` >> .ver + @echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))' >>.ver + @mv -f .ver $@ + +init/version.o: init/version.c include/linux/compile.h include/config/MARKER + $(CC) $(CFLAGS) $(CFLAGS_KERNEL) -DUTS_MACHINE='"$(ARCH)"' -c -o init/version.o init/version.c + +init/main.o: init/main.c include/config/MARKER + $(CC) $(CFLAGS) $(CFLAGS_KERNEL) $(PROFILING) -c -o $*.o $< + +fs lib mm ipc kernel drivers net: dummy + $(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" $(subst $@, _dir_$@, $@) + +TAGS: dummy + etags `find include/asm-$(ARCH) -name '*.h'` + find include -type d \( -name "asm-*" -o -name config \) -prune -o -name '*.h' -print | xargs etags -a + find $(SUBDIRS) init -name '*.[ch]' | xargs etags -a + +# Exuberant ctags works better with -I +tags: dummy + CTAGSF=`ctags --version | grep -i exuberant >/dev/null && echo "-I __initdata,__exitdata,EXPORT_SYMBOL,EXPORT_SYMBOL_NOVERS"`; \ + ctags $$CTAGSF `find include/asm-$(ARCH) -name '*.h'` && \ + find include -type d \( -name "asm-*" -o -name config \) -prune -o -name '*.h' -print | xargs ctags $$CTAGSF -a && \ + find $(SUBDIRS) init -name '*.[ch]' | xargs ctags $$CTAGSF -a + +ifdef CONFIG_MODULES +ifdef CONFIG_MODVERSIONS +MODFLAGS += -DMODVERSIONS -include $(HPATH)/linux/modversions.h +endif + +.PHONY: modules +modules: $(patsubst %, _mod_%, $(SUBDIRS)) + +.PHONY: $(patsubst %, _mod_%, $(SUBDIRS)) +$(patsubst %, _mod_%, $(SUBDIRS)) : include/linux/version.h include/config/MARKER + $(MAKE) -C $(patsubst _mod_%, %, $@) CFLAGS="$(CFLAGS) $(MODFLAGS)" MAKING_MODULES=1 modules + +.PHONY: modules_install +modules_install: _modinst_ $(patsubst %, _modinst_%, $(SUBDIRS)) _modinst_post + +.PHONY: _modinst_ +_modinst_: + @rm -rf $(MODLIB)/kernel + @rm -f $(MODLIB)/build + @mkdir -p $(MODLIB)/kernel + @ln -s $(TOPDIR) $(MODLIB)/build + +# If System.map exists, run depmod. This deliberately does not have a +# dependency on System.map since that would run the dependency tree on +# vmlinux. This depmod is only for convenience to give the initial +# boot a modules.dep even before / is mounted read-write. However the +# boot script depmod is the master version. +ifeq "$(strip $(INSTALL_MOD_PATH))" "" +depmod_opts := +else +depmod_opts := -b $(INSTALL_MOD_PATH) -r +endif +.PHONY: _modinst_post +_modinst_post: _modinst_post_pcmcia + if [ -r System.map ]; then $(DEPMOD) -ae -F System.map $(depmod_opts) $(KERNELRELEASE); fi + +# Backwards compatibilty symlinks for people still using old versions +# of pcmcia-cs with hard coded pathnames on insmod. Remove +# _modinst_post_pcmcia for kernel 2.4.1. +.PHONY: _modinst_post_pcmcia +_modinst_post_pcmcia: + cd $(MODLIB); \ + mkdir -p pcmcia; \ + find kernel -path '*/pcmcia/*' -name '*.o' | xargs -i -r ln -sf ../{} pcmcia + +.PHONY: $(patsubst %, _modinst_%, $(SUBDIRS)) +$(patsubst %, _modinst_%, $(SUBDIRS)) : + $(MAKE) -C $(patsubst _modinst_%, %, $@) modules_install + +# modules disabled.... + +else +modules modules_install: dummy + @echo + @echo "The present kernel configuration has modules disabled." + @echo "Type 'make config' and enable loadable module support." + @echo "Then build a kernel with module support enabled." + @echo + @exit 1 +endif + +clean: archclean + find . \( -name '*.[oas]' -o -name core -o -name '.*.flags' \) -type f -print \ + | grep -v lxdialog/ | xargs rm -f + rm -f $(CLEAN_FILES) + rm -rf $(CLEAN_DIRS) + $(MAKE) -C Documentation/DocBook clean + +mrproper: clean archmrproper + find . \( -size 0 -o -name .depend \) -type f -print | xargs rm -f + rm -f $(MRPROPER_FILES) + rm -rf $(MRPROPER_DIRS) + $(MAKE) -C Documentation/DocBook mrproper + +distclean: mrproper + rm -f core `find . \( -not -type d \) -and \ + \( -name '*.orig' -o -name '*.rej' -o -name '*~' \ + -o -name '*.bak' -o -name '#*#' -o -name '.*.orig' \ + -o -name '.*.rej' -o -name '.SUMS' -o -size 0 \) -type f -print` TAGS tags + +backup: mrproper + cd .. && tar cf - linux/ | gzip -9 > backup.gz + sync + +sgmldocs: + chmod 755 $(TOPDIR)/scripts/docgen + chmod 755 $(TOPDIR)/scripts/gen-all-syms + chmod 755 $(TOPDIR)/scripts/kernel-doc + $(MAKE) -C $(TOPDIR)/Documentation/DocBook books + +psdocs: sgmldocs + $(MAKE) -C Documentation/DocBook ps + +pdfdocs: sgmldocs + $(MAKE) -C Documentation/DocBook pdf + +htmldocs: sgmldocs + $(MAKE) -C Documentation/DocBook html + +sums: + find . -type f -print | sort | xargs sum > .SUMS + +dep-files: scripts/mkdep archdep include/linux/version.h + scripts/mkdep -- init/*.c > .depend + scripts/mkdep -- `find $(FINDHPATH) -name SCCS -prune -o -follow -name \*.h ! -name modversions.h -print` > .hdepend + $(MAKE) $(patsubst %,_sfdep_%,$(SUBDIRS)) _FASTDEP_ALL_SUB_DIRS="$(SUBDIRS)" +ifdef CONFIG_MODVERSIONS + $(MAKE) update-modverfile +endif + +ifdef CONFIG_MODVERSIONS +MODVERFILE := $(TOPDIR)/include/linux/modversions.h +else +MODVERFILE := +endif +export MODVERFILE + +depend dep: dep-files + +checkconfig: + find * -name '*.[hcS]' -type f -print | sort | xargs $(PERL) -w scripts/checkconfig.pl + +checkhelp: + find * -name [cC]onfig.in -print | sort | xargs $(PERL) -w scripts/checkhelp.pl + +checkincludes: + find * -name '*.[hcS]' -type f -print | sort | xargs $(PERL) -w scripts/checkincludes.pl + +ifdef CONFIGURATION +..$(CONFIGURATION): + @echo + @echo "You have a bad or nonexistent" .$(CONFIGURATION) ": running 'make" $(CONFIGURATION)"'" + @echo + $(MAKE) $(CONFIGURATION) + @echo + @echo "Successful. Try re-making (ignore the error that follows)" + @echo + exit 1 + +#dummy: ..$(CONFIGURATION) +dummy: + +else + +dummy: + +endif + +include Rules.make + +# +# This generates dependencies for the .h files. +# + +scripts/mkdep: scripts/mkdep.c + $(HOSTCC) $(HOSTCFLAGS) -o scripts/mkdep scripts/mkdep.c + +scripts/split-include: scripts/split-include.c + $(HOSTCC) $(HOSTCFLAGS) -o scripts/split-include scripts/split-include.c + +# +# RPM target +# +# If you do a make spec before packing the tarball you can rpm -ta it +# +spec: + . scripts/mkspec >kernel.spec + +# +# Build a tar ball, generate an rpm from it and pack the result +# There arw two bits of magic here +# 1) The use of /. to avoid tar packing just the symlink +# 2) Removing the .dep files as they have source paths in them that +# will become invalid +# +rpm: clean spec + find . \( -size 0 -o -name .depend -o -name .hdepend \) -type f -print | xargs rm -f + set -e; \ + cd $(TOPDIR)/.. ; \ + ln -sf $(TOPDIR) $(KERNELPATH) ; \ + tar -cvz --exclude CVS -f $(KERNELPATH).tar.gz $(KERNELPATH)/. ; \ + rm $(KERNELPATH) ; \ + cd $(TOPDIR) ; \ + . scripts/mkversion > .version ; \ + rpm -ta $(TOPDIR)/../$(KERNELPATH).tar.gz ; \ + rm $(TOPDIR)/../$(KERNELPATH).tar.gz diff -urN linux-2.4.17/Makefile.org linux_umopenmosix/Makefile.org --- linux-2.4.17/Makefile.org Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/Makefile.org Wed Jun 26 23:45:14 2002 @@ -0,0 +1,584 @@ +VERSION = 2 +PATCHLEVEL = 4 +SUBLEVEL = 17 +EXTRAVERSION = + +KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) + +# SUBARCH tells the usermode build what the underlying arch is. That is set +# first, and if a usermode build is happening, the "ARCH=um" on the command +# line overrides the setting of ARCH below. If a native build is happening, +# then ARCH is assigned, getting whatever value it gets normally, and +# SUBARCH is subsequently ignored. + +SUBARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) +ARCH := $(SUBARCH) + +KERNELPATH=kernel-$(shell echo $(KERNELRELEASE) | sed -e "s/-//") + +CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \ + else if [ -x /bin/bash ]; then echo /bin/bash; \ + else echo sh; fi ; fi) +TOPDIR := $(shell /bin/pwd) + +HPATH = $(TOPDIR)/include +FINDHPATH = $(HPATH)/asm $(HPATH)/linux $(HPATH)/scsi $(HPATH)/net + +HOSTCC = gcc +HOSTCFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer + +CROSS_COMPILE = + +# +# Include the make variables (CC, etc...) +# + +AS = $(CROSS_COMPILE)as +LD = $(CROSS_COMPILE)ld +CC = $(CROSS_COMPILE)gcc +CPP = $(CC) -E +AR = $(CROSS_COMPILE)ar +NM = $(CROSS_COMPILE)nm +STRIP = $(CROSS_COMPILE)strip +OBJCOPY = $(CROSS_COMPILE)objcopy +OBJDUMP = $(CROSS_COMPILE)objdump +MAKEFILES = $(TOPDIR)/.config +GENKSYMS = /sbin/genksyms +DEPMOD = /sbin/depmod +MODFLAGS = -DMODULE +CFLAGS_KERNEL = +PERL = perl + +export VERSION PATCHLEVEL SUBLEVEL EXTRAVERSION KERNELRELEASE ARCH \ + CONFIG_SHELL TOPDIR HPATH HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC \ + CPP AR NM STRIP OBJCOPY OBJDUMP MAKE MAKEFILES GENKSYMS MODFLAGS PERL + +all: do-it-all + +# +# Make "config" the default target if there is no configuration file or +# "depend" the target if there is no top-level dependency information. +# + +ifeq (.config,$(wildcard .config)) +include .config + +ifdef CONFIG_MOSIX_EXTMOSIX +EXTRAVERSION := $(EXTRAVERSION)-mosix +endif + +ifeq (.depend,$(wildcard .depend)) +include .depend +do-it-all: Version vmlinux +else +CONFIGURATION = depend +do-it-all: depend +endif +else +CONFIGURATION = config +do-it-all: config +endif + +# +# INSTALL_PATH specifies where to place the updated kernel and system map +# images. Uncomment if you want to place them anywhere other than root. +# + +#export INSTALL_PATH=/boot + +# +# INSTALL_MOD_PATH specifies a prefix to MODLIB for module directory +# relocations required by build roots. This is not defined in the +# makefile but the arguement can be passed to make if needed. +# + +MODLIB := $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE) +export MODLIB + +# +# standard CFLAGS +# + +CPPFLAGS := -D__KERNEL__ -I$(HPATH) + +ifdef CONFIG_MOSIX_UDB +CFLAGS := $(CPPFLAGS) -Wall -Wstrict-prototypes -Wno-trigraphs -O2 \ + -fno-omit-frame-pointer -fno-strict-aliasing -fno-common +else +CFLAGS := $(CPPFLAGS) -Wall -Wstrict-prototypes -Wno-trigraphs -O2 \ + -fomit-frame-pointer -fno-strict-aliasing -fno-common +endif +AFLAGS := -D__ASSEMBLY__ $(CPPFLAGS) + +# +# ROOT_DEV specifies the default root-device when making the image. +# This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case +# the default of FLOPPY is used by 'build'. +# This is i386 specific. +# + +export ROOT_DEV = CURRENT + +# +# If you want to preset the SVGA mode, uncomment the next line and +# set SVGA_MODE to whatever number you want. +# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. +# The number is the same as you would ordinarily press at bootup. +# This is i386 specific. +# + +export SVGA_MODE = -DSVGA_MODE=NORMAL_VGA + +# +# If you want the RAM disk device, define this to be the size in blocks. +# This is i386 specific. +# + +#export RAMDISK = -DRAMDISK=512 + +CORE_FILES =kernel/kernel.o mm/mm.o fs/fs.o ipc/ipc.o +NETWORKS =net/network.o + +LIBS =$(TOPDIR)/lib/lib.a +ifdef CONFIG_MOSIX +CORE_FILES += $(TOPDIR)/mos/mos.o +SUBDIRS =mos kernel drivers mm fs net ipc lib +else +SUBDIRS =kernel drivers mm fs net ipc lib +endif +ifdef CONFIG_MOSIX_UDB +CORE_FILES += $(TOPDIR)/udb/debugger.o +SUBDIRS += udb +endif + +DRIVERS-n := +DRIVERS-y := +DRIVERS-m := +DRIVERS- := + +DRIVERS-$(CONFIG_ACPI) += drivers/acpi/acpi.o +DRIVERS-$(CONFIG_PARPORT) += drivers/parport/driver.o +DRIVERS-y += drivers/char/char.o \ + drivers/block/block.o \ + drivers/misc/misc.o \ + drivers/net/net.o \ + drivers/media/media.o +DRIVERS-$(CONFIG_AGP) += drivers/char/agp/agp.o +DRIVERS-$(CONFIG_DRM) += drivers/char/drm/drm.o +DRIVERS-$(CONFIG_NUBUS) += drivers/nubus/nubus.a +DRIVERS-$(CONFIG_ISDN) += drivers/isdn/isdn.a +DRIVERS-$(CONFIG_NET_FC) += drivers/net/fc/fc.o +DRIVERS-$(CONFIG_APPLETALK) += drivers/net/appletalk/appletalk.o +DRIVERS-$(CONFIG_TR) += drivers/net/tokenring/tr.o +DRIVERS-$(CONFIG_WAN) += drivers/net/wan/wan.o +DRIVERS-$(CONFIG_ARCNET) += drivers/net/arcnet/arcnetdrv.o +DRIVERS-$(CONFIG_ATM) += drivers/atm/atm.o +DRIVERS-$(CONFIG_IDE) += drivers/ide/idedriver.o +DRIVERS-$(CONFIG_FC4) += drivers/fc4/fc4.a +DRIVERS-$(CONFIG_SCSI) += drivers/scsi/scsidrv.o +DRIVERS-$(CONFIG_FUSION_BOOT) += drivers/message/fusion/fusion.o +DRIVERS-$(CONFIG_IEEE1394) += drivers/ieee1394/ieee1394drv.o + +ifneq ($(CONFIG_CD_NO_IDESCSI)$(CONFIG_BLK_DEV_IDECD)$(CONFIG_BLK_DEV_SR)$(CONFIG_PARIDE_PCD),) +DRIVERS-y += drivers/cdrom/driver.o +endif + +DRIVERS-$(CONFIG_SOUND) += drivers/sound/sounddrivers.o +DRIVERS-$(CONFIG_PCI) += drivers/pci/driver.o +DRIVERS-$(CONFIG_MTD) += drivers/mtd/mtdlink.o +DRIVERS-$(CONFIG_PCMCIA) += drivers/pcmcia/pcmcia.o +DRIVERS-$(CONFIG_NET_PCMCIA) += drivers/net/pcmcia/pcmcia_net.o +DRIVERS-$(CONFIG_NET_WIRELESS) += drivers/net/wireless/wireless_net.o +DRIVERS-$(CONFIG_PCMCIA_CHRDEV) += drivers/char/pcmcia/pcmcia_char.o +DRIVERS-$(CONFIG_DIO) += drivers/dio/dio.a +DRIVERS-$(CONFIG_SBUS) += drivers/sbus/sbus_all.o +DRIVERS-$(CONFIG_ZORRO) += drivers/zorro/driver.o +DRIVERS-$(CONFIG_FC4) += drivers/fc4/fc4.a +DRIVERS-$(CONFIG_ALL_PPC) += drivers/macintosh/macintosh.o +DRIVERS-$(CONFIG_MAC) += drivers/macintosh/macintosh.o +DRIVERS-$(CONFIG_ISAPNP) += drivers/pnp/pnp.o +DRIVERS-$(CONFIG_SGI_IP22) += drivers/sgi/sgi.a +DRIVERS-$(CONFIG_VT) += drivers/video/video.o +DRIVERS-$(CONFIG_PARIDE) += drivers/block/paride/paride.a +DRIVERS-$(CONFIG_HAMRADIO) += drivers/net/hamradio/hamradio.o +DRIVERS-$(CONFIG_TC) += drivers/tc/tc.a +DRIVERS-$(CONFIG_USB) += drivers/usb/usbdrv.o +DRIVERS-$(CONFIG_INPUT) += drivers/input/inputdrv.o +DRIVERS-$(CONFIG_I2O) += drivers/message/i2o/i2o.o +DRIVERS-$(CONFIG_IRDA) += drivers/net/irda/irda.o +DRIVERS-$(CONFIG_I2C) += drivers/i2c/i2c.o +DRIVERS-$(CONFIG_PHONE) += drivers/telephony/telephony.o +DRIVERS-$(CONFIG_MD) += drivers/md/mddev.o +DRIVERS-$(CONFIG_BLUEZ) += drivers/bluetooth/bluetooth.o +DRIVERS-$(CONFIG_HOTPLUG_PCI) += drivers/hotplug/vmlinux-obj.o + +DRIVERS := $(DRIVERS-y) + + +# files removed with 'make clean' +CLEAN_FILES = \ + kernel/ksyms.lst include/linux/compile.h \ + vmlinux System.map \ + .tmp* \ + drivers/char/consolemap_deftbl.c drivers/video/promcon_tbl.c \ + drivers/char/conmakehash \ + drivers/char/drm/*-mod.c \ + drivers/pci/devlist.h drivers/pci/classlist.h drivers/pci/gen-devlist \ + drivers/zorro/devlist.h drivers/zorro/gen-devlist \ + drivers/sound/bin2hex drivers/sound/hex2hex \ + drivers/atm/fore200e_mkfirm drivers/atm/{pca,sba}*{.bin,.bin1,.bin2} \ + drivers/scsi/aic7xxx/aicasm/aicasm_gram.c \ + drivers/scsi/aic7xxx/aicasm/aicasm_scan.c \ + drivers/scsi/aic7xxx/aicasm/y.tab.h \ + drivers/scsi/aic7xxx/aicasm/aicasm \ + drivers/scsi/53c700-mem.c \ + net/khttpd/make_times_h \ + net/khttpd/times.h \ + submenu* + +CLEAN_FILES += arch/i386/kernel/mosasm.H arch/i386/kernel/offset \ + mos/auto_syscalls.c mos/mkdefcalls mos/alternate.c \ + udb/symbols.c udb/symtab vmlinux.symtab + +# directories removed with 'make clean' +CLEAN_DIRS = \ + modules + +# files removed with 'make mrproper' +MRPROPER_FILES = \ + include/linux/autoconf.h include/linux/version.h \ + drivers/net/hamradio/soundmodem/sm_tbl_{afsk1200,afsk2666,fsk9600}.h \ + drivers/net/hamradio/soundmodem/sm_tbl_{hapn4800,psk4800}.h \ + drivers/net/hamradio/soundmodem/sm_tbl_{afsk2400_7,afsk2400_8}.h \ + drivers/net/hamradio/soundmodem/gentbl \ + drivers/sound/*_boot.h drivers/sound/.*.boot \ + drivers/sound/msndinit.c \ + drivers/sound/msndperm.c \ + drivers/sound/pndsperm.c \ + drivers/sound/pndspini.c \ + drivers/atm/fore200e_*_fw.c drivers/atm/.fore200e_*.fw \ + .version .config* config.in config.old \ + scripts/tkparse scripts/kconfig.tk scripts/kconfig.tmp \ + scripts/lxdialog/*.o scripts/lxdialog/lxdialog \ + .menuconfig.log \ + include/asm \ + .hdepend scripts/mkdep scripts/split-include scripts/docproc \ + $(TOPDIR)/include/linux/modversions.h \ + kernel.spec + +# directories removed with 'make mrproper' +MRPROPER_DIRS = \ + include/config \ + $(TOPDIR)/include/linux/modules + + +include arch/$(ARCH)/Makefile + +export CPPFLAGS CFLAGS AFLAGS + +export NETWORKS DRIVERS LIBS HEAD LDFLAGS LINKFLAGS MAKEBOOT ASFLAGS + +.S.s: + $(CPP) $(AFLAGS) -traditional -o $*.s $< +.S.o: + $(CC) $(AFLAGS) -traditional -c -o $*.o $< + +Version: dummy + @rm -f include/linux/compile.h + +boot: vmlinux + @$(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" -C arch/$(ARCH)/boot + +vmlinux: include/linux/version.h $(CONFIGURATION) init/main.o init/version.o linuxsubdirs + $(LD) $(LINKFLAGS) $(HEAD) init/main.o init/version.o \ + --start-group \ + $(CORE_FILES) \ + $(DRIVERS) \ + $(NETWORKS) \ + $(LIBS) \ + --end-group \ + -o vmlinux +ifdef CONFIG_MOSIX_UDB + $(NM) vmlinux | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] \)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' | sort > vmlinux.symtab + udb/symtab < vmlinux.symtab > udb/symbols.c + $(CC) -c $(CFLAGS) udb/symbols.c -o udb/symbols.o + $(LD) $(LINKFLAGS) $(HEAD) init/main.o init/version.o \ + --start-group \ + $(CORE_FILES) \ + $(DRIVERS) \ + $(NETWORKS) \ + $(LIBS) \ + --end-group \ + udb/symbols.o \ + -o vmlinux +endif + $(NM) vmlinux | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] \)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' | sort > System.map + +symlinks: + rm -f include/asm + ( cd include ; ln -sf asm-$(ARCH) asm) + @if [ ! -d include/linux/modules ]; then \ + mkdir include/linux/modules; \ + fi + +oldconfig: symlinks + $(CONFIG_SHELL) scripts/Configure -d arch/$(ARCH)/config.in + +xconfig: symlinks + $(MAKE) -C scripts kconfig.tk + wish -f scripts/kconfig.tk + +menuconfig: include/linux/version.h symlinks + $(MAKE) -C scripts/lxdialog all + $(CONFIG_SHELL) scripts/Menuconfig arch/$(ARCH)/config.in + +config: symlinks + $(CONFIG_SHELL) scripts/Configure arch/$(ARCH)/config.in + +include/config/MARKER: scripts/split-include include/linux/autoconf.h + scripts/split-include include/linux/autoconf.h include/config + @ touch include/config/MARKER + +linuxsubdirs: $(patsubst %, _dir_%, $(SUBDIRS)) + +$(patsubst %, _dir_%, $(SUBDIRS)) : dummy include/linux/version.h include/config/MARKER + $(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" -C $(patsubst _dir_%, %, $@) + +$(TOPDIR)/include/linux/version.h: include/linux/version.h +$(TOPDIR)/include/linux/compile.h: include/linux/compile.h + +newversion: + . scripts/mkversion > .tmpversion + @mv -f .tmpversion .version + +include/linux/compile.h: $(CONFIGURATION) include/linux/version.h newversion + @echo -n \#define UTS_VERSION \"\#`cat .version` > .ver + @if [ -n "$(CONFIG_SMP)" ] ; then echo -n " SMP" >> .ver; fi + @if [ -f .name ]; then echo -n \-`cat .name` >> .ver; fi + @echo ' '`date`'"' >> .ver + @echo \#define LINUX_COMPILE_TIME \"`date +%T`\" >> .ver + @echo \#define LINUX_COMPILE_BY \"`whoami`\" >> .ver + @echo \#define LINUX_COMPILE_HOST \"`hostname`\" >> .ver + @if [ -x /bin/dnsdomainname ]; then \ + echo \#define LINUX_COMPILE_DOMAIN \"`dnsdomainname`\"; \ + elif [ -x /bin/domainname ]; then \ + echo \#define LINUX_COMPILE_DOMAIN \"`domainname`\"; \ + else \ + echo \#define LINUX_COMPILE_DOMAIN ; \ + fi >> .ver + @echo \#define LINUX_COMPILER \"`$(CC) $(CFLAGS) -v 2>&1 | tail -1`\" >> .ver + @mv -f .ver $@ + +include/linux/version.h: ./Makefile + @echo \#define UTS_RELEASE \"$(KERNELRELEASE)\" > .ver + @echo \#define LINUX_VERSION_CODE `expr $(VERSION) \\* 65536 + $(PATCHLEVEL) \\* 256 + $(SUBLEVEL)` >> .ver + @echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))' >>.ver + @mv -f .ver $@ + +init/version.o: init/version.c include/linux/compile.h include/config/MARKER + $(CC) $(CFLAGS) $(CFLAGS_KERNEL) -DUTS_MACHINE='"$(ARCH)"' -c -o init/version.o init/version.c + +init/main.o: init/main.c include/config/MARKER + $(CC) $(CFLAGS) $(CFLAGS_KERNEL) $(PROFILING) -c -o $*.o $< + +fs lib mm ipc kernel drivers net: dummy + $(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" $(subst $@, _dir_$@, $@) + +TAGS: dummy + etags `find include/asm-$(ARCH) -name '*.h'` + find include -type d \( -name "asm-*" -o -name config \) -prune -o -name '*.h' -print | xargs etags -a + find $(SUBDIRS) init -name '*.[ch]' | xargs etags -a + +# Exuberant ctags works better with -I +tags: dummy + CTAGSF=`ctags --version | grep -i exuberant >/dev/null && echo "-I __initdata,__exitdata,EXPORT_SYMBOL,EXPORT_SYMBOL_NOVERS"`; \ + ctags $$CTAGSF `find include/asm-$(ARCH) -name '*.h'` && \ + find include -type d \( -name "asm-*" -o -name config \) -prune -o -name '*.h' -print | xargs ctags $$CTAGSF -a && \ + find $(SUBDIRS) init -name '*.[ch]' | xargs ctags $$CTAGSF -a + +ifdef CONFIG_MODULES +ifdef CONFIG_MODVERSIONS +MODFLAGS += -DMODVERSIONS -include $(HPATH)/linux/modversions.h +endif + +.PHONY: modules +modules: $(patsubst %, _mod_%, $(SUBDIRS)) + +.PHONY: $(patsubst %, _mod_%, $(SUBDIRS)) +$(patsubst %, _mod_%, $(SUBDIRS)) : include/linux/version.h include/config/MARKER + $(MAKE) -C $(patsubst _mod_%, %, $@) CFLAGS="$(CFLAGS) $(MODFLAGS)" MAKING_MODULES=1 modules + +.PHONY: modules_install +modules_install: _modinst_ $(patsubst %, _modinst_%, $(SUBDIRS)) _modinst_post + +.PHONY: _modinst_ +_modinst_: + @rm -rf $(MODLIB)/kernel + @rm -f $(MODLIB)/build + @mkdir -p $(MODLIB)/kernel + @ln -s $(TOPDIR) $(MODLIB)/build + +# If System.map exists, run depmod. This deliberately does not have a +# dependency on System.map since that would run the dependency tree on +# vmlinux. This depmod is only for convenience to give the initial +# boot a modules.dep even before / is mounted read-write. However the +# boot script depmod is the master version. +ifeq "$(strip $(INSTALL_MOD_PATH))" "" +depmod_opts := +else +depmod_opts := -b $(INSTALL_MOD_PATH) -r +endif +.PHONY: _modinst_post +_modinst_post: _modinst_post_pcmcia + if [ -r System.map ]; then $(DEPMOD) -ae -F System.map $(depmod_opts) $(KERNELRELEASE); fi + +# Backwards compatibilty symlinks for people still using old versions +# of pcmcia-cs with hard coded pathnames on insmod. Remove +# _modinst_post_pcmcia for kernel 2.4.1. +.PHONY: _modinst_post_pcmcia +_modinst_post_pcmcia: + cd $(MODLIB); \ + mkdir -p pcmcia; \ + find kernel -path '*/pcmcia/*' -name '*.o' | xargs -i -r ln -sf ../{} pcmcia + +.PHONY: $(patsubst %, _modinst_%, $(SUBDIRS)) +$(patsubst %, _modinst_%, $(SUBDIRS)) : + $(MAKE) -C $(patsubst _modinst_%, %, $@) modules_install + +# modules disabled.... + +else +modules modules_install: dummy + @echo + @echo "The present kernel configuration has modules disabled." + @echo "Type 'make config' and enable loadable module support." + @echo "Then build a kernel with module support enabled." + @echo + @exit 1 +endif + +clean: archclean + find . \( -name '*.[oas]' -o -name core -o -name '.*.flags' \) -type f -print \ + | grep -v lxdialog/ | xargs rm -f + rm -f $(CLEAN_FILES) + rm -rf $(CLEAN_DIRS) + $(MAKE) -C Documentation/DocBook clean + +mrproper: clean archmrproper + find . \( -size 0 -o -name .depend \) -type f -print | xargs rm -f + rm -f $(MRPROPER_FILES) + rm -rf $(MRPROPER_DIRS) + $(MAKE) -C Documentation/DocBook mrproper + +distclean: mrproper + rm -f core `find . \( -not -type d \) -and \ + \( -name '*.orig' -o -name '*.rej' -o -name '*~' \ + -o -name '*.bak' -o -name '#*#' -o -name '.*.orig' \ + -o -name '.*.rej' -o -name '.SUMS' -o -size 0 \) -type f -print` TAGS tags + +backup: mrproper + cd .. && tar cf - linux/ | gzip -9 > backup.gz + sync + +sgmldocs: + chmod 755 $(TOPDIR)/scripts/docgen + chmod 755 $(TOPDIR)/scripts/gen-all-syms + chmod 755 $(TOPDIR)/scripts/kernel-doc + $(MAKE) -C $(TOPDIR)/Documentation/DocBook books + +psdocs: sgmldocs + $(MAKE) -C Documentation/DocBook ps + +pdfdocs: sgmldocs + $(MAKE) -C Documentation/DocBook pdf + +htmldocs: sgmldocs + $(MAKE) -C Documentation/DocBook html + +sums: + find . -type f -print | sort | xargs sum > .SUMS + +dep-files: scripts/mkdep archdep include/linux/version.h + scripts/mkdep -- init/*.c > .depend + scripts/mkdep -- `find $(FINDHPATH) -name SCCS -prune -o -follow -name \*.h ! -name modversions.h -print` > .hdepend + $(MAKE) $(patsubst %,_sfdep_%,$(SUBDIRS)) _FASTDEP_ALL_SUB_DIRS="$(SUBDIRS)" +ifdef CONFIG_MODVERSIONS + $(MAKE) update-modverfile +endif + +ifdef CONFIG_MODVERSIONS +MODVERFILE := $(TOPDIR)/include/linux/modversions.h +else +MODVERFILE := +endif +export MODVERFILE + +depend dep: dep-files + +checkconfig: + find * -name '*.[hcS]' -type f -print | sort | xargs $(PERL) -w scripts/checkconfig.pl + +checkhelp: + find * -name [cC]onfig.in -print | sort | xargs $(PERL) -w scripts/checkhelp.pl + +checkincludes: + find * -name '*.[hcS]' -type f -print | sort | xargs $(PERL) -w scripts/checkincludes.pl + +ifdef CONFIGURATION +..$(CONFIGURATION): + @echo + @echo "You have a bad or nonexistent" .$(CONFIGURATION) ": running 'make" $(CONFIGURATION)"'" + @echo + $(MAKE) $(CONFIGURATION) + @echo + @echo "Successful. Try re-making (ignore the error that follows)" + @echo + exit 1 + +#dummy: ..$(CONFIGURATION) +dummy: + +else + +dummy: + +endif + +include Rules.make + +# +# This generates dependencies for the .h files. +# + +scripts/mkdep: scripts/mkdep.c + $(HOSTCC) $(HOSTCFLAGS) -o scripts/mkdep scripts/mkdep.c + +scripts/split-include: scripts/split-include.c + $(HOSTCC) $(HOSTCFLAGS) -o scripts/split-include scripts/split-include.c + +# +# RPM target +# +# If you do a make spec before packing the tarball you can rpm -ta it +# +spec: + . scripts/mkspec >kernel.spec + +# +# Build a tar ball, generate an rpm from it and pack the result +# There arw two bits of magic here +# 1) The use of /. to avoid tar packing just the symlink +# 2) Removing the .dep files as they have source paths in them that +# will become invalid +# +rpm: clean spec + find . \( -size 0 -o -name .depend -o -name .hdepend \) -type f -print | xargs rm -f + set -e; \ + cd $(TOPDIR)/.. ; \ + ln -sf $(TOPDIR) $(KERNELPATH) ; \ + tar -cvz --exclude CVS -f $(KERNELPATH).tar.gz $(KERNELPATH)/. ; \ + rm $(KERNELPATH) ; \ + cd $(TOPDIR) ; \ + . scripts/mkversion > .version ; \ + rpm -ta $(TOPDIR)/../$(KERNELPATH).tar.gz ; \ + rm $(TOPDIR)/../$(KERNELPATH).tar.gz diff -urN linux-2.4.17/Makefile.orig linux_umopenmosix/Makefile.orig --- linux-2.4.17/Makefile.orig Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/Makefile.orig Wed Jun 26 23:45:14 2002 @@ -0,0 +1,546 @@ +VERSION = 2 +PATCHLEVEL = 4 +SUBLEVEL = 17 +EXTRAVERSION = + +KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) + +# SUBARCH tells the usermode build what the underlying arch is. That is set +# first, and if a usermode build is happening, the "ARCH=um" on the command +# line overrides the setting of ARCH below. If a native build is happening, +# then ARCH is assigned, getting whatever value it gets normally, and +# SUBARCH is subsequently ignored. + +SUBARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) +ARCH := $(SUBARCH) + +KERNELPATH=kernel-$(shell echo $(KERNELRELEASE) | sed -e "s/-//") + +CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \ + else if [ -x /bin/bash ]; then echo /bin/bash; \ + else echo sh; fi ; fi) +TOPDIR := $(shell /bin/pwd) + +HPATH = $(TOPDIR)/include +FINDHPATH = $(HPATH)/asm $(HPATH)/linux $(HPATH)/scsi $(HPATH)/net + +HOSTCC = ccache gcc +HOSTCFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer + +CROSS_COMPILE = + +# +# Include the make variables (CC, etc...) +# + +AS = $(CROSS_COMPILE)as +LD = $(CROSS_COMPILE)ld +CC = ccache $(CROSS_COMPILE)gcc +CPP = $(CC) -E +AR = $(CROSS_COMPILE)ar +NM = $(CROSS_COMPILE)nm +STRIP = $(CROSS_COMPILE)strip +OBJCOPY = $(CROSS_COMPILE)objcopy +OBJDUMP = $(CROSS_COMPILE)objdump +MAKEFILES = $(TOPDIR)/.config +GENKSYMS = /sbin/genksyms +DEPMOD = /sbin/depmod +MODFLAGS = -DMODULE +CFLAGS_KERNEL = +PERL = perl + +export VERSION PATCHLEVEL SUBLEVEL EXTRAVERSION KERNELRELEASE ARCH \ + CONFIG_SHELL TOPDIR HPATH HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC \ + CPP AR NM STRIP OBJCOPY OBJDUMP MAKE MAKEFILES GENKSYMS MODFLAGS PERL + +all: do-it-all + +# +# Make "config" the default target if there is no configuration file or +# "depend" the target if there is no top-level dependency information. +# + +ifeq (.config,$(wildcard .config)) +include .config +ifeq (.depend,$(wildcard .depend)) +include .depend +do-it-all: Version vmlinux +else +CONFIGURATION = depend +do-it-all: depend +endif +else +CONFIGURATION = config +do-it-all: config +endif + +# +# INSTALL_PATH specifies where to place the updated kernel and system map +# images. Uncomment if you want to place them anywhere other than root. +# + +#export INSTALL_PATH=/boot + +# +# INSTALL_MOD_PATH specifies a prefix to MODLIB for module directory +# relocations required by build roots. This is not defined in the +# makefile but the arguement can be passed to make if needed. +# + +MODLIB := $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE) +export MODLIB + +# +# standard CFLAGS +# + +CPPFLAGS := -D__KERNEL__ -I$(HPATH) + +CFLAGS := $(CPPFLAGS) -Wall -Wstrict-prototypes -Wno-trigraphs -O2 \ + -fomit-frame-pointer -fno-strict-aliasing -fno-common +AFLAGS := -D__ASSEMBLY__ $(CPPFLAGS) + +# +# ROOT_DEV specifies the default root-device when making the image. +# This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case +# the default of FLOPPY is used by 'build'. +# This is i386 specific. +# + +export ROOT_DEV = CURRENT + +# +# If you want to preset the SVGA mode, uncomment the next line and +# set SVGA_MODE to whatever number you want. +# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. +# The number is the same as you would ordinarily press at bootup. +# This is i386 specific. +# + +export SVGA_MODE = -DSVGA_MODE=NORMAL_VGA + +# +# If you want the RAM disk device, define this to be the size in blocks. +# This is i386 specific. +# + +#export RAMDISK = -DRAMDISK=512 + +CORE_FILES =kernel/kernel.o mm/mm.o fs/fs.o ipc/ipc.o +NETWORKS =net/network.o + +LIBS =$(TOPDIR)/lib/lib.a +SUBDIRS =kernel drivers mm fs net ipc lib + +DRIVERS-n := +DRIVERS-y := +DRIVERS-m := +DRIVERS- := + +DRIVERS-$(CONFIG_ACPI) += drivers/acpi/acpi.o +DRIVERS-$(CONFIG_PARPORT) += drivers/parport/driver.o +DRIVERS-y += drivers/char/char.o \ + drivers/block/block.o \ + drivers/misc/misc.o \ + drivers/net/net.o \ + drivers/media/media.o +DRIVERS-$(CONFIG_AGP) += drivers/char/agp/agp.o +DRIVERS-$(CONFIG_DRM) += drivers/char/drm/drm.o +DRIVERS-$(CONFIG_NUBUS) += drivers/nubus/nubus.a +DRIVERS-$(CONFIG_ISDN) += drivers/isdn/isdn.a +DRIVERS-$(CONFIG_NET_FC) += drivers/net/fc/fc.o +DRIVERS-$(CONFIG_APPLETALK) += drivers/net/appletalk/appletalk.o +DRIVERS-$(CONFIG_TR) += drivers/net/tokenring/tr.o +DRIVERS-$(CONFIG_WAN) += drivers/net/wan/wan.o +DRIVERS-$(CONFIG_ARCNET) += drivers/net/arcnet/arcnetdrv.o +DRIVERS-$(CONFIG_ATM) += drivers/atm/atm.o +DRIVERS-$(CONFIG_IDE) += drivers/ide/idedriver.o +DRIVERS-$(CONFIG_FC4) += drivers/fc4/fc4.a +DRIVERS-$(CONFIG_SCSI) += drivers/scsi/scsidrv.o +DRIVERS-$(CONFIG_FUSION_BOOT) += drivers/message/fusion/fusion.o +DRIVERS-$(CONFIG_IEEE1394) += drivers/ieee1394/ieee1394drv.o + +ifneq ($(CONFIG_CD_NO_IDESCSI)$(CONFIG_BLK_DEV_IDECD)$(CONFIG_BLK_DEV_SR)$(CONFIG_PARIDE_PCD),) +DRIVERS-y += drivers/cdrom/driver.o +endif + +DRIVERS-$(CONFIG_SOUND) += drivers/sound/sounddrivers.o +DRIVERS-$(CONFIG_PCI) += drivers/pci/driver.o +DRIVERS-$(CONFIG_MTD) += drivers/mtd/mtdlink.o +DRIVERS-$(CONFIG_PCMCIA) += drivers/pcmcia/pcmcia.o +DRIVERS-$(CONFIG_NET_PCMCIA) += drivers/net/pcmcia/pcmcia_net.o +DRIVERS-$(CONFIG_NET_WIRELESS) += drivers/net/wireless/wireless_net.o +DRIVERS-$(CONFIG_PCMCIA_CHRDEV) += drivers/char/pcmcia/pcmcia_char.o +DRIVERS-$(CONFIG_DIO) += drivers/dio/dio.a +DRIVERS-$(CONFIG_SBUS) += drivers/sbus/sbus_all.o +DRIVERS-$(CONFIG_ZORRO) += drivers/zorro/driver.o +DRIVERS-$(CONFIG_FC4) += drivers/fc4/fc4.a +DRIVERS-$(CONFIG_ALL_PPC) += drivers/macintosh/macintosh.o +DRIVERS-$(CONFIG_MAC) += drivers/macintosh/macintosh.o +DRIVERS-$(CONFIG_ISAPNP) += drivers/pnp/pnp.o +DRIVERS-$(CONFIG_SGI_IP22) += drivers/sgi/sgi.a +DRIVERS-$(CONFIG_VT) += drivers/video/video.o +DRIVERS-$(CONFIG_PARIDE) += drivers/block/paride/paride.a +DRIVERS-$(CONFIG_HAMRADIO) += drivers/net/hamradio/hamradio.o +DRIVERS-$(CONFIG_TC) += drivers/tc/tc.a +DRIVERS-$(CONFIG_USB) += drivers/usb/usbdrv.o +DRIVERS-$(CONFIG_INPUT) += drivers/input/inputdrv.o +DRIVERS-$(CONFIG_I2O) += drivers/message/i2o/i2o.o +DRIVERS-$(CONFIG_IRDA) += drivers/net/irda/irda.o +DRIVERS-$(CONFIG_I2C) += drivers/i2c/i2c.o +DRIVERS-$(CONFIG_PHONE) += drivers/telephony/telephony.o +DRIVERS-$(CONFIG_MD) += drivers/md/mddev.o +DRIVERS-$(CONFIG_BLUEZ) += drivers/bluetooth/bluetooth.o +DRIVERS-$(CONFIG_HOTPLUG_PCI) += drivers/hotplug/vmlinux-obj.o + +DRIVERS := $(DRIVERS-y) + + +# files removed with 'make clean' +CLEAN_FILES = \ + kernel/ksyms.lst include/linux/compile.h \ + vmlinux System.map \ + .tmp* \ + drivers/char/consolemap_deftbl.c drivers/video/promcon_tbl.c \ + drivers/char/conmakehash \ + drivers/char/drm/*-mod.c \ + drivers/pci/devlist.h drivers/pci/classlist.h drivers/pci/gen-devlist \ + drivers/zorro/devlist.h drivers/zorro/gen-devlist \ + drivers/sound/bin2hex drivers/sound/hex2hex \ + drivers/atm/fore200e_mkfirm drivers/atm/{pca,sba}*{.bin,.bin1,.bin2} \ + drivers/scsi/aic7xxx/aicasm/aicasm_gram.c \ + drivers/scsi/aic7xxx/aicasm/aicasm_scan.c \ + drivers/scsi/aic7xxx/aicasm/y.tab.h \ + drivers/scsi/aic7xxx/aicasm/aicasm \ + drivers/scsi/53c700-mem.c \ + net/khttpd/make_times_h \ + net/khttpd/times.h \ + submenu* +# directories removed with 'make clean' +CLEAN_DIRS = \ + modules + +# files removed with 'make mrproper' +MRPROPER_FILES = \ + include/linux/autoconf.h include/linux/version.h \ + drivers/net/hamradio/soundmodem/sm_tbl_{afsk1200,afsk2666,fsk9600}.h \ + drivers/net/hamradio/soundmodem/sm_tbl_{hapn4800,psk4800}.h \ + drivers/net/hamradio/soundmodem/sm_tbl_{afsk2400_7,afsk2400_8}.h \ + drivers/net/hamradio/soundmodem/gentbl \ + drivers/sound/*_boot.h drivers/sound/.*.boot \ + drivers/sound/msndinit.c \ + drivers/sound/msndperm.c \ + drivers/sound/pndsperm.c \ + drivers/sound/pndspini.c \ + drivers/atm/fore200e_*_fw.c drivers/atm/.fore200e_*.fw \ + .version .config* config.in config.old \ + scripts/tkparse scripts/kconfig.tk scripts/kconfig.tmp \ + scripts/lxdialog/*.o scripts/lxdialog/lxdialog \ + .menuconfig.log \ + include/asm \ + .hdepend scripts/mkdep scripts/split-include scripts/docproc \ + $(TOPDIR)/include/linux/modversions.h \ + kernel.spec + +# directories removed with 'make mrproper' +MRPROPER_DIRS = \ + include/config \ + $(TOPDIR)/include/linux/modules + + +include arch/$(ARCH)/Makefile + +export CPPFLAGS CFLAGS AFLAGS + +export NETWORKS DRIVERS LIBS HEAD LDFLAGS LINKFLAGS MAKEBOOT ASFLAGS + +.S.s: + $(CPP) $(AFLAGS) -traditional -o $*.s $< +.S.o: + $(CC) $(AFLAGS) -traditional -c -o $*.o $< + +Version: dummy + @rm -f include/linux/compile.h + +boot: vmlinux + @$(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" -C arch/$(ARCH)/boot + +vmlinux: include/linux/version.h $(CONFIGURATION) init/main.o init/version.o linuxsubdirs + $(LD) $(LINKFLAGS) $(HEAD) init/main.o init/version.o \ + --start-group \ + $(CORE_FILES) \ + $(DRIVERS) \ + $(NETWORKS) \ + $(LIBS) \ + --end-group \ + -o vmlinux + $(NM) vmlinux | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] \)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' | sort > System.map + +symlinks: + rm -f include/asm + ( cd include ; ln -sf asm-$(ARCH) asm) + @if [ ! -d include/linux/modules ]; then \ + mkdir include/linux/modules; \ + fi + +oldconfig: symlinks + $(CONFIG_SHELL) scripts/Configure -d arch/$(ARCH)/config.in + +xconfig: symlinks + $(MAKE) -C scripts kconfig.tk + wish -f scripts/kconfig.tk + +menuconfig: include/linux/version.h symlinks + $(MAKE) -C scripts/lxdialog all + $(CONFIG_SHELL) scripts/Menuconfig arch/$(ARCH)/config.in + +config: symlinks + $(CONFIG_SHELL) scripts/Configure arch/$(ARCH)/config.in + +include/config/MARKER: scripts/split-include include/linux/autoconf.h + scripts/split-include include/linux/autoconf.h include/config + @ touch include/config/MARKER + +linuxsubdirs: $(patsubst %, _dir_%, $(SUBDIRS)) + +$(patsubst %, _dir_%, $(SUBDIRS)) : dummy include/linux/version.h include/config/MARKER + $(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" -C $(patsubst _dir_%, %, $@) + +$(TOPDIR)/include/linux/version.h: include/linux/version.h +$(TOPDIR)/include/linux/compile.h: include/linux/compile.h + +newversion: + . scripts/mkversion > .tmpversion + @mv -f .tmpversion .version + +include/linux/compile.h: $(CONFIGURATION) include/linux/version.h newversion + @echo -n \#define UTS_VERSION \"\#`cat .version` > .ver + @if [ -n "$(CONFIG_SMP)" ] ; then echo -n " SMP" >> .ver; fi + @if [ -f .name ]; then echo -n \-`cat .name` >> .ver; fi + @echo ' '`date`'"' >> .ver + @echo \#define LINUX_COMPILE_TIME \"`date +%T`\" >> .ver + @echo \#define LINUX_COMPILE_BY \"`whoami`\" >> .ver + @echo \#define LINUX_COMPILE_HOST \"`hostname`\" >> .ver + @if [ -x /bin/dnsdomainname ]; then \ + echo \#define LINUX_COMPILE_DOMAIN \"`dnsdomainname`\"; \ + elif [ -x /bin/domainname ]; then \ + echo \#define LINUX_COMPILE_DOMAIN \"`domainname`\"; \ + else \ + echo \#define LINUX_COMPILE_DOMAIN ; \ + fi >> .ver + @echo \#define LINUX_COMPILER \"`$(CC) $(CFLAGS) -v 2>&1 | tail -1`\" >> .ver + @mv -f .ver $@ + +include/linux/version.h: ./Makefile + @echo \#define UTS_RELEASE \"$(KERNELRELEASE)\" > .ver + @echo \#define LINUX_VERSION_CODE `expr $(VERSION) \\* 65536 + $(PATCHLEVEL) \\* 256 + $(SUBLEVEL)` >> .ver + @echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))' >>.ver + @mv -f .ver $@ + +init/version.o: init/version.c include/linux/compile.h include/config/MARKER + $(CC) $(CFLAGS) $(CFLAGS_KERNEL) -DUTS_MACHINE='"$(ARCH)"' -c -o init/version.o init/version.c + +init/main.o: init/main.c include/config/MARKER + $(CC) $(CFLAGS) $(CFLAGS_KERNEL) $(PROFILING) -c -o $*.o $< + +fs lib mm ipc kernel drivers net: dummy + $(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" $(subst $@, _dir_$@, $@) + +TAGS: dummy + etags `find include/asm-$(ARCH) -name '*.h'` + find include -type d \( -name "asm-*" -o -name config \) -prune -o -name '*.h' -print | xargs etags -a + find $(SUBDIRS) init -name '*.[ch]' | xargs etags -a + +# Exuberant ctags works better with -I +tags: dummy + CTAGSF=`ctags --version | grep -i exuberant >/dev/null && echo "-I __initdata,__exitdata,EXPORT_SYMBOL,EXPORT_SYMBOL_NOVERS"`; \ + ctags $$CTAGSF `find include/asm-$(ARCH) -name '*.h'` && \ + find include -type d \( -name "asm-*" -o -name config \) -prune -o -name '*.h' -print | xargs ctags $$CTAGSF -a && \ + find $(SUBDIRS) init -name '*.[ch]' | xargs ctags $$CTAGSF -a + +ifdef CONFIG_MODULES +ifdef CONFIG_MODVERSIONS +MODFLAGS += -DMODVERSIONS -include $(HPATH)/linux/modversions.h +endif + +.PHONY: modules +modules: $(patsubst %, _mod_%, $(SUBDIRS)) + +.PHONY: $(patsubst %, _mod_%, $(SUBDIRS)) +$(patsubst %, _mod_%, $(SUBDIRS)) : include/linux/version.h include/config/MARKER + $(MAKE) -C $(patsubst _mod_%, %, $@) CFLAGS="$(CFLAGS) $(MODFLAGS)" MAKING_MODULES=1 modules + +.PHONY: modules_install +modules_install: _modinst_ $(patsubst %, _modinst_%, $(SUBDIRS)) _modinst_post + +.PHONY: _modinst_ +_modinst_: + @rm -rf $(MODLIB)/kernel + @rm -f $(MODLIB)/build + @mkdir -p $(MODLIB)/kernel + @ln -s $(TOPDIR) $(MODLIB)/build + +# If System.map exists, run depmod. This deliberately does not have a +# dependency on System.map since that would run the dependency tree on +# vmlinux. This depmod is only for convenience to give the initial +# boot a modules.dep even before / is mounted read-write. However the +# boot script depmod is the master version. +ifeq "$(strip $(INSTALL_MOD_PATH))" "" +depmod_opts := +else +depmod_opts := -b $(INSTALL_MOD_PATH) -r +endif +.PHONY: _modinst_post +_modinst_post: _modinst_post_pcmcia + if [ -r System.map ]; then $(DEPMOD) -ae -F System.map $(depmod_opts) $(KERNELRELEASE); fi + +# Backwards compatibilty symlinks for people still using old versions +# of pcmcia-cs with hard coded pathnames on insmod. Remove +# _modinst_post_pcmcia for kernel 2.4.1. +.PHONY: _modinst_post_pcmcia +_modinst_post_pcmcia: + cd $(MODLIB); \ + mkdir -p pcmcia; \ + find kernel -path '*/pcmcia/*' -name '*.o' | xargs -i -r ln -sf ../{} pcmcia + +.PHONY: $(patsubst %, _modinst_%, $(SUBDIRS)) +$(patsubst %, _modinst_%, $(SUBDIRS)) : + $(MAKE) -C $(patsubst _modinst_%, %, $@) modules_install + +# modules disabled.... + +else +modules modules_install: dummy + @echo + @echo "The present kernel configuration has modules disabled." + @echo "Type 'make config' and enable loadable module support." + @echo "Then build a kernel with module support enabled." + @echo + @exit 1 +endif + +clean: archclean + find . \( -name '*.[oas]' -o -name core -o -name '.*.flags' \) -type f -print \ + | grep -v lxdialog/ | xargs rm -f + rm -f $(CLEAN_FILES) + rm -rf $(CLEAN_DIRS) + $(MAKE) -C Documentation/DocBook clean + +mrproper: clean archmrproper + find . \( -size 0 -o -name .depend \) -type f -print | xargs rm -f + rm -f $(MRPROPER_FILES) + rm -rf $(MRPROPER_DIRS) + $(MAKE) -C Documentation/DocBook mrproper + +distclean: mrproper + rm -f core `find . \( -not -type d \) -and \ + \( -name '*.orig' -o -name '*.rej' -o -name '*~' \ + -o -name '*.bak' -o -name '#*#' -o -name '.*.orig' \ + -o -name '.*.rej' -o -name '.SUMS' -o -size 0 \) -type f -print` TAGS tags + +backup: mrproper + cd .. && tar cf - linux/ | gzip -9 > backup.gz + sync + +sgmldocs: + chmod 755 $(TOPDIR)/scripts/docgen + chmod 755 $(TOPDIR)/scripts/gen-all-syms + chmod 755 $(TOPDIR)/scripts/kernel-doc + $(MAKE) -C $(TOPDIR)/Documentation/DocBook books + +psdocs: sgmldocs + $(MAKE) -C Documentation/DocBook ps + +pdfdocs: sgmldocs + $(MAKE) -C Documentation/DocBook pdf + +htmldocs: sgmldocs + $(MAKE) -C Documentation/DocBook html + +sums: + find . -type f -print | sort | xargs sum > .SUMS + +dep-files: scripts/mkdep archdep include/linux/version.h + scripts/mkdep -- init/*.c > .depend + scripts/mkdep -- `find $(FINDHPATH) -name SCCS -prune -o -follow -name \*.h ! -name modversions.h -print` > .hdepend + $(MAKE) $(patsubst %,_sfdep_%,$(SUBDIRS)) _FASTDEP_ALL_SUB_DIRS="$(SUBDIRS)" +ifdef CONFIG_MODVERSIONS + $(MAKE) update-modverfile +endif + +ifdef CONFIG_MODVERSIONS +MODVERFILE := $(TOPDIR)/include/linux/modversions.h +else +MODVERFILE := +endif +export MODVERFILE + +depend dep: dep-files + +checkconfig: + find * -name '*.[hcS]' -type f -print | sort | xargs $(PERL) -w scripts/checkconfig.pl + +checkhelp: + find * -name [cC]onfig.in -print | sort | xargs $(PERL) -w scripts/checkhelp.pl + +checkincludes: + find * -name '*.[hcS]' -type f -print | sort | xargs $(PERL) -w scripts/checkincludes.pl + +ifdef CONFIGURATION +..$(CONFIGURATION): + @echo + @echo "You have a bad or nonexistent" .$(CONFIGURATION) ": running 'make" $(CONFIGURATION)"'" + @echo + $(MAKE) $(CONFIGURATION) + @echo + @echo "Successful. Try re-making (ignore the error that follows)" + @echo + exit 1 + +#dummy: ..$(CONFIGURATION) +dummy: + +else + +dummy: + +endif + +include Rules.make + +# +# This generates dependencies for the .h files. +# + +scripts/mkdep: scripts/mkdep.c + $(HOSTCC) $(HOSTCFLAGS) -o scripts/mkdep scripts/mkdep.c + +scripts/split-include: scripts/split-include.c + $(HOSTCC) $(HOSTCFLAGS) -o scripts/split-include scripts/split-include.c + +# +# RPM target +# +# If you do a make spec before packing the tarball you can rpm -ta it +# +spec: + . scripts/mkspec >kernel.spec + +# +# Build a tar ball, generate an rpm from it and pack the result +# There arw two bits of magic here +# 1) The use of /. to avoid tar packing just the symlink +# 2) Removing the .dep files as they have source paths in them that +# will become invalid +# +rpm: clean spec + find . \( -size 0 -o -name .depend -o -name .hdepend \) -type f -print | xargs rm -f + set -e; \ + cd $(TOPDIR)/.. ; \ + ln -sf $(TOPDIR) $(KERNELPATH) ; \ + tar -cvz --exclude CVS -f $(KERNELPATH).tar.gz $(KERNELPATH)/. ; \ + rm $(KERNELPATH) ; \ + cd $(TOPDIR) ; \ + . scripts/mkversion > .version ; \ + rpm -ta $(TOPDIR)/../$(KERNELPATH).tar.gz ; \ + rm $(TOPDIR)/../$(KERNELPATH).tar.gz diff -urN linux-2.4.17/arch/i386/Makefile linux_umopenmosix/arch/i386/Makefile --- linux-2.4.17/arch/i386/Makefile Thu Apr 12 22:20:31 2001 +++ linux_umopenmosix/arch/i386/Makefile Wed Jun 26 23:45:14 2002 @@ -138,6 +138,9 @@ install: vmlinux @$(MAKEBOOT) BOOTIMAGE=bzImage install +install1: + @$(MAKEBOOT) BOOTIMAGE=bzImage install + archclean: @$(MAKEBOOT) clean diff -urN linux-2.4.17/arch/i386/config.in linux_umopenmosix/arch/i386/config.in --- linux-2.4.17/arch/i386/config.in Fri Dec 21 19:41:53 2001 +++ linux_umopenmosix/arch/i386/config.in Wed Jun 26 23:45:14 2002 @@ -11,6 +11,31 @@ define_bool CONFIG_UID16 y mainmenu_option next_comment +comment 'MOSIX' +bool 'MOSIX process migration support' CONFIG_MOSIX +if [ "$CONFIG_MOSIX" = "y" ]; then + bool 'Support clusters with a complex network topology' CONFIG_MOSIX_TOPOLOGY + if [ "$CONFIG_MOSIX_TOPOLOGY" = "y" ]; then + int 'Maximum network-topology complexity to support (2-10)' CONFIG_MOSIX_MAXTOPOLOGY 4 + fi + + bool 'MOSIX Kernel Debugger' CONFIG_MOSIX_UDB + if [ "$CONFIG_MOSIX_UDB" = "y" ]; then + bool 'MOSIX Kernel Debugging Code' CONFIG_MOSIX_DEBUG + bool 'Allow migration to self (for easyier testing)' CONFIG_MOSIX_CHEAT_MIGSELF + bool 'Process-arrival messages' CONFIG_MOSIX_WEEEEEEEEE + fi + bool 'MOSIX Kernel Diagnostics' CONFIG_MOSIX_DIAG + bool 'Stricter security on MOSIX ports' CONFIG_MOSIX_SECUREPORTS + int 'Level of process-identity disclosure (0-3)' CONFIG_MOSIX_DISCLOSURE 1 + bool 'Create the kernel with a "-mosix" extension' CONFIG_MOSIX_EXTMOSIX + bool 'Direct File-System Access' CONFIG_MOSIX_DFSA + bool 'MOSIX File-System' CONFIG_MOSIX_FS + bool 'Poll/Select exceptions on pipes' CONFIG_MOSIX_PIPE_EXCEPTIONS +fi +endmenu + +mainmenu_option next_comment comment 'Code maturity level options' bool 'Prompt for development and/or incomplete code/drivers' CONFIG_EXPERIMENTAL endmenu diff -urN linux-2.4.17/arch/i386/config.in.org linux_umopenmosix/arch/i386/config.in.org --- linux-2.4.17/arch/i386/config.in.org Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/i386/config.in.org Wed Jun 26 23:45:14 2002 @@ -0,0 +1,443 @@ +# +# For a description of the syntax of this configuration file, +# see Documentation/kbuild/config-language.txt. +# +mainmenu_name "Linux Kernel Configuration" + +define_bool CONFIG_X86 y +define_bool CONFIG_ISA y +define_bool CONFIG_SBUS n + +define_bool CONFIG_UID16 y + +mainmenu_option next_comment +comment 'MOSIX' +bool 'MOSIX process migration support' CONFIG_MOSIX +if [ "$CONFIG_MOSIX" = "y" ]; then + bool 'Support clusters with a complex network topology' CONFIG_MOSIX_TOPOLOGY + if [ "$CONFIG_MOSIX_TOPOLOGY" = "y" ]; then + int 'Maximum network-topology complexity to support (2-10)' CONFIG_MOSIX_MAXTOPOLOGY 4 + fi + + bool 'MOSIX Kernel Debugger' CONFIG_MOSIX_UDB + if [ "$CONFIG_MOSIX_UDB" = "y" ]; then + bool 'MOSIX Kernel Debugging Code' CONFIG_MOSIX_DEBUG + bool 'Allow migration to self (for easyier testing)' CONFIG_MOSIX_CHEAT_MIGSELF + bool 'Process-arrival messages' CONFIG_MOSIX_WEEEEEEEEE + fi + bool 'MOSIX Kernel Diagnostics' CONFIG_MOSIX_DIAG + bool 'Stricter security on MOSIX ports' CONFIG_MOSIX_SECUREPORTS + int 'Level of process-identity disclosure (0-3)' CONFIG_MOSIX_DISCLOSURE 1 + bool 'Create the kernel with a "-mosix" extension' CONFIG_MOSIX_EXTMOSIX + bool 'Direct File-System Access' CONFIG_MOSIX_DFSA + bool 'MOSIX File-System' CONFIG_MOSIX_FS + bool 'Poll/Select exceptions on pipes' CONFIG_MOSIX_PIPE_EXCEPTIONS +fi +endmenu + +mainmenu_option next_comment +comment 'Code maturity level options' +bool 'Prompt for development and/or incomplete code/drivers' CONFIG_EXPERIMENTAL +endmenu + +mainmenu_option next_comment +comment 'Loadable module support' +bool 'Enable loadable module support' CONFIG_MODULES +if [ "$CONFIG_MODULES" = "y" ]; then + bool ' Set version information on all module symbols' CONFIG_MODVERSIONS + bool ' Kernel module loader' CONFIG_KMOD +fi +endmenu + +mainmenu_option next_comment +comment 'Processor type and features' +choice 'Processor family' \ + "386 CONFIG_M386 \ + 486 CONFIG_M486 \ + 586/K5/5x86/6x86/6x86MX CONFIG_M586 \ + Pentium-Classic CONFIG_M586TSC \ + Pentium-MMX CONFIG_M586MMX \ + Pentium-Pro/Celeron/Pentium-II CONFIG_M686 \ + Pentium-III/Celeron(Coppermine) CONFIG_MPENTIUMIII \ + Pentium-4 CONFIG_MPENTIUM4 \ + K6/K6-II/K6-III CONFIG_MK6 \ + Athlon/Duron/K7 CONFIG_MK7 \ + Crusoe CONFIG_MCRUSOE \ + Winchip-C6 CONFIG_MWINCHIPC6 \ + Winchip-2 CONFIG_MWINCHIP2 \ + Winchip-2A/Winchip-3 CONFIG_MWINCHIP3D \ + CyrixIII/C3 CONFIG_MCYRIXIII" Pentium-Pro +# +# Define implied options from the CPU selection here +# + +if [ "$CONFIG_M386" = "y" ]; then + define_bool CONFIG_X86_CMPXCHG n + define_bool CONFIG_X86_XADD n + define_int CONFIG_X86_L1_CACHE_SHIFT 4 + define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y + define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n + define_bool CONFIG_X86_PPRO_FENCE y +else + define_bool CONFIG_X86_WP_WORKS_OK y + define_bool CONFIG_X86_INVLPG y + define_bool CONFIG_X86_CMPXCHG y + define_bool CONFIG_X86_XADD y + define_bool CONFIG_X86_BSWAP y + define_bool CONFIG_X86_POPAD_OK y + define_bool CONFIG_RWSEM_GENERIC_SPINLOCK n + define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM y +fi +if [ "$CONFIG_M486" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 4 + define_bool CONFIG_X86_USE_STRING_486 y + define_bool CONFIG_X86_ALIGNMENT_16 y + define_bool CONFIG_X86_PPRO_FENCE y +fi +if [ "$CONFIG_M586" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 5 + define_bool CONFIG_X86_USE_STRING_486 y + define_bool CONFIG_X86_ALIGNMENT_16 y + define_bool CONFIG_X86_PPRO_FENCE y +fi +if [ "$CONFIG_M586TSC" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 5 + define_bool CONFIG_X86_USE_STRING_486 y + define_bool CONFIG_X86_ALIGNMENT_16 y + define_bool CONFIG_X86_TSC y + define_bool CONFIG_X86_PPRO_FENCE y +fi +if [ "$CONFIG_M586MMX" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 5 + define_bool CONFIG_X86_USE_STRING_486 y + define_bool CONFIG_X86_ALIGNMENT_16 y + define_bool CONFIG_X86_TSC y + define_bool CONFIG_X86_GOOD_APIC y + define_bool CONFIG_X86_PPRO_FENCE y +fi +if [ "$CONFIG_M686" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 5 + define_bool CONFIG_X86_TSC y + define_bool CONFIG_X86_GOOD_APIC y + define_bool CONFIG_X86_PGE y + define_bool CONFIG_X86_USE_PPRO_CHECKSUM y + define_bool CONFIG_X86_PPRO_FENCE y +fi +if [ "$CONFIG_MPENTIUMIII" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 5 + define_bool CONFIG_X86_TSC y + define_bool CONFIG_X86_GOOD_APIC y + define_bool CONFIG_X86_PGE y + define_bool CONFIG_X86_USE_PPRO_CHECKSUM y +fi +if [ "$CONFIG_MPENTIUM4" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 7 + define_bool CONFIG_X86_TSC y + define_bool CONFIG_X86_GOOD_APIC y + define_bool CONFIG_X86_PGE y + define_bool CONFIG_X86_USE_PPRO_CHECKSUM y +fi +if [ "$CONFIG_MK6" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 5 + define_bool CONFIG_X86_ALIGNMENT_16 y + define_bool CONFIG_X86_TSC y + define_bool CONFIG_X86_USE_PPRO_CHECKSUM y +fi +if [ "$CONFIG_MK7" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 6 + define_bool CONFIG_X86_TSC y + define_bool CONFIG_X86_GOOD_APIC y + define_bool CONFIG_X86_USE_3DNOW y + define_bool CONFIG_X86_PGE y + define_bool CONFIG_X86_USE_PPRO_CHECKSUM y +fi +if [ "$CONFIG_MCYRIXIII" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 5 + define_bool CONFIG_X86_TSC y + define_bool CONFIG_X86_ALIGNMENT_16 y + define_bool CONFIG_X86_USE_3DNOW y + define_bool CONFIG_X86_USE_PPRO_CHECKSUM y +fi +if [ "$CONFIG_MCRUSOE" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 5 + define_bool CONFIG_X86_TSC y +fi +if [ "$CONFIG_MWINCHIPC6" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 5 + define_bool CONFIG_X86_ALIGNMENT_16 y + define_bool CONFIG_X86_USE_PPRO_CHECKSUM y +fi +if [ "$CONFIG_MWINCHIP2" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 5 + define_bool CONFIG_X86_ALIGNMENT_16 y + define_bool CONFIG_X86_TSC y + define_bool CONFIG_X86_USE_PPRO_CHECKSUM y +fi +if [ "$CONFIG_MWINCHIP3D" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 5 + define_bool CONFIG_X86_ALIGNMENT_16 y + define_bool CONFIG_X86_TSC y + define_bool CONFIG_X86_USE_PPRO_CHECKSUM y +fi +tristate 'Toshiba Laptop support' CONFIG_TOSHIBA +tristate 'Dell laptop support' CONFIG_I8K + +tristate '/dev/cpu/microcode - Intel IA32 CPU microcode support' CONFIG_MICROCODE +tristate '/dev/cpu/*/msr - Model-specific register support' CONFIG_X86_MSR +tristate '/dev/cpu/*/cpuid - CPU information support' CONFIG_X86_CPUID + +choice 'High Memory Support' \ + "off CONFIG_NOHIGHMEM \ + 4GB CONFIG_HIGHMEM4G \ + 64GB CONFIG_HIGHMEM64G" off +if [ "$CONFIG_HIGHMEM4G" = "y" ]; then + define_bool CONFIG_HIGHMEM y +fi +if [ "$CONFIG_HIGHMEM64G" = "y" ]; then + define_bool CONFIG_HIGHMEM y + define_bool CONFIG_X86_PAE y +fi + +bool 'Math emulation' CONFIG_MATH_EMULATION +bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR +bool 'Symmetric multi-processing support' CONFIG_SMP +if [ "$CONFIG_SMP" != "y" ]; then + bool 'Local APIC support on uniprocessors' CONFIG_X86_UP_APIC + dep_bool 'IO-APIC support on uniprocessors' CONFIG_X86_UP_IOAPIC $CONFIG_X86_UP_APIC + if [ "$CONFIG_X86_UP_APIC" = "y" ]; then + define_bool CONFIG_X86_LOCAL_APIC y + fi + if [ "$CONFIG_X86_UP_IOAPIC" = "y" ]; then + define_bool CONFIG_X86_IO_APIC y + fi +else + bool 'Multiquad NUMA system' CONFIG_MULTIQUAD +fi + +if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then + define_bool CONFIG_HAVE_DEC_LOCK y +fi +endmenu + +mainmenu_option next_comment +comment 'General setup' + +bool 'Networking support' CONFIG_NET + +# Visual Workstation support is utterly broken. +# If you want to see it working mail an VW540 to hch@infradead.org 8) +#bool 'SGI Visual Workstation support' CONFIG_VISWS +if [ "$CONFIG_VISWS" = "y" ]; then + define_bool CONFIG_X86_VISWS_APIC y + define_bool CONFIG_X86_LOCAL_APIC y + define_bool CONFIG_PCI y +else + if [ "$CONFIG_SMP" = "y" ]; then + define_bool CONFIG_X86_IO_APIC y + define_bool CONFIG_X86_LOCAL_APIC y + fi + bool 'PCI support' CONFIG_PCI + if [ "$CONFIG_PCI" = "y" ]; then + choice ' PCI access mode' \ + "BIOS CONFIG_PCI_GOBIOS \ + Direct CONFIG_PCI_GODIRECT \ + Any CONFIG_PCI_GOANY" Any + if [ "$CONFIG_PCI_GOBIOS" = "y" -o "$CONFIG_PCI_GOANY" = "y" ]; then + define_bool CONFIG_PCI_BIOS y + fi + if [ "$CONFIG_PCI_GODIRECT" = "y" -o "$CONFIG_PCI_GOANY" = "y" ]; then + define_bool CONFIG_PCI_DIRECT y + fi + fi +fi + +source drivers/pci/Config.in + +bool 'EISA support' CONFIG_EISA + +if [ "$CONFIG_VISWS" != "y" ]; then + bool 'MCA support' CONFIG_MCA +else + define_bool CONFIG_MCA n +fi + +bool 'Support for hot-pluggable devices' CONFIG_HOTPLUG + +if [ "$CONFIG_HOTPLUG" = "y" ] ; then + source drivers/pcmcia/Config.in + source drivers/hotplug/Config.in +else + define_bool CONFIG_PCMCIA n + define_bool CONFIG_HOTPLUG_PCI n +fi + +bool 'System V IPC' CONFIG_SYSVIPC +bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT +bool 'Sysctl support' CONFIG_SYSCTL +if [ "$CONFIG_PROC_FS" = "y" ]; then + choice 'Kernel core (/proc/kcore) format' \ + "ELF CONFIG_KCORE_ELF \ + A.OUT CONFIG_KCORE_AOUT" ELF +fi +tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT +tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF +tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC + +bool 'Power Management support' CONFIG_PM + +if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + dep_bool ' ACPI support' CONFIG_ACPI $CONFIG_PM + + if [ "$CONFIG_ACPI" != "n" ]; then + source drivers/acpi/Config.in + fi +fi + +dep_tristate ' Advanced Power Management BIOS support' CONFIG_APM $CONFIG_PM +if [ "$CONFIG_APM" != "n" ]; then + bool ' Ignore USER SUSPEND' CONFIG_APM_IGNORE_USER_SUSPEND + bool ' Enable PM at boot time' CONFIG_APM_DO_ENABLE + bool ' Make CPU Idle calls when idle' CONFIG_APM_CPU_IDLE + bool ' Enable console blanking using APM' CONFIG_APM_DISPLAY_BLANK + bool ' RTC stores time in GMT' CONFIG_APM_RTC_IS_GMT + bool ' Allow interrupts during APM BIOS calls' CONFIG_APM_ALLOW_INTS + bool ' Use real mode APM BIOS call to power off' CONFIG_APM_REAL_MODE_POWER_OFF +fi + +endmenu + +source drivers/mtd/Config.in + +source drivers/parport/Config.in + +source drivers/pnp/Config.in + +source drivers/block/Config.in + +source drivers/md/Config.in + +if [ "$CONFIG_NET" = "y" ]; then + source net/Config.in +fi + +source drivers/telephony/Config.in + +mainmenu_option next_comment +comment 'ATA/IDE/MFM/RLL support' + +tristate 'ATA/IDE/MFM/RLL support' CONFIG_IDE + +if [ "$CONFIG_IDE" != "n" ]; then + source drivers/ide/Config.in +else + define_bool CONFIG_BLK_DEV_IDE_MODES n + define_bool CONFIG_BLK_DEV_HD n +fi +endmenu + +mainmenu_option next_comment +comment 'SCSI support' + +tristate 'SCSI support' CONFIG_SCSI + +if [ "$CONFIG_SCSI" != "n" ]; then + source drivers/scsi/Config.in +fi +endmenu + +source drivers/message/fusion/Config.in + +source drivers/ieee1394/Config.in + +source drivers/message/i2o/Config.in + +if [ "$CONFIG_NET" = "y" ]; then + mainmenu_option next_comment + comment 'Network device support' + + bool 'Network device support' CONFIG_NETDEVICES + if [ "$CONFIG_NETDEVICES" = "y" ]; then + source drivers/net/Config.in + if [ "$CONFIG_ATM" = "y" ]; then + source drivers/atm/Config.in + fi + fi + endmenu +fi + +source net/ax25/Config.in + +source net/irda/Config.in + +mainmenu_option next_comment +comment 'ISDN subsystem' +if [ "$CONFIG_NET" != "n" ]; then + tristate 'ISDN support' CONFIG_ISDN + if [ "$CONFIG_ISDN" != "n" ]; then + source drivers/isdn/Config.in + fi +fi +endmenu + +mainmenu_option next_comment +comment 'Old CD-ROM drivers (not SCSI, not IDE)' + +bool 'Support non-SCSI/IDE/ATAPI CDROM drives' CONFIG_CD_NO_IDESCSI +if [ "$CONFIG_CD_NO_IDESCSI" != "n" ]; then + source drivers/cdrom/Config.in +fi +endmenu + +# +# input before char - char/joystick depends on it. As does USB. +# +source drivers/input/Config.in +source drivers/char/Config.in + +#source drivers/misc/Config.in + +source drivers/media/Config.in + +source fs/Config.in + +if [ "$CONFIG_VT" = "y" ]; then + mainmenu_option next_comment + comment 'Console drivers' + bool 'VGA text console' CONFIG_VGA_CONSOLE + bool 'Video mode selection support' CONFIG_VIDEO_SELECT + if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + tristate 'MDA text console (dual-headed) (EXPERIMENTAL)' CONFIG_MDA_CONSOLE + source drivers/video/Config.in + fi + endmenu +fi + +mainmenu_option next_comment +comment 'Sound' + +tristate 'Sound card support' CONFIG_SOUND +if [ "$CONFIG_SOUND" != "n" ]; then + source drivers/sound/Config.in +fi +endmenu + +source drivers/usb/Config.in + +if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + source net/bluetooth/Config.in +fi + +mainmenu_option next_comment +comment 'Kernel hacking' + +bool 'Kernel debugging' CONFIG_DEBUG_KERNEL +if [ "$CONFIG_DEBUG_KERNEL" != "n" ]; then + bool ' Debug high memory support' CONFIG_DEBUG_HIGHMEM + bool ' Debug memory allocations' CONFIG_DEBUG_SLAB + bool ' Memory mapped I/O debugging' CONFIG_DEBUG_IOVIRT + bool ' Magic SysRq key' CONFIG_MAGIC_SYSRQ + bool ' Spinlock debugging' CONFIG_DEBUG_SPINLOCK + bool ' Verbose BUG() reporting (adds 70K)' CONFIG_DEBUG_BUGVERBOSE +fi + +endmenu diff -urN linux-2.4.17/arch/i386/defconfig linux_umopenmosix/arch/i386/defconfig --- linux-2.4.17/arch/i386/defconfig Mon Nov 12 21:59:03 2001 +++ linux_umopenmosix/arch/i386/defconfig Wed Jun 26 23:45:14 2002 @@ -1,6 +1,26 @@ # # Automatically generated make config: don't edit # + +# +# MOSIX options +# + +CONFIG_MOSIX=y +# CONFIG_MOSIX_TOPOLOGY is not set +CONFIG_MOSIX_MAXTOPOLOGY=4 +# CONFIG_MOSIX_UDB is not set +# CONFIG_MOSIX_CHEAT_MIGSELF is not set +# CONFIG_MOSIX_DEBUG is not set +# CONFIG_MOSIX_WEEEEEEEEE is not set +CONFIG_MOSIX_DISCLOSURE=1 +# CONFIG_MOSIX_EXTMOSIX is not set +CONFIG_MOSIX_DIAG=y +CONFIG_MOSIX_SECUREPORTS=y +# CONFIG_MOSIX_DFSA is not set +# CONFIG_MOSIX_FS is not set +# CONFIG_MOSIX_PIPE_EXCEPTIONS is not set + CONFIG_X86=y CONFIG_ISA=y # CONFIG_SBUS is not set diff -urN linux-2.4.17/arch/i386/kernel/Makefile linux_umopenmosix/arch/i386/kernel/Makefile --- linux-2.4.17/arch/i386/kernel/Makefile Sat Nov 10 00:21:21 2001 +++ linux_umopenmosix/arch/i386/kernel/Makefile Wed Jun 26 23:45:14 2002 @@ -42,3 +42,14 @@ obj-$(CONFIG_X86_VISWS_APIC) += visws_apic.o include $(TOPDIR)/Rules.make + +ifdef CONFIG_MOSIX +entry.o: ./mosasm.H + +offset: offset.c $(TOPDIR)/include/linux/config.h $(TOPDIR)/include/linux/types.h $(TOPDIR)/include/linux/sched.h $(TOPDIR)/include/linux/signal.h $(TOPDIR)/include/linux/sys.h $(TOPDIR)/include/linux/kernel.h + $(HOSTCC) $(HOSTCFLAGS) -D__KERNEL__ -I$(TOPDIR)/include -o offset offset.c + +./mosasm.H: offset entry.S + ./offset < entry.S > mosasm.H + +endif diff -urN linux-2.4.17/arch/i386/kernel/entry.S linux_umopenmosix/arch/i386/kernel/entry.S --- linux-2.4.17/arch/i386/kernel/entry.S Sat Nov 3 03:18:49 2001 +++ linux_umopenmosix/arch/i386/kernel/entry.S Wed Jun 26 23:45:14 2002 @@ -46,6 +46,10 @@ #include #include +#ifdef CONFIG_MOSIX +#include "mosasm.H" +#endif /* CONFIG_MOSIX */ + EBX = 0x00 ECX = 0x04 EDX = 0x08 @@ -179,10 +183,16 @@ pushl %ebx call SYMBOL_NAME(schedule_tail) addl $4, %esp +#ifdef CONFIG_MOSIX +ENTRY(ret_from_kickstart) + GET_CURRENT(%ebx) + jmp ret_from_sys_call +#else GET_CURRENT(%ebx) testb $0x02,tsk_ptrace(%ebx) # PT_TRACESYS jne tracesys_exit jmp ret_from_sys_call +#endif /* CONFIG_MOSIX */ /* * Return to user mode is not as complex as all this looks, @@ -197,16 +207,50 @@ GET_CURRENT(%ebx) testb $0x02,tsk_ptrace(%ebx) # PT_TRACESYS jne tracesys +/* conflict resolution - Qlusters */ +#ifdef CONFIG_MOSIX_UDB + pushl %eax + call SYMBOL_NAME(sys_call_trace) # display syscalls for debugging + popl %eax +#endif /* CONFIG_MOSIX_UDB */ cmpl $(NR_syscalls),%eax jae badsys +#ifdef CONFIG_MOSIX + testl $(DTRACESYS1|DTRACESYS2),DFLAGS(%ebx) + jne adjust_trace_before_syscall +adjusted_trace: + testb $DREMOTE,DFLAGS(%ebx) + je local_syscall +on_remote: + pushl %eax + call *SYMBOL_NAME(remote_sys_call_table)(,%eax,4) + addl $4,%esp + movl %eax,EAX(%esp) + jmp ret_from_sys_call +local_syscall: +#endif /* CONFIG_MOSIX */ call *SYMBOL_NAME(sys_call_table)(,%eax,4) movl %eax,EAX(%esp) # save the return value +#ifdef CONFIG_MOSIX + call SYMBOL_NAME(mosix_local_syscall) +#endif /* CONFIG_MOSIX */ ENTRY(ret_from_sys_call) +#ifdef CONFIG_MOSIX + testl $(DTRACESYS1|DTRACESYS2),DFLAGS(%ebx) + jne adjust_trace_before_syscall +ret_check_reschedule: +#endif /* CONFIG_MOSIX */ cli # need_resched and signals atomic test cmpl $0,need_resched(%ebx) jne reschedule cmpl $0,sigpending(%ebx) jne signal_return +#ifdef CONFIG_MOSIX +straight_to_mosix: + call SYMBOL_NAME(mosix_pre_usermode_actions) + testl %eax,%eax + jne ret_from_sys_call +#endif /* CONFIG_MOSIX */ restore_all: RESTORE_ALL @@ -218,7 +262,11 @@ jne v86_signal_return xorl %edx,%edx call SYMBOL_NAME(do_signal) +#ifdef CONFIG_MOSIX + jmp straight_to_mosix +#else jmp restore_all +#endif /* CONFIG_MOSIX */ ALIGN v86_signal_return: @@ -226,18 +274,41 @@ movl %eax,%esp xorl %edx,%edx call SYMBOL_NAME(do_signal) +#ifdef CONFIG_MOSIX + jmp straight_to_mosix +#else jmp restore_all +#endif /* CONFIG_MOSIX */ ALIGN tracesys: movl $-ENOSYS,EAX(%esp) call SYMBOL_NAME(syscall_trace) +#ifdef CONFIG_MOSIX +adjust_trace_before_syscall: # only arrive here with DTRACESYS(1|2) + testl $DDEPUTY,DFLAGS(%ebx) + jne straight_to_mosix # no mess with signals/syscalls/tracesys + testl $DREMOTE,DFLAGS(%ebx) + je no_need_to_unsync + call wait_for_permission_to_continue +no_need_to_unsync: + testl $DTRACESYS2,DFLAGS(%ebx) + jne second_tracesys # skipping system-call + orl $DTRACESYS2,DFLAGS(%ebx) # next time we skip the system-call + movl $-ENOSYS,EAX(%esp) + movl ORIG_EAX(%esp),%eax + cmpl $(NR_syscalls),%eax + jae second_tracesys # prevent system-call out of range trick + jmp adjusted_trace # now do the system-call +second_tracesys: # note: "syscall_trace" clears the flags +#else movl ORIG_EAX(%esp),%eax cmpl $(NR_syscalls),%eax jae tracesys_exit call *SYMBOL_NAME(sys_call_table)(,%eax,4) movl %eax,EAX(%esp) # save the return value tracesys_exit: +#endif /* CONFIG_MOSIX */ call SYMBOL_NAME(syscall_trace) jmp ret_from_sys_call badsys: @@ -251,7 +322,11 @@ movl EFLAGS(%esp),%eax # mix EFLAGS and CS movb CS(%esp),%al testl $(VM_MASK | 3),%eax # return to VM86 mode or non-supervisor? +#ifdef CONFIG_MOSIX + jne ret_check_reschedule +#else jne ret_from_sys_call +#endif /* CONFIG_MOSIX */ jmp restore_all ALIGN @@ -259,6 +334,73 @@ call SYMBOL_NAME(schedule) # test jmp ret_from_sys_call +#ifdef CONFIG_MOSIX +/* + * call_with_regs(caddr_t routine, pt_regs *before, pt_regs *after) + * pushes the "before" regs on the stack and calls routine, + * then places the possibly-modified registers in "after" + * (which may possibly equal "before"). + * Also, set "current->altregs" to the pushed registers, then restores it. + */ +ENTRY(call_with_regs) + pushl %ebx + GET_CURRENT(%ebx) + pushl ALTREGS(%ebx) + movl 16(%esp),%eax + pushl 56(%eax) + pushl 52(%eax) + pushl 48(%eax) + pushl 44(%eax) + pushl 40(%eax) + pushl 36(%eax) + pushl 32(%eax) + pushl 28(%eax) + pushl 24(%eax) + pushl 20(%eax) + pushl 16(%eax) + pushl 12(%eax) + pushl 8(%eax) + pushl 4(%eax) + pushl 0(%eax) + movl %esp,ALTREGS(%ebx) + movl 72(%esp),%eax + call *%eax + movl 80(%esp),%edx + popl %ecx + movl %ecx,0(%edx) + popl %ecx + movl %ecx,4(%edx) + popl %ecx + movl %ecx,8(%edx) + popl %ecx + movl %ecx,12(%edx) + popl %ecx + movl %ecx,16(%edx) + popl %ecx + movl %ecx,20(%edx) + popl %ecx + movl %ecx,24(%edx) + popl %ecx + movl %ecx,28(%edx) + popl %ecx + movl %ecx,32(%edx) + popl %ecx + movl %ecx,36(%edx) + popl %ecx + movl %ecx,40(%edx) + popl %ecx + movl %ecx,44(%edx) + popl %ecx + movl %ecx,48(%edx) + popl %ecx + movl %ecx,52(%edx) + popl %ecx + movl %ecx,56(%edx) + popl ALTREGS(%ebx) + popl %ebx + ret +#endif /* CONFIG_MOSIX */ + ENTRY(divide_error) pushl $0 # no error code pushl $ SYMBOL_NAME(do_divide_error) @@ -402,7 +544,11 @@ .long SYMBOL_NAME(sys_read) .long SYMBOL_NAME(sys_write) .long SYMBOL_NAME(sys_open) /* 5 */ +#ifdef CONFIG_MOSIX_DFSA + .long SYMBOL_NAME(sys_close_syscall) +#else .long SYMBOL_NAME(sys_close) +#endif /* CONFIG_MOSIX_DFSA */ .long SYMBOL_NAME(sys_waitpid) .long SYMBOL_NAME(sys_creat) .long SYMBOL_NAME(sys_link) diff -urN linux-2.4.17/arch/i386/kernel/i387.c linux_umopenmosix/arch/i386/kernel/i387.c --- linux-2.4.17/arch/i386/kernel/i387.c Fri Feb 23 20:09:08 2001 +++ linux_umopenmosix/arch/i386/kernel/i387.c Wed Jun 26 23:45:14 2002 @@ -520,3 +520,66 @@ return fpvalid; } + +#ifdef CONFIG_MOSIX + +int +has_fxsr(void) +{ + return(cpu_has_fxsr); +} + +void +fsave_to_fxsave(union i387_union *from, union i387_union *to) +{ + int i; + long *fcp, *tcp; + + to->fxsave.cwd = from->fsave.cwd; + to->fxsave.swd = from->fsave.swd; + to->fxsave.twd = twd_i387_to_fxsr(from->fsave.twd); + to->fxsave.fop = from->fxsave.padding[0]; + to->fxsave.fip = from->fsave.fip; + to->fxsave.fcs = from->fsave.fcs; + to->fxsave.foo = from->fsave.foo; + to->fxsave.mxcsr = from->fxsave.padding[1]; + to->fxsave.fos = from->fsave.fos; + for(fcp = from->fsave.st_space , tcp = to->fxsave.st_space , + i = 0 ; i < 8 ; i++) + { + *tcp++ = *fcp++; + *tcp++ = *fcp++; + *tcp = *((unsigned short *)fcp)++; + tcp += 2; + } + memcpy(to->fxsave.xmm_space, from->fxsave.xmm_space, + sizeof(from->fxsave.xmm_space)); +} + +void +fxsave_to_fsave(union i387_union *from, union i387_union *to) +{ + int i; + long *fcp, *tcp; + + to->fsave.cwd = from->fxsave.cwd; + to->fsave.swd = from->fxsave.swd; + to->fsave.twd = twd_fxsr_to_i387(&from->fxsave); + to->fsave.fip = from->fxsave.fip; + to->fsave.fcs = from->fxsave.fcs; + to->fsave.foo = from->fxsave.foo; + to->fsave.fos = from->fxsave.fos; + to->fxsave.padding[0] = from->fxsave.fop; + to->fxsave.padding[1] = from->fxsave.mxcsr; + for(fcp = from->fxsave.st_space , tcp = to->fsave.st_space , + i = 0 ; i < 8 ; i++) + { + *tcp++ = *fcp++; + *tcp++ = *fcp++; + *((unsigned short *)tcp)++ = *fcp; + fcp += 2; + } + memcpy(to->fxsave.xmm_space, from->fxsave.xmm_space, + sizeof(to->fxsave.xmm_space)); +} +#endif /* CONFIG_MOSIX */ diff -urN linux-2.4.17/arch/i386/kernel/ioport.c linux_umopenmosix/arch/i386/kernel/ioport.c --- linux-2.4.17/arch/i386/kernel/ioport.c Tue Jul 20 01:22:48 1999 +++ linux_umopenmosix/arch/i386/kernel/ioport.c Wed Jun 26 23:45:14 2002 @@ -15,6 +15,10 @@ #include #include +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ + /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ static void set_bitmap(unsigned long *bitmap, short base, short extent, int new_value) { @@ -61,6 +65,10 @@ return -EINVAL; if (turn_on && !capable(CAP_SYS_RAWIO)) return -EPERM; +#ifdef CONFIG_MOSIX + if(turn_on && !mosix_go_home_for_reason(1, DSTAY_FOR_IOPL)) + return(-ENOMEM); +#endif /* CONFIG_MOSIX */ /* * If it's the first ioperm() call in this thread's lifetime, set the * IO bitmap up. ioperm() is much less timing critical than clone(), @@ -111,6 +119,11 @@ if (!capable(CAP_SYS_RAWIO)) return -EPERM; } +#ifdef CONFIG_MOSIX + if(!mosix_go_home_for_reason(1, DSTAY_FOR_IOPL)) + return(-ENOMEM); + regs = mos_to_regs(¤t->mosix); +#endif /* CONFIG_MOSIX */ regs->eflags = (regs->eflags & 0xffffcfff) | (level << 12); return 0; } diff -urN linux-2.4.17/arch/i386/kernel/irq.c linux_umopenmosix/arch/i386/kernel/irq.c --- linux-2.4.17/arch/i386/kernel/irq.c Thu Oct 25 22:53:46 2001 +++ linux_umopenmosix/arch/i386/kernel/irq.c Wed Jun 26 23:45:14 2002 @@ -279,6 +279,10 @@ clear_bit(0,&global_irq_lock); for (;;) { +#ifdef CONFIG_MOSIX_UDB + extern int nmi_debugger; + if(!nmi_debugger) +#endif /* CONFIG_MOSIX_UDB */ if (!--count) { show("wait_on_irq"); count = ~0; diff -urN linux-2.4.17/arch/i386/kernel/nmi.c linux_umopenmosix/arch/i386/kernel/nmi.c --- linux-2.4.17/arch/i386/kernel/nmi.c Fri Sep 21 06:55:24 2001 +++ linux_umopenmosix/arch/i386/kernel/nmi.c Wed Jun 26 23:45:14 2002 @@ -25,6 +25,9 @@ #include unsigned int nmi_watchdog = NMI_NONE; +#ifdef CONFIG_MOSIX_UDB +#include +#endif /* CONFIG_MOSIX_UDB */ static unsigned int nmi_hz = HZ; unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ extern void show_registers(struct pt_regs *regs); @@ -268,6 +271,10 @@ */ int sum, cpu = smp_processor_id(); +#ifdef CONFIG_MOSIX_UDB + if(nmi_debugger) + return; +#endif /* CONFIG_MOSIX_UDB */ sum = apic_timer_irqs[cpu]; if (last_irq_sums[cpu] == sum) { @@ -285,6 +292,10 @@ bust_spinlocks(1); printk("NMI Watchdog detected LOCKUP on CPU%d, registers:\n", cpu); show_registers(regs); +#ifdef CONFIG_MOSIX_UDB + mosix_debugger("Watchdog"); + return; +#endif /* CONFIG_MOSIX_UDB */ printk("console shuts up ...\n"); console_silent(); spin_unlock(&nmi_print_lock); diff -urN linux-2.4.17/arch/i386/kernel/offset.c linux_umopenmosix/arch/i386/kernel/offset.c --- linux-2.4.17/arch/i386/kernel/offset.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/i386/kernel/offset.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * Some sections copyright 2002 by Moshe Bar + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh, Moshe Bar + */ + +/* + * Produce an include-file for "entry.S", with: + * 1. constant-offsets of some required MOSIX-members of "task_struct" + * 2. some bits to test "current->dflags" + * 3. a duplicate system-call table (remote_sys_call_table), with all + * system-calls names preceded by a "remote_". + */ + +#include +#include +#include +#include +#include +#include + +struct file; +#define FILE struct file + +extern int printf(char *, ...); +extern int fgets(char *, int, FILE *); +extern int perror(char *); +extern FILE *stdin; + +char line[2048]; + +int +main(void) +{ + register char *c; + register int i; + long t, time(long *); + char *ctime(long *); + + time(&t); + printf("/* Please do not edit -- this file is created automatically */\n"); + printf("/* %.24s */\n\n", ctime(&t)); + printf("ALTREGS\t\t= 0x%X\n", + (int)&(((struct task_struct *)0)->mosix.altregs)); + printf("DFLAGS\t\t= 0x%X\n", + (int)&(((struct task_struct *)0)->mosix.dflags)); + printf("DDEPUTY\t\t= 0x%X\n", DDEPUTY); + printf("DREMOTE\t\t= 0x%X\n", DREMOTE); + printf("DTRACESYS1\t\t= 0x%X\n", DTRACESYS1); + printf("DTRACESYS2\t\t= 0x%X\n", DTRACESYS2); + printf(".data\n"); + printf("\nENTRY(remote_sys_call_table)\n"); + while(fgets(line, 2048, stdin) && + strcmp(line, "ENTRY(sys_call_table)\n")) + ; + for(i = 0 ; i < NR_syscalls && fgets(line, 2048, stdin) ; i++) + if(!strncmp(line, "\t.long SYMBOL_NAME(", 19)) + { + for(c = &line[19] ; *c && *c != ')' ; c++); + *c = '\0'; + if(!strcmp(&line[19], "sys_ni_syscall")) + { + printf("\t.long SYMBOL_NAME(sys_ni_syscall)\n"); + continue; + } + printf("\t.long SYMBOL_NAME(remote_%s)\n", &line[19]); + } + else if(!strcmp(line, "\t.rept NR_syscalls-(.-sys_call_table)/4\n")) + printf("\t.rept NR_syscalls-(.-remote_sys_call_table)/4\n"); + else + { + printf("%s", line); + if(!strncmp(line, "\t.endr", 5)) + break; + } + printf(".text\n"); + return(0); +} diff -urN linux-2.4.17/arch/i386/kernel/process.c linux_umopenmosix/arch/i386/kernel/process.c --- linux-2.4.17/arch/i386/kernel/process.c Fri Oct 5 03:42:54 2001 +++ linux_umopenmosix/arch/i386/kernel/process.c Wed Jun 26 23:45:14 2002 @@ -49,6 +49,10 @@ #include +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ + asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); int hlt_counter; @@ -160,6 +164,14 @@ /* shamelessly grabbed from lib/vsprintf.c for readability */ #define is_digit(c) ((c) >= '0' && (c) <= '9') #endif + +#ifdef CONFIG_MOSIX_UDB +void set_fastest_reboot(void) +{ + reboot_mode = 0x1234; +} +#endif /* CONFIG_MOSIX_UDB */ + static int __init reboot_setup(char *str) { while(1) { @@ -510,6 +522,43 @@ return retval; } +#ifdef CONFIG_MOSIX +/* + * Create a thread that starts as kernel, but will eventually run in user-mode: + * The slight differences from "kernel_thread" are: + * 1) not using CLONE_VM. + * 2) adding SIGCHLD to the flags. + * 3) leaving space on the stack for the user-registers. + */ +int user_thread(int (*fn)(void *), void * arg, unsigned long flags) +{ + long retval, d0; + + __asm__ __volatile__( + "movl %%esp,%%esi\n\t" + "int $0x80\n\t" /* Linux/i386 system call */ + "cmpl %%esp,%%esi\n\t" /* child or parent? */ + "je 1f\n\t" /* parent - jump */ + "subl %7,%%esp\n\t" /* space for user-registers */ + /* Load the argument into eax, and push it. That way, it does + * not matter whether the called function is compiled with + * -mregparm or not. */ + "movl %4,%%eax\n\t" + "pushl %%eax\n\t" + "call *%5\n\t" /* call fn */ + "movl %3,%0\n\t" /* exit */ + "int $0x80\n" + "1:\t" + :"=&a" (retval), "=&S" (d0) + :"0" (__NR_clone), "i" (__NR_exit), + "r" (arg), "r" (fn), + "b" (flags | SIGCHLD), + "i" (sizeof(struct pt_regs)) + : "memory"); + return retval; +} +#endif /* CONFIG_MOSIX */ + /* * Free current thread data structures etc.. */ @@ -583,6 +632,10 @@ childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p)) - 1; struct_cpy(childregs, regs); +#ifdef CONFIG_MOSIX + p->mosix.altregs = (uint32_t *)childregs; + p->mosix.deputy_regs = current->mosix.deputy_regs; +#endif /* CONFIG_MOSIX */ childregs->eax = 0; childregs->esp = esp; @@ -734,10 +787,32 @@ */ tss->bitmap = INVALID_IO_BITMAP_OFFSET; } +#ifdef CONFIG_MOSIX + if(test_bit(X86_FEATURE_TSC, &boot_cpu_data.x86_capability)) + { + /* REMOTE emulates the RDTSC instruction */ + if(next_p->mosix.dflags & DREMOTE) + { + if(!(prev_p->mosix.dflags & DREMOTE)) + __asm__ __volatile__("movl %%cr4,%%eax\n\t" \ + "orl $4,%%eax\n\t" \ + "movl %%eax,%%cr4\n" \ + : : :"ax"); + } + else if(prev_p->mosix.dflags & DREMOTE) + __asm__ __volatile__("movl %%cr4,%%eax\n\t" \ + "andl $0xfffffffb,%%eax\n\t" \ + "movl %%eax,%%cr4\n" \ + : : :"ax"); + } +#endif /* CONFIG_MOSIX */ } asmlinkage int sys_fork(struct pt_regs regs) { +#ifdef CONFIG_MOSIX + mosix_obtain_registers(BIT_OF_REGISTER(esp)); +#endif /* CONFIG_MOSIX */ return do_fork(SIGCHLD, regs.esp, ®s, 0); } @@ -745,12 +820,27 @@ { unsigned long clone_flags; unsigned long newsp; +#ifdef CONFIG_MOSIX + int retval; +#endif /* CONFIG_MOSIX */ clone_flags = regs.ebx; newsp = regs.ecx; if (!newsp) +#ifdef CONFIG_MOSIX + mosix_obtain_registers(BIT_OF_REGISTER(esp)) , +#endif /* CONFIG_MOSIX */ newsp = regs.esp; +#ifdef CONFIG_MOSIX + if((clone_flags & CLONE_VM) && (retval = mosix_pre_clone())) + return(retval); + retval = do_fork(clone_flags, newsp, ®s, 0); + if(clone_flags & CLONE_VM) + mosix_post_clone(); + return(retval); +#else return do_fork(clone_flags, newsp, ®s, 0); +#endif /* CONFIG_MOSIX */ } /* @@ -765,7 +855,18 @@ */ asmlinkage int sys_vfork(struct pt_regs regs) { +#ifdef CONFIG_MOSIX + int retval; + + mosix_obtain_registers(BIT_OF_REGISTER(esp)); + if((retval = mosix_pre_clone())) + return(retval); + retval = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, ®s, 0); + mosix_post_clone(); + return(retval); +#else return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, ®s, 0); +#endif /* CONFIG_MOSIX */ } /* @@ -801,23 +902,50 @@ unsigned long ebp, esp, eip; unsigned long stack_page; int count = 0; +#ifdef CONFIG_MOSIX + unsigned long result = 0; + if (!p || p == current || LOGICAL_STATE(p) == TASK_RUNNING) +#else if (!p || p == current || p->state == TASK_RUNNING) +#endif /* CONFIG_MOSIX */ return 0; stack_page = (unsigned long)p; esp = p->thread.esp; if (!stack_page || esp < stack_page || esp > 8188+stack_page) +#ifdef CONFIG_MOSIX + return(result); +#else return 0; +#endif /* CONFIG_MOSIX */ /* include/asm-i386/system.h:switch_to() pushes ebp last. */ ebp = *(unsigned long *) esp; do { if (ebp < stack_page || ebp > 8184+stack_page) +#ifdef CONFIG_MOSIX + return(result); +#else return 0; +#endif /* CONFIG_MOSIX */ eip = *(unsigned long *) (ebp+4); if (eip < first_sched || eip >= last_sched) +#ifdef CONFIG_MOSIX + { + if(!result) + return eip; + } + else + result = 0; +#else return eip; +#endif /* CONFIG_MOSIX */ ebp = *(unsigned long *) ebp; +#ifdef CONFIG_MOSIX + } while (count++ < 64); + return(result); +#else } while (count++ < 16); return 0; +#endif /* CONFIG_MOSIX */ } #undef last_sched #undef first_sched diff -urN linux-2.4.17/arch/i386/kernel/ptrace.c linux_umopenmosix/arch/i386/kernel/ptrace.c --- linux-2.4.17/arch/i386/kernel/ptrace.c Wed Nov 21 20:42:41 2001 +++ linux_umopenmosix/arch/i386/kernel/ptrace.c Wed Jun 26 23:45:14 2002 @@ -21,6 +21,10 @@ #include #include +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ + /* * does not yet catch signals sent when the child dies. * in exit.c or in signal.c. @@ -44,13 +48,47 @@ * this routine assumes that all the privileged stacks are in our * data space. */ +#ifdef CONFIG_MOSIX +inline int get_stack_long(struct task_struct *task, int offset) +#else static inline int get_stack_long(struct task_struct *task, int offset) +#endif /* CONFIG_MOSIX */ { unsigned char *stack; +#ifdef CONFIG_MOSIX + if(task != current) + lock_mosix(); + if(task->mosix.dflags & DDEPUTY) + { + if(task != current) + unlock_mosix(); + return(request_process(task, NULL, PR_PTRACE_GET_STACK_LONG, + offset)); + } + if(task == current) + mosix_obtain_registers(ALL_REGISTERS); + else + task_lock(task); + if(!task->thread.saved_esp0) /* unless in VM86 mode */ + stack = (char *)(mos_to_regs(&task->mosix) + 1); + else +#endif /* CONFIG_MOSIX */ stack = (unsigned char *)task->thread.esp0; stack += offset; +#ifdef CONFIG_MOSIX + { + int res = *((int *)stack); + if(task != current) + { + task_unlock(task); + unlock_mosix(); + } + return(res); + } +#else return (*((int *)stack)); +#endif /* CONFIG_MOSIX */ } /* @@ -59,14 +97,43 @@ * this routine assumes that all the privileged stacks are in our * data space. */ +#ifdef CONFIG_MOSIX +inline int put_stack_long(struct task_struct *task, int offset, +#else static inline int put_stack_long(struct task_struct *task, int offset, +#endif /* CONFIG_MOSIX */ unsigned long data) { unsigned char * stack; +#ifdef CONFIG_MOSIX + if(task != current) + lock_mosix(); + if(task->mosix.dflags & DDEPUTY) + { + if(task != current) + unlock_mosix(); + return(request_process_arg2(task, NULL, + PR_PTRACE_PUT_STACK_LONG, offset, data)); + } + if(task == current) + mosix_obtain_registers(ALL_REGISTERS); + else + task_lock(task); + if(!task->thread.saved_esp0) /* unless in VM86 mode */ + stack = (char *)(mos_to_regs(&task->mosix) + 1); + else +#endif /* CONFIG_MOSIX */ stack = (unsigned char *) task->thread.esp0; stack += offset; *(unsigned long *) stack = data; +#ifdef CONFIG_MOSIX + if(task != current) + { + task_unlock(task); + unlock_mosix(); + } +#endif /* CONFIG_MOSIX */ return 0; } @@ -107,6 +174,17 @@ return 0; } +#ifdef CONFIG_MOSIX +void +ptrace_putregs(unsigned long *data) +{ + register int i; + + for (i = 0; i < FRAME_SIZE ; i++) + putreg(current, i << 2, data[i]); +} +#endif /* CONFIG_MOSIX */ + static unsigned long getreg(struct task_struct *child, unsigned long regno) { @@ -147,6 +225,17 @@ put_stack_long(child, EFL_OFFSET, tmp); } +#ifdef CONFIG_MOSIX +void +ptrace_getregs(unsigned long *data) +{ + register int i; + + for (i = 0; i < FRAME_SIZE ; i++) + data[i] = getreg(current, i << 2); +} +#endif /* CONFIG_MOSIX */ + asmlinkage int sys_ptrace(long request, long pid, long addr, long data) { struct task_struct *child; @@ -161,6 +250,9 @@ goto out; /* set the ptrace bit in the process flags. */ current->ptrace |= PT_PTRACED; +#ifdef CONFIG_MOSIX_DFSA + tell_process(current, DREQ_NOTUPTODATE); +#endif /* CONFIG_MOSIX_DFSA */ ret = 0; goto out; } @@ -210,6 +302,17 @@ addr > sizeof(struct user) - 3) break; +#ifdef CONFIG_MOSIX + lock_mosix(); + if(child->mosix.dflags & DDEPUTY) + { + unlock_mosix(); + if(request_process(child, &tmp, PR_PTRACE_PEEKUSER, addr)) + goto out; + } + else + { +#endif /* CONFIG_MOSIX */ tmp = 0; /* Default return condition */ if(addr < FRAME_SIZE*sizeof(long)) tmp = getreg(child, addr); @@ -219,6 +322,10 @@ addr = addr >> 2; tmp = child->thread.debugreg[addr]; } +#ifdef CONFIG_MOSIX + unlock_mosix(); + } +#endif /* CONFIG_MOSIX */ ret = put_user(tmp,(unsigned long *) data); break; } @@ -265,7 +372,23 @@ addr -= (long) &dummy->u_debugreg; addr = addr >> 2; +#ifdef CONFIG_MOSIX + lock_mosix(); + if(child->mosix.dflags & DDEPUTY) + { + unlock_mosix(); + if(request_process_arg2(child, NULL, + PR_PTRACE_POKEUSER, addr, data)) + goto out; + } + else + { +#endif /* CONFIG_MOSIX */ child->thread.debugreg[addr] = data; +#ifdef CONFIG_MOSIX + unlock_mosix(); + } +#endif /* CONFIG_MOSIX */ ret = 0; } break; @@ -282,9 +405,22 @@ else child->ptrace &= ~PT_TRACESYS; child->exit_code = data; +#ifdef CONFIG_MOSIX + if(child->mosix.dflags & DDEPUTY) + { + if(request_process(child, NULL, PR_PTRACE_CONT, + request)) + goto out; + } + else + { +#endif /* CONFIG_MOSIX */ /* make sure the single step bit is not set. */ tmp = get_stack_long(child, EFL_OFFSET) & ~TRAP_FLAG; put_stack_long(child, EFL_OFFSET,tmp); +#ifdef CONFIG_MOSIX + } +#endif /* CONFIG_MOSIX */ wake_up_process(child); ret = 0; break; @@ -299,7 +435,11 @@ long tmp; ret = 0; +#ifdef CONFIG_MOSIX + if (LOGICAL_STATE(child) == TASK_ZOMBIE) /* already dead */ +#else if (child->state == TASK_ZOMBIE) /* already dead */ +#endif /* CONFIG_MOSIX */ break; child->exit_code = SIGKILL; /* make sure the single step bit is not set. */ @@ -320,8 +460,21 @@ /* Spurious delayed TF traps may occur */ child->ptrace |= PT_DTRACE; } +#ifdef CONFIG_MOSIX + if(child->mosix.dflags & DDEPUTY) + { + if(request_process(child, NULL, PR_PTRACE_SINGLE_STEP, + 0)) + goto out; + } + else + { +#endif /* CONFIG_MOSIX */ tmp = get_stack_long(child, EFL_OFFSET) | TRAP_FLAG; put_stack_long(child, EFL_OFFSET, tmp); +#ifdef CONFIG_MOSIX + } +#endif /* CONFIG_MOSIX */ child->exit_code = data; /* give it a chance to run. */ wake_up_process(child); @@ -339,26 +492,57 @@ ret = -EIO; break; } +#ifdef CONFIG_MOSIX + /* NOTE: even when child does not look like a DEPUTY, + * it could easily become one while we wait for a page + * on __put_user ... therefore, we do this ALWAYS: + */ + { + unsigned long t[FRAME_SIZE]; + + if(request_process(child, &t, PR_PTRACE_GETREGS, 0)) + ret = -EIO; + else + copy_to_user((void *)data, t, sizeof(t)); + } +#else for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) { __put_user(getreg(child, i),(unsigned long *) data); data += sizeof(long); } +#endif /* CONFIG_MOSIX */ ret = 0; break; } case PTRACE_SETREGS: { /* Set all gp regs in the child. */ +#ifndef CONFIG_MOSIX unsigned long tmp; +#endif /* CONFIG_MOSIX */ if (!access_ok(VERIFY_READ, (unsigned *)data, FRAME_SIZE*sizeof(long))) { ret = -EIO; break; } +#ifdef CONFIG_MOSIX + /* NOTE: even when child does not look like a DEPUTY, + * it could easily become one while we wait for a page + * on __get_user ... therefore, we do this ALWAYS: + */ + { + unsigned long t[FRAME_SIZE]; + + copy_from_user(t, (void *)data, sizeof(t)); + ret = request_process(child, &t, PR_PTRACE_SETREGS, 0) + ? -EIO : 0; + } +#else for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) { __get_user(tmp, (unsigned long *) data); putreg(child, i, tmp); data += sizeof(long); } ret = 0; +#endif /* CONFIG_MOSIX */ break; } @@ -369,6 +553,23 @@ break; } ret = 0; +#ifdef CONFIG_MOSIX + /* DEPUTY or NOT - the child-process may become DEPUTY + * while we page-fault on "__copy_to_user", + * so we always do it the long way: + */ + { + struct user_i387_struct f; + + if(request_process(child, &f, PR_PTRACE_GETFPREGS, 0)) + { + ret = -EIO; + goto out; + } + __copy_to_user((void *)data, &f, + sizeof(struct user_i387_struct)); + } +#else if ( !child->used_math ) { /* Simulate an empty FPU. */ set_fpu_cwd(child, 0x037f); @@ -376,6 +577,7 @@ set_fpu_twd(child, 0xffff); } get_fpregs((struct user_i387_struct *)data, child); +#endif /* CONFIG_MOSIX */ break; } @@ -385,9 +587,23 @@ ret = -EIO; break; } +#ifdef CONFIG_MOSIX + /* DEPUTY or NOT - the child-process may become DEPUTY + * while we page-fault on "__copy_from_user", + * so we always do it the long way: + */ + { + struct user_i387_struct f; + + __copy_from_user(&f, (void *)data, sizeof(f)); + ret = request_process(child, &f, PR_PTRACE_SETFPREGS, 0) + ? -EIO : 0; + } +#else child->used_math = 1; set_fpregs(child, (struct user_i387_struct *)data); ret = 0; +#endif /* CONFIG_MOSIX */ break; } @@ -397,6 +613,24 @@ ret = -EIO; break; } +#ifdef CONFIG_MOSIX + ret = 0; + /* DEPUTY or NOT - the child-process may become DEPUTY + * while we page-fault on "__copy_to_user", + * so we always do it the long way: + */ + { + struct user_fxsr_struct f; + + if(request_process(child, &f, PR_PTRACE_GETFPXREGS, 0)) + { + ret = -EIO; + goto out; + } + __copy_to_user((void *)data, &f, + sizeof(struct user_fxsr_struct)); + } +#else if ( !child->used_math ) { /* Simulate an empty FPU. */ set_fpu_cwd(child, 0x037f); @@ -405,6 +639,7 @@ set_fpu_mxcsr(child, 0x1f80); } ret = get_fpxregs((struct user_fxsr_struct *)data, child); +#endif /* CONFIG_MOSIX */ break; } @@ -414,8 +649,22 @@ ret = -EIO; break; } +#ifdef CONFIG_MOSIX + /* DEPUTY or NOT - the child-process may become DEPUTY + * while we page-fault on "__copy_from_user", + * so we always do it the long way: + */ + { + struct user_fxsr_struct f; + + __copy_from_user(&f, (void *)data, sizeof(f)); + ret = request_process(child, &f, PR_PTRACE_SETFPXREGS, + 0) ? -EIO : 0; + } +#else child->used_math = 1; ret = set_fpxregs(child, (struct user_fxsr_struct *)data); +#endif /* CONFIG_MOSIX */ break; } @@ -439,7 +688,174 @@ return ret; } +#ifdef CONFIG_MOSIX +/* + * The following routines are parts of sys_ptrace performed by the + * child process on itself. + * Should the relevant code in "sys_ptrace" change, the following routines + * must also be modified accordingly. + */ +unsigned long +ptrace_peekuser(long addr) +{ + struct user * dummy = NULL; + unsigned long tmp; + + tmp = 0; /* Default return condition */ + if(addr < FRAME_SIZE*sizeof(long)) + tmp = getreg(current, addr); + if(addr >= (long) &dummy->u_debugreg[0] && + addr <= (long) &dummy->u_debugreg[7]){ + addr -= (long) &dummy->u_debugreg[0]; + addr = addr >> 2; + tmp = current->thread.debugreg[addr]; + }; + return(tmp); +} + +void +ptrace_pokeuser(long addr, long data) +{ + current->thread.debugreg[addr] = data; +#define loaddebug(tsk,register) \ + __asm__("movl %0,%%db" #register \ + : /* no output */ \ + :"r" (tsk->thread.debugreg[register])) + /* unlike the original ptrace code, we are doing this for ourselves, + * and there may be no "switch_to" before we go to user-mode again, + * so here we go: + */ + if(current->thread.debugreg[7]) + { + loaddebug(current, 0); + loaddebug(current, 1); + loaddebug(current, 2); + loaddebug(current, 3); + loaddebug(current, 6); + loaddebug(current, 7); + } +} + +void +ptrace_cont(int request) +{ + unsigned long tmp; + struct task_struct *tsk = current; + + tmp = get_stack_long(tsk, EFL_OFFSET) & ~TRAP_FLAG; + put_stack_long(tsk, EFL_OFFSET, tmp); + if (request == PTRACE_SYSCALL) + tsk->ptrace |= PT_TRACESYS; + else + tsk->ptrace &= ~PT_TRACESYS; + if (request == PTRACE_DETACH) + tsk->ptrace &= ~PT_PTRACED; +} + +void +ptrace_single_step(void) +{ + unsigned long tmp; + + tmp = get_stack_long(current, EFL_OFFSET) | TRAP_FLAG; + put_stack_long(current, EFL_OFFSET, tmp); + current->ptrace |= PT_PTRACED | PT_DTRACE; +#ifdef CONFIG_MOSIX_DFSA + tell_process(current, DREQ_NOTUPTODATE); +#endif /* CONFIG_MOSIX_DFSA */ +} + +void +ptrace_getfpregs(struct user_i387_struct *to) +{ + struct task_struct *p = current; + + unlazy_fpu(p); + if (!p->used_math) + { + /* Simulate an empty FPU. */ + set_fpu_cwd(p, 0x037f); + set_fpu_swd(p, 0x0000); + set_fpu_twd(p, 0xffff); + } + get_fpregs(to, p); +} + +void +ptrace_getfpxregs(struct user_fxsr_struct *to) +{ + struct task_struct *p = current; + + unlazy_fpu(p); + if (!p->used_math) + { + /* Simulate an empty FPU. */ + set_fpu_cwd(p, 0x037f); + set_fpu_swd(p, 0x0000); + set_fpu_twd(p, 0xffff); + set_fpu_mxcsr(p, 0x1f80); + } + get_fpxregs(to, p); +} + +void +ptrace_setfpregs(struct user_i387_struct *from) +{ + struct task_struct *p = current; + + clear_fpu(p); + p->used_math = 1; + set_fpregs(p, from); +} + +void +ptrace_setfpxregs(struct user_fxsr_struct *from) +{ + struct task_struct *p = current; + + clear_fpu(p); + p->used_math = 1; + set_fpxregs(p, from); +} +#endif /* CONFIG_MOSIX */ + +#ifdef CONFIG_MOSIX +/* + * When a process starts a system call in PT_TRACESYS mode, it must run + * the full sequence: A=syscall_trace, B=the_system_call, C=syscall_trace. + * Since our process can migrate at any of those stages, we maintain 2 flags + * that tell us where to resume in the sequence. + * In the particular case of DTRACESYS1, the trace was already complete + * before the migration. In the case of DTRACESYS2, the system-call was already + * complete before the migration and the code in "entry.S" prevents it from + * re-running. + */ +asmlinkage void +syscall_trace(void) +{ + extern asmlinkage void do_syscall_trace(void); + + switch(current->mosix.dflags & (DTRACESYS1|DTRACESYS2)) + { + case 0: + current->mosix.dflags |= DTRACESYS1; + break; + case DTRACESYS1: + return; + case DTRACESYS2: + case DTRACESYS1|DTRACESYS2: + current->mosix.dflags &= ~(DTRACESYS1|DTRACESYS2); + } + if(current->mosix.dflags & DREMOTE) + mosix_remote_syscall_trace(); + else + do_syscall_trace(); +} + +asmlinkage void do_syscall_trace(void) +#else asmlinkage void syscall_trace(void) +#endif /* CONFIG_MOSIX */ { if ((current->ptrace & (PT_PTRACED|PT_TRACESYS)) != (PT_PTRACED|PT_TRACESYS)) diff -urN linux-2.4.17/arch/i386/kernel/signal.c linux_umopenmosix/arch/i386/kernel/signal.c --- linux-2.4.17/arch/i386/kernel/signal.c Sat Sep 15 00:15:40 2001 +++ linux_umopenmosix/arch/i386/kernel/signal.c Wed Jun 26 23:45:14 2002 @@ -24,6 +24,10 @@ #include #include +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ + #define DEBUG_SIG 0 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) @@ -37,6 +41,29 @@ if (from->si_code < 0) return __copy_to_user(to, from, sizeof(siginfo_t)); else { +#ifdef CONFIG_MOSIX + /* it is unreasonable to send a separate request per word, + * so find the limit and send them all together. + */ + int sz = offsetof(struct siginfo, _sifields); + + switch(from->si_code >> 16) + { + case __SI_FAULT >> 16: + sz += sizeof(to->_sifields._sigfault); + break; + case __SI_CHLD >> 16: + sz += sizeof(to->_sifields._sigchld); + break; + case __SI_MIGRATION >> 16: + sz += sizeof(to->_sifields._sigmig); + break; + default: + sz += sizeof(to->_sifields._kill); + break; + } + return(__copy_to_user(to, from, sz)); +#else int err; /* If you change siginfo_t structure, please be sure @@ -62,6 +89,7 @@ /* case __SI_RT: This is not generated by the kernel as of now. */ } return err; +#endif /* CONFIG_MOSIX */ } } @@ -81,6 +109,10 @@ recalc_sigpending(current); spin_unlock_irq(¤t->sigmask_lock); +#ifdef CONFIG_MOSIX + mosix_obtain_registers( + BIT_OF_REGISTER(orig_eax)|BIT_OF_REGISTER(eax)|BIT_OF_REGISTER(eip)); +#endif /* CONFIG_MOSIX */ regs->eax = -EINTR; while (1) { current->state = TASK_INTERRUPTIBLE; @@ -110,6 +142,10 @@ recalc_sigpending(current); spin_unlock_irq(¤t->sigmask_lock); +#ifdef CONFIG_MOSIX + mosix_obtain_registers( + BIT_OF_REGISTER(orig_eax)|BIT_OF_REGISTER(eax)|BIT_OF_REGISTER(eip)); +#endif /* CONFIG_MOSIX */ regs->eax = -EINTR; while (1) { current->state = TASK_INTERRUPTIBLE; @@ -155,6 +191,10 @@ sys_sigaltstack(const stack_t *uss, stack_t *uoss) { struct pt_regs *regs = (struct pt_regs *) &uss; + +#ifdef CONFIG_MOSIX + mosix_obtain_registers(BIT_OF_REGISTER(esp)); +#endif /* CONFIG_MOSIX */ return do_sigaltstack(uss, uoss, regs->esp); } @@ -185,11 +225,19 @@ char retcode[8]; }; +#ifdef CONFIG_MOSIX +int +#else static int +#endif /* CONFIG_MOSIX */ restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc, int *peax) { unsigned int err = 0; +#ifdef CONFIG_MOSIX + mosix_obtain_registers(ALL_REGISTERS); +#endif /* CONFIG_MOSIX */ + #define COPY(x) err |= __get_user(regs->x, &sc->x) #define COPY_SEG(seg) \ @@ -249,10 +297,18 @@ asmlinkage int sys_sigreturn(unsigned long __unused) { struct pt_regs *regs = (struct pt_regs *) &__unused; +#ifdef CONFIG_MOSIX + struct sigframe *frame; +#else struct sigframe *frame = (struct sigframe *)(regs->esp - 8); +#endif /* CONFIG_MOSIX */ sigset_t set; int eax; +#ifdef CONFIG_MOSIX + mosix_obtain_registers(BIT_OF_REGISTER(esp)); + frame = (struct sigframe *)(regs->esp - 8); +#endif /* CONFIG_MOSIX */ if (verify_area(VERIFY_READ, frame, sizeof(*frame))) goto badframe; if (__get_user(set.sig[0], &frame->sc.oldmask) @@ -267,6 +323,14 @@ recalc_sigpending(current); spin_unlock_irq(¤t->sigmask_lock); +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DDEPUTY) + { + if (mosix_deputy_restore_sigcontext(&frame->sc, &eax)) + goto badframe; + } + else +#endif /* CONFIG_MOSIX */ if (restore_sigcontext(regs, &frame->sc, &eax)) goto badframe; return eax; @@ -279,11 +343,19 @@ asmlinkage int sys_rt_sigreturn(unsigned long __unused) { struct pt_regs *regs = (struct pt_regs *) &__unused; +#ifdef CONFIG_MOSIX + struct rt_sigframe *frame; +#else struct rt_sigframe *frame = (struct rt_sigframe *)(regs->esp - 4); +#endif /* CONFIG_MOSIX */ sigset_t set; stack_t st; int eax; +#ifdef CONFIG_MOSIX + mosix_obtain_registers(BIT_OF_REGISTER(esp)); + frame = (struct rt_sigframe *)(regs->esp - 4); +#endif /* CONFIG_MOSIX */ if (verify_area(VERIFY_READ, frame, sizeof(*frame))) goto badframe; if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) @@ -295,6 +367,14 @@ recalc_sigpending(current); spin_unlock_irq(¤t->sigmask_lock); +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DDEPUTY) + { + if(mosix_deputy_restore_sigcontext(&frame->uc.uc_mcontext,&eax)) + goto badframe; + } + else +#endif /* CONFIG_MOSIX */ if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax)) goto badframe; @@ -302,6 +382,9 @@ goto badframe; /* It is more difficult to avoid calling this function than to call it and ignore errors. */ +#ifdef CONFIG_MOSIX + mosix_obtain_registers(BIT_OF_REGISTER(esp)); +#endif /* CONFIG_MOSIX */ do_sigaltstack(&st, NULL, regs->esp); return eax; @@ -321,6 +404,9 @@ { int tmp, err = 0; +#ifdef CONFIG_MOSIX_NO_NEED__ALL_CALLERS_ALREADY_DID + mosix_obtain_registers(ALL_REGISTERS); +#endif /* CONFIG_MOSIX_NO_NEED__ALL_CALLERS_ALREADY_DID */ tmp = 0; __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp)); err |= __put_user(tmp, (unsigned int *)&sc->gs); @@ -385,12 +471,19 @@ return (void *)((esp - frame_size) & -8ul); } +#ifdef CONFIG_MOSIX +void setup_frame(int sig, struct k_sigaction *ka, +#else static void setup_frame(int sig, struct k_sigaction *ka, +#endif /* CONFIG_MOSIX */ sigset_t *set, struct pt_regs * regs) { struct sigframe *frame; int err = 0; +#ifdef CONFIG_MOSIX + mosix_obtain_registers(ALL_REGISTERS); +#endif /* CONFIG_MOSIX */ frame = get_sigframe(ka, regs, sizeof(*frame)); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) @@ -455,12 +548,19 @@ force_sig(SIGSEGV, current); } +#ifdef CONFIG_MOSIX +void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, +#else static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, +#endif /* CONFIG_MOSIX */ sigset_t *set, struct pt_regs * regs) { struct rt_sigframe *frame; int err = 0; +#ifdef CONFIG_MOSIX + mosix_obtain_registers(ALL_REGISTERS); +#endif /* CONFIG_MOSIX */ frame = get_sigframe(ka, regs, sizeof(*frame)); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) @@ -538,6 +638,10 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, struct pt_regs * regs) { +#ifdef CONFIG_MOSIX + mosix_obtain_registers( + BIT_OF_REGISTER(orig_eax)|BIT_OF_REGISTER(eax)|BIT_OF_REGISTER(eip)); +#endif /* CONFIG_MOSIX */ /* Are we from a system call? */ if (regs->orig_eax >= 0) { /* If so, check system call restarting.. */ @@ -559,6 +663,11 @@ } /* Set up the stack frame */ +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DDEPUTY) + mosix_deputy_setup_frame(sig, ka, *info, oldset); + else +#endif /* CONFIG_MOSIX */ if (ka->sa.sa_flags & SA_SIGINFO) setup_rt_frame(sig, ka, info, oldset, regs); else @@ -586,6 +695,11 @@ siginfo_t info; struct k_sigaction *ka; +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DREMOTE) + return(0); +#endif /* CONFIG_MOSIX */ + /* * We want the common case to go fast, which * is why we may in certain cases get here from @@ -611,6 +725,10 @@ if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) { /* Let the debugger run. */ current->exit_code = signr; +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DDEPUTY) + mosix_deputy_rusage(0); +#endif /* CONFIG_MOSIX */ current->state = TASK_STOPPED; notify_parent(current, SIGCHLD); schedule(); @@ -668,6 +786,10 @@ case SIGSTOP: { struct signal_struct *sig; +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DDEPUTY) + mosix_deputy_rusage(0); +#endif /* CONFIG_MOSIX */ current->state = TASK_STOPPED; current->exit_code = signr; sig = current->p_pptr->sig; @@ -706,6 +828,10 @@ } /* Did we come from a system call? */ +#ifdef CONFIG_MOSIX + mosix_obtain_registers( + BIT_OF_REGISTER(orig_eax)|BIT_OF_REGISTER(eax)|BIT_OF_REGISTER(eip)); +#endif /* CONFIG_MOSIX */ if (regs->orig_eax >= 0) { /* Restart the system call - no handlers present */ if (regs->eax == -ERESTARTNOHAND || diff -urN linux-2.4.17/arch/i386/kernel/sys_i386.c linux_umopenmosix/arch/i386/kernel/sys_i386.c --- linux-2.4.17/arch/i386/kernel/sys_i386.c Mon Mar 19 22:35:09 2001 +++ linux_umopenmosix/arch/i386/kernel/sys_i386.c Wed Jun 26 23:45:14 2002 @@ -22,6 +22,10 @@ #include #include +#ifdef CONFIG_MOSIX_DFSA +#include +#endif /* CONFIG_MOSIX_DFSA */ + /* * sys_pipe() is the normal C calling standard for creating * a pipe. It's not the way Unix traditionally does this, though. @@ -55,12 +59,20 @@ goto out; } +#ifdef CONFIG_MOSIX + error = do_mmap_pgoff_down(file, addr, len, prot, flags, pgoff); +#else down_write(¤t->mm->mmap_sem); error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); up_write(¤t->mm->mmap_sem); +#endif /* CONFIG_MOSIX */ if (file) fput(file); +#ifdef CONFIG_MOSIX_DFSA + if(file && file_count(file) > 1) + dfsa_touch_file(fd); +#endif /* CONFIG_MOSIX_DFSA */ out: return error; } diff -urN linux-2.4.17/arch/i386/kernel/traps.c linux_umopenmosix/arch/i386/kernel/traps.c --- linux-2.4.17/arch/i386/kernel/traps.c Sun Sep 30 21:26:08 2001 +++ linux_umopenmosix/arch/i386/kernel/traps.c Wed Jun 26 23:45:14 2002 @@ -50,6 +50,11 @@ #include #include +#ifdef CONFIG_MOSIX +#include +#include +#endif /* CONFIG_MOSIX */ + asmlinkage int system_call(void); asmlinkage void lcall7(void); asmlinkage void lcall27(void); @@ -248,6 +253,12 @@ show_registers(regs); bust_spinlocks(0); spin_unlock_irq(&die_lock); +#ifdef CONFIG_MOSIX_UDB + { + extern void mosix_debugger(char *); + mosix_debugger("die"); + } +#endif /* CONFIG_MOSIX_UDB */ do_exit(SIGSEGV); } @@ -336,10 +347,16 @@ } DO_VM86_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->eip) +#ifndef CONFIG_MOSIX_UDB DO_VM86_ERROR( 3, SIGTRAP, "int3", int3) +#endif /* CONFIG_MOSIX_UDB */ DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow) DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds) +#ifdef CONFIG_MOSIX +DO_ERROR_INFO( 6, SIGILL, "invalid operand", real_invalid_op, ILL_ILLOPN, regs->eip) +#else DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip) +#endif /* CONFIG_MOSIX */ DO_VM86_ERROR( 7, SIGSEGV, "device not available", device_not_available) DO_ERROR( 8, SIGSEGV, "double fault", double_fault) DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) @@ -356,6 +373,24 @@ if (!(regs->xcs & 3)) goto gp_in_kernel; +#ifdef CONFIG_MOSIX + if(error_code == 0 && (current->mosix.dflags & DREMOTE)) + { + short code; + u64 clk; + + if(!get_user(code, (short *)regs->eip) && + code == 0x310F) /* RDTSC */ + { + clk = mosix_remote_tsc(); + regs->eax = clk & 0xffffffff; + regs->edx = clk >> 32; + regs->eip += 2; + return; + } + mosix_go_home(0); + } +#endif /* CONFIG_MOSIX */ current->thread.error_code = error_code; current->thread.trap_no = 13; force_sig(SIGSEGV, current); @@ -452,6 +487,41 @@ inb(0x71); /* dummy */ } +#ifdef CONFIG_MOSIX_UDB +asmlinkage void do_int3(struct pt_regs * regs, long error_code) +{ + extern int udb_breakpoint(struct pt_regs *regs); + + if (!(regs->eflags & VM_MASK) && (regs->xcs & 3) != 3 && + udb_breakpoint(regs)) + return; + do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL); +} +#endif /* CONFIG_MOSIX_UDB */ + +#ifdef CONFIG_MOSIX +static __u32 user_features[NCAPINTS] = USER_MODE_FEATURES; + +asmlinkage void +do_invalid_op(struct pt_regs * regs, long error_code) +{ + if((current->mosix.dflags & DREMOTE) && !(regs->eflags & VM_MASK) && + (regs->xcs & 3) == 3) + { + int i; + + for(i = 0 ; i < NCAPINTS ; i++) + if(current->mosix.features[i] & user_features[i] & + ~boot_cpu_data.x86_capability[i]) + { + mosix_go_home(0); /* no return if successful */ + break; + } + } + do_real_invalid_op(regs, error_code); +} +#endif /* CONFIG_MOSIX */ + /* * Our handling of the processor debug registers is non-trivial. * We do not clear them on entry and exit from the kernel. Therefore diff -urN linux-2.4.17/arch/i386/kernel/vm86.c linux_umopenmosix/arch/i386/kernel/vm86.c --- linux-2.4.17/arch/i386/kernel/vm86.c Sat Jul 7 03:05:07 2001 +++ linux_umopenmosix/arch/i386/kernel/vm86.c Thu Jun 27 22:49:21 2002 @@ -17,6 +17,10 @@ #include #include +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ + /* * Known problems: * @@ -69,6 +73,10 @@ struct pt_regs *ret; unsigned long tmp; +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DREMOTE) + panic("remote save_v86"); +#endif /* CONFIG_MOSIX */ if (!current->thread.vm86_info) { printk("no vm86_info: BAD\n"); do_exit(SIGSEGV); @@ -83,9 +91,18 @@ do_exit(SIGSEGV); } tss = init_tss + smp_processor_id(); +#ifdef CONFIG_MOSIX + lock_mosix(); /* ptrace checks saved_esp0 under the mosix-lock */ +#endif /* CONFIG_MOSIX */ tss->esp0 = current->thread.esp0 = current->thread.saved_esp0; current->thread.saved_esp0 = 0; ret = KVM86->regs32; +#ifdef CONFIG_MOSIX + unlock_mosix(); + task_lock(current); + current->mosix.stay &= ~DSTAY_FOR_86; + task_unlock(current); +#endif /* CONFIG_MOSIX */ return ret; } @@ -136,6 +153,13 @@ struct task_struct *tsk; int tmp, ret = -EPERM; +#ifdef CONFIG_MOSIX + if(!mosix_go_home_for_reason(1, DSTAY_FOR_86)) + { + ret = -ENOMEM; + goto out; + } +#endif /* CONFIG_MOSIX */ tsk = current; if (tsk->thread.saved_esp0) goto out; @@ -193,6 +217,13 @@ ret = -EFAULT; if (tmp) goto out; +#ifdef CONFIG_MOSIX + if(!mosix_go_home_for_reason(1, DSTAY_FOR_86)) + { + ret = -ENOMEM; + goto out; + } +#endif /* CONFIG_MOSIX */ info.regs32 = (struct pt_regs *) &subfunction; info.vm86plus.is_vm86pus = 1; tsk->thread.vm86_info = (struct vm86_struct *)v86; @@ -245,9 +276,15 @@ * Save old state, set default return value (%eax) to 0 */ info->regs32->eax = 0; +#ifdef CONFIG_MOSIX + lock_mosix(); +#endif /* CONFIG_MOSIX */ tsk->thread.saved_esp0 = tsk->thread.esp0; tss = init_tss + smp_processor_id(); tss->esp0 = tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; +#ifdef CONFIG_MOSIX + unlock_mosix(); +#endif /* CONFIG_MOSIX */ tsk->thread.screen_bitmap = info->screen_bitmap; if (info->flags & VM86_SCREEN_BITMAP) @@ -267,6 +304,11 @@ regs32 = save_v86_state(regs16); regs32->eax = retval; +#ifdef CONFIG_MOSIX + task_lock(current); + current->mosix.stay &= ~DSTAY_FOR_86; + task_unlock(current); +#endif /* CONFIG_MOSIX */ __asm__ __volatile__("movl %0,%%esp\n\t" "jmp ret_from_sys_call" : : "r" (regs32), "b" (current)); @@ -602,7 +644,11 @@ int ret = 0; read_lock(&tasklist_lock); +#ifdef CONFIG_MOSIX + for_each_local_task(p) { +#else for_each_task(p) { +#endif /* CONFIG_MOSIX */ if ((p == tsk) && (p->sig)) { ret = 1; break; diff -urN linux-2.4.17/arch/i386/lib/usercopy.c linux_umopenmosix/arch/i386/lib/usercopy.c --- linux-2.4.17/arch/i386/lib/usercopy.c Fri Nov 9 23:58:02 2001 +++ linux_umopenmosix/arch/i386/lib/usercopy.c Wed Jun 26 23:45:14 2002 @@ -99,6 +99,11 @@ __strncpy_from_user(char *dst, const char *src, long count) { long res; + +#ifdef CONFIG_MOSIX + if(USER_IS_REMOTE) + return(deputy_strncpy_from_user(dst, (char *)src, count, 0)); +#endif /* CONFIG_MOSIX */ __do_strncpy_from_user(dst, src, count, res); return res; } @@ -107,6 +112,11 @@ strncpy_from_user(char *dst, const char *src, long count) { long res = -EFAULT; + +#ifdef CONFIG_MOSIX + if(USER_IS_REMOTE) + return(deputy_strncpy_from_user(dst, (char *)src, count, 1)); +#endif /* CONFIG_MOSIX */ if (access_ok(VERIFY_READ, src, 1)) __do_strncpy_from_user(dst, src, count, res); return res; @@ -141,6 +151,10 @@ unsigned long clear_user(void *to, unsigned long n) { +#ifdef CONFIG_MOSIX + if (USER_IS_REMOTE) + return(deputy_clear_user(to, n, 1)); +#endif /* CONFIG_MOSIX */ if (access_ok(VERIFY_WRITE, to, n)) __do_clear_user(to, n); return n; @@ -149,6 +163,10 @@ unsigned long __clear_user(void *to, unsigned long n) { +#ifdef CONFIG_MOSIX + if (USER_IS_REMOTE) + return(deputy_clear_user(to, n, 0)); +#endif /* CONFIG_MOSIX */ __do_clear_user(to, n); return n; } @@ -164,6 +182,10 @@ unsigned long mask = -__addr_ok(s); unsigned long res, tmp; +#ifdef CONFIG_MOSIX + if(USER_IS_REMOTE) + return(deputy_strnlen_user((char *)s, n)); +#endif /* CONFIG_MOSIX */ __asm__ __volatile__( " testl %0, %0\n" " jz 3f\n" diff -urN linux-2.4.17/arch/i386/mm/fault.c linux_umopenmosix/arch/i386/mm/fault.c --- linux-2.4.17/arch/i386/mm/fault.c Wed Oct 10 00:13:03 2001 +++ linux_umopenmosix/arch/i386/mm/fault.c Wed Jun 26 23:45:14 2002 @@ -25,6 +25,10 @@ #include #include +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ + extern void die(const char *,struct pt_regs *,long); extern int console_loglevel; @@ -40,6 +44,10 @@ if (!size) return 1; +#ifdef CONFIG_MOSIX + if(USER_IS_REMOTE) + return(deputy_verify_write((void *)addr, size)); +#endif /* CONFIG_MOSIX */ vma = find_vma(current->mm, start); if (!vma) goto bad_area; @@ -136,6 +144,10 @@ asmlinkage void do_invalid_op(struct pt_regs *, unsigned long); extern unsigned long idt; +#ifdef CONFIG_MOSIX_UDB +int debug_fixup = 0; +#endif /* CONFIG_MOSIX_UDB */ + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate @@ -263,6 +275,10 @@ */ if (regs->eflags & VM_MASK) { unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT; +#ifdef CONFIG_MOSIX_DIAG + if(!(current->mosix.stay & DSTAY_FOR_86)) + panic("VM_MASK without STAY"); +#endif /* CONFIG_MOSIX_DIAG */ if (bit < 32) tsk->thread.screen_bitmap |= 1 << bit; } @@ -307,6 +323,10 @@ /* Are we prepared to handle this kernel fault? */ if ((fixup = search_exception_table(regs->eip)) != 0) { regs->eip = fixup; +#ifdef CONFIG_MOSIX_UDB + if(debug_fixup) + mosix_debugger("fixup"); +#endif /* CONFIG_MOSIX_UDB */ return; } diff -urN linux-2.4.17/arch/um/Makefile linux_umopenmosix/arch/um/Makefile --- linux-2.4.17/arch/um/Makefile Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/Makefile Wed Jun 26 23:45:14 2002 @@ -0,0 +1,110 @@ +include arch/$(ARCH)/Makefile-$(SUBARCH) + +EXTRAVERSION := $(EXTRAVERSION)-14um +include/linux/version.h: arch/$(ARCH)/Makefile + +# Recalculate MODLIB to reflect the EXTRAVERSION changes (via KERNELRELEASE) +# The way the toplevel Makefile is written EXTRAVERSION is not supposed +# to be changed outside the toplevel Makefile, but recalculating MODLIB is +# a sufficient workaround until we no longer need architecture dependent +# EXTRAVERSION... +MODLIB := $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE) + +ARCH_DIR = arch/um + +MAKEBOOT = $(MAKE) -C $(ARCH_DIR)/boot + +ifeq ($(CONFIG_DEBUGSYM),y) +DEBUG = -g +CFLAGS := $(subst -fomit-frame-pointer,,$(CFLAGS)) +endif + +ifeq ($(CONFIG_GCOV),y) +CFLAGS += -fprofile-arcs -ftest-coverage +endif + +ifeq ($(CONFIG_GPROF), y) +PROFILE += -pg -DPROFILING +LINK_PROFILE = $(PROFILE) -Wl,--wrap,__monstartup +endif + +SUBDIRS += $(ARCH_DIR)/fs $(ARCH_DIR)/drivers $(ARCH_DIR)/kernel \ + $(ARCH_DIR)/sys-$(SUBARCH) + +LIBS += $(shell [ -e $(ARCH_DIR)/fs/fs.o ] && echo $(ARCH_DIR)/fs/fs.o) \ + $(ARCH_DIR)/kernel/um.o $(ARCH_DIR)/drivers/um_drivers.o \ + $(ARCH_DIR)/sys-$(SUBARCH)/sys.o + +ifeq ($(CONFIG_PT_PROXY), y) +SUBDIRS += $(ARCH_DIR)/ptproxy +LIBS += $(ARCH_DIR)/ptproxy/ptproxy.a +endif + +NESTING = 0 + +ARCH_INCLUDE = $(TOPDIR)/$(ARCH_DIR)/include + +# -Derrno=kernel_errno - This turns all kernel references to errno into +# kernel_errno to separate them from the libc errno. This allows -fno-common +# in CFLAGS. Otherwise, it would cause ld to complain about the two different +# errnos. + +CFLAGS += $(DEBUG) $(PROFILE) $(ARCH_CFLAGS) -D__arch_um__ \ + -DSUBARCH=\"$(SUBARCH)\" -DNESTING=$(NESTING) -D_LARGEFILE64_SOURCE \ + -I$(ARCH_INCLUDE) -Derrno=kernel_errno + +LINKFLAGS += -r + +LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc + +$(ARCH_DIR)/link.ld: $(ARCH_DIR)/link.ld.in + m4 -DSTART=$(START_ADDR) -DSUBARCH=$(SUBARCH) \ + -DELF_SUBARCH=$(ELF_SUBARCH) $< > $@ + +SYMLINK_HEADERS = include/asm-um/archparam.h include/asm-um/system.h \ + include/asm-um/sigcontext.h include/asm-um/processor.h + +ARCH_SYMLINKS = include/asm-um/arch arch/um/include/sysdep $(SYMLINK_HEADERS) + +linux: $(ARCH_SYMLINKS) $(ARCH_DIR)/main.o vmlinux $(ARCH_DIR)/link.ld + mv vmlinux vmlinux.o + $(CC) -Wl,-T,$(ARCH_DIR)/link.ld $(LINK_PROFILE) $(LINK_WRAPS) \ + -o linux -static $(ARCH_DIR)/main.o vmlinux.o -L/usr/lib + +USER_CFLAGS := $(patsubst -I%,,$(CFLAGS)) +USER_CFLAGS := $(patsubst -Derrno=kernel_errno,,$(USER_CFLAGS)) +USER_CFLAGS := $(patsubst -D__KERNEL__,,$(USER_CFLAGS)) -I$(ARCH_INCLUDE) + +# To get a definition of F_SETSIG +USER_CFLAGS += -D_GNU_SOURCE + +$(ARCH_DIR)/main.o: $(ARCH_DIR)/main.c + $(CC) $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $< + +archmrproper: + $(MAKE) -C $(ARCH_DIR)/sys-$(SUBARCH) archmrproper + rm -f $(SYMLINK_HEADERS) $(ARCH_SYMLINKS) include/asm \ + $(ARCH_DIR)/link.ld \ + $(addprefix $(ARCH_DIR)/kernel/,$(KERN_SYMLINKS)) + +archclean: + find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \ + -o -name '*.gcov' \) -type f -print | xargs rm -f + rm -f linux x.i gmon.out $(ARCH_DIR)/link.ld + @$(MAKEBOOT) clean + +archdep: $(ARCH_SYMLINKS) + @$(MAKEBOOT) dep + +$(SYMLINK_HEADERS): + echo $@ + cd $(TOPDIR)/$(dir $@) ; \ + ln -sf $(basename $(notdir $@))-$(SUBARCH)$(suffix $@) $(notdir $@) + +include/asm-um/arch: + cd $(TOPDIR)/include/asm-um && ln -sf ../asm-$(SUBARCH) arch + +arch/um/include/sysdep: + cd $(TOPDIR)/arch/um/include && ln -sf sysdep-$(SUBARCH) sysdep + +export SUBARCH USER_CFLAGS diff -urN linux-2.4.17/arch/um/Makefile-i386 linux_umopenmosix/arch/um/Makefile-i386 --- linux-2.4.17/arch/um/Makefile-i386 Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/Makefile-i386 Wed Jun 26 23:45:14 2002 @@ -0,0 +1,8 @@ +ifeq ($(CONFIG_HOST_2G_2G), y) +START_ADDR = 0x60000000 +else +START_ADDR = 0xa0000000 +endif + +ARCH_CFLAGS = -U__$(SUBARCH)__ -U$(SUBARCH) -DUM_FASTCALL +ELF_SUBARCH = $(SUBARCH) diff -urN linux-2.4.17/arch/um/Makefile-ia64 linux_umopenmosix/arch/um/Makefile-ia64 --- linux-2.4.17/arch/um/Makefile-ia64 Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/Makefile-ia64 Wed Jun 26 23:45:14 2002 @@ -0,0 +1 @@ +START_ADDR = 0x1000000000000000 diff -urN linux-2.4.17/arch/um/Makefile-ppc linux_umopenmosix/arch/um/Makefile-ppc --- linux-2.4.17/arch/um/Makefile-ppc Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/Makefile-ppc Wed Jun 26 23:45:14 2002 @@ -0,0 +1,9 @@ +ifeq ($(CONFIG_HOST_2G_2G), y) +START_ADDR = 0x60000000 +else +START_ADDR = 0xa0000000 +endif +ARCH_CFLAGS = -U__powerpc__ -D__UM_PPC__ + +# The arch is ppc, but the elf32 name is powerpc +ELF_SUBARCH = powerpc diff -urN linux-2.4.17/arch/um/boot/Makefile linux_umopenmosix/arch/um/boot/Makefile --- linux-2.4.17/arch/um/boot/Makefile Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/boot/Makefile Wed Jun 26 23:45:14 2002 @@ -0,0 +1,3 @@ +dep: + +clean: diff -urN linux-2.4.17/arch/um/config.in linux_umopenmosix/arch/um/config.in --- linux-2.4.17/arch/um/config.in Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/config.in Wed Jun 26 23:45:14 2002 @@ -0,0 +1,142 @@ +define_bool CONFIG_USERMODE y + +mainmenu_name "Linux/Usermode Kernel Configuration" + +define_bool CONFIG_ISA n +define_bool CONFIG_SBUS n +define_bool CONFIG_PCI n + +define_bool CONFIG_UID16 y + +define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM y + +mainmenu_option next_comment +comment 'Code maturity level options' +bool 'Prompt for development and/or incomplete code/drivers' CONFIG_EXPERIMENTAL +endmenu + +mainmenu_option next_comment +comment 'openMosix' +bool 'openMosix process migration support' CONFIG_MOSIX +if [ "$CONFIG_MOSIX" = "y" ]; then + bool 'Support clusters with a complex network topology' CONFIG_MOSIX_TOPOLOGY + if [ "$CONFIG_MOSIX_TOPOLOGY" = "y" ]; then + int 'Maximum network-topology complexity to support (2-10)' CONFIG_MOSIX_MAXTOPOLOGY 4 + fi + + bool 'Stricter security on openMosix ports' CONFIG_MOSIX_SECUREPORTS + int 'Level of process-identity disclosure (0-3)' CONFIG_MOSIX_DISCLOSURE 1 + bool 'Create the kernel with a "-openmosix" extension' CONFIG_MOSIX_EXTMOSIX +# bool 'Direct File-System Access' CONFIG_MOSIX_DFSA + bool 'openMosix File-System' CONFIG_MOSIX_FS + if [ "$CONFIG_MOSIX_FS" = "y" ]; then CONFIG_MOSIX_DFSA = "y" + fi + bool 'Poll/Select exceptions on pipes' CONFIG_MOSIX_PIPE_EXCEPTIONS +fi +endmenu + +mainmenu_option next_comment +comment 'General Setup' +define_bool CONFIG_STDIO_CONSOLE y +bool 'Networking support' CONFIG_NET +bool 'System V IPC' CONFIG_SYSVIPC +bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT +bool 'Sysctl support' CONFIG_SYSCTL +tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT +tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF +tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC +bool 'Unix98 PTY support' CONFIG_UNIX98_PTYS +if [ "$CONFIG_UNIX98_PTYS" = "y" ]; then + int 'Maximum number of Unix98 PTYs in use (0-2048)' CONFIG_UNIX98_PTY_COUNT 256 +fi +bool 'Virtual serial line' CONFIG_SSL +tristate 'Host filesystem' CONFIG_HOSTFS +bool 'Management console' CONFIG_MCONSOLE +dep_bool 'Magic SysRq key' CONFIG_MAGIC_SYSRQ $CONFIG_MCONSOLE +bool '2G/2G host address space split' CONFIG_HOST_2G_2G +bool 'Symmetric multi-processing support' CONFIG_UML_SMP +define_bool CONFIG_SMP $CONFIG_UML_SMP +string 'Default main console channel initialization' CONFIG_CON_ZERO_CHAN \ + "fd:0,fd:1" +string 'Default console channel initialization' CONFIG_CON_CHAN "xterm" +string 'Default serial line channel initialization' CONFIG_SSL_CHAN "pty" +endmenu + +mainmenu_option next_comment +comment 'Loadable module support' +bool 'Enable loadable module support' CONFIG_MODULES +if [ "$CONFIG_MODULES" = "y" ]; then +# MODVERSIONS does not yet work in this architecture +# bool ' Set version information on all module symbols' CONFIG_MODVERSIONS + bool ' Kernel module loader' CONFIG_KMOD +fi +endmenu + +mainmenu_option next_comment +comment 'Devices' +define_bool CONFIG_BLK_DEV_UBD y +bool 'Always do synchronous disk IO for UBD' CONFIG_BLK_DEV_UBD_SYNC +tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP +dep_tristate 'Network block device support' CONFIG_BLK_DEV_NBD $CONFIG_NET +tristate 'RAM disk support' CONFIG_BLK_DEV_RAM +if [ "$CONFIG_BLK_DEV_RAM" = "y" -o "$CONFIG_BLK_DEV_RAM" = "m" ]; then + int ' Default RAM disk size' CONFIG_BLK_DEV_RAM_SIZE 4096 +fi +dep_bool ' Initial RAM disk (initrd) support' CONFIG_BLK_DEV_INITRD $CONFIG_BLK_DEV_RAM +tristate 'Example IO memory driver' CONFIG_MMAPPER + +tristate 'Sound support' CONFIG_UML_SOUND +define_tristate CONFIG_SOUND $CONFIG_UML_SOUND +define_tristate CONFIG_HOSTAUDIO $CONFIG_UML_SOUND + +bool 'file descriptor channel support' CONFIG_FD_CHAN +bool 'port channel support' CONFIG_PORT_CHAN +bool 'pty channel support' CONFIG_PTY_CHAN +bool 'tty channel support' CONFIG_TTY_CHAN +bool 'xterm channel support' CONFIG_XTERM_CHAN + +endmenu + +if [ "$CONFIG_NET" = "y" ]; then + source net/Config.in +fi + +if [ "$CONFIG_NET" = "y" ]; then + mainmenu_option next_comment + comment 'Network device support' + + bool 'Virtual network device support' CONFIG_UML_NET + if [ "$CONFIG_UML_NET" != "n" ]; then + bool ' Ethertap transport' CONFIG_UML_NET_ETHERTAP + bool ' TUN/TAP transport' CONFIG_UML_NET_TUNTAP + bool ' SLIP transport' CONFIG_UML_NET_SLIP + bool ' Daemon transport' CONFIG_UML_NET_DAEMON + bool ' Multicast transport' CONFIG_UML_NET_MCAST + fi + + bool 'Software network device support' CONFIG_NETDEVICES + if [ "$CONFIG_NETDEVICES" = "y" ]; then + source drivers/net/Config.in + fi + + endmenu +fi + +source fs/Config.in + +source drivers/md/Config.in + +source drivers/mtd/Config.in + +mainmenu_option next_comment +comment 'Kernel hacking' +bool 'Debug memory allocations' CONFIG_DEBUG_SLAB +bool 'Enable kernel debugging symbols' CONFIG_DEBUGSYM +if [ "$CONFIG_XTERM_CHAN" = "y" ]; then + dep_bool 'Enable ptrace proxy' CONFIG_PT_PROXY $CONFIG_DEBUGSYM +else + define_bool CONFIG_PT_PROXY n +fi +dep_bool 'Enable gprof support' CONFIG_GPROF $CONFIG_DEBUGSYM +dep_bool 'Enable gcov support' CONFIG_GCOV $CONFIG_DEBUGSYM +endmenu diff -urN linux-2.4.17/arch/um/config.in.org linux_umopenmosix/arch/um/config.in.org --- linux-2.4.17/arch/um/config.in.org Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/config.in.org Wed Jun 26 23:45:14 2002 @@ -0,0 +1,122 @@ +define_bool CONFIG_USERMODE y + +mainmenu_name "Linux/Usermode Kernel Configuration" + +define_bool CONFIG_ISA n +define_bool CONFIG_SBUS n +define_bool CONFIG_PCI n + +define_bool CONFIG_UID16 y + +define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM y + +mainmenu_option next_comment +comment 'Code maturity level options' +bool 'Prompt for development and/or incomplete code/drivers' CONFIG_EXPERIMENTAL +endmenu + +mainmenu_option next_comment +comment 'General Setup' +define_bool CONFIG_STDIO_CONSOLE y +bool 'Networking support' CONFIG_NET +bool 'System V IPC' CONFIG_SYSVIPC +bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT +bool 'Sysctl support' CONFIG_SYSCTL +tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT +tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF +tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC +bool 'Unix98 PTY support' CONFIG_UNIX98_PTYS +if [ "$CONFIG_UNIX98_PTYS" = "y" ]; then + int 'Maximum number of Unix98 PTYs in use (0-2048)' CONFIG_UNIX98_PTY_COUNT 256 +fi +bool 'Virtual serial line' CONFIG_SSL +tristate 'Host filesystem' CONFIG_HOSTFS +bool 'Management console' CONFIG_MCONSOLE +dep_bool 'Magic SysRq key' CONFIG_MAGIC_SYSRQ $CONFIG_MCONSOLE +bool '2G/2G host address space split' CONFIG_HOST_2G_2G +bool 'Symmetric multi-processing support' CONFIG_UML_SMP +define_bool CONFIG_SMP $CONFIG_UML_SMP +string 'Default main console channel initialization' CONFIG_CON_ZERO_CHAN \ + "fd:0,fd:1" +string 'Default console channel initialization' CONFIG_CON_CHAN "xterm" +string 'Default serial line channel initialization' CONFIG_SSL_CHAN "pty" +endmenu + +mainmenu_option next_comment +comment 'Loadable module support' +bool 'Enable loadable module support' CONFIG_MODULES +if [ "$CONFIG_MODULES" = "y" ]; then +# MODVERSIONS does not yet work in this architecture +# bool ' Set version information on all module symbols' CONFIG_MODVERSIONS + bool ' Kernel module loader' CONFIG_KMOD +fi +endmenu + +mainmenu_option next_comment +comment 'Devices' +define_bool CONFIG_BLK_DEV_UBD y +bool 'Always do synchronous disk IO for UBD' CONFIG_BLK_DEV_UBD_SYNC +tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP +dep_tristate 'Network block device support' CONFIG_BLK_DEV_NBD $CONFIG_NET +tristate 'RAM disk support' CONFIG_BLK_DEV_RAM +if [ "$CONFIG_BLK_DEV_RAM" = "y" -o "$CONFIG_BLK_DEV_RAM" = "m" ]; then + int ' Default RAM disk size' CONFIG_BLK_DEV_RAM_SIZE 4096 +fi +dep_bool ' Initial RAM disk (initrd) support' CONFIG_BLK_DEV_INITRD $CONFIG_BLK_DEV_RAM +tristate 'Example IO memory driver' CONFIG_MMAPPER + +tristate 'Sound support' CONFIG_UML_SOUND +define_tristate CONFIG_SOUND $CONFIG_UML_SOUND +define_tristate CONFIG_HOSTAUDIO $CONFIG_UML_SOUND + +bool 'file descriptor channel support' CONFIG_FD_CHAN +bool 'port channel support' CONFIG_PORT_CHAN +bool 'pty channel support' CONFIG_PTY_CHAN +bool 'tty channel support' CONFIG_TTY_CHAN +bool 'xterm channel support' CONFIG_XTERM_CHAN + +endmenu + +if [ "$CONFIG_NET" = "y" ]; then + source net/Config.in +fi + +if [ "$CONFIG_NET" = "y" ]; then + mainmenu_option next_comment + comment 'Network device support' + + bool 'Virtual network device support' CONFIG_UML_NET + if [ "$CONFIG_UML_NET" != "n" ]; then + bool ' Ethertap transport' CONFIG_UML_NET_ETHERTAP + bool ' TUN/TAP transport' CONFIG_UML_NET_TUNTAP + bool ' SLIP transport' CONFIG_UML_NET_SLIP + bool ' Daemon transport' CONFIG_UML_NET_DAEMON + bool ' Multicast transport' CONFIG_UML_NET_MCAST + fi + + bool 'Software network device support' CONFIG_NETDEVICES + if [ "$CONFIG_NETDEVICES" = "y" ]; then + source drivers/net/Config.in + fi + + endmenu +fi + +source fs/Config.in + +source drivers/md/Config.in + +source drivers/mtd/Config.in + +mainmenu_option next_comment +comment 'Kernel hacking' +bool 'Debug memory allocations' CONFIG_DEBUG_SLAB +bool 'Enable kernel debugging symbols' CONFIG_DEBUGSYM +if [ "$CONFIG_XTERM_CHAN" = "y" ]; then + dep_bool 'Enable ptrace proxy' CONFIG_PT_PROXY $CONFIG_DEBUGSYM +else + define_bool CONFIG_PT_PROXY n +fi +dep_bool 'Enable gprof support' CONFIG_GPROF $CONFIG_DEBUGSYM +dep_bool 'Enable gcov support' CONFIG_GCOV $CONFIG_DEBUGSYM +endmenu diff -urN linux-2.4.17/arch/um/config.release linux_umopenmosix/arch/um/config.release --- linux-2.4.17/arch/um/config.release Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/config.release Wed Jun 26 23:45:14 2002 @@ -0,0 +1,300 @@ +# +# Automatically generated make config: don't edit +# +CONFIG_USERMODE=y +# CONFIG_ISA is not set +# CONFIG_SBUS is not set +# CONFIG_PCI is not set +CONFIG_UID16=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y + +# +# General Setup +# +CONFIG_STDIO_CONSOLE=y +CONFIG_NET=y +CONFIG_SYSVIPC=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_SYSCTL=y +CONFIG_BINFMT_AOUT=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=y +CONFIG_UNIX98_PTYS=y +CONFIG_UNIX98_PTY_COUNT=256 +CONFIG_SSL=y +CONFIG_HOSTFS=y +CONFIG_MCONSOLE=y +CONFIG_MAGIC_SYSRQ=y +# CONFIG_HOST_2G_2G is not set + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_KMOD=y + +# +# Devices +# +CONFIG_BLK_DEV_UBD=y +# CONFIG_BLK_DEV_UBD_SYNC is not set +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_NBD=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=4096 +# CONFIG_BLK_DEV_INITRD is not set +# CONFIG_MMAPPER is not set + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_PACKET_MMAP=y +# CONFIG_NETLINK_DEV is not set +# CONFIG_NETFILTER is not set +# CONFIG_FILTER is not set +CONFIG_UNIX=y +CONFIG_INET=y +# CONFIG_IP_MULTICAST is not set +# CONFIG_IP_ADVANCED_ROUTER is not set +# CONFIG_IP_PNP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_ARPD is not set +# CONFIG_INET_ECN is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_IPV6 is not set +# CONFIG_KHTTPD is not set +# CONFIG_ATM is not set +# CONFIG_VLAN_8021Q is not set + +# +# +# +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_DECNET is not set +# CONFIG_BRIDGE is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_LLC is not set +# CONFIG_NET_DIVERT is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_NET_FASTROUTE is not set +# CONFIG_NET_HW_FLOWCONTROL is not set + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set + +# +# Network device support +# +CONFIG_UML_NET=y +CONFIG_UML_NET_ETHERTAP=y +CONFIG_UML_NET_TUNTAP=y +CONFIG_UML_NET_SLIP=y +CONFIG_UML_NET_DAEMON=y +CONFIG_UML_NET_MCAST=y +CONFIG_NETDEVICES=y + +# +# ARCnet devices +# +# CONFIG_ARCNET is not set +CONFIG_DUMMY=y +CONFIG_BONDING=m +CONFIG_EQUALIZER=m +CONFIG_TUN=y +# CONFIG_ETHERTAP is not set + +# +# Ethernet (10 or 100Mbit) +# +# CONFIG_NET_ETHERNET is not set + +# +# Ethernet (1000 Mbit) +# +# CONFIG_ACENIC is not set +# CONFIG_DL2K is not set +# CONFIG_MYRI_SBUS is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_SK98LIN is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +CONFIG_PLIP=m +CONFIG_PPP=m +CONFIG_PPP_MULTILINK=y +# CONFIG_PPP_FILTER is not set +# CONFIG_PPP_ASYNC is not set +CONFIG_PPP_SYNC_TTY=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_BSDCOMP=m +CONFIG_PPPOE=m +CONFIG_SLIP=m +CONFIG_SLIP_COMPRESSED=y +CONFIG_SLIP_SMART=y +# CONFIG_SLIP_MODE_SLIP6 is not set + +# +# Wireless LAN (non-hamradio) +# +# CONFIG_NET_RADIO is not set + +# +# Token Ring devices +# +# CONFIG_TR is not set +# CONFIG_NET_FC is not set +# CONFIG_RCPCI is not set +CONFIG_SHAPER=m + +# +# Wan interfaces +# +# CONFIG_WAN is not set + +# +# File systems +# +CONFIG_QUOTA=y +CONFIG_AUTOFS_FS=m +CONFIG_AUTOFS4_FS=m +CONFIG_REISERFS_FS=m +# CONFIG_REISERFS_CHECK is not set +# CONFIG_REISERFS_PROC_INFO is not set +CONFIG_ADFS_FS=m +# CONFIG_ADFS_FS_RW is not set +CONFIG_AFFS_FS=m +CONFIG_HFS_FS=m +CONFIG_BFS_FS=m +# CONFIG_EXT3_FS is not set +# CONFIG_JBD is not set +# CONFIG_JBD_DEBUG is not set +CONFIG_FAT_FS=y +CONFIG_MSDOS_FS=y +CONFIG_UMSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_EFS_FS=m +CONFIG_JFFS_FS=m +CONFIG_JFFS_FS_VERBOSE=0 +# CONFIG_JFFS_PROC_FS is not set +# CONFIG_JFFS2_FS is not set +CONFIG_CRAMFS=m +CONFIG_TMPFS=y +CONFIG_RAMFS=m +CONFIG_ISO9660_FS=y +# CONFIG_JOLIET is not set +# CONFIG_ZISOFS is not set +CONFIG_MINIX_FS=m +CONFIG_VXFS_FS=m +# CONFIG_NTFS_FS is not set +# CONFIG_NTFS_RW is not set +CONFIG_HPFS_FS=m +CONFIG_PROC_FS=y +CONFIG_DEVFS_FS=y +CONFIG_DEVFS_MOUNT=y +# CONFIG_DEVFS_DEBUG is not set +CONFIG_DEVPTS_FS=y +CONFIG_QNX4FS_FS=m +# CONFIG_QNX4FS_RW is not set +CONFIG_ROMFS_FS=m +CONFIG_EXT2_FS=y +CONFIG_SYSV_FS=m +CONFIG_UDF_FS=m +# CONFIG_UDF_RW is not set +CONFIG_UFS_FS=m +# CONFIG_UFS_FS_WRITE is not set + +# +# Network File Systems +# +# CONFIG_CODA_FS is not set +# CONFIG_INTERMEZZO_FS is not set +# CONFIG_NFS_FS is not set +# CONFIG_NFS_V3 is not set +# CONFIG_ROOT_NFS is not set +# CONFIG_NFSD is not set +# CONFIG_NFSD_V3 is not set +# CONFIG_SUNRPC is not set +# CONFIG_LOCKD is not set +# CONFIG_SMB_FS is not set +# CONFIG_NCP_FS is not set +# CONFIG_NCPFS_PACKET_SIGNING is not set +# CONFIG_NCPFS_IOCTL_LOCKING is not set +# CONFIG_NCPFS_STRONG is not set +# CONFIG_NCPFS_NFS_NS is not set +# CONFIG_NCPFS_OS2_NS is not set +# CONFIG_NCPFS_SMALLDOS is not set +# CONFIG_NCPFS_NLS is not set +# CONFIG_NCPFS_EXTRAS is not set +# CONFIG_ZISOFS_FS is not set +CONFIG_ZLIB_FS_INFLATE=m + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_SMB_NLS is not set +CONFIG_NLS=y + +# +# Native Language Support +# +CONFIG_NLS_DEFAULT="iso8859-1" +# CONFIG_NLS_CODEPAGE_437 is not set +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +# CONFIG_NLS_CODEPAGE_850 is not set +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +# CONFIG_NLS_ISO8859_1 is not set +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +# CONFIG_NLS_ISO8859_15 is not set +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +# CONFIG_NLS_UTF8 is not set + +# +# Kernel hacking +# +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUGSYM is not set +# CONFIG_PT_PROXY is not set +# CONFIG_GPROF is not set +# CONFIG_GCOV is not set diff -urN linux-2.4.17/arch/um/defconfig linux_umopenmosix/arch/um/defconfig --- linux-2.4.17/arch/um/defconfig Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/defconfig Wed Jun 26 23:45:14 2002 @@ -0,0 +1,344 @@ +# +# Automatically generated make config: don't edit +# +CONFIG_USERMODE=y +# CONFIG_ISA is not set +# CONFIG_SBUS is not set +# CONFIG_PCI is not set +CONFIG_UID16=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y + +# +# General Setup +# +CONFIG_STDIO_CONSOLE=y +CONFIG_NET=y +CONFIG_SYSVIPC=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_SYSCTL=y +CONFIG_BINFMT_AOUT=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=y +CONFIG_UNIX98_PTYS=y +CONFIG_UNIX98_PTY_COUNT=256 +CONFIG_SSL=y +CONFIG_HOSTFS=m +CONFIG_MCONSOLE=y +CONFIG_MAGIC_SYSRQ=y +# CONFIG_HOST_2G_2G is not set +# CONFIG_UML_SMP is not set +# CONFIG_SMP is not set +CONFIG_CON_ZERO_CHAN="fd:0,fd:1" +CONFIG_CON_CHAN="xterm" +CONFIG_SSL_CHAN="pty" + +# +# Loadable module support +# +CONFIG_MODULES=y +# CONFIG_KMOD is not set + +# +# Devices +# +CONFIG_BLK_DEV_UBD=y +# CONFIG_BLK_DEV_UBD_SYNC is not set +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_NBD=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_INITRD=y +# CONFIG_MMAPPER is not set +CONFIG_UML_SOUND=y +CONFIG_SOUND=y +CONFIG_HOSTAUDIO=y +CONFIG_FD_CHAN=y +CONFIG_PORT_CHAN=y +CONFIG_PTY_CHAN=y +CONFIG_TTY_CHAN=y +CONFIG_XTERM_CHAN=y + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_PACKET_MMAP=y +# CONFIG_NETLINK_DEV is not set +# CONFIG_NETFILTER is not set +# CONFIG_FILTER is not set +CONFIG_UNIX=y +CONFIG_INET=y +# CONFIG_IP_MULTICAST is not set +# CONFIG_IP_ADVANCED_ROUTER is not set +# CONFIG_IP_PNP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_ARPD is not set +# CONFIG_INET_ECN is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_IPV6 is not set +# CONFIG_KHTTPD is not set +# CONFIG_ATM is not set +# CONFIG_VLAN_8021Q is not set + +# +# +# +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_DECNET is not set +# CONFIG_BRIDGE is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_LLC is not set +# CONFIG_NET_DIVERT is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_NET_FASTROUTE is not set +# CONFIG_NET_HW_FLOWCONTROL is not set + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set + +# +# Network device support +# +CONFIG_UML_NET=y +CONFIG_UML_NET_ETHERTAP=y +CONFIG_UML_NET_TUNTAP=y +CONFIG_UML_NET_SLIP=y +CONFIG_UML_NET_DAEMON=y +CONFIG_UML_NET_MCAST=y +CONFIG_NETDEVICES=y + +# +# ARCnet devices +# +# CONFIG_ARCNET is not set +CONFIG_DUMMY=y +# CONFIG_BONDING is not set +# CONFIG_EQUALIZER is not set +CONFIG_TUN=y +CONFIG_ETHERTAP=y + +# +# Ethernet (10 or 100Mbit) +# +# CONFIG_NET_ETHERNET is not set + +# +# Ethernet (1000 Mbit) +# +# CONFIG_ACENIC_OMIT_TIGON_I is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +# CONFIG_PLIP is not set +CONFIG_PPP=y +# CONFIG_PPP_MULTILINK is not set +# CONFIG_PPP_ASYNC is not set +# CONFIG_PPP_SYNC_TTY is not set +# CONFIG_PPP_DEFLATE is not set +# CONFIG_PPP_BSDCOMP is not set +# CONFIG_PPPOE is not set +CONFIG_SLIP=y +# CONFIG_SLIP_COMPRESSED is not set +# CONFIG_SLIP_SMART is not set +# CONFIG_SLIP_MODE_SLIP6 is not set + +# +# Wireless LAN (non-hamradio) +# +# CONFIG_NET_RADIO is not set + +# +# Token Ring devices +# +# CONFIG_TR is not set +# CONFIG_NET_FC is not set +# CONFIG_SHAPER is not set + +# +# Wan interfaces +# +# CONFIG_WAN is not set + +# +# File systems +# +CONFIG_QUOTA=y +CONFIG_AUTOFS_FS=m +CONFIG_AUTOFS4_FS=m +CONFIG_REISERFS_FS=m +# CONFIG_REISERFS_CHECK is not set +# CONFIG_REISERFS_PROC_INFO is not set +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EXT3_FS is not set +# CONFIG_JBD is not set +CONFIG_FAT_FS=m +CONFIG_MSDOS_FS=m +CONFIG_UMSDOS_FS=m +CONFIG_VFAT_FS=m +# CONFIG_EFS_FS is not set +CONFIG_JFFS_FS=y +CONFIG_JFFS_FS_VERBOSE=0 +CONFIG_JFFS_PROC_FS=y +CONFIG_JFFS2_FS=y +CONFIG_JFFS2_FS_DEBUG=0 +# CONFIG_CRAMFS is not set +# CONFIG_TMPFS is not set +# CONFIG_RAMFS is not set +CONFIG_ISO9660_FS=m +# CONFIG_JOLIET is not set +# CONFIG_ZISOFS is not set +CONFIG_MINIX_FS=m +# CONFIG_VXFS_FS is not set +# CONFIG_NTFS_FS is not set +# CONFIG_HPFS_FS is not set +CONFIG_PROC_FS=y +CONFIG_DEVFS_FS=y +CONFIG_DEVFS_MOUNT=y +# CONFIG_DEVFS_DEBUG is not set +CONFIG_DEVPTS_FS=y +# CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set +CONFIG_EXT2_FS=y +# CONFIG_SYSV_FS is not set +# CONFIG_UDF_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +# CONFIG_CODA_FS is not set +# CONFIG_INTERMEZZO_FS is not set +# CONFIG_NFS_FS is not set +# CONFIG_NFSD is not set +# CONFIG_SUNRPC is not set +# CONFIG_LOCKD is not set +# CONFIG_SMB_FS is not set +# CONFIG_NCP_FS is not set +# CONFIG_ZISOFS_FS is not set +# CONFIG_ZLIB_FS_INFLATE is not set + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_SMB_NLS is not set +CONFIG_NLS=y + +# +# Native Language Support +# +CONFIG_NLS_DEFAULT="iso8859-1" +# CONFIG_NLS_CODEPAGE_437 is not set +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +# CONFIG_NLS_CODEPAGE_850 is not set +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +# CONFIG_NLS_ISO8859_1 is not set +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +# CONFIG_NLS_ISO8859_15 is not set +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +# CONFIG_NLS_UTF8 is not set + +# +# Multi-device support (RAID and LVM) +# +# CONFIG_MD is not set + +# +# Memory Technology Devices (MTD) +# +CONFIG_MTD=y +# CONFIG_MTD_DEBUG is not set +# CONFIG_MTD_PARTITIONS is not set + +# +# User Modules And Translation Layers +# +CONFIG_MTD_CHAR=y +CONFIG_MTD_BLOCK=y +# CONFIG_FTL is not set +# CONFIG_NFTL is not set + +# +# RAM/ROM/Flash chip drivers +# +# CONFIG_MTD_CFI is not set +# CONFIG_MTD_JEDECPROBE is not set +# CONFIG_MTD_GEN_PROBE is not set +# CONFIG_MTD_RAM is not set +# CONFIG_MTD_ROM is not set +# CONFIG_MTD_ABSENT is not set +# CONFIG_MTD_OBSOLETE_CHIPS is not set + +# +# Mapping drivers for chip access +# + +# +# Self-contained MTD device drivers +# +# CONFIG_MTD_SLRAM is not set +# CONFIG_MTD_MTDRAM is not set +CONFIG_MTD_BLKMTD=m + +# +# Disk-On-Chip Device Drivers +# +# CONFIG_MTD_DOC1000 is not set +# CONFIG_MTD_DOC2000 is not set +# CONFIG_MTD_DOC2001 is not set +# CONFIG_MTD_DOCPROBE is not set + +# +# NAND Flash Device Drivers +# +# CONFIG_MTD_NAND is not set + +# +# Kernel hacking +# +# CONFIG_DEBUG_SLAB is not set +CONFIG_DEBUGSYM=y +CONFIG_PT_PROXY=y +# CONFIG_GPROF is not set +# CONFIG_GCOV is not set diff -urN linux-2.4.17/arch/um/drivers/Makefile linux_umopenmosix/arch/um/drivers/Makefile --- linux-2.4.17/arch/um/drivers/Makefile Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/Makefile Wed Jun 26 23:45:14 2002 @@ -0,0 +1,48 @@ +# +# Copyright (C) 2000 Jeff Dike (jdike@karaya.com) +# Licensed under the GPL +# + +OBJ := um_drivers.o + +CHAN_OBJS = chan_kern.o chan_user.o line.o + +OBJS-y = +OBJS-$(CONFIG_SSL) += ssl.o +OBJS-$(CONFIG_UML_NET_SLIP) += slip_kern.o slip_user.o +OBJS-$(CONFIG_UML_NET_ETHERTAP) += ethertap_kern.o ethertap_user.o +OBJS-$(CONFIG_UML_NET_TUNTAP) += tuntap_kern.o tuntap_user.o +OBJS-$(CONFIG_UML_NET_DAEMON) += daemon_kern.o daemon_user.o +OBJS-$(CONFIG_UML_NET_MCAST) += mcast_user.o mcast_kern.o +OBJS-$(CONFIG_UML_NET) += net_kern.o net_user.o +OBJS-$(CONFIG_MCONSOLE) += mconsole_kern.o mconsole_user.o +OBJS-$(CONFIG_MMAPPER) += mmapper_kern.o +OBJS-$(CONFIG_BLK_DEV_UBD) += ubd.o ubd_user.o +OBJS-$(CONFIG_HOSTAUDIO) += hostaudio_kern.o hostaudio_user.o +OBJS-$(CONFIG_FD_CHAN) += fd.o +OBJS-$(CONFIG_PORT_CHAN) += port.o port_kern.o +OBJS-$(CONFIG_PTY_CHAN) += pty.o +OBJS-$(CONFIG_TTY_CHAN) += tty.o +OBJS-$(CONFIG_XTERM_CHAN) += xterm.o + +OBJS = stdio_console.o $(OBJS-y) $(CHAN_OBJS) + +USER_OBJS = $(filter %_user.o,$(OBJS)) fd.o pty.o socket.o tty.o xterm.o + +all : $(OBJ) + +$(USER_OBJS) : %.o: %.c + $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< + +$(OBJ): $(OBJS) $(export-objs) + rm -f $@ + $(LD) $(LINKFLAGS) --start-group $^ --end-group -o $@ + +clean: + rm -f $(OBJS) $(export-objs) + +modules: + +fastdep: + +include $(TOPDIR)/Rules.make diff -urN linux-2.4.17/arch/um/drivers/chan_kern.c linux_umopenmosix/arch/um/drivers/chan_kern.c --- linux-2.4.17/arch/um/drivers/chan_kern.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/chan_kern.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,326 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include +#include "chan_kern.h" +#include "user_util.h" +#include "kern.h" +#include "irq_user.h" + +static void tty_receive_char(struct tty_struct *tty, char ch) +{ + if(tty == NULL) return; + + if(I_IXON(tty) && !I_IXOFF(tty) && !tty->raw) { + if(ch == STOP_CHAR(tty)){ + stop_tty(tty); + return; + } + else if(ch == START_CHAR(tty)){ + start_tty(tty); + return; + } + } + + if((tty->flip.flag_buf_ptr == NULL) || + (tty->flip.char_buf_ptr == NULL)) + return; + tty_insert_flip_char(tty, ch, TTY_NORMAL); +} + +static int open_one_chan(struct chan *chan, int input, int output) +{ + int fd; + + if(chan->opened) return(0); + fd = (*chan->ops->open)(input, output, chan->data); + if(fd < 0) return(-fd); + chan->fd = fd; + + chan->opened = 1; + return(0); +} + +int open_chan(struct list_head *chans) +{ + struct list_head *ele; + struct chan *chan; + int ret, err = 0; + + list_for_each(ele, chans){ + chan = list_entry(ele, struct chan, list); + ret = open_one_chan(chan, chan->input, chan->output); + if(chan->primary) err = ret; + } + return(err); +} + +void enable_chan(struct list_head *chans, + int (*irq_setup)(int fd, int input, int output, void *data), + void *data) +{ + struct list_head *ele; + struct chan *chan; + + list_for_each(ele, chans){ + chan = list_entry(ele, struct chan, list); + if(!chan->opened) continue; + + (*irq_setup)(chan->fd, chan->input, chan->output, data); + } +} + +void disable_chan(struct list_head *chans) +{ + struct list_head *ele; + struct chan *chan; + + list_for_each(ele, chans){ + chan = list_entry(ele, struct chan, list); + if(!chan->opened) continue; + + free_irq_by_fd(chan->fd); + } +} + +void close_chan(struct list_head *chans) +{ + struct list_head *ele; + struct chan *chan; + + /* Close in reverse order as open in case more than one of them + * refers to the same device and they save and restore that device's + * state. Then, the first one opened will have the original state, + * so it must be the last closed. + */ + for(ele = chans->prev; ele != chans; ele = ele->prev){ + chan = list_entry(ele, struct chan, list); + if(chan->ops->close != NULL) + (*chan->ops->close)(chan->fd, chan->data); + free_irq_by_fd(chan->fd); + } +} + +int write_chan(struct list_head *chans, const char *buf, int len) +{ + struct list_head *ele; + struct chan *chan; + int n, ret = 0; + + list_for_each(ele, chans){ + chan = list_entry(ele, struct chan, list); + if(!chan->output) continue; + n = chan->ops->write(chan->fd, buf, len, chan->data); + if(chan->primary) ret = n; + } + return(ret); +} + +int console_write_chan(struct list_head *chans, const char *buf, int len) +{ + struct list_head *ele; + struct chan *chan; + int n, ret = 0; + + list_for_each(ele, chans){ + chan = list_entry(ele, struct chan, list); + if(!chan->output) continue; + n = chan->ops->console_write(chan->fd, buf, len, chan->data); + if(chan->primary) ret = n; + } + return(ret); +} + +int chan_window_size(struct list_head *chans, unsigned short *rows_out, + unsigned short *cols_out) +{ + struct list_head *ele; + struct chan *chan; + + list_for_each(ele, chans){ + chan = list_entry(ele, struct chan, list); + if(chan->primary) + return(chan->ops->window_size(chan->fd, chan->data, + rows_out, cols_out)); + } + return(0); +} + +void free_one_chan(struct chan *chan) +{ + list_del(&chan->list); + (*chan->ops->free)(chan->data); + free_irq_by_fd(chan->fd); + kfree(chan); +} + +void free_chan(struct list_head *chans) +{ + struct list_head *ele, *next; + struct chan *chan; + + list_for_each_safe(ele, next, chans){ + chan = list_entry(ele, struct chan, list); + free_one_chan(chan); + } +} + +struct chan_type { + char *key; + struct chan_ops *ops; +}; + +struct chan_type chan_table[] = { +#ifdef CONFIG_PTY_CHAN + { "pty", &pty_ops }, + { "pts", &pts_ops }, +#endif +#ifdef CONFIG_TTY_CHAN + { "tty", &tty_ops }, +#endif +#ifdef CONFIG_XTERM_CHAN + { "xterm", &xterm_ops }, +#endif +#ifdef CONFIG_FD_CHAN + { "fd", &fd_ops }, +#endif +#ifdef CONFIG_PORT_CHAN + { "port", &port_ops }, +#endif +}; + +static struct chan *parse_chan(char *str, int pri, int device, + struct chan_opts *opts) +{ + struct chan_type *entry; + struct chan_ops *ops; + struct chan *chan; + void *data; + int i; + + ops = NULL; + for(i = 0; i < sizeof(chan_table)/sizeof(chan_table[0]); i++){ + entry = &chan_table[i]; + if(!strncmp(str, entry->key, strlen(entry->key))){ + ops = entry->ops; + str += strlen(entry->key); + break; + } + } + if(ops == NULL){ + printk(KERN_ERR "parse_chan couldn't parse \"%s\"\n", str); + return(NULL); + } + + data = (*ops->init)(str, device, opts); + if(data == NULL) return(NULL); + chan = kmalloc(sizeof(*chan), GFP_KERNEL); + if(chan == NULL) return(NULL); + *chan = ((struct chan) { list : LIST_HEAD_INIT(chan->list), + primary : 1, + input : 0, + output : 0, + opened : 0, + fd : -1, + pri : pri, + ops : ops, + data : data }); + return(chan); +} + +int parse_chan_pair(char *str, struct list_head *chans, int pri, int device, + struct chan_opts *opts) +{ + struct chan *new, *chan; + char *in, *out; + + if(!list_empty(chans)){ + chan = list_entry(chans->next, struct chan, list); + if(chan->pri >= pri) return(0); + free_chan(chans); + INIT_LIST_HEAD(chans); + } + + if((out = strchr(str, ',')) != NULL){ + in = str; + *out = '\0'; + out++; + new = parse_chan(in, pri, device, opts); + if(new == NULL) return(-1); + new->input = 1; + list_add(&new->list, chans); + + new = parse_chan(out, pri, device, opts); + if(new == NULL) return(-1); + list_add(&new->list, chans); + new->output = 1; + } + else { + new = parse_chan(str, pri, device, opts); + if(new == NULL) return(-1); + list_add(&new->list, chans); + new->input = 1; + new->output = 1; + } + return(0); +} + +int chan_out_fd(struct list_head *chans) +{ + struct list_head *ele; + struct chan *chan; + + list_for_each(ele, chans){ + chan = list_entry(ele, struct chan, list); + if(chan->primary && chan->output) + return(chan->fd); + } + return(-1); +} + +void chan_interrupt(struct list_head *chans, struct tty_struct *tty) +{ + struct list_head *ele; + struct chan *chan; + char c; + + list_for_each(ele, chans){ + chan = list_entry(ele, struct chan, list); + if(!chan->input) continue; + do { + c = chan->ops->read(chan->fd, chan->data); + if(c > 0) tty_receive_char(tty, c); + } while(c > 0); + if(c == 0) reactivate_fd(chan->fd); + if(c == -EIO){ + chan->ops->close(chan->fd, chan->data); + chan->opened = 0; + if(chan->primary){ + if(tty != NULL) tty_hangup(tty); + free_chan(chans); + return; + } + else free_one_chan(chan); + } + } + if(tty) tty_flip_buffer_push(tty); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/chan_user.c linux_umopenmosix/arch/um/drivers/chan_user.c --- linux-2.4.17/arch/um/drivers/chan_user.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/chan_user.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,114 @@ +/* + * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include +#include "user_util.h" +#include "user.h" + +void generic_close(int fd, void *unused) +{ + close(fd); +} + +int generic_read(int fd, void *unused) +{ + int n; + char c; + + n = read(fd, &c, sizeof(c)); + if(n < 0){ + if(errno == EAGAIN) return(0); + return(-errno); + } + else if(n == 0) return(-EIO); + return(c); +} + +int generic_write(int fd, const char *buf, int n, void *unused) +{ + return(write(fd, buf, n)); +} + +int generic_console_write(int fd, const char *buf, int n, void *state) +{ + struct termios save, *orig = state; + int err; + + if(isatty(fd)){ + tcgetattr(fd, &save); + tcsetattr(fd, TCSADRAIN, orig); + } + err = generic_write(fd, buf, n, NULL); + if(isatty(fd)) tcsetattr(fd, TCSADRAIN, &save); + return(err); +} + +int generic_window_size(int fd, void *unused, unsigned short *rows_out, + unsigned short *cols_out) +{ + struct winsize size; + int ret = 0; + + if(ioctl(fd, TIOCGWINSZ, &size) == 0){ + ret = ((*rows_out != size.ws_row) || + (*cols_out != size.ws_col)); + *rows_out = size.ws_row; + *cols_out = size.ws_col; + } + return(ret); +} + +void generic_free(void *data) +{ + kfree(data); +} + +int getmaster(char *line) +{ + struct stat stb; + char *pty, *bank, *cp; + int master; + + pty = &line[strlen("/dev/ptyp")]; + for (bank = "pqrs"; *bank; bank++) { + line[strlen("/dev/pty")] = *bank; + *pty = '0'; + if (stat(line, &stb) < 0) + break; + for (cp = "0123456789abcdef"; *cp; cp++) { + *pty = *cp; + master = open(line, O_RDWR); + if (master >= 0) { + char *tp = &line[strlen("/dev/")]; + int ok; + + /* verify slave side is usable */ + *tp = 't'; + ok = access(line, R_OK|W_OK) == 0; + *tp = 'p'; + if (ok) return(master); + (void) close(master); + } + } + } + return(-1); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/daemon.h linux_umopenmosix/arch/um/drivers/daemon.h --- linux-2.4.17/arch/um/drivers/daemon.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/daemon.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "net_user.h" + +struct daemon_data { + char *sock_type; + char *ctl_sock; + char *data_sock; + void *ctl_addr; + void *data_addr; + void *local_addr; + unsigned char hwaddr[ETH_ADDR_LEN]; + int hw_setup; + int control; + void *dev; +}; + +extern struct net_user_info daemon_user_info; + +extern int daemon_user_set_mac(struct daemon_data *pri, unsigned char *hwaddr, + int len); +extern int daemon_user_write(int fd, void *buf, int len, + struct daemon_data *pri); + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/daemon_kern.c linux_umopenmosix/arch/um/drivers/daemon_kern.c --- linux-2.4.17/arch/um/drivers/daemon_kern.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/daemon_kern.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,127 @@ +/* + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and + * James Leu (jleu@mindspring.net). + * Copyright (C) 2001 by various other people who didn't put their name here. + * Licensed under the GPL. + */ + +#include "linux/init.h" +#include "linux/netdevice.h" +#include "linux/etherdevice.h" +#include "net_kern.h" +#include "net_user.h" +#include "daemon.h" +#include "daemon_kern.h" + +struct daemon_data daemon_priv[MAX_UML_NETDEV] = { + [ 0 ... MAX_UML_NETDEV - 1 ] = + { + sock_type: "unix", + ctl_sock: "/tmp/uml.ctl", + data_sock: "/tmp/uml.data", + hwaddr: { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }, + hw_setup: 0, + control: -1, + } +}; + +struct net_device *daemon_init(int private_size, int index) +{ + struct net_device *dev; + struct uml_net_private *pri; + struct daemon_data *dpri; + + dev = init_etherdev(NULL, private_size); + if(dev == NULL) return(NULL); + pri = dev->priv; + dpri = (struct daemon_data *) pri->user; + *dpri = daemon_priv[index]; + memcpy(dev->dev_addr, dpri->hwaddr, ETH_ALEN); + printk("daemon backend"); + if(dpri->hw_setup) + printk("- ethernet address = %x:%x:%x:%x:%x:%x\n", + dpri->hwaddr[0], dpri->hwaddr[1], dpri->hwaddr[2], + dpri->hwaddr[3], dpri->hwaddr[4], dpri->hwaddr[5]); + printk("\n"); + return(dev); +} + +static unsigned short daemon_protocol(struct sk_buff *skb) +{ + return(eth_type_trans(skb, skb->dev)); +} + +static int daemon_set_mac(struct sockaddr *addr, void *data) +{ + struct daemon_data *pri = data; + struct net_device *dev = pri->dev; + struct sockaddr *hwaddr = addr; + + memcpy(dev->dev_addr, hwaddr->sa_data, ETH_ALEN); + return(daemon_user_set_mac(pri, hwaddr->sa_data, ETH_ALEN)); +} + +static int daemon_read(int fd, struct sk_buff **skb, + struct uml_net_private *lp) +{ + *skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER); + if(*skb == NULL) return(-ENOMEM); + return(net_recvfrom(fd, (*skb)->mac.raw, + (*skb)->dev->mtu + ETH_HEADER_OTHER)); +} + +static int daemon_write(int fd, struct sk_buff **skb, + struct uml_net_private *lp) +{ + return(daemon_user_write(fd, (*skb)->data, (*skb)->len, + (struct daemon_data *) &lp->user)); +} + +static struct net_kern_info daemon_kern_info = { + init: daemon_init, + protocol: daemon_protocol, + set_mac: daemon_set_mac, + read: daemon_read, + write: daemon_write, +}; + +static int daemon_count = 0; + +void daemon_setup(char *str, struct uml_net *dev) +{ + int err, n = daemon_count; + + dev->user = &daemon_user_info; + dev->kern = &daemon_kern_info; + dev->private_size = sizeof(struct daemon_data); + dev->transport_index = daemon_count++; + if(*str != ',') return; + str++; + if(*str != ','){ + err = setup_etheraddr(str, daemon_priv[n].hwaddr); + if(!err) daemon_priv[n].hw_setup = 1; + } + str = strchr(str, ','); + if(str == NULL) return; + *str++ = '\0'; + if(*str != ',') daemon_priv[n].sock_type = str; + str = strchr(str, ','); + if(str == NULL) return; + *str++ = '\0'; + if(*str != ',') daemon_priv[n].ctl_sock = str; + str = strchr(str, ','); + if(str == NULL) return; + *str++ = '\0'; + if(*str != '\0') daemon_priv[n].data_sock = str; +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/daemon_kern.h linux_umopenmosix/arch/um/drivers/daemon_kern.h --- linux-2.4.17/arch/um/drivers/daemon_kern.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/daemon_kern.h Sat Jun 29 17:02:15 2002 @@ -0,0 +1,8 @@ +#ifndef __UM_DAEMON_KERN_H +#define __UM_DAEMON_KERN_H + +#include "net_kern.h" + +extern void daemon_setup(char *arg, struct uml_net *dev); + +#endif diff -urN linux-2.4.17/arch/um/drivers/daemon_user.c linux_umopenmosix/arch/um/drivers/daemon_user.c --- linux-2.4.17/arch/um/drivers/daemon_user.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/daemon_user.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,189 @@ +/* + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and + * James Leu (jleu@mindspring.net). + * Copyright (C) 2001 by various other people who didn't put their name here. + * Licensed under the GPL. + */ + +#include +#include +#include +#include +#include +#include "net_user.h" +#include "daemon.h" +#include "kern_util.h" +#include "user_util.h" +#include "user.h" + +#define MAX_PACKET (ETH_MAX_PACKET + ETH_HEADER_OTHER) + +enum request_type { REQ_NEW_CONTROL }; + +struct request { + enum request_type type; + union { + struct { + unsigned char addr[ETH_ADDR_LEN]; + struct sockaddr_un name; + } new_control; + struct { + unsigned long cookie; + } new_data; + } u; +}; + +static struct sockaddr_un *new_addr(void *name, int len) +{ + struct sockaddr_un *sun; + + sun = um_kmalloc(sizeof(struct sockaddr_un)); + if(sun == NULL){ + printk("new_addr: allocation of sockaddr_un failed\n"); + return(NULL); + } + sun->sun_family = AF_UNIX; + memcpy(sun->sun_path, name, len); + return(sun); +} + +static void daemon_user_init(void *data, void *dev) +{ + struct daemon_data *pri = data; + struct timeval tv; + struct { + char zero; + int pid; + int usecs; + } name; + + if(!strcmp(pri->sock_type, "unix")){ + pri->ctl_addr = new_addr(pri->ctl_sock, + strlen(pri->ctl_sock) + 1); + pri->data_addr = new_addr(pri->data_sock, + strlen(pri->data_sock) + 1); + } + name.zero = 0; + name.pid = getpid(); + gettimeofday(&tv, NULL); + name.usecs = tv.tv_usec; + pri->local_addr = new_addr(&name, sizeof(name)); + pri->dev = dev; +} + +static int daemon_open(void *data) +{ + struct daemon_data *pri = data; + struct sockaddr_un *ctl_addr = pri->ctl_addr; + struct sockaddr_un *local_addr = pri->local_addr; + struct request req; + char addr[sizeof("255.255.255.255\0")]; + int fd, n, err; + + if(!pri->hw_setup){ + pri->hwaddr[0] = 0xfe; + pri->hwaddr[1] = 0xfd; + pri->hwaddr[2] = 0x0; + pri->hwaddr[3] = 0x0; + pri->hwaddr[4] = 0x0; + pri->hwaddr[5] = 0x0; + dev_ip_addr(pri->dev, addr, &pri->hwaddr[2]); + set_ether_mac(pri->dev, pri->hwaddr); + } + if((ctl_addr == NULL) || (pri->data_addr == NULL) || + (pri->local_addr == NULL)) + return(-EINVAL); + + if((pri->control = socket(AF_UNIX, SOCK_STREAM, 0)) < 0){ + printk("daemon_open : control socket failed, errno = %d\n", + errno); + return(-ENOMEM); + } + + if(connect(pri->control, (struct sockaddr *) ctl_addr, + sizeof(*ctl_addr)) < 0){ + printk("daemon_open : control connect failed, errno = %d\n", + errno); + err = -ENOTCONN; + goto out; + } + + req.type = REQ_NEW_CONTROL; + memcpy(req.u.new_control.addr, pri->hwaddr, + sizeof(req.u.new_control.addr)); + req.u.new_control.name = *local_addr; + n = write(pri->control, &req, sizeof(req)); + if(n != sizeof(req)){ + printk("daemon_open : control setup request returned %d, " + "errno = %d\n", n, errno); + err = -ENOTCONN; + goto out; + } + + if((fd = socket(AF_UNIX, SOCK_DGRAM, 0)) < 0){ + printk("daemon_open : data socket failed, errno = %d\n", + errno); + err = -ENOMEM; + goto out; + } + if(bind(fd, (struct sockaddr *) local_addr, sizeof(*local_addr)) < 0){ + printk("daemon_open : data bind failed, errno = %d\n", + errno); + close(fd); + err = -EINVAL; + goto out; + } + + return(fd); + out: + close(pri->control); + return(err); +} + +static void daemon_close(int fd, void *data) +{ + struct daemon_data *pri = data; + + close(fd); + close(pri->control); +} + +int daemon_user_write(int fd, void *buf, int len, struct daemon_data *pri) +{ + struct sockaddr_un *data_addr = pri->data_addr; + + return(net_sendto(fd, buf, len, data_addr, sizeof(*data_addr))); +} + +static int daemon_set_mtu(int mtu, void *data) +{ + return(mtu); +} + +int daemon_user_set_mac(struct daemon_data *pri, unsigned char *hwaddr, + int len) +{ + memcpy(pri->hwaddr, hwaddr, len); + return(0); +} + +struct net_user_info daemon_user_info = { + init: daemon_user_init, + open: daemon_open, + close: daemon_close, + set_mtu: daemon_set_mtu, + add_address: NULL, + delete_address: NULL, + max_packet: MAX_PACKET - ETH_HEADER_OTHER +}; + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/etap.h linux_umopenmosix/arch/um/drivers/etap.h --- linux-2.4.17/arch/um/drivers/etap.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/etap.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "net_user.h" + +struct ethertap_data { + char *dev_name; + char *gate_addr; + int data_fd; + int control_fd; + void *dev; + unsigned char hw_addr[ETH_ADDR_LEN]; + int hw_setup; +}; + +extern struct net_user_info ethertap_user_info; + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/etap_kern.h linux_umopenmosix/arch/um/drivers/etap_kern.h --- linux-2.4.17/arch/um/drivers/etap_kern.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/etap_kern.h Sat Jun 29 17:02:11 2002 @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __UM_ETHERTAP_KERN_H +#define __UM_ETHERTAP_KERN_H + +#include "net_kern.h" + +extern void ethertap_setup(char *arg, struct uml_net *dev); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/ethertap_kern.c linux_umopenmosix/arch/um/drivers/ethertap_kern.c --- linux-2.4.17/arch/um/drivers/ethertap_kern.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/ethertap_kern.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and + * James Leu (jleu@mindspring.net). + * Copyright (C) 2001 by various other people who didn't put their name here. + * Licensed under the GPL. + */ + +#include "linux/init.h" +#include "linux/netdevice.h" +#include "linux/etherdevice.h" +#include "net_kern.h" +#include "net_user.h" +#include "etap.h" +#include "etap_kern.h" + +struct ethertap_setup { + char *dev_name; + unsigned char hw_addr[ETH_ALEN]; + int hw_setup; + char *gate_addr; +}; + +struct ethertap_setup ethertap_priv[MAX_UML_NETDEV] = { + [ 0 ... MAX_UML_NETDEV - 1 ] = + { + dev_name: NULL, + hw_addr: { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }, + hw_setup: 0, + gate_addr: NULL, + } +}; + +struct net_device *etap_init(int private_size, int index) +{ + struct net_device *dev; + struct uml_net_private *pri; + struct ethertap_data *epri; + + dev = init_etherdev(NULL, private_size); + if(dev == NULL) return(NULL); + pri = dev->priv; + epri = (struct ethertap_data *) pri->user; + epri->dev_name = ethertap_priv[index].dev_name; + epri->gate_addr = ethertap_priv[index].gate_addr; + memcpy(dev->dev_addr, ethertap_priv[index].hw_addr, ETH_ALEN); + memcpy(epri->hw_addr, ethertap_priv[index].hw_addr, + sizeof(epri->hw_addr)); + printk("ethertap backend - %s", epri->dev_name); + if(epri->gate_addr != NULL) + printk(", IP = %s", epri->gate_addr); + epri->hw_setup = ethertap_priv[index].hw_setup; + if(epri->hw_setup) + printk(", ether = %x:%x:%x:%x:%x:%x", + epri->hw_addr[0], epri->hw_addr[1], epri->hw_addr[2], + epri->hw_addr[3], epri->hw_addr[4], epri->hw_addr[5]); + printk("\n"); + epri->data_fd = -1; + epri->control_fd = -1; + return(dev); +} + +static unsigned short etap_protocol(struct sk_buff *skb) +{ + return(eth_type_trans(skb, skb->dev)); +} + +static int etap_set_mac(struct sockaddr *addr, void *data) +{ + struct ethertap_data *pri = data; + struct sockaddr *hwaddr = addr; + + memcpy(pri->hw_addr, hwaddr->sa_data, ETH_ALEN); + + return 0; +} + +static int etap_read(int fd, struct sk_buff **skb, struct uml_net_private *lp) +{ + int len; + + *skb = ether_adjust_skb(*skb, ETH_HEADER_ETHERTAP); + if(*skb == NULL) return(-ENOMEM); + len = net_recvfrom(fd, (*skb)->mac.raw, + (*skb)->dev->mtu + 2 * ETH_HEADER_ETHERTAP); + if(len <= 0) return(len); + skb_pull(*skb, 2); + len -= 2; + return(len); +} + +static int etap_write(int fd, struct sk_buff **skb, struct uml_net_private *lp) +{ + if(skb_headroom(*skb) < 2){ + struct sk_buff *skb2; + + skb2 = skb_realloc_headroom(*skb, 2); + dev_kfree_skb(*skb); + if (skb2 == NULL) return(-ENOMEM); + *skb = skb2; + } + skb_push(*skb, 2); + return(net_send(fd, (*skb)->data, (*skb)->len)); +} + +struct net_kern_info ethertap_kern_info = { + init: etap_init, + protocol: etap_protocol, + set_mac: etap_set_mac, + read: etap_read, + write: etap_write, +}; + +static int ethertap_count = 0; + +void ethertap_setup(char *str, struct uml_net *dev) +{ + struct ethertap_setup *pri; + + dev->user = ðertap_user_info; + dev->kern = ðertap_kern_info; + dev->private_size = sizeof(struct ethertap_data); + pri = ðertap_priv[ethertap_count]; + dev->transport_index = ethertap_count++; + tap_setup_common(str, "ethertap", &pri->dev_name, pri->hw_addr, + &pri->hw_setup, &pri->gate_addr); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/ethertap_user.c linux_umopenmosix/arch/um/drivers/ethertap_user.c --- linux-2.4.17/arch/um/drivers/ethertap_user.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/ethertap_user.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,224 @@ +/* + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and + * James Leu (jleu@mindspring.net). + * Copyright (C) 2001 by various other people who didn't put their name here. + * Licensed under the GPL. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "user.h" +#include "kern_util.h" +#include "net_user.h" +#include "etap.h" + +#define MAX_PACKET ETH_MAX_PACKET + +void etap_user_init(void *data, void *dev) +{ + struct ethertap_data *pri = data; + + pri->dev = dev; +} + +struct etap_open_data { + char *name; + char *gate; + int data_remote; + int data_me; + int control_remote; + int control_me; + int err; + char *output; +}; + +struct addr_change { + enum { ADD_ADDR, DEL_ADDR } what; + unsigned char addr[4]; + unsigned char netmask[4]; +}; + +static void etap_change(int op, unsigned char *addr, unsigned char *netmask, + int fd) +{ + struct addr_change change; + char *output; + + change.what = op; + memcpy(change.addr, addr, sizeof(change.addr)); + memcpy(change.netmask, netmask, sizeof(change.netmask)); + if(write(fd, &change, sizeof(change)) != sizeof(change)) + printk("etap_change - request failed, errno = %d\n", + errno); + if(!read_output(fd, &output)) printk("%s", output); +} + +static void etap_open_addr(unsigned char *addr, unsigned char *netmask, + void *arg) +{ + etap_change(ADD_ADDR, addr, netmask, *((int *) arg)); +} + +static void etap_close_addr(unsigned char *addr, unsigned char *netmask, + void *arg) +{ + etap_change(DEL_ADDR, addr, netmask, *((int *) arg)); +} + +static void etap_tramp(void *arg) +{ + struct etap_open_data *data = arg; + int pid, status; + char version_buf[sizeof("nnnnn\0")]; + char data_fd_buf[sizeof("nnnnnn\0")]; + char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")]; + char *setup_args[] = { "uml_net", version_buf, "ethertap", data->name, + data_fd_buf, gate_buf, NULL }; + char *nosetup_args[] = { "uml_net", version_buf, "ethertap", + data->name, data_fd_buf, NULL }; + char **args, c; + + sprintf(data_fd_buf, "%d", data->data_remote); + sprintf(version_buf, "%d", UML_NET_VERSION); + if(data->gate != NULL){ + strcpy(gate_buf, data->gate); + args = setup_args; + } + else args = nosetup_args; + data->err = 0; + if((pid = fork()) == 0){ + dup2(data->control_remote, 1); + close(data->data_me); + close(data->control_me); + execvp(args[0], args); + printk("Exec of '%s' failed - errno = %d\n", args[0], errno); + exit(1); + } + else if(pid < 0) data->err = errno; + close(data->data_remote); + close(data->control_remote); + data->output = NULL; + if(read(data->control_me, &c, sizeof(c)) != sizeof(c)){ + printk("etap_tramp : read of status failed, errno = %d\n", + errno); + data->err = EINVAL; + return; + } + if(c != 1){ + printk("etap_tramp : uml_net failed\n"); + data->err = EINVAL; + if(waitpid(pid, &status, 0) < 0) data->err = errno; + else if(!WIFEXITED(status) || (WEXITSTATUS(status) != 1)){ + printk("uml_net didn't exit with status 1\n"); + } + return; + } + if(read_output(data->control_me, &data->output)) + data->err = EINVAL; +} + +static int etap_open(void *data) +{ + struct ethertap_data *pri = data; + struct etap_open_data tap_data; + int data_fds[2], control_fds[2], err; + + err = tap_open_common(pri->dev, pri->hw_setup, pri->gate_addr); + if(err) return(err); + + tap_data.name = pri->dev_name; + + if(socketpair(PF_UNIX, SOCK_DGRAM, 0, data_fds) < 0){ + printk("data socketpair failed - errno = %d\n", errno); + return(-errno); + } + tap_data.data_remote = data_fds[1]; + tap_data.data_me = data_fds[0]; + + if(socketpair(PF_UNIX, SOCK_STREAM, 0, control_fds) < 0){ + printk("data socketpair failed - errno = %d\n", errno); + return(-errno); + } + tap_data.control_remote = control_fds[1]; + tap_data.control_me = control_fds[0]; + + tap_data.gate = pri->gate_addr; + tracing_cb(etap_tramp, &tap_data); + if(tap_data.output){ + printk("%s", tap_data.output); + kfree(tap_data.output); + } + if(tap_data.err != 0){ + printk("etap_tramp failed - errno = %d\n", tap_data.err); + return(-tap_data.err); + } + pri->data_fd = data_fds[0]; + pri->control_fd = control_fds[0]; + iter_addresses(pri->dev, etap_open_addr, &pri->control_fd); + return(data_fds[0]); +} + +static void etap_close(int fd, void *data) +{ + struct ethertap_data *pri = data; + + iter_addresses(pri->dev, etap_close_addr, &pri->control_fd); + close(fd); + shutdown(pri->data_fd, SHUT_RDWR); + close(pri->data_fd); + pri->data_fd = -1; + close(pri->control_fd); + pri->control_fd = -1; +} + +static int etap_set_mtu(int mtu, void *data) +{ + return(mtu); +} + +static void etap_add_addr(unsigned char *addr, unsigned char *netmask, + void *data) +{ + struct ethertap_data *pri = data; + + if(pri->control_fd == -1) return; + etap_open_addr(addr, netmask, &pri->control_fd); +} + +static void etap_del_addr(unsigned char *addr, unsigned char *netmask, + void *data) +{ + struct ethertap_data *pri = data; + + if(pri->control_fd == -1) return; + etap_close_addr(addr, netmask, &pri->control_fd); +} + +struct net_user_info ethertap_user_info = { + init: etap_user_init, + open: etap_open, + close: etap_close, + set_mtu: etap_set_mtu, + add_address: etap_add_addr, + delete_address: etap_del_addr, + max_packet: MAX_PACKET - ETH_HEADER_ETHERTAP +}; + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/fd.c linux_umopenmosix/arch/um/drivers/fd.c --- linux-2.4.17/arch/um/drivers/fd.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/fd.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include "user.h" +#include "user_util.h" +#include "chan_user.h" + +struct fd_chan { + int fd; + int raw; + struct termios tt; +}; + +void *fd_init(char *str, int device, struct chan_opts *opts) +{ + struct fd_chan *data; + char *end; + int n; + + if(*str != ':'){ + printk("fd_init : channel type 'fd' must specify a file " + "descriptor\n"); + return(NULL); + } + str++; + n = strtoul(str, &end, 0); + if(*end != '\0'){ + printk("fd_init : couldn't parse file descriptor '%s'\n", str); + return(NULL); + } + if((data = um_kmalloc(sizeof(*data))) == NULL) return(NULL); + *data = ((struct fd_chan) { fd : n, + raw : opts->raw }); + return(data); +} + +int fd_open(int input, int output, void *d) +{ + struct fd_chan *data = d; + + if(data->raw && isatty(data->fd)){ + tcgetattr(data->fd, &data->tt); + raw(data->fd, 0); + } + return(data->fd); +} + +void fd_close(int fd, void *d) +{ + struct fd_chan *data = d; + + if(data->raw && isatty(fd)){ + tcsetattr(fd, TCSADRAIN, &data->tt); + data->raw = 0; + } +} + +int fd_console_write(int fd, const char *buf, int n, void *d) +{ + struct fd_chan *data = d; + + return(generic_console_write(fd, buf, n, &data->tt)); +} + +struct chan_ops fd_ops = { + init: fd_init, + open: fd_open, + close: fd_close, + read: generic_read, + write: generic_write, + console_write: fd_console_write, + window_size: generic_window_size, + free: generic_free, +}; + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/hostaudio_kern.c linux_umopenmosix/arch/um/drivers/hostaudio_kern.c --- linux-2.4.17/arch/um/drivers/hostaudio_kern.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/hostaudio_kern.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,262 @@ +/* + * Copyright (C) 2002 Steve Schmidtke + * Licensed under the GPL + */ + +#include "linux/config.h" +#include "linux/module.h" +#include "linux/version.h" +#include "linux/init.h" +#include "linux/slab.h" +#include "linux/fs.h" +#include "linux/sound.h" +#include "linux/soundcard.h" +#include "kern_util.h" +#include "init.h" +#include "hostaudio.h" + +char *dsp = HOSTAUDIO_DEV_DSP; +char *mixer = HOSTAUDIO_DEV_MIXER; + +static int set_dsp(char *name, int *add) +{ + dsp = uml_strdup(name); + return(0); +} + +__uml_setup("dsp=", set_dsp, +"dsp=\n" +" This is used to specify the host dsp device to the hostaudio driver.\n" +" The default is \"" HOSTAUDIO_DEV_DSP "\".\n\n" +); + +static int set_mixer(char *name, int *add) +{ + mixer = uml_strdup(name); + return(0); +} + +__uml_setup("mixer=", set_mixer, +"mixer=\n" +" This is used to specify the host mixer device to the hostaudio driver.\n" +" The default is \"" HOSTAUDIO_DEV_MIXER "\".\n\n" +); + +/* /dev/dsp file operations */ + +static ssize_t hostaudio_read(struct file *file, char *buffer, size_t count, + loff_t *ppos) +{ + struct hostaudio_state *state = file->private_data; + +#ifdef DEBUG + printk("hostaudio: read called, count = %d\n", count); +#endif + + return(hostaudio_read_user(state, buffer, count, ppos)); +} + +static ssize_t hostaudio_write(struct file *file, const char *buffer, + size_t count, loff_t *ppos) +{ + struct hostaudio_state *state = file->private_data; + +#ifdef DEBUG + printk("hostaudio: write called, count = %d\n", count); +#endif + return(hostaudio_write_user(state, buffer, count, ppos)); +} + +static unsigned int hostaudio_poll(struct file *file, + struct poll_table_struct *wait) +{ + unsigned int mask = 0; + +#ifdef DEBUG + printk("hostaudio: poll called (unimplemented)\n"); +#endif + + return(mask); +} + +static int hostaudio_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct hostaudio_state *state = file->private_data; + +#ifdef DEBUG + printk("hostaudio: ioctl called, cmd = %u\n", cmd); +#endif + + return(hostaudio_ioctl_user(state, cmd, arg)); +} + +static int hostaudio_open(struct inode *inode, struct file *file) +{ + struct hostaudio_state *state; + int r = 0, w = 0; + int ret; + +#ifdef DEBUG + printk("hostaudio: open called (host: %s)\n", dsp); +#endif + + state = kmalloc(sizeof(struct hostaudio_state), GFP_KERNEL); + if(state == NULL) return(-ENOMEM); + + if(file->f_mode & FMODE_READ) r = 1; + if(file->f_mode & FMODE_WRITE) w = 1; + + ret = hostaudio_open_user(state, r, w, dsp); + if(ret < 0){ + kfree(state); + return(ret); + } + + file->private_data = state; + return(0); +} + +static int hostaudio_release(struct inode *inode, struct file *file) +{ + struct hostaudio_state *state = file->private_data; + int ret; + +#ifdef DEBUG + printk("hostaudio: release called\n"); +#endif + + ret = hostaudio_release_user(state); + kfree(state); + + return(ret); +} + +/* /dev/mixer file operations */ + +static int hostmixer_ioctl_mixdev(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct hostmixer_state *state = file->private_data; + +#ifdef DEBUG + printk("hostmixer: ioctl called\n"); +#endif + + return(hostmixer_ioctl_mixdev_user(state, cmd, arg)); +} + +static int hostmixer_open_mixdev(struct inode *inode, struct file *file) +{ + struct hostmixer_state *state; + int r = 0, w = 0; + int ret; + +#ifdef DEBUG + printk("hostmixer: open called (host: %s)\n", mixer); +#endif + + state = kmalloc(sizeof(struct hostmixer_state), GFP_KERNEL); + if(state == NULL) return(-ENOMEM); + + if(file->f_mode & FMODE_READ) r = 1; + if(file->f_mode & FMODE_WRITE) w = 1; + + ret = hostmixer_open_mixdev_user(state, r, w, mixer); + + if(ret < 0){ + kfree(state); + return(ret); + } + + file->private_data = state; + return(0); +} + +static int hostmixer_release(struct inode *inode, struct file *file) +{ + struct hostmixer_state *state = file->private_data; + int ret; + +#ifdef DEBUG + printk("hostmixer: release called\n"); +#endif + + ret = hostmixer_release_mixdev_user(state); + kfree(state); + + return(ret); +} + + +/* kernel module operations */ + +static struct file_operations hostaudio_fops = { + owner: THIS_MODULE, + llseek: no_llseek, + read: hostaudio_read, + write: hostaudio_write, + poll: hostaudio_poll, + ioctl: hostaudio_ioctl, + mmap: NULL, + open: hostaudio_open, + release: hostaudio_release, +}; + +static struct file_operations hostmixer_fops = { + owner: THIS_MODULE, + llseek: no_llseek, + ioctl: hostmixer_ioctl_mixdev, + open: hostmixer_open_mixdev, + release: hostmixer_release, +}; + +struct { + int dev_audio; + int dev_mixer; +} module_data; + +MODULE_AUTHOR("Steve Schmidtke"); +MODULE_DESCRIPTION("UML Audio Relay"); +MODULE_LICENSE("GPL"); + +static int __init hostaudio_init_module(void) +{ + printk(KERN_INFO "UML Audio Relay: " __DATE__ " " __TIME__ "\n"); + + module_data.dev_audio = register_sound_dsp(&hostaudio_fops, -1); + if(module_data.dev_audio < 0){ + printk(KERN_ERR "hostaudio: couldn't register DSP device!\n"); + return -ENODEV; + } + + module_data.dev_mixer = register_sound_mixer(&hostmixer_fops, -1); + if(module_data.dev_mixer < 0){ + printk(KERN_ERR "hostmixer: couldn't register mixer " + "device!\n"); + unregister_sound_dsp(module_data.dev_audio); + return -ENODEV; + } + + return 0; +} + +static void __exit hostaudio_cleanup_module (void) +{ + unregister_sound_mixer(module_data.dev_mixer); + unregister_sound_dsp(module_data.dev_audio); +} + +module_init(hostaudio_init_module); +module_exit(hostaudio_cleanup_module); + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/hostaudio_user.c linux_umopenmosix/arch/um/drivers/hostaudio_user.c --- linux-2.4.17/arch/um/drivers/hostaudio_user.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/hostaudio_user.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,160 @@ +/* + * Copyright (C) 2002 Steve Schmidtke + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include "hostaudio.h" +#include "user_util.h" +#include "kern_util.h" +#include "user.h" + +/* /dev/dsp file operations */ + +ssize_t hostaudio_read_user(struct hostaudio_state *state, char *buffer, + size_t count, loff_t *ppos) +{ + ssize_t ret; + +#ifdef DEBUG + printk("hostaudio: read_user called, count = %d\n", count); +#endif + + ret = read(state->fd, buffer, count); + + if(ret < 0) return(-errno); + return(ret); +} + +ssize_t hostaudio_write_user(struct hostaudio_state *state, const char *buffer, + size_t count, loff_t *ppos) +{ + ssize_t ret; + +#ifdef DEBUG + printk("hostaudio: write_user called, count = %d\n", count); +#endif + + ret = write(state->fd, buffer, count); + + if(ret < 0) return(-errno); + return(ret); +} + +int hostaudio_ioctl_user(struct hostaudio_state *state, unsigned int cmd, + unsigned long arg) +{ + int ret; +#ifdef DEBUG + printk("hostaudio: ioctl_user called, cmd = %u\n", cmd); +#endif + + ret = ioctl(state->fd, cmd, arg); + + if(ret < 0) return(-errno); + return(ret); +} + +int hostaudio_open_user(struct hostaudio_state *state, int r, int w, char *dsp) +{ + int flags = 0; + +#ifdef DEBUG + printk("hostaudio: open_user called\n"); +#endif + + if(r && !w) flags = O_RDONLY; + else if(!r && w) flags = O_WRONLY; + else if(r && w) flags = O_RDWR; + + state->fd = open(dsp, flags); + + if(state->fd >= 0) return(0); + + printk("hostaudio_open_user failed to open '%s', errno = %d\n", + dsp, errno); + + return(-errno); +} + +int hostaudio_release_user(struct hostaudio_state *state) +{ +#ifdef DEBUG + printk("hostaudio: release called\n"); +#endif + if(state->fd >= 0){ + close(state->fd); + state->fd=-1; + } + + return(0); +} + +/* /dev/mixer file operations */ + +int hostmixer_ioctl_mixdev_user(struct hostmixer_state *state, + unsigned int cmd, unsigned long arg) +{ + int ret; +#ifdef DEBUG + printk("hostmixer: ioctl_user called cmd = %u\n",cmd); +#endif + + ret = ioctl(state->fd, cmd, arg); + if(ret < 0) + return(-errno); + return(ret); +} + +int hostmixer_open_mixdev_user(struct hostmixer_state *state, int r, int w, + char *mixer) +{ + int flags = 0; + +#ifdef DEBUG + printk("hostmixer: open_user called\n"); +#endif + + if(r && !w) flags = O_RDONLY; + else if(!r && w) flags = O_WRONLY; + else if(r && w) flags = O_RDWR; + + state->fd = open(mixer, flags); + + if(state->fd >= 0) return(0); + + printk("hostaudio_open_mixdev_user failed to open '%s', errno = %d\n", + mixer, errno); + + return(-errno); +} + +int hostmixer_release_mixdev_user(struct hostmixer_state *state) +{ +#ifdef DEBUG + printk("hostmixer: release_user called\n"); +#endif + + if(state->fd >= 0){ + close(state->fd); + state->fd = -1; + } + + return 0; +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/line.c linux_umopenmosix/arch/um/drivers/line.c --- linux-2.4.17/arch/um/drivers/line.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/line.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "linux/sched.h" +#include "chan_kern.h" +#include "line.h" + +void line_interrupt(int irq, void *data, struct pt_regs *unused) +{ + struct line *dev = data; + + if(dev->count > 0) chan_interrupt(&dev->chan_list, dev->tty); +} + +int line_open(struct line *lines, int n, struct tty_struct *tty, + int (*setup_irq)(int fd, int input, int output, void *data), + struct chan_opts *opts) +{ + struct line *line = &lines[n]; + int err = 0; + + down(&line->sem); + if(line->count == 0){ + if(list_empty(&line->chan_list)){ + err = parse_chan_pair(line->init_str, &line->chan_list, + line->init_pri, n, opts); + if(err) goto out; + err = open_chan(&line->chan_list); + if(err) goto out; + } + enable_chan(&line->chan_list, setup_irq, line); + } + /* This is outside the if because the initial console is opened + * with tty == NULL + */ + line->tty = tty; + if(tty != NULL) tty->driver_data = line; + + line->count++; + out: + up(&line->sem); + return(err); +} + +void line_close(struct line *lines, int n) +{ + struct line *line = &lines[n]; + + line->count--; + if(line->count == 0){ + disable_chan(&line->chan_list); + line->tty = NULL; + } +} + +void line_setup(struct line *lines, int num, char *init) +{ + int i, n; + char *end; + + if(*init == '=') n = -1; + else { + n = simple_strtoul(init, &end, 0); + if(*end != '='){ + printk(KERN_ERR "line_setup failed to parse \"%s\"\n", + init); + return; + } + init = end; + } + init++; + if(n == -1){ + for(i = 0; i < num; i++){ + if(lines[i].init_pri <= INIT_ALL){ + lines[i].init_str = init; + lines[i].init_pri = INIT_ALL; + } + } + } + else if(lines[n].init_pri <= INIT_ONE){ + lines[n].init_str = init; + lines[n].init_pri = INIT_ONE; + } +} + +struct list_head winch_regs = LIST_HEAD_INIT(winch_regs); + +void register_winch(struct winch_lines *lines) +{ + list_add(&lines->list, &winch_regs); +} + +void run_winch_handlers(void) +{ + struct list_head *ele; + struct winch_lines *lines; + struct line *line; + int i; + + list_for_each(ele, &winch_regs){ + lines = list_entry(ele, struct winch_lines, list); + for(i = 0; i < lines->nlines; i++){ + line = &lines->lines[i]; + if((line->count > 0) && (line->tty != NULL) && + chan_window_size(&line->chan_list, + &line->tty->winsize.ws_row, + &line->tty->winsize.ws_col)) + kill_pg(line->tty->pgrp, SIGWINCH, 1); + } + } +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/mcast.h linux_umopenmosix/arch/um/drivers/mcast.h --- linux-2.4.17/arch/um/drivers/mcast.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/mcast.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "net_user.h" + +struct mcast_data { + char *addr; + unsigned short port; + void *mcast_addr; + int ttl; + unsigned char hwaddr[ETH_ADDR_LEN]; + int hw_setup; + void *dev; +}; + +extern struct net_user_info mcast_user_info; + +extern int mcast_user_set_mac(struct mcast_data *pri, unsigned char *hwaddr, + int len); +extern int mcast_user_write(int fd, void *buf, int len, + struct mcast_data *pri); + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/mcast_kern.c linux_umopenmosix/arch/um/drivers/mcast_kern.c --- linux-2.4.17/arch/um/drivers/mcast_kern.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/mcast_kern.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,167 @@ +/* + * user-mode-linux networking multicast transport + * Copyright (C) 2001 by Harald Welte + * + * based on the existing uml-networking code, which is + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and + * James Leu (jleu@mindspring.net). + * Copyright (C) 2001 by various other people who didn't put their name here. + * + * Licensed under the GPL. + */ + +#include "linux/kernel.h" +#include "linux/init.h" +#include "linux/netdevice.h" +#include "linux/etherdevice.h" +#include "linux/in.h" +#include "linux/inet.h" +#include "net_kern.h" +#include "net_user.h" +#include "mcast.h" +#include "mcast_kern.h" + +struct mcast_data mcast_priv[MAX_UML_NETDEV] = { + [ 0 ... MAX_UML_NETDEV - 1 ] = + { + addr: "239.192.168.1", + port: 1102, + ttl: 1, + hwaddr: { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }, + hw_setup: 0, + } +}; + +struct net_device *mcast_init(int private_size, int index) +{ + struct net_device *dev; + struct uml_net_private *pri; + struct mcast_data *dpri; + + dev = init_etherdev(NULL, private_size); + if (!dev) + return NULL; + + pri = dev->priv; + dpri = (struct mcast_data *) pri->user; + *dpri = mcast_priv[index]; + memcpy(dev->dev_addr, dpri->hwaddr, ETH_ALEN); + printk("mcast backend "); + if(dpri->hw_setup) + printk("ethernet address=%x:%x:%x:%x:%x:%x ", + dpri->hwaddr[0], dpri->hwaddr[1], dpri->hwaddr[2], + dpri->hwaddr[3], dpri->hwaddr[4], dpri->hwaddr[5]); + + printk("multicast adddress: %s:%u, TTL:%u ", + dpri->addr, dpri->port, dpri->ttl); + + printk("\n"); + return(dev); +} + +static unsigned short mcast_protocol(struct sk_buff *skb) +{ + return eth_type_trans(skb, skb->dev); +} + +static int mcast_set_mac(struct sockaddr *addr, void *data) +{ + struct mcast_data *pri = data; + struct net_device *dev = pri->dev; + struct sockaddr *hwaddr = addr; + + memcpy(dev->dev_addr, hwaddr->sa_data, ETH_ALEN); + return mcast_user_set_mac(pri, hwaddr->sa_data, ETH_ALEN); +} + +static int mcast_read(int fd, struct sk_buff **skb, struct uml_net_private *lp) +{ + *skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER); + if(*skb == NULL) return(-ENOMEM); + return(net_recvfrom(fd, (*skb)->mac.raw, + (*skb)->dev->mtu + ETH_HEADER_OTHER)); +} + +static int mcast_write(int fd, struct sk_buff **skb, + struct uml_net_private *lp) +{ + return mcast_user_write(fd, (*skb)->data, (*skb)->len, + (struct mcast_data *) &lp->user); +} + +static struct net_kern_info mcast_kern_info = { + init: mcast_init, + protocol: mcast_protocol, + set_mac: mcast_set_mac, + read: mcast_read, + write: mcast_write, +}; + +static int mcast_count = 0; + +void mcast_setup(char *str, struct uml_net *dev) +{ + int err, n = mcast_count; + int num = 0; + char *p1, *p2; + + dev->user = &mcast_user_info; + dev->kern = &mcast_kern_info; + dev->private_size = sizeof(struct mcast_data); + dev->transport_index = mcast_count++; + + + /* somewhat more sophisticated parser, needed for in_aton */ + + p1 = str; + if (*str == ',') + p1++; + while (p1 && *p1) { + if ((p2 = strchr(p1, ','))) + *p2++ = '\0'; + if (strlen(p1) > 0) { + switch (num) { + case 0: + /* First argument: Ethernet address */ + err = setup_etheraddr(p1, + mcast_priv[n].hwaddr); + if (!err) + mcast_priv[n].hw_setup = 1; + break; + case 1: + /* Second argument: Multicast group */ + mcast_priv[n].addr = p1; + break; + case 2: + /* Third argument: Port number */ + mcast_priv[n].port = + htons(simple_strtoul(p1, NULL, 10)); + break; + case 3: + /* Fourth argument: TTL */ + mcast_priv[n].ttl = + simple_strtoul(p1, NULL, 10); + break; + } + } + p1 = p2; + num++; + } + + printk(KERN_INFO "Configured mcast device: %s:%u-%u\n", + mcast_priv[n].addr, mcast_priv[n].port, + mcast_priv[n].ttl); + + return; +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/mcast_kern.h linux_umopenmosix/arch/um/drivers/mcast_kern.h --- linux-2.4.17/arch/um/drivers/mcast_kern.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/mcast_kern.h Sat Jun 29 17:02:18 2002 @@ -0,0 +1,8 @@ +#ifndef __UM_MCAST_KERN_H +#define __UM_MCAST_KERN_H + +#include "net_kern.h" + +extern void mcast_setup(char *arg, struct uml_net *dev); + +#endif diff -urN linux-2.4.17/arch/um/drivers/mcast_user.c linux_umopenmosix/arch/um/drivers/mcast_user.c --- linux-2.4.17/arch/um/drivers/mcast_user.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/mcast_user.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,194 @@ +/* + * user-mode-linux networking multicast transport + * Copyright (C) 2001 by Harald Welte + * + * based on the existing uml-networking code, which is + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and + * James Leu (jleu@mindspring.net). + * Copyright (C) 2001 by various other people who didn't put their name here. + * + * Licensed under the GPL. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include "net_user.h" +#include "mcast.h" +#include "kern_util.h" +#include "user_util.h" +#include "user.h" + +#define MAX_PACKET (ETH_MAX_PACKET + ETH_HEADER_OTHER) + +static struct sockaddr_in *new_addr(char *addr, unsigned short port) +{ + struct sockaddr_in *sin; + + sin = um_kmalloc(sizeof(struct sockaddr_in)); + if(sin == NULL){ + printk("new_addr: allocation of sockaddr_in failed\n"); + return(NULL); + } + sin->sin_addr.s_addr = in_aton(addr); + sin->sin_port = port; + return(sin); +} + +static void mcast_user_init(void *data, void *dev) +{ + struct mcast_data *pri = data; + + pri->mcast_addr = new_addr(pri->addr, pri->port); + pri->dev = dev; +} + +static int mcast_open(void *data) +{ + struct mcast_data *pri = data; + struct sockaddr_in *sin = pri->mcast_addr; + struct ip_mreq mreq; + char addr[sizeof("255.255.255.255\0")]; + int fd, err, yes = 1; + + + if(!pri->hw_setup){ + pri->hwaddr[0] = 0xfe; + pri->hwaddr[1] = 0xfd; + pri->hwaddr[2] = 0x0; + pri->hwaddr[3] = 0x0; + pri->hwaddr[4] = 0x0; + pri->hwaddr[5] = 0x0; + dev_ip_addr(pri->dev, addr, &pri->hwaddr[2]); + set_ether_mac(pri->dev, pri->hwaddr); + } + + if ((sin->sin_addr.s_addr == 0) || (sin->sin_port == 0)) { + err = -EINVAL; + goto out; + } + + if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0){ + printk("mcast_open : data socket failed, errno = %d\n", + errno); + err = -ENOMEM; + goto out; + } + + if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) { + printk("mcast_open: SO_REUSEADDR failed, errno = %d\n", + errno); + close(fd); + err = -EINVAL; + goto out; + } + + /* set ttl according to config */ + if (setsockopt(fd, SOL_IP, IP_MULTICAST_TTL, &pri->ttl, + sizeof(pri->ttl)) < 0) { + printk("mcast_open: IP_MULTICAST_TTL failed, error = %d\n", + errno); + close(fd); + err = -EINVAL; + goto out; + } + + /* set LOOP, so data does get fed back to local sockets */ + if (setsockopt(fd, SOL_IP, IP_MULTICAST_LOOP, &yes, sizeof(yes)) < 0) { + printk("mcast_open: IP_MULTICAST_LOOP failed, error = %d\n", + errno); + close(fd); + err = -EINVAL; + goto out; + } + + /* bind socket to mcast address */ + if (bind(fd, (struct sockaddr *) sin, sizeof(*sin)) < 0) { + printk("mcast_open : data bind failed, errno = %d\n", errno); + close(fd); + err = -EINVAL; + goto out; + } + + /* subscribe to the multicast group */ + mreq.imr_multiaddr.s_addr = sin->sin_addr.s_addr; + mreq.imr_interface.s_addr = 0; + if (setsockopt(fd, SOL_IP, IP_ADD_MEMBERSHIP, + &mreq, sizeof(mreq)) < 0) { + printk("mcast_open: IP_ADD_MEMBERSHIP failed, error = %d\n", + errno); + printk("There appears not to be a multicast-capable network " + "interface on the host.\n"); + printk("eth0 should be configured in order to use the " + "multicast transport.\n"); + close(fd); + err = -EINVAL; + goto out; + } + + return(fd); + out: + return(err); +} + +static void mcast_close(int fd, void *data) +{ + struct ip_mreq mreq; + struct mcast_data *pri = data; + struct sockaddr_in *sin = pri->mcast_addr; + + mreq.imr_multiaddr.s_addr = sin->sin_addr.s_addr; + mreq.imr_interface.s_addr = 0; + if (setsockopt(fd, SOL_IP, IP_DROP_MEMBERSHIP, + &mreq, sizeof(mreq)) < 0) { + printk("mcast_open: IP_DROP_MEMBERSHIP failed, error = %d\n", + errno); + } + + close(fd); +} + +int mcast_user_write(int fd, void *buf, int len, struct mcast_data *pri) +{ + struct sockaddr_in *data_addr = pri->mcast_addr; + + return(net_sendto(fd, buf, len, data_addr, sizeof(*data_addr))); +} + +static int mcast_set_mtu(int mtu, void *data) +{ + return(mtu); +} + +int mcast_user_set_mac(struct mcast_data *pri, unsigned char *hwaddr, + int len) +{ + memcpy(pri->hwaddr, hwaddr, len); + return 0; +} + +struct net_user_info mcast_user_info = { + init: mcast_user_init, + open: mcast_open, + close: mcast_close, + set_mtu: mcast_set_mtu, + add_address: NULL, + delete_address: NULL, + max_packet: MAX_PACKET - ETH_HEADER_OTHER +}; + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/mconsole_kern.c linux_umopenmosix/arch/um/drivers/mconsole_kern.c --- linux-2.4.17/arch/um/drivers/mconsole_kern.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/mconsole_kern.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,238 @@ +/* + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) + * Licensed under the GPL + */ + +#include "linux/kernel.h" +#include "linux/slab.h" +#include "linux/init.h" +#include "linux/notifier.h" +#include "linux/reboot.h" +#include "linux/utsname.h" +#include "linux/ctype.h" +#include "linux/interrupt.h" +#include "linux/sysrq.h" +#include "asm/irq.h" +#include "user_util.h" +#include "kern_util.h" +#include "kern.h" +#include "mconsole.h" +#include "mconsole_kern.h" +#include "irq_user.h" + +static int do_unlink_socket(struct notifier_block *notifier, + unsigned long what, void *data) +{ + return(mconsole_unlink_socket()); +} + + +static struct notifier_block reboot_notifier = { + notifier_call: do_unlink_socket, + priority: 0, +}; + +LIST_HEAD(mc_requests); + +void mc_task_proc(void *unused) +{ + struct mconsole_entry *req; + unsigned long flags; + int done; + + do { + save_flags(flags); + req = list_entry(mc_requests.next, struct mconsole_entry, + list); + list_del(&req->list); + done = list_empty(&mc_requests); + restore_flags(flags); + req->request.cmd->handler(&req->request); + kfree(req); + } while(!done); +} + +struct tq_struct mconsole_task = { + routine: mc_task_proc, + data: NULL +}; + +void mconsole_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + int fd; + struct mconsole_entry *new; + struct mc_request req; + + fd = (int) dev_id; + while (mconsole_get_request(fd, &req)){ + if(req.cmd->as_interrupt) (*req.cmd->handler)(&req); + else { + new = kmalloc(sizeof(req), GFP_ATOMIC); + if(new == NULL) + mconsole_reply(&req, "Out of memory", 1, 0); + else { + new->request = req; + list_add(&new->list, &mc_requests); + } + } + } + if(!list_empty(&mc_requests)) schedule_task(&mconsole_task); + reactivate_fd(fd); +} + +void mconsole_version(struct mc_request *req) +{ + char version[256]; + + sprintf(version, "%s %s %s %s %s", system_utsname.sysname, + system_utsname.nodename, system_utsname.release, + system_utsname.version, system_utsname.machine); + mconsole_reply(req, version, 0, 0); +} + +#define UML_MCONSOLE_HELPTEXT \ +"Commands: + version - Get kernel version + help - Print this message + halt - Halt UML + reboot - Reboot UML + config = - Add a new device to UML; + same syntax as command line + remove - Remove a device from the client +" + +void mconsole_help(struct mc_request *req) +{ + mconsole_reply(req, UML_MCONSOLE_HELPTEXT, 0, 0); +} + +void mconsole_halt(struct mc_request *req) +{ + mconsole_reply(req, "", 0, 0); + machine_halt(); +} + +void mconsole_reboot(struct mc_request *req) +{ + mconsole_reply(req, "", 0, 0); + machine_restart(NULL); +} + +extern void ctrl_alt_del(void); + +void mconsole_cad(struct mc_request *req) +{ + mconsole_reply(req, "", 0, 0); + ctrl_alt_del(); +} + +LIST_HEAD(mconsole_devices); + +void mconsole_register_dev(struct mc_device *new) +{ + list_add(&new->list, &mconsole_devices); +} + +static struct mc_device *mconsole_find_dev(char *name) +{ + struct list_head *ele; + struct mc_device *dev; + + list_for_each(ele, &mconsole_devices){ + dev = list_entry(ele, struct mc_device, list); + if(!strncmp(name, dev->name, strlen(dev->name))) + return(dev); + } + return(NULL); +} + +void mconsole_config(struct mc_request *req) +{ + struct mc_device *dev; + char *ptr = req->request.data; + int err; + + ptr += strlen("config"); + while(isspace(*ptr)) ptr++; + dev = mconsole_find_dev(ptr); + if(dev == NULL){ + mconsole_reply(req, "Bad configuration option", 1, 0); + return; + } + err = (*dev->config)(&ptr[strlen(dev->name)]); + mconsole_reply(req, "", err, 0); +} + +void mconsole_remove(struct mc_request *req) +{ + struct mc_device *dev; + char *ptr = req->request.data; + int err; + + ptr += strlen("remove"); + while(isspace(*ptr)) ptr++; + dev = mconsole_find_dev(ptr); + if(dev == NULL){ + mconsole_reply(req, "Bad remove option", 1, 0); + return; + } + err = (*dev->remove)(&ptr[strlen(dev->name)]); + mconsole_reply(req, "", err, 0); +} + +#ifdef CONFIG_MAGIC_SYSRQ +void mconsole_sysrq(struct mc_request *req) +{ + char *ptr = req->request.data; + + ptr += strlen("sysrq"); + while(isspace(*ptr)) ptr++; + + handle_sysrq(*ptr, (struct pt_regs *) ¤t->thread.process_regs, + NULL, NULL); + mconsole_reply(req, "", 0, 0); +} +#else +void mconsole_sysrq(struct mc_request *req) +{ + mconsole_reply(req, "Sysrq not compiled in", 1, 0); +} +#endif + +int mconsole_init(void) +{ + int err; + int sock; + + sock = mconsole_create_listening_socket(); + if (sock < 0) { + printk("Failed to initialize management console\n"); + return 1; + } + + register_reboot_notifier(&reboot_notifier); + + err = um_request_irq(MCONSOLE_IRQ, sock, mconsole_interrupt, + SA_INTERRUPT | SA_SHIRQ, "mconsole", + (void *)sock); + if (err) { + printk("Failed to get IRQ for management console\n"); + return 1; + } + + printk("mconsole initialized on %s\n", mconsole_socket_name); + return 0; +} + +__initcall(mconsole_init); + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/mconsole_user.c linux_umopenmosix/arch/um/drivers/mconsole_user.c --- linux-2.4.17/arch/um/drivers/mconsole_user.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/mconsole_user.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,232 @@ +/* + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "user.h" +#include "mconsole.h" +#include "umid.h" + +static struct mconsole_command commands[] = { + { "version", mconsole_version, 1 }, + { "halt", mconsole_halt, 0 }, + { "reboot", mconsole_reboot, 0 }, + { "config", mconsole_config, 0 }, + { "remove", mconsole_remove, 0 }, + { "sysrq", mconsole_sysrq, 1 }, + { "help", mconsole_help, 1 }, + { "cad", mconsole_cad, 1 }, +}; + +char mconsole_socket_name[256]; + +static int has_correct_credentials(struct msghdr *msg) +{ + struct cmsghdr *cmsg; + + cmsg = CMSG_FIRSTHDR(msg); + while (cmsg != NULL) { + if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_CREDENTIALS) { + struct ucred *cred; + + cred = (struct ucred *)CMSG_DATA(cmsg); + if (cred->uid == getuid()) + return 1; + } + cmsg = CMSG_NXTHDR(msg, cmsg); + } + + return 0; +} + +int mconsole_reply_v0(struct mc_request *req, char *reply) +{ + struct iovec iov; + struct msghdr msg; + + iov.iov_base = reply; + iov.iov_len = strlen(reply); + + msg.msg_name = &(req->origin); + msg.msg_namelen = req->originlen; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + return sendmsg(req->originating_fd, &msg, 0); +} + +static struct mconsole_command *mconsole_parse(struct mc_request *req) +{ + struct mconsole_command *cmd; + int i; + + for(i=0;irequest.data, cmd->command, + strlen(cmd->command))){ + return(cmd); + } + } + return(NULL); +} + +#define MIN(a,b) ((a)<(b) ? (a):(b)) + +int mconsole_get_request(int fd, struct mc_request *req) +{ + char anc[64]; + struct iovec iov; + struct msghdr msg; + int len; + + iov.iov_base = &req->request; + iov.iov_len = sizeof(req->request); + + msg.msg_name = &(req->origin); + msg.msg_namelen = sizeof(req->origin); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = anc; + msg.msg_controllen = sizeof(anc); + msg.msg_flags = 0; + + req->len = recvmsg(fd, &msg, 0); + if (req->len < 0) + return 0; + + if(!has_correct_credentials(&msg)){ + mconsole_reply(req, "Permission denied", 1, 0); + return(0); + } + + req->originlen = msg.msg_namelen; + req->originating_fd = fd; + + if(req->request.magic != MCONSOLE_MAGIC){ + /* Unversioned request */ + len = MIN(sizeof(req->request.data) - 1, + strlen((char *) &req->request)); + memmove(req->request.data, &req->request, len); + req->request.data[len] = '\0'; + + req->request.magic = MCONSOLE_MAGIC; + req->request.version = 0; + req->request.len = len; + + mconsole_reply_v0(req, "ERR Version 0 mconsole clients are " + "not supported by this driver"); + return(0); + } + + req->cmd = mconsole_parse(req); + if(req->cmd == NULL){ + mconsole_reply(req, "Unknown command", 1, 0); + return(0); + } + + return(1); +} + +int mconsole_reply(struct mc_request *req, char *str, int err, int more) +{ + struct iovec iov; + struct msghdr msg; + struct mconsole_reply reply; + int total, len, n; + + msg.msg_name = &(req->origin); + msg.msg_namelen = req->originlen; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + total = strlen(str); + do { + reply.err = err; + + /* err can only be true on the first packet */ + err = 0; + + len = MIN(total, MCONSOLE_MAX_DATA - 1); + + if(len == total) reply.more = more; + else reply.more = 1; + + memcpy(reply.data, str, len); + reply.data[len] = '\0'; + total -= len; + reply.len = len + 1; + + iov.iov_base = &reply; + iov.iov_len = sizeof(reply) + reply.len - sizeof(reply.data); + n = sendmsg(req->originating_fd, &msg, 0); + if(n < 0) return(-errno); + } while(total > 0); + return(0); +} + +int mconsole_unlink_socket(void) +{ + unlink(mconsole_socket_name); + return 0; +} + +int mconsole_create_listening_socket(void) +{ + struct sockaddr_un addr; + char file[256]; + int sock, err, yes = 1; + + sock = socket(PF_UNIX, SOCK_DGRAM, 0); + if (sock < 0) { + printk("create_listening_socket - socket failed, errno = %d\n", + errno); + return(-1); + } + + addr.sun_family = AF_UNIX; + + if(umid_file_name("mconsole", file, sizeof(file))) return(-1); + + strcpy(mconsole_socket_name, file); + strcpy(addr.sun_path, file); + + err = bind(sock, (struct sockaddr *) &addr, sizeof(addr)); + if (err < 0) { + if (errno != EADDRINUSE) { + printk("create_listening_socket - bind failed, " + "errno = %d\n", errno); + return(-1); + } + } + + setsockopt(sock, SOL_SOCKET, SO_PASSCRED, &yes, sizeof(yes)); + + return sock; +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/mmapper_kern.c linux_umopenmosix/arch/um/drivers/mmapper_kern.c --- linux-2.4.17/arch/um/drivers/mmapper_kern.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/mmapper_kern.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,146 @@ +/* + * arch/um/drivers/mmapper_kern.c + * + * BRIEF MODULE DESCRIPTION + * + * Copyright (C) 2000 RidgeRun, Inc. + * Author: RidgeRun, Inc. + * Greg Lonnon glonnon@ridgerun.com or info@ridgerun.com + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mem_user.h" +#include "user_util.h" + +static unsigned long mmapper_size; +static char *p_buf = NULL; +static char *v_buf = NULL; + +static ssize_t +mmapper_read(struct file *file, char *buf, size_t count, loff_t *ppos) +{ + if(*ppos > mmapper_size) + return -EINVAL; + + if(count + *ppos > mmapper_size) + count = count + *ppos - mmapper_size; + + if(count < 0) + return -EINVAL; + + copy_to_user(buf,&v_buf[*ppos],count); + + return count; +} + +static ssize_t +mmapper_write(struct file *file, const char *buf, size_t count, loff_t *ppos) +{ + if(*ppos > mmapper_size) + return -EINVAL; + + if(count + *ppos > mmapper_size) + count = count + *ppos - mmapper_size; + + if(count < 0) + return -EINVAL; + + copy_from_user(&v_buf[*ppos],buf,count); + + return count; +} + +static int +mmapper_ioctl(struct inode *inode, struct file *file, unsigned int cmd, + unsigned long arg) +{ + return(-ENOIOCTLCMD); +} + +static int +mmapper_mmap(struct file *file, struct vm_area_struct * vma) +{ + int ret = -EINVAL; + int size; + + lock_kernel(); + if (vma->vm_pgoff != 0) + goto out; + + size = vma->vm_end - vma->vm_start; + + /* XXX A comment above remap_page_range says it should only be + * called when the mm semaphore is held + */ + if (remap_page_range(vma->vm_start, (unsigned long) p_buf, + size, vma->vm_page_prot)) + goto out; + ret = 0; +out: + unlock_kernel(); + return ret; +} + +static int +mmapper_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static int +mmapper_release(struct inode *inode, struct file *file) +{ + return 0; +} + +static struct file_operations mmapper_fops = { + owner: THIS_MODULE, + read: mmapper_read, + write: mmapper_write, + ioctl: mmapper_ioctl, + mmap: mmapper_mmap, + open: mmapper_open, + release: mmapper_release, +}; + +static int __init mmapper_init(void) +{ + printk(KERN_INFO "Mapper v0.1\n"); + + p_buf = (char *) find_iomem("mmapper", &mmapper_size); + + v_buf = p_buf; + + devfs_register (NULL, "mmapper", DEVFS_FL_DEFAULT, + 30, 0, S_IFCHR | S_IRUGO | S_IWUGO, + &mmapper_fops, NULL); + devfs_mk_symlink(NULL, "mmapper", DEVFS_FL_DEFAULT, "mmapper0", + NULL, NULL); + return(0); +} + +static void mmapper_exit(void) +{ +} + +module_init(mmapper_init); +module_exit(mmapper_exit); + +MODULE_AUTHOR("Greg Lonnon "); +MODULE_DESCRIPTION("DSPLinux simulator mmapper driver"); +/* + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/net_kern.c linux_umopenmosix/arch/um/drivers/net_kern.c --- linux-2.4.17/arch/um/drivers/net_kern.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/net_kern.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,691 @@ +/* + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and + * James Leu (jleu@mindspring.net). + * Copyright (C) 2001 by various other people who didn't put their name here. + * Licensed under the GPL. + */ + +#include "linux/config.h" +#include "linux/kernel.h" +#include "linux/netdevice.h" +#include "linux/skbuff.h" +#include "linux/socket.h" +#include "linux/spinlock.h" +#include "linux/module.h" +#include "linux/init.h" +#include "linux/etherdevice.h" +#include "linux/list.h" +#include "linux/inetdevice.h" +#include "linux/ctype.h" +#include "user_util.h" +#include "kern_util.h" +#include "net_kern.h" +#include "net_user.h" +#include "slip.h" +#include "slip_kern.h" +#include "etap.h" +#include "etap_kern.h" +#include "tuntap.h" +#include "tuntap_kern.h" +#include "daemon.h" +#include "daemon_kern.h" +#include "mcast.h" +#include "mcast_kern.h" +#include "mconsole_kern.h" +#include "init.h" +#include "irq_user.h" + +LIST_HEAD(opened); + +struct uml_net devices[MAX_UML_NETDEV] = { + [ 0 ... MAX_UML_NETDEV - 1 ] = + { + dev: NULL, + user: NULL, + kern: NULL, + private_size: 0, + } +}; + +static int eth_setup_common(char *str, int *index_out) +{ + char *end; + int n; + + n = simple_strtoul(str, &end, 0); + if(end == str){ + printk(KERN_ERR "eth_setup: Failed to parse '%s'\n", str); + return(1); + } + if((n < 0) || (n > sizeof(devices)/sizeof(devices[0]))){ + printk(KERN_ERR "eth_setup: device %d out of range\n", n); + return(1); + } + str = end; + if(*str != '='){ + printk(KERN_ERR + "eth_setup: expected '=' after device number\n"); + return(1); + } + str++; + if(devices[n].dev != NULL){ + printk(KERN_ERR "eth_setup: Device %d already configured\n", + n); + return(1); + } + if(index_out) *index_out = n; +#ifdef CONFIG_UML_NET_ETHERTAP + if(!strncmp(str, "ethertap", strlen("ethertap"))){ + ethertap_setup(&str[strlen("ethertap")], &devices[n]); + return(0); + } +#endif +#ifdef CONFIG_UML_NET_TUNTAP + if(!strncmp(str, "tuntap", strlen("tuntap"))){ + tuntap_setup(&str[strlen("tuntap")], &devices[n]); + return(0); + } +#endif +#ifdef CONFIG_UML_NET_DAEMON + if(!strncmp(str, "daemon", strlen("daemon"))){ + daemon_setup(&str[strlen("daemon")], &devices[n]); + return(0); + } +#endif +#ifdef CONFIG_UML_NET_SLIP + if(!strncmp(str, "slip", strlen("slip"))){ + slip_setup(&str[strlen("slip")], &devices[n]); + return(0); + } +#endif +#ifdef CONFIG_UML_NET_MCAST + if(!strncmp(str, "mcast", strlen("mcast"))){ + mcast_setup(&str[strlen("mcast")], &devices[n]); + return(0); + } +#endif + printk(KERN_ERR "Unknown transport in eth_setup : %s\n", str); + return(1); +} + +static int eth_setup(char *str) +{ + eth_setup_common(str, NULL); + return(1); +} + +#ifdef CONFIG_UML_NET_ETHERTAP +#define UML_NET_ETHERTAP_HELP \ +" eth[0-9]+=ethertap,,,\n" \ +" eth0=ethertap,tap0,,192.168.0.1\n\n" +#else +#define UML_NET_ETHERTAP_HELP +#endif +#ifdef CONFIG_UML_NET_TUNTAP +#define UML_NET_TUNTAP_HELP \ +" eth[0-9]+=tuntap,,,\n" \ +" eth0=tuntap,,fe:fd:0:0:0:1,192.168.0.1\n\n" +#else +#define UML_NET_TUNTAP_HELP +#endif +#ifdef CONFIG_UML_NET_DAEMON +#define UML_NET_DAEMON_HELP \ +" eth[0-9]+=daemon,,,,\n" \ +" eth0=daemon,unix,/tmp/uml.ctl,/tmp/uml.data\n\n" +#else +#define UML_NET_DAEMON_HELP +#endif +#ifdef CONFIG_UML_NET_SLIP +#define UML_NET_SLIP_HELP \ +" eth[0-9]+=slip,\n" \ +" eth0=slip,192.168.0.1\n\n" +#else +#define UML_NET_SLIP_HELP +#endif +#ifdef CONFIG_UML_NET_MCAST +#define UML_NET_MCAST_HELP \ +" eth[0-9]+=mcast,,
,,\n" \ +" eth0=mcast,,224.2.3.4:5555,3\n\n" +#else +#define UML_NET_MCAST_HELP +#endif + +__setup("eth", eth_setup); +__uml_help(eth_setup, +"eth[0-9]+=,\n" +" Configure a network device. Formats and examples follow (one \n" +" for each configured transport).\n\n" +UML_NET_ETHERTAP_HELP +UML_NET_TUNTAP_HELP +UML_NET_DAEMON_HELP +UML_NET_SLIP_HELP +UML_NET_MCAST_HELP +); +int ndev = 0; + +static int uml_net_rx(struct net_device *dev) +{ + struct uml_net_private *lp = dev->priv; + int pkt_len; + struct sk_buff *skb; + + /* If we can't allocate memory, try again next round. */ + if ((skb = dev_alloc_skb(dev->mtu)) == NULL) { + lp->stats.rx_dropped++; + reactivate_fd(lp->fd); + return 0; + } + + skb->dev = dev; + skb_put(skb, dev->mtu); + skb->mac.raw = skb->data; + pkt_len = (*lp->read)(lp->fd, &skb, lp); + + reactivate_fd(lp->fd); + if (pkt_len > 0) { + skb_trim(skb, pkt_len); + skb->protocol = (*lp->protocol)(skb); + netif_rx(skb); + + lp->stats.rx_bytes += skb->len; + lp->stats.rx_packets++; + return pkt_len; + } + + kfree_skb(skb); + return pkt_len; +} + +void uml_net_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + struct net_device *dev = dev_id; + struct uml_net_private *lp = dev->priv; + int err; + + if (netif_running(dev)) { + spin_lock(&lp->lock); + while((err = uml_net_rx(dev)) > 0) ; + if(err < 0) { + printk(KERN_ERR + "Device '%s' read returned %d, shutting it " + "down\n", dev->name, err); + dev->flags &= ~IFF_UP; + dev_close(dev); + } + spin_unlock(&lp->lock); + } +} + +static int uml_net_open(struct net_device *dev) +{ + struct uml_net_private *lp = dev->priv; + int err; + + spin_lock(&lp->lock); + + if(lp->fd >= 0){ + err = -ENXIO; + goto out; + } + + lp->fd = (*lp->open)(&lp->user); + if(lp->fd < 0){ + err = lp->fd; + goto out; + } + + err = um_request_irq(dev->irq, lp->fd, uml_net_interrupt, + SA_INTERRUPT | SA_SHIRQ, dev->name, dev); + if(err != 0){ + printk(KERN_ERR "uml_net_open: failed to get irq(%d)\n", err); + (*lp->close)(lp->fd, &lp->user); + lp->fd = -1; + err = -ENETUNREACH; + } + + lp->tl.data = (unsigned long) &lp->user; + netif_start_queue(dev); + + list_add(&lp->list, &opened); + MOD_INC_USE_COUNT; + out: + spin_unlock(&lp->lock); + return(err); +} + +static int uml_net_close(struct net_device *dev) +{ + struct uml_net_private *lp = dev->priv; + + netif_stop_queue(dev); + spin_lock(&lp->lock); + + free_irq(dev->irq, dev); + (*lp->close)(lp->fd, &lp->user); + lp->fd = -1; + list_del(&lp->list); + + MOD_DEC_USE_COUNT; + spin_unlock(&lp->lock); + return 0; +} + +static int uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct uml_net_private *lp = dev->priv; + unsigned long flags; + int len; + + netif_stop_queue(dev); + + spin_lock_irqsave(&lp->lock, flags); + + len = (*lp->write)(lp->fd, &skb, lp); + + if(len == skb->len) { + lp->stats.tx_packets++; + lp->stats.tx_bytes += skb->len; + dev->trans_start = jiffies; + netif_start_queue(dev); + + /* this is normally done in the interrupt when tx finishes */ + netif_wake_queue(dev); + } + else if(len == 0){ + netif_start_queue(dev); + lp->stats.tx_dropped++; + } + else { + netif_start_queue(dev); + printk(KERN_ERR "uml_net_start_xmit: failed(%d)\n", len); + } + + spin_unlock_irqrestore(&lp->lock, flags); + + dev_kfree_skb(skb); + + return 0; +} + +static struct net_device_stats *uml_net_get_stats(struct net_device *dev) +{ + struct uml_net_private *lp = dev->priv; + return &lp->stats; +} + +static void uml_net_set_multicast_list(struct net_device *dev) +{ + if (dev->flags & IFF_PROMISC) return; + else if (dev->mc_count) dev->flags |= IFF_ALLMULTI; + else dev->flags &= ~IFF_ALLMULTI; +} + +static void uml_net_tx_timeout(struct net_device *dev) +{ + dev->trans_start = jiffies; + netif_wake_queue(dev); +} + +static int uml_net_set_mac(struct net_device *dev, void *addr) +{ + struct uml_net_private *lp = dev->priv; + struct sockaddr *hwaddr = addr; + int err; + + spin_lock(&lp->lock); + + err = (*lp->set_mac)(hwaddr, &lp->user); + + spin_unlock(&lp->lock); + + return err; +} + +static int uml_net_change_mtu(struct net_device *dev, int new_mtu) +{ + struct uml_net_private *lp = dev->priv; + int err = 0; + + spin_lock(&lp->lock); + + new_mtu = (*lp->set_mtu)(new_mtu, &lp->user); + if(new_mtu < 0){ + err = new_mtu; + goto out; + } + + dev->mtu = new_mtu; + + out: + spin_unlock(&lp->lock); + return err; +} + +static int uml_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + return(-EINVAL); +} + +void uml_net_user_timer_expire(unsigned long _conn) +{ +#ifdef undef + struct connection *conn = (struct connection *)_conn; + + dprintk(KERN_INFO "uml_net_user_timer_expire [%p]\n", conn); + do_connect(conn); +#endif +} + +static int eth_configure(struct uml_net *device, int n) +{ + struct net_device *dev; + struct uml_net_private *lp; + + device->private_size += sizeof(struct uml_net_private) + + sizeof(((struct uml_net_private *) 0)->user); + printk(KERN_INFO "Netdevice %d : ", n); + dev = (*device->kern->init)(device->private_size, + device->transport_index); + device->dev = dev; + + if (dev == NULL){ + printk(KERN_ERR "eth_configure: Out of memory on device %d\n", + n); + return(1); + } + + dev->mtu = device->user->max_packet; + dev->open = uml_net_open; + dev->hard_start_xmit = uml_net_start_xmit; + dev->stop = uml_net_close; + dev->get_stats = uml_net_get_stats; + dev->set_multicast_list = uml_net_set_multicast_list; + dev->tx_timeout = uml_net_tx_timeout; + dev->set_mac_address = uml_net_set_mac; + dev->change_mtu = uml_net_change_mtu; + dev->do_ioctl = uml_net_ioctl; + dev->watchdog_timeo = (HZ >> 1); + dev->irq = UM_ETH_IRQ; + + lp = dev->priv; + spin_lock_init(&lp->lock); + init_timer(&lp->tl); + lp->tl.function = uml_net_user_timer_expire; + lp->list = ((struct list_head) LIST_HEAD_INIT(lp->list)); + memset(&lp->stats, 0, sizeof(lp->stats)); + lp->fd = -1; + lp->protocol = device->kern->protocol; + lp->set_mac = device->kern->set_mac; + lp->open = device->user->open; + lp->close = device->user->close; + lp->read = device->kern->read; + lp->write = device->kern->write; + lp->add_address = device->user->add_address; + lp->delete_address = device->user->delete_address; + lp->set_mtu = device->user->set_mtu; + + if(device->user->init) + (*device->user->init)(&lp->user, dev); + return(0); +} + +int __init uml_net_probe(void) +{ + int i; + + for(i = 0; i < sizeof(devices)/sizeof(devices[0]); i++){ + if(devices[i].user == NULL) continue; + eth_configure(&devices[i], i); + } + return(0); +} + +static int net_config(char *str) +{ + int err, n; + + str = uml_strdup(str); + if(str == NULL){ + printk(KERN_ERR "net_config failed to strdup string\n"); + return(1); + } + err = eth_setup_common(str, &n); + if(err){ + kfree(str); + return(err); + } + err = eth_configure(&devices[n], n); + return(err); +} + +static int net_remove(char *str) +{ + struct net_device *dev; + struct uml_net_private *lp; + int n; + + if(!isdigit(*str)) return(-1); + n = *str - '0'; + if(devices[n].dev == NULL) return(0); + dev = devices[n].dev; + lp = dev->priv; + if(lp->fd > 0) return(-1); + unregister_netdev(dev); + devices[n].dev = NULL; + return(0); +} + +static struct mc_device net_mc = { + name: "eth", + config: net_config, + remove: net_remove, +}; + +static int uml_inetaddr_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct in_ifaddr *ifa = ptr; + u32 addr = ifa->ifa_address; + u32 netmask = ifa->ifa_mask; + struct net_device *dev = ifa->ifa_dev->dev; + struct uml_net_private *lp; + void (*proc)(unsigned char *, unsigned char *, void *); + unsigned char addr_buf[4], netmask_buf[4]; + + if(dev->open != uml_net_open) return(NOTIFY_DONE); + + lp = dev->priv; + if(lp->fd == -1) return(NOTIFY_DONE); + + proc = NULL; + switch (event){ + case NETDEV_UP: + proc = lp->add_address; + break; + case NETDEV_DOWN: + proc = lp->delete_address; + break; + } + if(proc != NULL){ + addr_buf[0] = addr & 0xff; + addr_buf[1] = (addr >> 8) & 0xff; + addr_buf[2] = (addr >> 16) & 0xff; + addr_buf[3] = addr >> 24; + netmask_buf[0] = netmask & 0xff; + netmask_buf[1] = (netmask >> 8) & 0xff; + netmask_buf[2] = (netmask >> 16) & 0xff; + netmask_buf[3] = netmask >> 24; + (*proc)(addr_buf, netmask_buf, &lp->user); + } + return(NOTIFY_DONE); +} + +struct notifier_block uml_inetaddr_notifier = { + notifier_call: uml_inetaddr_event, +}; + +static int uml_net_init(void) +{ + mconsole_register_dev(&net_mc); + register_inetaddr_notifier(¨_inetaddr_notifier); + return(0); +} + +__initcall(uml_net_init); + +static void close_devices(void) +{ + struct list_head *ele; + struct uml_net_private *lp; + + list_for_each(ele, &opened){ + lp = list_entry(ele, struct uml_net_private, list); + (*lp->close)(lp->fd, &lp->user); + } +} + +__uml_exitcall(close_devices); + +int setup_etheraddr(char *str, unsigned char *addr) +{ + char *end; + int i; + + for(i=0;i<6;i++){ + addr[i] = simple_strtoul(str, &end, 16); + if((end == str) || + ((*end != ':') && (*end != ',') && (*end != '\0'))){ + printk(KERN_ERR + "setup_etheraddr: failed to parse '%s' " + "as an ethernet address\n", str); + return(-1); + } + str = end + 1; + } + if(addr[0] & 1){ + printk(KERN_ERR + "Attempt to assign a broadcast ethernet address to a " + "device disallowed\n"); + return(-1); + } + return(0); +} + +void dev_ip_addr(void *d, char *buf, char *bin_buf) +{ + struct net_device *dev = d; + struct in_device *ip = dev->ip_ptr; + struct in_ifaddr *in; + u32 addr; + + if((ip == NULL) || ((in = ip->ifa_list) == NULL)){ + printk(KERN_WARNING "dev_ip_addr - device not assigned an " + "IP address\n"); + return; + } + addr = in->ifa_address; + sprintf(buf, "%d.%d.%d.%d", addr & 0xff, (addr >> 8) & 0xff, + (addr >> 16) & 0xff, addr >> 24); + if(bin_buf){ + bin_buf[0] = addr & 0xff; + bin_buf[1] = (addr >> 8) & 0xff; + bin_buf[2] = (addr >> 16) & 0xff; + bin_buf[3] = addr >> 24; + } +} + +void set_ether_mac(void *d, unsigned char *addr) +{ + struct net_device *dev = d; + + memcpy(dev->dev_addr, addr, ETH_ALEN); +} + +struct sk_buff *ether_adjust_skb(struct sk_buff *skb, int extra) +{ + if((skb != NULL) && (skb_tailroom(skb) < extra)){ + struct sk_buff *skb2; + + skb2 = skb_copy_expand(skb, 0, extra, GFP_ATOMIC); + dev_kfree_skb(skb); + skb = skb2; + } + if(skb != NULL) skb_put(skb, extra); + return(skb); +} + +void iter_addresses(void *d, void (*cb)(unsigned char *, unsigned char *, + void *), + void *arg) +{ + struct net_device *dev = d; + struct in_device *ip = dev->ip_ptr; + struct in_ifaddr *in; + unsigned char address[4], netmask[4]; + + if(ip == NULL) return; + in = ip->ifa_list; + while(in != NULL){ + address[0] = in->ifa_address & 0xff; + address[1] = (in->ifa_address >> 8) & 0xff; + address[2] = (in->ifa_address >> 16) & 0xff; + address[3] = in->ifa_address >> 24; + netmask[0] = in->ifa_mask & 0xff; + netmask[1] = (in->ifa_mask >> 8) & 0xff; + netmask[2] = (in->ifa_mask >> 16) & 0xff; + netmask[3] = in->ifa_mask >> 24; + (*cb)(address, netmask, arg); + in = in->ifa_next; + } +} + +void *get_output_buffer(int *len_out) +{ + void *ret; + + ret = (void *) __get_free_pages(GFP_KERNEL, 0); + if(ret) *len_out = PAGE_SIZE; + else *len_out = 0; + return(ret); +} + +void free_output_buffer(void *buffer) +{ + free_pages((unsigned long) buffer, 0); +} + +void tap_setup_common(char *str, char *type, char **dev_name, char *hw_addr, + int *hw_setup, char **gate_addr) +{ + int err; + + if(*str != ','){ + printk(KERN_ERR + "ethertap_setup: expected ',' after '%s'\n", type); + return; + } + str++; + if(*str != ',') *dev_name = str; + str = strchr(str, ','); + if(str == NULL) return; + *str++ = '\0'; + if(*str != ','){ + err = setup_etheraddr(str, hw_addr); + if(!err) *hw_setup = 1; + } + str = strchr(str, ','); + if(str == NULL) return; + *str++ = '\0'; + if(*str != '\0') *gate_addr = str; +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/net_kern.h linux_umopenmosix/arch/um/drivers/net_kern.h --- linux-2.4.17/arch/um/drivers/net_kern.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/net_kern.h Sat Jun 29 17:02:09 2002 @@ -0,0 +1,66 @@ +#ifndef __UM_NET_KERN_H +#define __UM_NET_KERN_H + +#include "linux/netdevice.h" +#include "linux/skbuff.h" +#include "linux/socket.h" +#include "linux/list.h" + +#define MAX_UML_NETDEV (16) + +struct uml_net { + struct net_device *dev; + struct net_user_info *user; + struct net_kern_info *kern; + int private_size; + int transport_index; +}; + +struct uml_net_private { + spinlock_t lock; + + struct timer_list tl; + struct list_head list; + struct net_device_stats stats; + int fd; + unsigned short (*protocol)(struct sk_buff *); + int (*set_mac)(struct sockaddr *hwaddr, void *); + int (*open)(void *); + void (*close)(int, void *); + int (*read)(int, struct sk_buff **skb, struct uml_net_private *); + int (*write)(int, struct sk_buff **skb, struct uml_net_private *); + + void (*add_address)(unsigned char *, unsigned char *, void *); + void (*delete_address)(unsigned char *, unsigned char *, void *); + int (*set_mtu)(int mtu, void *); + int user[1]; +}; + +struct net_kern_info { + struct net_device *(*init)(int, int); + unsigned short (*protocol)(struct sk_buff *); + int (*set_mac)(struct sockaddr *hwaddr, void *); + int (*read)(int, struct sk_buff **skb, struct uml_net_private *); + int (*write)(int, struct sk_buff **skb, struct uml_net_private *); +}; + +extern struct net_device *ether_init(int); +extern unsigned short ether_protocol(struct sk_buff *); +extern int ether_set_mac(struct sockaddr *hwaddr, void *); +extern int setup_etheraddr(char *str, unsigned char *addr); +extern struct sk_buff *ether_adjust_skb(struct sk_buff *skb, int extra); +extern void tap_setup_common(char *str, char *type, char **dev_name, + char *hw_addr, int *hw_setup, char **gate_addr); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/net_user.c linux_umopenmosix/arch/um/drivers/net_user.c --- linux-2.4.17/arch/um/drivers/net_user.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/net_user.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,227 @@ +/* + * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include +#include "user.h" +#include "user_util.h" +#include "kern_util.h" +#include "net_user.h" + +int tap_open_common(void *dev, int hw_setup, char *gate_addr) +{ + char addr[sizeof("255.255.255.255\0")]; + char ether[ETH_ADDR_LEN]; + + if((gate_addr != NULL) || !hw_setup){ + ether[0] = 0xfe; + ether[1] = 0xfd; + ether[2] = 0x0; + ether[3] = 0x0; + ether[4] = 0x0; + ether[5] = 0x0; + dev_ip_addr(dev, addr, ðer[2]); + } + if(gate_addr != NULL){ + int uml_addr[4], tap_addr[4]; + if(sscanf(addr, "%d.%d.%d.%d", ¨_addr[0], ¨_addr[1], + ¨_addr[2], ¨_addr[3]) != 4){ + printk("Invalid UML IP address - '%s'\n", addr); + return(-EINVAL); + } + if(sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0], + &tap_addr[1], &tap_addr[2], &tap_addr[3]) != 4){ + printk("Invalid tap IP address - '%s'\n", + gate_addr); + return(-EINVAL); + } + if((uml_addr[0] == tap_addr[0]) && + (uml_addr[1] == tap_addr[1]) && + (uml_addr[2] == tap_addr[2]) && + (uml_addr[3] == tap_addr[3])){ + printk("The tap IP address and the UML eth IP address" + " must be different\n"); + return(-EINVAL); + } + } + if(!hw_setup){ + ether[0] = 0xfe; + ether[1] = 0xfd; + set_ether_mac(dev, ether); + } + return(0); +} + +int read_output(int fd, char **output_out) +{ + int n; + + if(read(fd, &n, sizeof(n)) != sizeof(n)){ + printk("read_output - read of length failed, errno = %d\n", + errno); + return(-1); + } + if((*output_out = um_kmalloc(n)) == NULL){ + printk("read_output - kmalloc failed\n"); + return(-1); + } + if(read(fd, *output_out, n) != n){ + printk("read_output - read of data failed, errno = %d\n", + errno); + kfree(*output_out); + return(-1); + } + return(0); +} + +int net_read(int fd, void *buf, int len) +{ + int n; + + while(((n = read(fd, buf, len)) < 0) && (errno == EINTR)) ; + + if(n < 0){ + if(errno == EAGAIN) return(0); + return(-errno); + } + else if(n == 0) return(-ENOTCONN); + return(n); +} + +int net_recvfrom(int fd, void *buf, int len) +{ + int n; + + while(((n = recvfrom(fd, buf, len, 0, NULL, NULL)) < 0) && + (errno == EINTR)) ; + + if(n < 0){ + if(errno == EAGAIN) return(0); + return(-errno); + } + else if(n == 0) return(-ENOTCONN); + return(n); +} + +int net_write(int fd, void *buf, int len) +{ + int n; + + while(((n = write(fd, buf, len)) < 0) && (errno == EINTR)) ; + if(n < 0){ + if(errno == EAGAIN) return(0); + return(-errno); + } + else if(n == 0) return(-ENOTCONN); + return(n); +} + +int net_send(int fd, void *buf, int len) +{ + int n; + + while(((n = send(fd, buf, len, 0)) < 0) && (errno == EINTR)) ; + if(n < 0){ + if(errno == EAGAIN) return(0); + return(-errno); + } + else if(n == 0) return(-ENOTCONN); + return(n); +} + +int net_sendto(int fd, void *buf, int len, void *to, int sock_len) +{ + int n; + + while(((n = sendto(fd, buf, len, 0, (struct sockaddr *) to, + sock_len)) < 0) && (errno == EINTR)) ; + if(n < 0){ + if(errno == EAGAIN) return(0); + return(-errno); + } + else if(n == 0) return(-ENOTCONN); + return(n); +} + +struct change_data { + char *dev; + char *what; + char *address; + char *netmask; + char *output; +}; + +static void change_tramp(void *arg) +{ + int pid, fds[2]; + struct change_data *data = arg; + char version[sizeof("nnnnn\0")]; + char *argv[] = { "uml_net", version, data->what, data->dev, + data->address, data->netmask, NULL }; + + sprintf(version, "%d", UML_NET_VERSION); + if(pipe(fds) < 0){ + printk("change_tramp - pipe failed, errno = %d\n", + errno); + return; + } + if((pid = fork()) == 0){ + dup2(fds[1], 1); + close(fds[0]); + execvp(argv[0], argv); + printk("Exec of '%s' failed - errno = %d\n", argv[0], errno); + exit(1); + } + close(fds[1]); + if(read_output(fds[0], &data->output)) data->output = NULL; + waitpid(pid, NULL, 0); +} + +static void change(char *dev, char *what, unsigned char *addr, + unsigned char *netmask) +{ + char addr_buf[sizeof("255.255.255.255\0")]; + char netmask_buf[sizeof("255.255.255.255\0")]; + struct change_data data; + + data.dev = dev; + data.what = what; + sprintf(addr_buf, "%d.%d.%d.%d", addr[0], addr[1], addr[2], addr[3]); + sprintf(netmask_buf, "%d.%d.%d.%d", netmask[0], netmask[1], + netmask[2], netmask[3]); + data.address = addr_buf; + data.netmask = netmask_buf; + tracing_cb(change_tramp, &data); + if(data.output != NULL){ + printk("%s", data.output); + kfree(data.output); + } +} + +void open_addr(unsigned char *addr, unsigned char *netmask, void *arg) +{ + change(arg, "add", addr, netmask); +} + +void close_addr(unsigned char *addr, unsigned char *netmask, void *arg) +{ + change(arg, "del", addr, netmask); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/net_user.h linux_umopenmosix/arch/um/drivers/net_user.h --- linux-2.4.17/arch/um/drivers/net_user.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/net_user.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,55 @@ +#ifndef __UM_NET_USER_H__ +#define __UM_NET_USER_H__ + +#define ETH_ADDR_LEN (6) +#define ETH_HEADER_ETHERTAP (16) +#define ETH_HEADER_OTHER (14) +#define ETH_MAX_PACKET (1500) + +#define UML_NET_VERSION (4) + +struct net_user_info { + void (*init)(void *, void *); + int (*open)(void *); + void (*close)(int, void *); + int (*set_mtu)(int mtu, void *); + void (*add_address)(unsigned char *, unsigned char *, void *); + void (*delete_address)(unsigned char *, unsigned char *, void *); + int max_packet; +}; + +extern void ether_user_init(void *data, void *dev); +extern void dev_ip_addr(void *d, char *buf, char *bin_buf); +extern void set_ether_mac(void *d, unsigned char *addr); +extern void iter_addresses(void *d, void (*cb)(unsigned char *, + unsigned char *, void *), + void *arg); + +extern void *get_output_buffer(int *len_out); +extern void free_output_buffer(void *buffer); + +extern int tap_open_common(void *dev, int hw_setup, char *gate_addr); + +extern int read_output(int fd, char **output_out); + +extern int net_read(int fd, void *buf, int len); +extern int net_recvfrom(int fd, void *buf, int len); +extern int net_write(int fd, void *buf, int len); +extern int net_send(int fd, void *buf, int len); +extern int net_sendto(int fd, void *buf, int len, void *to, int sock_len); + +extern void open_addr(unsigned char *addr, unsigned char *netmask, void *arg); +extern void close_addr(unsigned char *addr, unsigned char *netmask, void *arg); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/port.c linux_umopenmosix/arch/um/drivers/port.c --- linux-2.4.17/arch/um/drivers/port.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/port.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,213 @@ +/* + * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "user_util.h" +#include "kern_util.h" +#include "user.h" +#include "chan_user.h" +#include "port.h" + +struct port_chan { + int fd; + int raw; + struct termios tt; + void *kernel_data; +}; + +void *port_init(char *str, int device, struct chan_opts *opts) +{ + struct port_chan *data; + void *kern_data; + char *end; + int port; + + if(*str != ':'){ + printk("port_init : channel type 'port' must specify a " + "port number\n"); + return(NULL); + } + str++; + port = strtoul(str, &end, 0); + if(*end != '\0'){ + printk("port_init : couldn't parse port '%s'\n", str); + return(NULL); + } + + if((kern_data = port_data(port)) == NULL) return(NULL); + + if((data = um_kmalloc(sizeof(*data))) == NULL) return(NULL); + *data = ((struct port_chan) { fd : -1, + raw : opts->raw, + kernel_data : kern_data }); + + return(data); +} + +int port_open(int input, int output, void *d) +{ + struct port_chan *data = d; + int fd; + + fd = port_wait(data->kernel_data); + if((fd >= 0) && data->raw){ + tcgetattr(fd, &data->tt); + raw(fd, 0); + } + data->fd = fd; + return(fd); +} + +void port_close(int fd, void *d) +{ + struct port_chan *data = d; + + close(data->fd); +} + +int port_console_write(int fd, const char *buf, int n, void *d) +{ + struct port_chan *data = d; + + return(generic_console_write(fd, buf, n, &data->tt)); +} + +void port_free(void *d) +{ + struct port_chan *data = d; + + port_kern_free(data->kernel_data); + kfree(data); +} + +struct chan_ops port_ops = { + init: port_init, + open: port_open, + close: port_close, + read: generic_read, + write: generic_write, + console_write: port_console_write, + window_size: generic_window_size, + free: port_free, +}; + +int port_listen_fd(int port) +{ + struct sockaddr_in addr; + int fd; + + fd = socket(PF_INET, SOCK_STREAM, 0); + if(fd == -1) return(-errno); + + addr.sin_family = AF_INET; + addr.sin_port = htons(port); + addr.sin_addr.s_addr = htonl(INADDR_ANY); + if(bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) + return(-errno); + + if(listen(fd, 1) < 0) return(-errno); + + return(fd); +} + +struct port_connect_data { + int sock_fd; + int pipe_fds[2]; + int err; +}; + +void port_connect_tramp(void *d) +{ + struct port_connect_data *data = d; + + data->err = 0; + if(fork() == 0){ + dup2(data->sock_fd, 0); + dup2(data->sock_fd, 1); + dup2(data->sock_fd, 2); + close(data->sock_fd); + dup2(data->pipe_fds[1], 3); + close(data->pipe_fds[1]); + execlp("/usr/sbin/in.telnetd", "in.telnetd", "-L", + "/usr/lib/uml/port-helper", NULL); + shutdown(3, SHUT_RDWR); + shutdown(data->pipe_fds[0], SHUT_RDWR); + data->err = errno; + exit(1); + } +} + +static int rcv_fd(int fd, struct port_connect_data *data) +{ + int new, n; + char buf[CMSG_SPACE(sizeof(new))]; + struct msghdr msg; + struct cmsghdr *cmsg; + + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = NULL; + msg.msg_iovlen = 0; + msg.msg_control = buf; + msg.msg_controllen = sizeof(buf); + msg.msg_flags = 0; + + n = recvmsg(fd, &msg, 0); + if(n < 0){ + printk("rcv_fd : recvmsg failed - errno = %d\n", errno); + return(-1); + } + + cmsg = CMSG_FIRSTHDR(&msg); + if(cmsg == NULL){ + printk("rcv_fd didn't receive anything, error = %d\n", + data->err); + return(-1); + } + if((cmsg->cmsg_level != SOL_SOCKET) || + (cmsg->cmsg_type != SCM_RIGHTS)){ + printk("rcv_fd didn't receive a descriptor\n"); + return(-1); + } + + new = ((int *) CMSG_DATA(cmsg))[0]; + return(new); +} + +int port_connection(int fd) +{ + int new, fds[2]; + struct port_connect_data data; + + if((new = accept(fd, NULL, 0)) < 0) return(-errno); + + if(socketpair(PF_UNIX, SOCK_DGRAM, 0, fds) < 0) return(-errno); + + data = ((struct port_connect_data) + { sock_fd : new, + pipe_fds : { fds[0], fds[1] } }); + + tracing_cb(port_connect_tramp, &data); + return(rcv_fd(fds[0], &data)); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/port.h linux_umopenmosix/arch/um/drivers/port.h --- linux-2.4.17/arch/um/drivers/port.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/port.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __PORT_H__ +#define __PORT_H__ + +extern void *port_data(int port); +extern int port_wait(void *data); +extern void port_kern_close(void *d); +extern int port_connection(int fd); +extern int port_listen_fd(int port); +extern void port_read(int fd, void *data); +extern void port_kern_free(void *d); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/port_kern.c linux_umopenmosix/arch/um/drivers/port_kern.c --- linux-2.4.17/arch/um/drivers/port_kern.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/port_kern.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,149 @@ +/* + * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "linux/list.h" +#include "linux/slab.h" +#include "linux/irq.h" +#include "linux/spinlock.h" +#include "linux/errno.h" +#include "asm/semaphore.h" +#include "asm/errno.h" +#include "kern_util.h" +#include "kern.h" +#include "irq_user.h" +#include "port.h" + +struct port_list { + struct list_head list; + struct semaphore sem; + int port; + int fd; + spinlock_t lock; + struct list_head connections; +}; + +struct port_dev { + struct port_list *port; + int fd; +}; + +struct connection { + struct list_head list; + int fd; +}; + +struct list_head ports = LIST_HEAD_INIT(ports); + +static void port_interrupt(int irq, void *data, struct pt_regs *regs) +{ + struct port_list *port = data; + struct connection *conn; + int fd; + + reactivate_fd(port->fd); + fd = port_connection(port->fd); + if(fd < 0){ + printk("port_connection returned %d\n", -fd); + return; + } + conn = kmalloc(sizeof(*conn), GFP_ATOMIC); + if(conn == NULL){ + printk("port_interrupt : failed to allocate connection\n"); + close(fd); + return; + } + *conn = ((struct connection) + { list : LIST_HEAD_INIT(conn->list), + fd : fd }); + list_add(&conn->list, &port->connections); + up(&port->sem); +} + +void *port_data(int port_num) +{ + struct list_head *ele; + struct port_list *port; + struct port_dev *dev; + int fd; + + list_for_each(ele, &ports){ + port = list_entry(ele, struct port_list, list); + if(port->port == port_num) goto found; + } + port = kmalloc(sizeof(struct port_list), GFP_KERNEL); + if(port == NULL){ + printk(KERN_ERR "Allocation of port list failed\n"); + return(NULL); + } + + fd = port_listen_fd(port_num); + if(fd < 0){ + printk(KERN_ERR "binding to port %d failed, errno = %d\n", + port_num, -fd); + return(NULL); + } + if(um_request_irq(ACCEPT_IRQ, fd, port_interrupt, + SA_INTERRUPT | SA_SHIRQ, "port", port)){ + printk(KERN_ERR "Failed to get IRQ for port %d\n", port_num); + return(NULL); + } + + *port = ((struct port_list) + { list : LIST_HEAD_INIT(port->list), + sem : __SEMAPHORE_INITIALIZER(port->sem, 0), + lock : SPIN_LOCK_UNLOCKED, + port : port_num, + fd : fd, + connections : LIST_HEAD_INIT(port->connections) }); + list_add(&port->list, &ports); + + found: + dev = kmalloc(sizeof(struct port_dev), GFP_KERNEL); + if(dev == NULL){ + printk(KERN_ERR "Allocation of port device entry failed\n"); + return(NULL); + } + + *dev = ((struct port_dev) + { port : port, + fd : -1 }); + return(dev); +} + +int port_wait(void *data) +{ + struct port_dev *dev = data; + struct connection *conn; + + if(down_interruptible(&dev->port->sem)) return(-ERESTARTSYS); + spin_lock(&dev->port->lock); + conn = list_entry(dev->port->connections.next, struct connection, + list); + list_del(&conn->list); + spin_unlock(&dev->port->lock); + + dev->fd = conn->fd; + kfree(conn); + + return(dev->fd); +} + +void port_kern_free(void *d) +{ + struct port_dev *dev = d; + + kfree(dev); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/pty.c linux_umopenmosix/arch/um/drivers/pty.c --- linux-2.4.17/arch/um/drivers/pty.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/pty.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,99 @@ +/* + * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include "chan_user.h" +#include "user.h" +#include "user_util.h" + +struct pty_chan { + void (*announce)(char *dev_name, int dev); + int dev; + int raw; + struct termios tt; +}; + +void *pty_chan_init(char *str, int device, struct chan_opts *opts) +{ + struct pty_chan *data; + + if((data = um_kmalloc(sizeof(*data))) == NULL) return(NULL); + *data = ((struct pty_chan) { announce : opts->announce, + dev : device, + raw : opts->raw }); + return(data); +} + +int pts_open(int input, int output, void *d) +{ + struct pty_chan *data = d; + int fd; + + if((fd = get_pty()) < 0){ + printk("open_pts : Failed to open pts\n"); + return(-errno); + } + if(data->raw){ + tcgetattr(fd, &data->tt); + raw(fd, 0); + } + if(data->announce) (*data->announce)(ptsname(fd), data->dev); + return(fd); +} + +int pty_open(int input, int output, void *d) +{ + struct pty_chan *data = d; + int fd; + char dev[sizeof("/dev/ptyxx\0")] = "/dev/ptyxx"; + + fd = getmaster(dev); + if(fd < 0) return(-errno); + if(data->raw) raw(fd, 0); + if(data->announce) (*data->announce)(dev, data->dev); + return(fd); +} + +int pty_console_write(int fd, const char *buf, int n, void *d) +{ + struct pty_chan *data = d; + + return(generic_console_write(fd, buf, n, &data->tt)); +} + +struct chan_ops pty_ops = { + init: pty_chan_init, + open: pty_open, + close: generic_close, + read: generic_read, + write: generic_write, + console_write: pty_console_write, + window_size: generic_window_size, + free: generic_free, +}; + +struct chan_ops pts_ops = { + init: pty_chan_init, + open: pts_open, + close: generic_close, + read: generic_read, + write: generic_write, + console_write: pty_console_write, + window_size: generic_window_size, + free: generic_free, +}; + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/slip.h linux_umopenmosix/arch/um/drivers/slip.h --- linux-2.4.17/arch/um/drivers/slip.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/slip.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,34 @@ +#ifndef __UM_SLIP_H +#define __UM_SLIP_H + +#define BUF_SIZE 1500 + +struct slip_data { + void *dev; + char name[sizeof("slnnnnn\0")]; + char *addr; + char *gate_addr; + int slave; + char buf[2 * BUF_SIZE]; + int pos; + int esc; +}; + +extern struct net_user_info slip_user_info; + +extern int set_umn_addr(int fd, char *addr, char *ptp_addr); +extern int slip_user_read(int fd, void *buf, int len, struct slip_data *pri); +extern int slip_user_write(int fd, void *buf, int len, struct slip_data *pri); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/slip_kern.c linux_umopenmosix/arch/um/drivers/slip_kern.c --- linux-2.4.17/arch/um/drivers/slip_kern.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/slip_kern.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,106 @@ +#include "linux/kernel.h" +#include "linux/stddef.h" +#include "linux/init.h" +#include "linux/netdevice.h" +#include "linux/if_arp.h" +#include "net_kern.h" +#include "net_user.h" +#include "kern.h" +#include "slip.h" +#include "slip_kern.h" + +struct slip_data slip_priv[MAX_UML_NETDEV] = { + [ 0 ... MAX_UML_NETDEV - 1 ] = + { + addr: NULL, + gate_addr: NULL, + slave: -1, + buf: { 0 }, + pos: 0, + esc: 0, + } +}; + +struct net_device umn_dev; + +struct net_device *slip_init(int private_size, int index) +{ + struct uml_net_private *private; + struct slip_data *spri; + + private = kmalloc(private_size, GFP_KERNEL); + if(private == NULL) return(NULL); + umn_dev.priv = private; + spri = (struct slip_data *) private->user; + *spri = slip_priv[index]; + strncpy(umn_dev.name, "umn", IFNAMSIZ); + umn_dev.init = NULL; + umn_dev.hard_header_len = 0; + umn_dev.addr_len = 4; + umn_dev.type = ARPHRD_ETHER; + umn_dev.tx_queue_len = 256; + umn_dev.flags = IFF_NOARP; + if(register_netdev(&umn_dev)) + printk(KERN_ERR "Couldn't initialize umn\n"); + printk("SLIP backend - SLIP IP = %s\n", spri->gate_addr); + + return(&umn_dev); +} + +static int slip_set_mac(struct sockaddr *hwaddr, void *data) +{ + return(0); +} + +static unsigned short slip_protocol(struct sk_buff *skbuff) +{ + return(htons(ETH_P_IP)); +} + +static int slip_read(int fd, struct sk_buff **skb, + struct uml_net_private *lp) +{ + return(slip_user_read(fd, (*skb)->mac.raw, (*skb)->dev->mtu, + (struct slip_data *) &lp->user)); +} + +static int slip_write(int fd, struct sk_buff **skb, + struct uml_net_private *lp) +{ + return(slip_user_write(fd, (*skb)->data, (*skb)->len, + (struct slip_data *) &lp->user)); +} + +struct net_kern_info slip_kern_info = { + init: slip_init, + protocol: slip_protocol, + set_mac: slip_set_mac, + read: slip_read, + write: slip_write, +}; + +static int slip_count = 0; + +void slip_setup(char *str, struct uml_net *dev) +{ + int n = slip_count; + + dev->user = &slip_user_info; + dev->kern = &slip_kern_info; + dev->private_size = sizeof(struct slip_data); + dev->transport_index = slip_count++; + if(*str != ',') return; + str++; + if(str[0] != '\0') slip_priv[n].gate_addr = str; +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/slip_kern.h linux_umopenmosix/arch/um/drivers/slip_kern.h --- linux-2.4.17/arch/um/drivers/slip_kern.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/slip_kern.h Sat Jun 29 17:02:09 2002 @@ -0,0 +1,8 @@ +#ifndef __UM_SLIP_KERN_H +#define __UM_SLIP_KERN_H + +#include "net_kern.h" + +extern void slip_setup(char *arg, struct uml_net *dev); + +#endif diff -urN linux-2.4.17/arch/um/drivers/slip_user.c linux_umopenmosix/arch/um/drivers/slip_user.c --- linux-2.4.17/arch/um/drivers/slip_user.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/slip_user.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,323 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "user_util.h" +#include "kern_util.h" +#include "user.h" +#include "net_user.h" +#include "slip.h" + +void slip_user_init(void *data, void *dev) +{ + struct slip_data *pri = data; + + pri->dev = dev; +} + +static int set_up_tty(int fd) +{ + int i; + struct termios tios; + + if (tcgetattr(fd, &tios) < 0) { + printk("could not get initial terminal attributes\n"); + return(-1); + } + + tios.c_cflag = CS8 | CREAD | HUPCL | CLOCAL; + tios.c_iflag = IGNBRK | IGNPAR; + tios.c_oflag = 0; + tios.c_lflag = 0; + for (i = 0; i < NCCS; i++) + tios.c_cc[i] = 0; + tios.c_cc[VMIN] = 1; + tios.c_cc[VTIME] = 0; + + cfsetospeed(&tios, B38400); + cfsetispeed(&tios, B38400); + + if (tcsetattr(fd, TCSAFLUSH, &tios) < 0) { + printk("failed to set terminal attributes\n"); + return(-1); + } + return(0); +} + +struct slip_tramp_data { + int fd; + char **args; + int err; + char *output; +}; + +void slip_tramp(void *arg) +{ + struct slip_tramp_data *data = arg; + char **argv = data->args; + int status, pid, fds[2]; + + data->err = 0; + data->output = NULL; + if(pipe(fds) != 0){ + perror("slip_tramp : pipe failed"); + data->err = EINVAL; + return; + } + if((pid = fork()) == 0){ + if(data->fd != -1) dup2(data->fd, 0); + dup2(fds[1], 1); + close(fds[0]); + execvp(argv[0], argv); + exit(errno); + } + else if(pid < 0) data->err = errno; + else { + close(fds[1]); + read_output(fds[0], &data->output); + if(waitpid(pid, &status, 0) < 0) data->err = errno; + else if(!WIFEXITED(status) || (WEXITSTATUS(status) != 0)){ + printk("'%s' didn't exit with status 0\n", argv[0]); + data->err = EINVAL; + } + } +} + +static int slip_open(void *data) +{ + struct slip_data *pri = data; + struct slip_tramp_data slip_data; + char version_buf[sizeof("nnnnn\0")]; + char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")]; + char *argv[] = { "uml_net", version_buf, "slip", "up", gate_buf, + NULL }; + int sfd, mfd, disc, sencap; + + if((mfd = get_pty()) < 0){ + printk("umn : Failed to open pty\n"); + return(-1); + } + if((sfd = open(ptsname(mfd), O_RDWR)) < 0){ + printk("Couldn't open tty for slip line\n"); + return(-1); + } + if(set_up_tty(sfd)) return(-1); + pri->slave = sfd; + pri->pos = 0; + pri->esc = 0; + if(pri->gate_addr != NULL){ + sprintf(version_buf, "%d", UML_NET_VERSION); + slip_data.fd = sfd; + strcpy(gate_buf, pri->gate_addr); + slip_data.args = argv; + tracing_cb(slip_tramp, &slip_data); + if(slip_data.output != NULL){ + printk("%s", slip_data.output); + kfree(slip_data.output); + } + if(slip_data.err != 0){ + printk("slip_tramp failed - errno = %d\n", + slip_data.err); + return(-slip_data.err); + } + if(ioctl(pri->slave, SIOCGIFNAME, pri->name) < 0){ + printk("SIOCGIFNAME failed, errno = %d\n", errno); + return(-errno); + } + iter_addresses(pri->dev, open_addr, pri->name); + } + else { + disc = N_SLIP; + if(ioctl(sfd, TIOCSETD, &disc) < 0){ + printk("Failed to set slip line discipline - " + "errno = %d\n", errno); + return(-errno); + } + sencap = 0; + if(ioctl(sfd, SIOCSIFENCAP, &sencap) < 0){ + printk("Failed to sett slip encapsulation - " + "errno = %d\n", errno); + return(-errno); + } + } + return(mfd); +} + +static void slip_close(int fd, void *data) +{ + struct slip_data *pri = data; + struct slip_tramp_data slip_data; + char version_buf[sizeof("nnnnn\0")]; + char *argv[] = { "uml_net", version_buf, "slip", "down", pri->name, + NULL }; + + if(pri->gate_addr != NULL) + iter_addresses(pri->dev, close_addr, pri->name); + + sprintf(version_buf, "%d", UML_NET_VERSION); + slip_data.fd = -1; + slip_data.args = argv; + tracing_cb(slip_tramp, &slip_data); + if(slip_data.output != NULL){ + printk("%s", slip_data.output); + kfree(slip_data.output); + } + if(slip_data.err != 0) + printk("slip_tramp failed - errno = %d\n", slip_data.err); + close(fd); + close(pri->slave); + pri->slave = -1; +} + +/* SLIP protocol characters. */ +#define END 0300 /* indicates end of frame */ +#define ESC 0333 /* indicates byte stuffing */ +#define ESC_END 0334 /* ESC ESC_END means END 'data' */ +#define ESC_ESC 0335 /* ESC ESC_ESC means ESC 'data' */ + +static int slip_unesc(struct slip_data *sl, unsigned char c) +{ + int ret; + + switch(c){ + case END: + sl->esc = 0; + ret = sl->pos; + sl->pos = 0; + return(ret); + case ESC: + sl->esc = 1; + return(0); + case ESC_ESC: + if(sl->esc){ + sl->esc = 0; + c = ESC; + } + break; + case ESC_END: + if(sl->esc){ + sl->esc = 0; + c = END; + } + break; + } + sl->buf[sl->pos++] = c; + return(0); +} + +int slip_user_read(int fd, void *buf, int len, struct slip_data *pri) +{ + int i, n, size, start; + + n = net_read(fd, &pri->buf[pri->pos], sizeof(pri->buf) - pri->pos); + if(n <= 0) return(n); + + start = pri->pos; + for(i = 0; i < n; i++){ + size = slip_unesc(pri, pri->buf[start + i]); + if(size){ + memcpy(buf, pri->buf, size); + return(size); + } + } + return(0); +} + +static int slip_esc(unsigned char *s, unsigned char *d, int len) +{ + unsigned char *ptr = d; + unsigned char c; + + /* + * Send an initial END character to flush out any + * data that may have accumulated in the receiver + * due to line noise. + */ + + *ptr++ = END; + + /* + * For each byte in the packet, send the appropriate + * character sequence, according to the SLIP protocol. + */ + + while (len-- > 0) { + switch(c = *s++) { + case END: + *ptr++ = ESC; + *ptr++ = ESC_END; + break; + case ESC: + *ptr++ = ESC; + *ptr++ = ESC_ESC; + break; + default: + *ptr++ = c; + break; + } + } + *ptr++ = END; + return (ptr - d); +} + +int slip_user_write(int fd, void *buf, int len, struct slip_data *pri) +{ + int actual, n; + + actual = slip_esc(buf, pri->buf, len); + n = net_write(fd, pri->buf, actual); + if(n < 0) return(n); + else return(len); +} + +static int slip_set_mtu(int mtu, void *data) +{ + return(mtu); +} + +static void slip_add_addr(unsigned char *addr, unsigned char *netmask, + void *data) +{ + struct slip_data *pri = data; + + if(pri->slave == -1) return; + open_addr(addr, netmask, pri->name); +} + +static void slip_del_addr(unsigned char *addr, unsigned char *netmask, + void *data) +{ + struct slip_data *pri = data; + + if(pri->slave == -1) return; + close_addr(addr, netmask, pri->name); +} + +struct net_user_info slip_user_info = { + init: slip_user_init, + open: slip_open, + close: slip_close, + set_mtu: slip_set_mtu, + add_address: slip_add_addr, + delete_address: slip_del_addr, + max_packet: BUF_SIZE +}; + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/ssl.c linux_umopenmosix/arch/um/drivers/ssl.c --- linux-2.4.17/arch/um/drivers/ssl.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/ssl.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,261 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "linux/config.h" +#include "linux/fs.h" +#include "linux/tty.h" +#include "linux/tty_driver.h" +#include "linux/major.h" +#include "linux/mm.h" +#include "linux/init.h" +#include "linux/devfs_fs_kernel.h" +#include "asm/termbits.h" +#include "asm/irq.h" +#include "line.h" +#include "ssl.h" +#include "chan_kern.h" +#include "user_util.h" +#include "kern_util.h" +#include "kern.h" +#include "init.h" +#include "2_5compat.h" + +static int ssl_version = 1; + +static struct tty_driver ssl_driver; + +static int ssl_refcount = 0; + +#define NR_PORTS 64 + +static struct tty_struct *ssl_table[NR_PORTS]; +static struct termios *ssl_termios[NR_PORTS]; +static struct termios *ssl_termios_locked[NR_PORTS]; + +void ssl_announce(char *dev_name, int dev) +{ + printk(KERN_INFO "Serial line %d assigned device '%s'\n", dev, + dev_name); +} + +static struct chan_opts opts = { + announce: ssl_announce, + xterm_title: "Serial Line #%d", + raw: 1, +}; + +static struct line serial_lines[NR_PORTS] = + { [0 ... NR_PORTS - 1] = LINE_INIT(CONFIG_SSL_CHAN) }; + +static int setup_ssl_irq(int fd, int input, int output, void *data) +{ + return(um_request_irq(SSL_IRQ, fd, line_interrupt, + SA_INTERRUPT | SA_SHIRQ, "ssl", data)); +} + +int ssl_open(struct tty_struct *tty, struct file *filp) +{ + int line; + + line = minor(tty->device) - tty->driver.minor_start; + if ((line < 0) || (line >= NR_PORTS)) + return -ENODEV; + return(line_open(serial_lines, line, tty, setup_ssl_irq, &opts)); +} + +static void ssl_close(struct tty_struct *tty, struct file * filp) +{ + line_close(serial_lines, minor(tty->device) - tty->driver.minor_start); +} + +static int ssl_write(struct tty_struct * tty, int from_user, + const unsigned char *buf, int count) +{ + int line; + + line = minor(tty->device) - tty->driver.minor_start; + if ((line < 0) || (line >= NR_PORTS)) + panic("Bad tty in ssl_put_char"); + return(write_chan(&serial_lines[line].chan_list, buf, count)); +} + +static void ssl_put_char(struct tty_struct *tty, unsigned char ch) +{ + int line; + + line = minor(tty->device) - tty->driver.minor_start; + if ((line < 0) || (line >= NR_PORTS)) + panic("Bad tty in ssl_put_char"); + write_chan(&serial_lines[line].chan_list, &ch, sizeof(ch)); +} + +static void ssl_flush_chars(struct tty_struct *tty) +{ + return; +} + +static int ssl_write_room(struct tty_struct *tty) +{ + return(16384); +} + +static int ssl_chars_in_buffer(struct tty_struct *tty) +{ + return(0); +} + +static void ssl_flush_buffer(struct tty_struct *tty) +{ + return; +} + +static int ssl_ioctl(struct tty_struct *tty, struct file * file, + unsigned int cmd, unsigned long arg) +{ + int ret; + + ret = 0; + switch(cmd){ + case TCGETS: + case TCSETS: + case TCFLSH: + case TCSETSF: + case TCSETSW: + case TCGETA: + ret = -ENOIOCTLCMD; + break; + default: + printk(KERN_ERR + "Unimplemented ioctl in ssl_ioctl : 0x%x\n", cmd); + ret = -ENOIOCTLCMD; + break; + } + return(ret); +} + +static void ssl_throttle(struct tty_struct * tty) +{ + printk(KERN_ERR "Someone should implement ssl_throttle\n"); +} + +static void ssl_unthrottle(struct tty_struct * tty) +{ + printk(KERN_ERR "Someone should implement ssl_unthrottle\n"); +} + +static void ssl_set_termios(struct tty_struct *tty, + struct termios *old_termios) +{ +} + +static void ssl_stop(struct tty_struct *tty) +{ + printk(KERN_ERR "Someone should implement ssl_stop\n"); +} + +static void ssl_start(struct tty_struct *tty) +{ + printk(KERN_ERR "Someone should implement ssl_start\n"); +} + +void ssl_hangup(struct tty_struct *tty) +{ +} + +static struct winch_lines winch = { + list : LIST_HEAD_INIT(winch.list), + lines : serial_lines, + nlines : sizeof(serial_lines)/sizeof(serial_lines[0]) +}; + +int ssl_init(void) +{ + int i, err; + + printk(KERN_INFO "Initializing software serial port version %d\n", + ssl_version); + + /* Initialize the tty_driver structure */ + + memset(&ssl_driver, 0, sizeof(struct tty_driver)); + ssl_driver.magic = TTY_DRIVER_MAGIC; + ssl_driver.name = "tts/%d"; + ssl_driver.major = TTYAUX_MAJOR; + ssl_driver.minor_start = 64; + ssl_driver.num = NR_PORTS; + ssl_driver.type = TTY_DRIVER_TYPE_SERIAL; + ssl_driver.subtype = 0; + ssl_driver.init_termios = tty_std_termios; + ssl_driver.init_termios.c_cflag = + B9600 | CS8 | CREAD | HUPCL | CLOCAL; + ssl_driver.flags = TTY_DRIVER_REAL_RAW; + ssl_driver.refcount = &ssl_refcount; + ssl_driver.table = ssl_table; + ssl_driver.termios = ssl_termios; + ssl_driver.termios_locked = ssl_termios_locked; + + ssl_driver.open = ssl_open; + ssl_driver.close = ssl_close; + ssl_driver.write = ssl_write; + ssl_driver.put_char = ssl_put_char; + ssl_driver.flush_chars = ssl_flush_chars; + ssl_driver.write_room = ssl_write_room; + ssl_driver.chars_in_buffer = ssl_chars_in_buffer; + ssl_driver.flush_buffer = ssl_flush_buffer; + ssl_driver.ioctl = ssl_ioctl; + ssl_driver.throttle = ssl_throttle; + ssl_driver.unthrottle = ssl_unthrottle; + ssl_driver.set_termios = ssl_set_termios; + ssl_driver.stop = ssl_stop; + ssl_driver.start = ssl_start; + ssl_driver.hangup = ssl_hangup; + if (tty_register_driver(&ssl_driver)) + panic("Couldn't register ssl driver\n"); + + err = devfs_mk_symlink(NULL, "serial", 0, "tts", NULL, NULL); + if(err) printk("Symlink creation from /dev/serial to /dev/tts " + "returned %d\n", err); + for(i = 0; i < sizeof(serial_lines)/sizeof(serial_lines[0]); i++){ + INIT_LIST_HEAD(&serial_lines[i].chan_list); + sema_init(&serial_lines[i].sem, 1); + } + + register_winch(&winch); + return(0); +} + +__initcall(ssl_init); + +static int ssl_chan_setup(char *str) +{ + line_setup(serial_lines, sizeof(serial_lines)/sizeof(serial_lines[0]), + str); + return(1); +} + +__setup("ssl", ssl_chan_setup); +__channel_help(ssl_chan_setup, "ssl"); + +static void ssl_exit(void) +{ + int i; + + for(i=0;idevice) - tty->driver.minor_start; + ret = open_console(line, tty); + chan_window_size(&vts[line].chan_list, &tty->winsize.ws_row, + &tty->winsize.ws_col); + return(ret); +} + +static void con_close(struct tty_struct * tty, struct file * filp) +{ + line_close(vts, minor(tty->device) - tty->driver.minor_start); +} + +static int con_write(struct tty_struct * tty, int from_user, + const unsigned char *buf, int count) +{ + int line; + + if(in_interrupt() && tty->stopped) return 0; + while(tty->stopped) schedule(); + + line = minor(tty->device) - tty->driver.minor_start; + return(write_chan(&vts[line].chan_list, buf, count)); +} + +static int write_room(struct tty_struct *tty) +{ + return(1024); +} + +static void set_termios(struct tty_struct *tty, struct termios * old) +{ +} + +static int chars_in_buffer(struct tty_struct *tty) +{ + return(0); +} + +static struct winch_lines winch = { + list : LIST_HEAD_INIT(winch.list), + lines : vts, + nlines : sizeof(vts)/sizeof(vts[0]) +}; + +int stdio_init(void) +{ + int i, err; + + printk(KERN_INFO "Initializing stdio console driver\n"); + memset(&console_driver, 0, sizeof(struct tty_driver)); + console_driver.magic = TTY_DRIVER_MAGIC; + console_driver.driver_name = "stdio console"; + console_driver.name = "vc/%d"; + console_driver.major = TTY_MAJOR; + console_driver.minor_start = 0; + console_driver.num = 8; + console_driver.type = TTY_DRIVER_TYPE_CONSOLE; + console_driver.subtype = SYSTEM_TYPE_CONSOLE; + console_driver.init_termios = tty_std_termios; + console_driver.flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_NO_DEVFS; + console_driver.refcount = &console_refcount; + console_driver.table = console_table; + console_driver.termios = console_termios; + console_driver.termios_locked = console_termios_locked; + + console_driver.open = con_open; + console_driver.close = con_close; + console_driver.write = con_write; + console_driver.put_char = NULL; + console_driver.flush_chars = NULL; + console_driver.write_room = write_room; + console_driver.chars_in_buffer = chars_in_buffer; + console_driver.flush_buffer = NULL; + console_driver.ioctl = NULL; + console_driver.throttle = NULL; + console_driver.unthrottle = NULL; + console_driver.send_xchar = NULL; + console_driver.set_termios = set_termios; + console_driver.stop = NULL; + console_driver.start = NULL; + console_driver.hangup = NULL; + console_driver.break_ctl = NULL; + console_driver.wait_until_sent = NULL; + console_driver.read_proc = NULL; + if (tty_register_driver(&console_driver)) + panic("Couldn't register console driver\n"); + + err = devfs_mk_symlink(NULL, "ttys", 0, "vc", NULL, NULL); + if(err) printk("Symlink creation from /dev/ttys to /dev/vc " + "returned %d\n", err); + for(i=0;iindex].chan_list, string, len); +} + +static kdev_t console_device(struct console *c) +{ + return mk_kdev(TTY_MAJOR, c->index); +} + +static int console_setup(struct console *co, char *options) +{ + return(0); +} + +static struct console stdiocons = INIT_CONSOLE("tty", console_write, + console_device, console_setup, + CON_PRINTBUFFER); + +void stdio_console_init(void) +{ + INIT_LIST_HEAD(&vts[0].chan_list); + list_add(&init_console_chan.list, &vts[0].chan_list); + register_console(&stdiocons); +} + +static int console_chan_setup(char *str) +{ + line_setup(vts, sizeof(vts)/sizeof(vts[0]), str); + return(1); +} + +__setup("con", console_chan_setup); +__channel_help(console_chan_setup, "con"); + +static void console_exit(void) +{ + int i; + + line_close(vts, 0); + for(i=0;i +#include +#include +#include +#include +#include "chan_user.h" +#include "user_util.h" +#include "user.h" + +struct tty_chan { + char *dev; + int raw; + struct termios tt; +}; + +void *tty_chan_init(char *str, int device, struct chan_opts *opts) +{ + struct tty_chan *data; + + if(*str != ':'){ + printk("tty_init : channel type 'tty' must specify " + "a device\n"); + return(NULL); + } + str++; + + if((data = um_kmalloc(sizeof(*data))) == NULL) return(NULL); + *data = ((struct tty_chan) { dev : str, + raw : opts->raw }); + + return(data); +} + +int tty_open(int input, int output, void *d) +{ + struct tty_chan *data = d; + int fd, mode; + + if(input && output) mode = O_RDWR; + else if(input) mode = O_RDONLY; + else mode = O_WRONLY; + + fd = open(data->dev, mode); + if(fd < 0) return(-errno); + if(data->raw){ + tcgetattr(fd, &data->tt); + raw(fd, 0); + } + return(fd); +} + +int tty_console_write(int fd, const char *buf, int n, void *d) +{ + struct tty_chan *data = d; + + return(generic_console_write(fd, buf, n, &data->tt)); +} + +struct chan_ops tty_ops = { + init: tty_chan_init, + open: tty_open, + close: generic_close, + read: generic_read, + write: generic_write, + console_write: tty_console_write, + window_size: generic_window_size, + free: generic_free, +}; + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/tuntap.h linux_umopenmosix/arch/um/drivers/tuntap.h --- linux-2.4.17/arch/um/drivers/tuntap.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/tuntap.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __UM_TUNTAP_H +#define __UM_TUNTAP_H + +#include "net_user.h" + +struct tuntap_data { + char *dev_name; + int fixed_config; + char *gate_addr; + int fd; + void *dev; + unsigned char hw_addr[ETH_ADDR_LEN]; + int hw_setup; +}; + +extern struct net_user_info tuntap_user_info; + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/tuntap_kern.c linux_umopenmosix/arch/um/drivers/tuntap_kern.c --- linux-2.4.17/arch/um/drivers/tuntap_kern.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/tuntap_kern.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,123 @@ +/* + * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "linux/stddef.h" +#include "linux/netdevice.h" +#include "linux/etherdevice.h" +#include "linux/skbuff.h" +#include "asm/errno.h" +#include "net_kern.h" +#include "net_user.h" +#include "tuntap.h" + +struct tuntap_setup { + char *dev_name; + unsigned char hw_addr[ETH_ALEN]; + int hw_setup; + char *gate_addr; +}; + +struct tuntap_setup tuntap_priv[MAX_UML_NETDEV] = { + [ 0 ... MAX_UML_NETDEV - 1 ] = + { + dev_name: NULL, + hw_addr: { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }, + hw_setup: 0, + gate_addr: NULL, + } +}; + +struct net_device *tuntap_init(int private_size, int index) +{ + struct net_device *dev; + struct uml_net_private *pri; + struct tuntap_data *tpri; + + dev = init_etherdev(NULL, private_size); + if(dev == NULL) return(NULL); + pri = dev->priv; + tpri = (struct tuntap_data *) pri->user; + tpri->dev_name = tuntap_priv[index].dev_name; + tpri->fixed_config = (tpri->dev_name != NULL); + tpri->gate_addr = tuntap_priv[index].gate_addr; + memcpy(dev->dev_addr, tuntap_priv[index].hw_addr, ETH_ALEN); + memcpy(tpri->hw_addr, tuntap_priv[index].hw_addr, + sizeof(tpri->hw_addr)); + printk("TUN/TAP backend - "); + if(tpri->gate_addr != NULL) + printk("IP = %s", tpri->gate_addr); + tpri->hw_setup = tuntap_priv[index].hw_setup; + if(tpri->hw_setup) + printk(" ether = %x:%x:%x:%x:%x:%x", + tpri->hw_addr[0], tpri->hw_addr[1], tpri->hw_addr[2], + tpri->hw_addr[3], tpri->hw_addr[4], tpri->hw_addr[5]); + printk("\n"); + tpri->fd = -1; + return(dev); +} + +static unsigned short tuntap_protocol(struct sk_buff *skb) +{ + return(eth_type_trans(skb, skb->dev)); +} + +static int tuntap_set_mac(struct sockaddr *addr, void *data) +{ + struct tuntap_data *pri = data; + struct sockaddr *hwaddr = addr; + + memcpy(pri->hw_addr, hwaddr->sa_data, ETH_ALEN); + + return 0; +} + +static int tuntap_read(int fd, struct sk_buff **skb, + struct uml_net_private *lp) +{ + *skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER); + if(*skb == NULL) return(-ENOMEM); + return(net_read(fd, (*skb)->mac.raw, + (*skb)->dev->mtu + ETH_HEADER_OTHER)); +} + +static int tuntap_write(int fd, struct sk_buff **skb, + struct uml_net_private *lp) +{ + return(net_write(fd, (*skb)->data, (*skb)->len)); +} + +struct net_kern_info tuntap_kern_info = { + init: tuntap_init, + protocol: tuntap_protocol, + set_mac: tuntap_set_mac, + read: tuntap_read, + write: tuntap_write, +}; + +static int tuntap_count = 0; + +void tuntap_setup(char *str, struct uml_net *dev) +{ + struct tuntap_setup *pri; + + dev->user = &tuntap_user_info; + dev->kern = &tuntap_kern_info; + dev->private_size = sizeof(struct tuntap_data); + pri = &tuntap_priv[tuntap_count]; + dev->transport_index = tuntap_count++; + tap_setup_common(str, "tuntap", &pri->dev_name, pri->hw_addr, + &pri->hw_setup, &pri->gate_addr); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/tuntap_kern.h linux_umopenmosix/arch/um/drivers/tuntap_kern.h --- linux-2.4.17/arch/um/drivers/tuntap_kern.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/tuntap_kern.h Sat Jun 29 17:02:19 2002 @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __UM_TUNTAP_KERN_H +#define __UM_TUNTAP_KERN_H + +#include "net_kern.h" + +extern void tuntap_setup(char *arg, struct uml_net *dev); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/tuntap_user.c linux_umopenmosix/arch/um/drivers/tuntap_user.c --- linux-2.4.17/arch/um/drivers/tuntap_user.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/tuntap_user.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,222 @@ +/* + * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "net_user.h" +#include "tuntap.h" +#include "kern_util.h" +#include "user.h" + +#define MAX_PACKET ETH_MAX_PACKET + +void tuntap_user_init(void *data, void *dev) +{ + struct tuntap_data *pri = data; + + pri->dev = dev; +} + +struct tuntap_open_data { + char *name; + char *gate; + int data_fd; + int remote; + int me; + int err; + char *buffer; + int len; + int used; +}; + +static void tuntap_open_tramp(void *arg) +{ + struct tuntap_open_data *data = arg; + char version_buf[sizeof("nnnnn\0")]; + char *args[] = { "uml_net", version_buf, "tuntap", "up", data->gate, + NULL }; + char buf[CMSG_SPACE(sizeof(data->data_fd))]; + struct msghdr msg; + struct cmsghdr *cmsg; + struct iovec iov; + int pid, n; + + sprintf(version_buf, "%d", UML_NET_VERSION); + data->err = 0; + if((pid = fork()) == 0){ + dup2(data->remote, 1); + close(data->me); + execvp(args[0], args); + printk("Exec of '%s' failed - errno = %d\n", args[0], errno); + exit(1); + } + else if(pid < 0) data->err = errno; + close(data->remote); + + msg.msg_name = NULL; + msg.msg_namelen = 0; + if(data->buffer != NULL){ + iov = ((struct iovec) { data->buffer, data->len }); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + } + else { + msg.msg_iov = NULL; + msg.msg_iovlen = 0; + } + msg.msg_control = buf; + msg.msg_controllen = sizeof(buf); + msg.msg_flags = 0; + n = recvmsg(data->me, &msg, 0); + data->used = n; + if(n < 0){ + printk("tuntap_open_tramp : recvmsg failed - errno = %d\n", + errno); + data->err = errno; + return; + } + waitpid(pid, NULL, 0); + + cmsg = CMSG_FIRSTHDR(&msg); + if(cmsg == NULL){ + printk("tuntap_open_tramp : didn't receive a message\n"); + data->err = EINVAL; + return; + } + if((cmsg->cmsg_level != SOL_SOCKET) || + (cmsg->cmsg_type != SCM_RIGHTS)){ + printk("tuntap_open_tramp : didn't receive a descriptor\n"); + data->err = EINVAL; + return; + } + data->data_fd = ((int *) CMSG_DATA(cmsg))[0]; +} + +static void tuntap_add_addr(unsigned char *addr, unsigned char *netmask, + void *data) +{ + struct tuntap_data *pri = data; + + if((pri->fd == -1) || pri->fixed_config) return; + open_addr(addr, netmask, pri->dev_name); +} + +static void tuntap_del_addr(unsigned char *addr, unsigned char *netmask, + void *data) +{ + struct tuntap_data *pri = data; + + if((pri->fd == -1) || pri->fixed_config) return; + close_addr(addr, netmask, pri->dev_name); +} + +static int tuntap_open(void *data) +{ + struct ifreq ifr; + struct tuntap_data *pri = data; + struct tuntap_open_data tap_data; + char *output; + int err, fds[2]; + + err = tap_open_common(pri->dev, pri->hw_setup, pri->gate_addr); + if(err) return(err); + + if(pri->fixed_config){ + if((pri->fd = open("/dev/net/tun", O_RDWR)) < 0){ + printk("Failed to open /dev/net/tun, errno = %d\n", + errno); + return(-errno); + } + memset(&ifr, 0, sizeof(ifr)); + ifr.ifr_flags = IFF_TAP; + strncpy(ifr.ifr_name, pri->dev_name, sizeof(ifr.ifr_name) - 1); + if(ioctl(pri->fd, TUNSETIFF, (void *) &ifr) < 0){ + printk("TUNSETIFF failed, errno = %d", errno); + close(pri->fd); + return(-errno); + } + } + else { + if(socketpair(PF_UNIX, SOCK_DGRAM, 0, fds) < 0){ + printk("data socketpair failed - errno = %d\n", errno); + return(-errno); + } + + tap_data.me = fds[0]; + tap_data.remote = fds[1]; + tap_data.data_fd = -1; + tap_data.gate = pri->gate_addr; + tap_data.buffer = get_output_buffer(&tap_data.len); + if(tap_data.buffer != NULL) tap_data.len--; + tap_data.used = 0; + + tracing_cb(tuntap_open_tramp, &tap_data); + output = tap_data.buffer; + if(tap_data.err == 0){ + pri->dev_name = uml_strdup(tap_data.buffer); + output += IFNAMSIZ; + printk(output); + free_output_buffer(tap_data.buffer); + } + else { + printk(output); + free_output_buffer(tap_data.buffer); + printk("tuntap_open_tramp failed - errno = %d\n", + tap_data.err); + return(-tap_data.err); + } + close(fds[0]); + pri->fd = tap_data.data_fd; + iter_addresses(pri->dev, open_addr, pri->dev_name); + } + + return(pri->fd); +} + +static void tuntap_close(int fd, void *data) +{ + struct tuntap_data *pri = data; + + if(!pri->fixed_config) + iter_addresses(pri->dev, close_addr, pri->dev_name); + close(fd); +} + +static int tuntap_set_mtu(int mtu, void *data) +{ + return(mtu); +} + +struct net_user_info tuntap_user_info = { + init: tuntap_user_init, + open: tuntap_open, + close: tuntap_close, + set_mtu: tuntap_set_mtu, + add_address: tuntap_add_addr, + delete_address: tuntap_del_addr, + max_packet: MAX_PACKET +}; + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/ubd.c linux_umopenmosix/arch/um/drivers/ubd.c --- linux-2.4.17/arch/um/drivers/ubd.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/ubd.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,824 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "ubd_user.h" +#define MAJOR_NR UBD_MAJOR +#include "linux/config.h" +#include "linux/blk.h" +#include "linux/blkdev.h" +#include "linux/hdreg.h" +#include "linux/init.h" +#include "linux/devfs_fs_kernel.h" +#include "linux/cdrom.h" +#include "linux/proc_fs.h" +#include "linux/ctype.h" +#include "linux/capability.h" +#include "linux/mm.h" +#include "linux/vmalloc.h" +#include "linux/blkpg.h" +#include "asm/segment.h" +#include "asm/uaccess.h" +#include "asm/irq.h" +#include "asm/types.h" +#include "user_util.h" +#include "mem_user.h" +#include "kern_util.h" +#include "kern.h" +#include "mconsole_kern.h" +#include "init.h" +#include "irq_user.h" +#include "2_5compat.h" + +extern __u64 file_size(char *file); + +static int ubd_open(struct inode * inode, struct file * filp); +static int ubd_release(struct inode * inode, struct file * file); +static int ubd_ioctl(struct inode * inode, struct file * file, + unsigned int cmd, unsigned long arg); + +#define MAX_DEV (8) + +static int blk_sizes[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = BLOCK_SIZE }; + +static int hardsect_sizes[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = BLOCK_SIZE }; + +static int sizes[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = 0 }; + +static struct block_device_operations ubd_blops = { + open: ubd_open, + release: ubd_release, + ioctl: ubd_ioctl, +}; + +static struct hd_struct ubd_part[MAX_DEV] = +{ [ 0 ... MAX_DEV - 1 ] = { 0, 0, 0 } }; + +static request_queue_t *ubd_queue; + +static int fake_major = 0; + +static struct gendisk ubd_gendisk = INIT_GENDISK(MAJOR_NR, "ubd", ubd_part, + sizes, MAX_DEV, &ubd_blops); + +static struct gendisk fake_gendisk = INIT_GENDISK(0, "ubd", ubd_part, + sizes, MAX_DEV, &ubd_blops); + +#ifdef CONFIG_BLK_DEV_UBD_SYNC +#define OPEN_FLAGS O_RDWR | O_SYNC +#else +#define OPEN_FLAGS O_RDWR +#endif + +struct cow { + char *file; + int fd; + unsigned long *bitmap; + unsigned long bitmap_len; + int bitmap_offset; + int data_offset; +}; + +struct ubd { + char *file; + int is_dir; + int count; + int fd; + __u64 size; + int boot_openflags; + int openflags; + devfs_handle_t real; + devfs_handle_t fake; + struct cow cow; +}; + +#define DEFAULT_COW { \ + file: NULL, \ + fd: -1, \ + bitmap: NULL, \ + bitmap_offset: 0, \ + data_offset: 0, \ +} + +#define DEFAULT_UBD { \ + file: NULL, \ + is_dir: 0, \ + count: 0, \ + fd: -1, \ + size: -1, \ + boot_openflags: OPEN_FLAGS, \ + openflags: OPEN_FLAGS, \ + real: NULL, \ + fake: NULL, \ + cow: DEFAULT_COW, \ +} + +struct ubd ubd_dev[MAX_DEV] = { +{ + file: "root_fs", + is_dir: 0, + count: 0, + fd: -1, + size: 0, + boot_openflags: OPEN_FLAGS, + openflags: OPEN_FLAGS, + real: NULL, + fake: NULL, + cow: DEFAULT_COW, +}, +[ 1 ... MAX_DEV - 1 ] = DEFAULT_UBD +}; + +static struct hd_driveid ubd_id = { + cyls: 0, + heads: 128, + sectors: 32, +}; + +static int fake_ide = 0; +static struct proc_dir_entry *proc_ide_root = NULL; +static struct proc_dir_entry *proc_ide = NULL; + +static void make_proc_ide(void) +{ + proc_ide_root = proc_mkdir("ide", 0); + proc_ide = proc_mkdir("ide0", proc_ide_root); +} + +static int proc_ide_read_media(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + int len; + + strcpy(page, "disk\n"); + len = strlen("disk\n"); + len -= off; + if (len < count){ + *eof = 1; + if (len <= 0) return 0; + } + else len = count; + *start = page + off; + return len; + +} + +static void make_ide_entries(char *dev_name) +{ + struct proc_dir_entry *dir, *ent; + char name[64]; + + if(!fake_ide) return; + if(proc_ide_root == NULL) make_proc_ide(); + dir = proc_mkdir(dev_name, proc_ide); + ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir); + if(!ent) return; + ent->nlink = 1; + ent->data = NULL; + ent->read_proc = proc_ide_read_media; + ent->write_proc = NULL; + sprintf(name,"ide0/%s", dev_name); + proc_symlink(dev_name, proc_ide_root, name); +} + +static int fake_ide_setup(char *str) +{ + fake_ide = 1; + return(1); +} + +__setup("fake_ide", fake_ide_setup); +__uml_help(fake_ide_setup, +"fake_ide\n" +" Create ide0 entries that map onto ubd devices.\n\n" +); + +static int ubd_setup_common(char *str, int *index_out) +{ + char *backing_file; + int n, sync, perm = O_RDWR; + + if(index_out) *index_out = -1; + n = *str++; + if(n == '='){ + char *end; + int major; + + if(!strcmp(str, "sync")){ + sync = 1; + return(0); + } + major = simple_strtoul(str, &end, 0); + if(*end != '\0'){ + printk(KERN_ERR + "ubd_setup : didn't parse major number\n"); + return(1); + } + fake_gendisk.major = major; + fake_major = major; + printk(KERN_INFO "Setting extra ubd major number to %d\n", + major); + return(0); + } + if(n < '0'){ + printk(KERN_ERR "ubd_setup : index out of range\n"); + return(1); + } + n -= '0'; + if(n >= MAX_DEV){ + printk(KERN_ERR "ubd_setup : index out of range\n"); + return(1); + } + if(index_out) *index_out = n; + sync = ubd_dev[n].boot_openflags & O_SYNC; + if (*str == 'r') { + perm = O_RDONLY; + str++; + } + if (*str == 's') { + sync = O_SYNC; + str++; + } + if(*str++ != '='){ + printk(KERN_ERR "ubd_setup : Expected '='\n"); + return(1); + } + backing_file = strchr(str, ','); + if(backing_file){ + *backing_file = '\0'; + backing_file++; + } + ubd_dev[n].file = str; + ubd_dev[n].cow.file = backing_file; + ubd_dev[n].boot_openflags = perm | sync; + return(0); +} + +static int ubd_setup(char *str) +{ + ubd_setup_common(str, NULL); + return(1); +} + +__setup("ubd", ubd_setup); +__uml_help(ubd_setup, +"ubd=\n" +" This is used to associate a device with a file in the underlying\n" +" filesystem. Usually, there is a filesystem in the file, but \n" +" that's not required. Swap devices containing swap files can be\n" +" specified like this. Also, a file which doesn't contain a\n" +" filesystem can have its contents read in the virtual \n" +" machine by running dd on the device. n must be in the range\n" +" 0 to 7. Appending an 'r' to the number will cause that device\n" +" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n" +" an 's' (has to be _after_ 'r', if there is one) will cause data\n" +" to be written to disk on the host immediately.\n\n" +); + +static int fakehd(char *str) +{ + printk(KERN_INFO + "fakehd : Changing ubd_gendisk.major_name to \"hd\".\n"); + ubd_gendisk.major_name = "hd"; + return(1); +} + +__setup("fakehd", fakehd); +__uml_help(fakehd, +"fakehd\n" +" Change the ubd device name to \"hd\".\n\n" +); + +static void do_ubd_request(request_queue_t * q); + +int thread_fd = -1; + +int intr_count = 0; + +extern int errno; + +static spinlock_t ubd_lock = SPIN_LOCK_UNLOCKED; + +static void ubd_finish(int error) +{ + int nsect; + + if(error){ + end_request(0); + return; + } + nsect = CURRENT->current_nr_sectors; + CURRENT->sector += nsect; + CURRENT->buffer += nsect << 9; + CURRENT->errors = 0; + CURRENT->nr_sectors -= nsect; + CURRENT->current_nr_sectors = 0; + end_request(1); +} + +static void ubd_handler(void) +{ + struct io_thread_req req; + + DEVICE_INTR = NULL; + intr_count++; + if(read_ubd_fs(thread_fd, &req, sizeof(req)) != sizeof(req)){ + printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, " + "errno = %d\n", getpid(), errno); + spin_lock(&REQUEST_LOCK); + end_request(0); + spin_unlock(&REQUEST_LOCK); + return; + } + + if((req.offset != ((__u64) (CURRENT->sector)) << 9) || + (req.length != (CURRENT->current_nr_sectors) << 9)) + panic("I/O op mismatch"); + + spin_lock(&REQUEST_LOCK); + ubd_finish(req.error); + reactivate_fd(thread_fd); + do_ubd_request(ubd_queue); + spin_unlock(&REQUEST_LOCK); +} + +static void ubd_intr(int irq, void *dev, struct pt_regs *unused) +{ + ubd_handler(); +} + +static int io_pid = -1; + +void kill_io_thread(void) +{ + if(io_pid != -1) kill(io_pid, SIGKILL); +} + +__uml_exitcall(kill_io_thread); + +int sync = 0; + +devfs_handle_t ubd_dir_handle; +devfs_handle_t ubd_fake_dir_handle; + +static int ubd_add(int n) +{ + char name[sizeof("nnnnnn\0")], dev_name[sizeof("ubd0x")]; + + if(ubd_dev[n].file == NULL) return(-1); + sprintf(name, "%d", n); + ubd_dev[n].real = devfs_register(ubd_dir_handle, name, + DEVFS_FL_DEFAULT, MAJOR_NR, n, + S_IFBLK | S_IRUSR | S_IWUSR | + S_IRGRP |S_IWGRP, + &ubd_blops, NULL); + if(fake_major != 0){ + ubd_dev[n].fake = devfs_register(ubd_fake_dir_handle, name, + DEVFS_FL_DEFAULT, fake_major, + n, S_IFBLK | S_IRUSR | + S_IWUSR | S_IRGRP | S_IWGRP, + &ubd_blops, NULL); + } + if(!strcmp(ubd_gendisk.major_name, "ubd")){ + sprintf(dev_name, "%s%d", ubd_gendisk.major_name, n); + } + else sprintf(dev_name, "%s%c", ubd_gendisk.major_name, + n + 'a'); + make_ide_entries(dev_name); + return(0); +} + +static int ubd_config(char *str) +{ + int n, err; + + str = uml_strdup(str); + if(str == NULL){ + printk(KERN_ERR "ubd_config failed to strdup string\n"); + return(1); + } + err = ubd_setup_common(str, &n); + if(err){ + kfree(str); + return(-1); + } + if(n != -1) ubd_add(n); + return(0); +} + +static int ubd_remove(char *str) +{ + int n; + + if(!isdigit(*str)) return(-1); + n = *str - '0'; + if(ubd_dev[n].file == NULL) return(0); + if(ubd_dev[n].count > 0) return(-1); + if(ubd_dev[n].real != NULL) devfs_unregister(ubd_dev[n].real); + if(ubd_dev[n].fake != NULL) devfs_unregister(ubd_dev[n].fake); + ubd_dev[n] = ((struct ubd) DEFAULT_UBD); + return(0); +} + +static struct mc_device ubd_mc = { + name: "ubd", + config: ubd_config, + remove: ubd_remove, +}; + +int ubd_mc_init(void) +{ + mconsole_register_dev(&ubd_mc); + return(0); +} + +__initcall(ubd_mc_init); + +static request_queue_t *ubd_get_queue(kdev_t device) +{ + return(ubd_queue); +} + +int ubd_init(void) +{ + unsigned long stack; + int i, err; + + ubd_dir_handle = devfs_mk_dir (NULL, "ubd", NULL); + if (devfs_register_blkdev(MAJOR_NR, "ubd", &ubd_blops)) { + printk(KERN_ERR "ubd: unable to get major %d\n", MAJOR_NR); + return -1; + } + ubd_queue = BLK_DEFAULT_QUEUE(MAJOR_NR); + INIT_QUEUE(ubd_queue, DEVICE_REQUEST, &ubd_lock); + INIT_ELV(ubd_queue, &ubd_queue->elevator); + read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read-ahead */ + blksize_size[MAJOR_NR] = blk_sizes; + blk_size[MAJOR_NR] = sizes; + INIT_HARDSECT(hardsect_size, MAJOR_NR, hardsect_sizes); + add_gendisk(&ubd_gendisk); + if (fake_major != 0){ + char name[sizeof("ubd_nnn\0")]; + + snprintf(name, sizeof(name), "ubd_%d", fake_major); + ubd_fake_dir_handle = devfs_mk_dir(NULL, name, NULL); + if(devfs_register_blkdev(fake_major, "ubd", &ubd_blops)) { + printk(KERN_ERR "ubd: unable to get major %d\n", + fake_major); + return -1; + } + blk_dev[fake_major].queue = ubd_get_queue; + read_ahead[fake_major] = 8; /* 8 sector (4kB) read-ahead */ + blksize_size[fake_major] = blk_sizes; + INIT_HARDSECT(hardsect_size, fake_major, hardsect_sizes); + add_gendisk(&fake_gendisk); + } + for(i=0;ifd); + if(dev->cow.file != NULL) { + close_fd(dev->cow.fd); + vfree(dev->cow.bitmap); + dev->cow.bitmap = NULL; + } +} + +static int ubd_open_dev(struct ubd *dev) +{ + int err, flags, n, create_cow, *create_ptr; + + create_cow = 0; + create_ptr = (dev->cow.file != NULL) ? &create_cow : NULL; + dev->fd = open_ubd_file(dev->file, &dev->openflags, &dev->cow.file, + &dev->cow.bitmap_offset, &dev->cow.bitmap_len, + &dev->cow.data_offset, create_ptr); + + if((dev->fd == -ENOENT) && create_cow){ + printk(KERN_INFO "Creating \"%s\" as COW file for \"%s\"\n", + dev->file, dev->cow.file); + n = dev - ubd_dev; + dev->fd = create_cow_file(dev->file, dev->cow.file, 1 << 9, + &dev->cow.bitmap_offset, + &dev->cow.bitmap_len, + &dev->cow.data_offset); + if(dev->fd < 0){ + printk(KERN_ERR "Creation of COW file \"%s\" failed, " + "errno = %d\n", dev->file, -dev->fd); + } + } + + if(dev->fd < 0) return(dev->fd); + + if(dev->cow.file != NULL){ + err = -ENOMEM; + dev->cow.bitmap = (void *) vmalloc(dev->cow.bitmap_len); + if(dev->cow.bitmap == NULL) goto error; + flush_tlb_kernel_vm(); + + err = read_cow_bitmap(dev->fd, dev->cow.bitmap, + dev->cow.bitmap_offset, + dev->cow.bitmap_len); + if(err) goto error; + + flags = O_RDONLY; + err = open_ubd_file(dev->cow.file, &flags, NULL, NULL, NULL, + NULL, NULL); + if(err < 0) goto error; + dev->cow.fd = err; + } + return(0); + error: + close_fd(dev->fd); + return(err); +} + +static int ubd_open(struct inode * inode, struct file * filp) +{ + char *file; + int n; + + n = minor(inode->i_rdev); + if(n > MAX_DEV) + return -ENODEV; + if(ubd_is_dir(ubd_dev[n].file)){ + ubd_dev[n].is_dir = 1; + return(0); + } + if(ubd_dev[n].count == 0){ + ubd_dev[n].openflags = ubd_dev[n].boot_openflags; + /* XXX This error is wrong when errno isn't stored in + * ubd_dev[n].fd + */ + if(ubd_open_dev(&ubd_dev[n]) < 0){ + printk(KERN_ERR "ubd%d: Can't open \"%s\": " + "errno = %d\n", n, ubd_dev[n].file, + -ubd_dev[n].fd); + } + if(ubd_dev[n].fd < 0) + return -ENODEV; + file = ubd_dev[n].cow.file ? ubd_dev[n].cow.file : + ubd_dev[n].file; + ubd_dev[n].size = file_size(file); + if(ubd_dev[n].size < 0) return(ubd_dev[n].size); + ubd_part[n].start_sect = 0; + ubd_part[n].nr_sects = ubd_dev[n].size / blk_sizes[n]; + sizes[n] = ubd_dev[n].size / BLOCK_SIZE; + } + ubd_dev[n].count++; + if ((filp->f_mode & FMODE_WRITE) && + ((ubd_dev[n].openflags & ~O_SYNC) == O_RDONLY)){ + if(--ubd_dev[n].count == 0) ubd_close(&ubd_dev[n]); + return -EROFS; + } + return(0); +} + +static int ubd_release(struct inode * inode, struct file * file) +{ + int n; + + n = minor(inode->i_rdev); + if(n > MAX_DEV) + return -ENODEV; + if(--ubd_dev[n].count == 0) ubd_close(&ubd_dev[n]); + return(0); +} + +int cow_read = 0; +int cow_write = 0; + +void cowify_req(struct io_thread_req *req, struct ubd *dev) +{ + int i, update_bitmap, sector = req->offset >> 9; + + if(req->length > (sizeof(req->sector_mask) * 8) << 9) + panic("Operation too long"); + if(req->op == UBD_READ) { + for(i = 0; i < req->length >> 9; i++){ + if(ubd_test_bit(sector + i, dev->cow.bitmap)){ + ubd_set_bit(i, &req->sector_mask); + cow_read++; + } + } + } + else { + update_bitmap = 0; + for(i = 0; i < req->length >> 9; i++){ + cow_write++; + ubd_set_bit(i, &req->sector_mask); + if(!ubd_test_bit(sector + i, dev->cow.bitmap)) + update_bitmap = 1; + ubd_set_bit(sector + i, dev->cow.bitmap); + } + if(update_bitmap){ + req->cow_offset = sector / (sizeof(unsigned long) * 8); + req->bitmap_words[0] = + dev->cow.bitmap[req->cow_offset]; + req->bitmap_words[1] = + dev->cow.bitmap[req->cow_offset + 1]; + req->cow_offset *= sizeof(unsigned long); + req->cow_offset += dev->cow.bitmap_offset; + } + } +} + +static int prepare_request(struct request *req, struct io_thread_req *io_req) +{ + struct ubd *dev; + __u64 block; + int nsect; + + if(req->rq_status == RQ_INACTIVE) return(1); + + dev = &ubd_dev[minor(req->rq_dev)]; + if(dev->is_dir){ + strcpy(req->buffer, "HOSTFS:"); + strcat(req->buffer, dev->file); + end_request(1); + return(1); + } + if(IS_WRITE(req) && ((dev->openflags & O_ACCMODE) == O_RDONLY)){ + printk("Write attempted on readonly ubd device %d\n", + minor(req->rq_dev)); + end_request(0); + return(1); + } + + block = req->sector; + nsect = req->current_nr_sectors; + + io_req->op = (req->cmd == READ) ? UBD_READ : UBD_WRITE; + io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd; + io_req->fds[1] = dev->fd; + io_req->offsets[0] = 0; + io_req->offsets[1] = dev->cow.data_offset; + io_req->offset = ((__u64) block) << 9; + io_req->length = nsect << 9; + io_req->buffer = req->buffer; + io_req->sectorsize = 1 << 9; + io_req->sector_mask = 0; + io_req->cow_offset = -1; + io_req->error = 0; + + if(dev->cow.file != NULL) cowify_req(io_req, dev); + return(0); +} + +static void do_ubd_request(request_queue_t *q) +{ + struct io_thread_req io_req; + struct request *req; + int err, n; + + if(thread_fd == -1){ + while(!list_empty(&q->queue_head)){ + req = blkdev_entry_next_request(&q->queue_head); + err = prepare_request(req, &io_req); + if(!err){ + do_io(&io_req); + ubd_finish(io_req.error); + } + } + } + else { + if(DEVICE_INTR || list_empty(&q->queue_head)) return; + req = blkdev_entry_next_request(&q->queue_head); + err = prepare_request(req, &io_req); + if(!err){ + SET_INTR(ubd_handler); + n = write_ubd_fs(thread_fd, (char *) &io_req, + sizeof(io_req)); + if(n != sizeof(io_req)) + printk("write to io thread failed - returned " + "%d, errno %d\n", n, errno); + } + } +} + +static int ubd_ioctl(struct inode * inode, struct file * file, + unsigned int cmd, unsigned long arg) +{ + struct hd_geometry *loc = (struct hd_geometry *) arg; + int dev, err; + + if(!inode) return -EINVAL; + dev = minor(inode->i_rdev); + if (dev > MAX_DEV) + return -EINVAL; + switch (cmd) { + struct hd_geometry g; + struct cdrom_volctrl volume; + case HDIO_GETGEO: + if (!loc) return -EINVAL; + g.heads = 128; + g.sectors = 32; + g.cylinders = ubd_dev[dev].size / (128 * 32); + g.start = 2; + return copy_to_user(loc, &g, sizeof g) ? -EFAULT : 0; + case BLKRASET: + if(!capable(CAP_SYS_ADMIN)) return -EACCES; + if(arg > 0xff) return -EINVAL; + read_ahead[major(inode->i_rdev)] = arg; + return 0; + case BLKRAGET: + if (!arg) return -EINVAL; + err = verify_area(VERIFY_WRITE, (long *) arg, sizeof(long)); + if (err) + return err; + return 0; + case BLKGETSIZE: /* Return device size */ + if (!arg) return -EINVAL; + err = verify_area(VERIFY_WRITE, (long *) arg, sizeof(long)); + if (err) + return err; + put_user(ubd_dev[dev].size >> 9, (long *) arg); + return 0; + case BLKFLSBUF: + if(!capable(CAP_SYS_ADMIN)) return -EACCES; + return 0; + + case BLKRRPART: /* Re-read partition tables */ + return 0; /* revalidate_hddisk(inode->i_rdev, 1); */ + + case HDIO_SET_UNMASKINTR: + if (!capable(CAP_SYS_ADMIN)) return -EACCES; + if ((arg > 1) || (minor(inode->i_rdev) & 0x3F)) + return -EINVAL; + return 0; + + case HDIO_GET_UNMASKINTR: + if (!arg) return -EINVAL; + err = verify_area(VERIFY_WRITE, (long *) arg, sizeof(long)); + if (err) + return err; + return 0; + + case HDIO_GET_MULTCOUNT: + if (!arg) return -EINVAL; + err = verify_area(VERIFY_WRITE, (long *) arg, sizeof(long)); + if (err) + return err; + return 0; + + case HDIO_SET_MULTCOUNT: + if (!capable(CAP_SYS_ADMIN)) return -EACCES; + if (minor(inode->i_rdev) & 0x3F) return -EINVAL; + return 0; + + case HDIO_GET_IDENTITY: + ubd_id.cyls = ubd_dev[dev].size / (128 * 32); + if (copy_to_user((char *) arg, (char *) &ubd_id, + sizeof(ubd_id))) + return -EFAULT; + return 0; + + case CDROMVOLREAD: + if(copy_from_user(&volume, (char *) arg, sizeof(volume))) + return -EFAULT; + volume.channel0 = 255; + volume.channel1 = 255; + volume.channel2 = 255; + volume.channel3 = 255; + if(copy_to_user((char *) arg, &volume, sizeof(volume))) + return -EFAULT; + return 0; + + default: + return blk_ioctl(inode->i_rdev, cmd, arg); + } +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/ubd_user.c linux_umopenmosix/arch/um/drivers/ubd_user.c --- linux-2.4.17/arch/um/drivers/ubd_user.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/ubd_user.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,526 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2001 Ridgerun,Inc (glonnon@ridgerun.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "asm/types.h" +#include "user_util.h" +#include "kern_util.h" +#include "user.h" +#include "ubd_user.h" + +#include +#include +#if __BYTE_ORDER == __BIG_ENDIAN +# define ntohll(x) (x) +# define htonll(x) (x) +#elif __BYTE_ORDER == __LITTLE_ENDIAN +# define ntohll(x) bswap_64(x) +# define htonll(x) bswap_64(x) +#else +#error "__BYTE_ORDER not defined" +#endif + +extern void panic(char *fmt, ...); + +#define PATH_LEN_V1 256 + +struct cow_header_v1 { + int magic; + int version; + char backing_file[PATH_LEN_V1]; + time_t mtime; + __u64 size; + int sectorsize; +}; + +#define PATH_LEN_V2 MAXPATHLEN + +struct cow_header_v2 { + unsigned long magic; + unsigned long version; + char backing_file[PATH_LEN_V2]; + time_t mtime; + __u64 size; + int sectorsize; +}; + +union cow_header { + struct cow_header_v1 v1; + struct cow_header_v2 v2; +}; + +#define COW_MAGIC 0x4f4f4f4d /* MOOO */ +#define COW_VERSION 2 + +static void sizes(__u64 size, int sectorsize, int bitmap_offset, + unsigned long *bitmap_len_out, int *data_offset_out) +{ + *bitmap_len_out = (size + sectorsize - 1) / (8 * sectorsize); + + *data_offset_out = bitmap_offset + *bitmap_len_out; + *data_offset_out = (*data_offset_out + sectorsize - 1) / sectorsize; + *data_offset_out *= sectorsize; +} + +static int read_cow_header(int fd, int *magic_out, char **backing_file_out, + time_t *mtime_out, __u64 *size_out, + int *sectorsize_out, int *bitmap_offset_out) +{ + union cow_header *header; + char *file; + int err, n; + unsigned long version, magic; + + header = um_kmalloc(sizeof(*header)); + if(header == NULL){ + printk("read_cow_header - Failed to allocate header\n"); + return(-ENOMEM); + } + err = -EINVAL; + n = read(fd, header, sizeof(*header)); + if(n < offsetof(typeof(header->v1), backing_file)){ + printk("read_cow_header - short header\n"); + goto out; + } + + magic = header->v1.magic; + if(magic == COW_MAGIC) { + version = header->v1.version; + } + else if(magic == ntohl(COW_MAGIC)){ + version = ntohl(header->v1.version); + } + else goto out; + + *magic_out = COW_MAGIC; + + if(version == 1){ + if(n < sizeof(header->v1)){ + printk("read_cow_header - failed to read V1 header\n"); + goto out; + } + *mtime_out = header->v1.mtime; + *size_out = header->v1.size; + *sectorsize_out = header->v1.sectorsize; + *bitmap_offset_out = sizeof(header->v1); + file = header->v1.backing_file; + } + else if(version == 2){ + if(n < sizeof(header->v2)){ + printk("read_cow_header - failed to read V2 header\n"); + goto out; + } + *mtime_out = ntohl(header->v2.mtime); + *size_out = ntohll(header->v2.size); + *sectorsize_out = ntohl(header->v2.sectorsize); + *bitmap_offset_out = sizeof(header->v2); + file = header->v2.backing_file; + } + else { + printk("read_cow_header - invalid COW version\n"); + goto out; + } + err = -ENOMEM; + *backing_file_out = uml_strdup(file); + if(*backing_file_out == NULL){ + printk("read_cow_header - failed to allocate backing file\n"); + goto out; + } + err = 0; + out: + kfree(header); + return(err); +} + +int open_ubd_file(char *file, int *openflags, char **backing_file_out, + int *bitmap_offset_out, unsigned long *bitmap_len_out, + int *data_offset_out, int *create_cow_out) +{ + struct stat64 buf; + time_t mtime; + __u64 size; + char *backing_file; + int fd, err, sectorsize, magic, mode = 0644; + + if((fd = open64(file, *openflags, mode)) < 0){ + if((errno == ENOENT) && (create_cow_out != NULL)) + *create_cow_out = 1; + if(((*openflags & O_ACCMODE) != O_RDWR) || + ((errno != EROFS) && (errno != EACCES))) return(-errno); + *openflags &= ~O_ACCMODE; + *openflags |= O_RDONLY; + if((fd = open64(file, *openflags, mode)) < 0) return(-errno); + } + if(backing_file_out == NULL) return(fd); + + err = read_cow_header(fd, &magic, &backing_file, &mtime, &size, + §orsize, bitmap_offset_out); + if(err && (*backing_file_out != NULL)){ + printk("Failed to read COW header from COW file \"%s\", " + "errno = %d\n", file, err); + goto error; + } + if(err) return(fd); + + if((*backing_file_out != NULL) && + strcmp(*backing_file_out, backing_file)){ + printk("Backing file mismatch - \"%s\" requested,\n" + "\"%s\" specified in COW header of \"%s\"\n", + *backing_file_out, backing_file, file); + printk("Using \"%s\"\n", backing_file); + } + + if(backing_file_out == NULL) return(fd); + + *backing_file_out = backing_file; + + err = stat64(*backing_file_out, &buf); + if(err){ + printk("Failed to stat backing file \"%s\", errno = %d\n", + *backing_file_out, errno); + err = -errno; + goto error; + } + + err = -EINVAL; + if(buf.st_size != size){ + printk("Size mismatch (%ld vs %ld) of COW header vs backing " + "file\n", buf.st_size, size); + goto error; + } + if(buf.st_mtime != mtime){ + printk("mtime mismatch (%ld vs %ld) of COW header vs backing " + "file\n", buf.st_mtime, mtime); + goto error; + } + + sizes(size, sectorsize, *bitmap_offset_out, bitmap_len_out, + data_offset_out); + + return(fd); + error: + close(fd); + return(err); +} + +int read_cow_bitmap(int fd, void *buf, int offset, int len) +{ + int err; + + err = lseek64(fd, offset, SEEK_SET); + if(err != offset) return(-errno); + err = read(fd, buf, len); + if(err < 0) return(-errno); + return(0); +} + +static int absolutize(char *to, int size, char *from) +{ + char save_cwd[256], *slash; + int remaining; + + if(getcwd(save_cwd, sizeof(save_cwd)) == NULL) { + printk("absolutize : unable to get cwd - errno = %d\n", errno); + return(-1); + } + slash = strrchr(from, '/'); + if(slash != NULL){ + *slash = '\0'; + if(chdir(from)){ + *slash = '/'; + printk("absolutize : Can't cd to '%s' - errno = %d\n", + from, errno); + return(-1); + } + *slash = '/'; + if(getcwd(to, size) == NULL){ + printk("absolutize : unable to get cwd of '%s' - " + "errno = %d\n", from, errno); + return(-1); + } + remaining = size - strlen(to); + if(strlen(slash) + 1 > remaining){ + printk("absolutize : unable to fit '%s' into %d " + "chars\n", from, size); + return(-1); + } + strcat(to, slash); + } + else { + if(strlen(save_cwd) + 1 + strlen(from) + 1 > size){ + printk("absolutize : unable to fit '%s' into %d " + "chars\n", from, size); + return(-1); + } + strcpy(to, save_cwd); + strcat(to, "/"); + strcat(to, from); + } + chdir(save_cwd); + return(0); +} + +int create_cow_file(char *cow_file, char *backing_file, int sectorsize, + int *bitmap_offset_out, unsigned long *bitmap_len_out, + int *data_offset_out) +{ + struct cow_header_v2 *header; + struct stat64 buf; + __u64 blocks; + long zero; + int err, fd, i, flags; + __u64 size; + + flags = O_RDWR | O_CREAT; + fd = open_ubd_file(cow_file, &flags, NULL, NULL, NULL, NULL, NULL); + if(fd < 0) return(fd); + + err = -ENOMEM; + header = um_kmalloc(sizeof(*header)); + if(header == NULL){ + printk("Failed to allocate COW V2 header\n"); + goto out_close; + } + header->magic = htonl(COW_MAGIC); + header->version = htonl(COW_VERSION); + + err = -EINVAL; + if(strlen(backing_file) > sizeof(header->backing_file) - 1){ + printk("Backing file name \"%s\" is too long - names are " + "limited to %d characters\n", backing_file, + sizeof(header->backing_file) - 1); + goto out_free; + } + + if(absolutize(header->backing_file, sizeof(header->backing_file), + backing_file)) + goto out_free; + + err = stat64(header->backing_file, &buf); + if(err < 0){ + err = -errno; + goto out_free; + } + + header->mtime = htonl(buf.st_mtime); + header->size = htonll(buf.st_size); + header->sectorsize = htonl(sectorsize); + size = buf.st_size; + + err = write(fd, header, sizeof(*header)); + if(err != sizeof(*header)) goto out_free; + + blocks = (size + sectorsize - 1) / sectorsize; + blocks = (blocks + sizeof(long) * 8 - 1) / (sizeof(long) * 8); + zero = 0; + for(i = 0; i < blocks; i++){ + err = write(fd, &zero, sizeof(zero)); + if(err != sizeof(zero)) goto out_free; + } + + sizes(size, sectorsize, sizeof(struct cow_header_v2), + bitmap_len_out, data_offset_out); + *bitmap_offset_out = sizeof(struct cow_header_v2); + + kfree(header); + return(fd); + + out_free: + kfree(header); + out_close: + close(fd); + return(err); +} + +int read_ubd_fs(int fd, void *buffer, int len) +{ + return(read(fd, buffer, len)); +} + +int write_ubd_fs(int fd, char *buffer, int len) +{ + return(write(fd, buffer, len)); +} + +int ubd_is_dir(char *file) +{ + struct stat64 buf; + + if(stat64(file, &buf) < 0) return(0); + return(S_ISDIR(buf.st_mode)); +} + +void do_io(struct io_thread_req *req) +{ + char *buf; + unsigned long len; + int n, nsectors, start, end, bit; + __u64 off; + + nsectors = req->length / req->sectorsize; + start = 0; + do { + bit = ubd_test_bit(start, &req->sector_mask); + end = start; + while((end < nsectors) && + (ubd_test_bit(end, &req->sector_mask) == bit)) + end++; + + if(end != nsectors) + printk("end != nsectors\n"); + off = req->offset + req->offsets[bit]; + len = (end - start) * req->sectorsize; + buf = &req->buffer[start * req->sectorsize]; + + if(lseek64(req->fds[bit], off, SEEK_SET) != off){ + printk("do_io - lseek failed : errno = %d\n", errno); + req->error = 1; + return; + } + if(req->op == UBD_READ){ + n = 0; + do { + buf = &buf[n]; + len -= n; + n = read(req->fds[bit], buf, len); + if (n < 0) { + printk("do_io - read returned %d : " + "errno = %d fd = %d\n", n, + errno, req->fds[bit]); + req->error = 1; + return; + } + } while((n < len) && (n != 0)); + if (n < len) memset(&buf[n], 0, len - n); + } + else { + n = write(req->fds[bit], buf, len); + if(n != len){ + printk("do_io - write returned %d : " + "errno = %d fd = %d\n", n, + errno, req->fds[bit]); + req->error = 1; + return; + } + } + + start = end; + } while(start < nsectors); + + if(req->cow_offset != -1){ + if(lseek64(req->fds[1], req->cow_offset, SEEK_SET) != + req->cow_offset){ + printk("do_io - bitmap lseek failed : errno = %d\n", + errno); + req->error = 1; + return; + } + n = write(req->fds[1], &req->bitmap_words, + sizeof(req->bitmap_words)); + if(n != sizeof(req->bitmap_words)){ + printk("do_io - bitmap update returned %d : " + "errno = %d fd = %d\n", n, errno, req->fds[1]); + req->error = 1; + return; + } + } + req->error = 0; + return; +} + +int kernel_fd = -1; + +int io_count = 0; + +int io_thread(void *arg) +{ + struct io_thread_req req; + int n; + + signal(SIGWINCH, SIG_IGN); + while(1){ + n = read(kernel_fd, &req, sizeof(req)); + if(n < 0) printk("io_thread - read returned %d, errno = %d\n", + n, errno); + else if(n < sizeof(req)){ + printk("io_thread - short read : length = %d\n", n); + continue; + } + io_count++; + do_io(&req); + n = write(kernel_fd, &req, sizeof(req)); + if(n != sizeof(req)) + printk("io_thread - write failed, errno = %d\n", + errno); + } +} + +int start_io_thread(unsigned long sp, int *fd_out) +{ + int pid, fds[2]; + + if(socketpair(AF_UNIX, SOCK_STREAM, 0, fds) < 0){ + printk("start_io_thread - socketpair failed, errno = %d\n", + errno); + return(-1); + } + kernel_fd = fds[0]; + *fd_out = fds[1]; + + pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM | SIGCHLD, + NULL); + if(pid < 0){ + printk("start_io_thread - clone failed : errno = %d\n", errno); + return(-errno); + } + return(pid); +} + +#ifdef notdef +int start_io_thread(unsigned long sp, int *fd_out) +{ + int pid; + + if((kernel_fd = get_pty()) < 0) return(-1); + raw(kernel_fd, 0); + if((*fd_out = open(ptsname(kernel_fd), O_RDWR)) < 0){ + printk("Couldn't open tty for IO\n"); + return(-1); + } + + pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM | SIGCHLD, + NULL); + if(pid < 0){ + printk("start_io_thread - clone failed : errno = %d\n", errno); + return(-errno); + } + return(pid); +} +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/drivers/xterm.c linux_umopenmosix/arch/um/drivers/xterm.c --- linux-2.4.17/arch/um/drivers/xterm.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/drivers/xterm.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,140 @@ +/* + * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "kern_util.h" +#include "user_util.h" +#include "chan_user.h" +#include "user.h" + +struct xterm_chan { + int pid; + int fd; + char *title; + int device; + int raw; + struct termios tt; +}; + +void *xterm_init(char *str, int device, struct chan_opts *opts) +{ + struct xterm_chan *data; + + if((data = malloc(sizeof(*data))) == NULL) return(NULL); + *data = ((struct xterm_chan) { pid : -1, + device : device, + title : opts->xterm_title, + raw : opts->raw }); + return(data); +} + +struct xterm_info { + char tty[2]; + int fd; + int slave; + int console_num; + int *pid_out; + char *title; +}; + +static void xterm_tramp(void *arg) +{ + struct xterm_info *info; + int pid; + char title[256], flag[sizeof("Sxxnn\0")], c; + + info = arg; + sprintf(flag, "-S%c%c%d", info->tty[0], info->tty[1], info->fd); + sprintf(title, info->title, info->console_num); + if((pid = fork()) != 0) *info->pid_out = pid; + else { + execlp("xterm", "xterm", flag, "-T", title, NULL); + printk("execlp of xterm failed - errno = %d\n", errno); + close(info->fd); + exit(1); + } + close(info->fd); + while((read(info->slave, &c, sizeof(c)) == sizeof(c)) && (c != '\n')) ; +} + +int xterm_open(int input, int output, void *d) +{ + struct xterm_chan *data = d; + struct xterm_info info; + int master, slave; + char dev[] = "/dev/ptyXX"; + + master = getmaster(dev); + if(master == -1){ + printk("No unused host ptys found\n"); + return(-ENODEV); + } + dev[strlen("/dev/")] = 't'; + slave = open(dev, O_RDWR); + if(slave == -1) return(-errno); + tcgetattr(slave, &data->tt); + raw(slave, 0); + info.tty[0] = dev[strlen("/dev/pty")]; + info.tty[1] = dev[strlen("/dev/ptyX")]; + info.fd = master; + info.slave = slave; + info.console_num = data->device; + info.pid_out = &data->pid; + info.title = data->title; + tracing_cb(xterm_tramp, &info); + tcsetattr(slave, TCSADRAIN, &data->tt); + if(data->raw) raw(slave, 0); + data->fd = slave; + return(slave); +} + +void xterm_close(int fd, void *d) +{ + struct xterm_chan *data = d; + + if(data->pid != -1) kill(data->pid, SIGKILL); + close(fd); +} + +void xterm_free(void *d) +{ + free(d); +} + +int xterm_console_write(int fd, const char *buf, int n, void *d) +{ + struct xterm_chan *data = d; + + return(generic_console_write(fd, buf, n, &data->tt)); +} + +struct chan_ops xterm_ops = { + init: xterm_init, + open: xterm_open, + close: xterm_close, + read: generic_read, + write: generic_write, + console_write: xterm_console_write, + window_size: generic_window_size, + free: xterm_free, +}; + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/fs/Makefile linux_umopenmosix/arch/um/fs/Makefile --- linux-2.4.17/arch/um/fs/Makefile Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/fs/Makefile Wed Jun 26 23:45:14 2002 @@ -0,0 +1,16 @@ +# +# Copyright (C) 2000 Jeff Dike (jdike@karaya.com) +# Licensed under the GPL +# + +O_TARGET := fs.o + +subdir-$(CONFIG_HOSTFS) = hostfs + +MOD_SUB_DIRS := $(subdir-m) +SUB_DIRS := $(subdir-y) + +obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o)) +obj-m += $(join $(subdir-m),$(subdir-m:%=/%.o)) + +include $(TOPDIR)/Rules.make diff -urN linux-2.4.17/arch/um/fs/hostfs/Makefile linux_umopenmosix/arch/um/fs/hostfs/Makefile --- linux-2.4.17/arch/um/fs/hostfs/Makefile Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/fs/hostfs/Makefile Wed Jun 26 23:45:14 2002 @@ -0,0 +1,31 @@ +# +# Copyright (C) 2000 Jeff Dike (jdike@karaya.com) +# Licensed under the GPL +# + +# struct stat64 changed the inode field name between 2.2 and 2.4 from st_ino +# to __st_ino. It stayed in the same place, so as long as the correct name +# is used, hostfs compiled on 2.2 should work on 2.4 and vice versa. + +STAT64_INO_FIELD := $(shell grep -q __st_ino /usr/include/bits/stat.h && \ + echo __)st_ino + +USER_CFLAGS := $(USER_CFLAGS) -DSTAT64_INO_FIELD=$(STAT64_INO_FIELD) + +O_TARGET := +obj-y = +obj-m = + +CFLAGS_hostfs_kern.o := $(CFLAGS) +CFLAGS_hostfs_user.o := $(USER_CFLAGS) + +ifneq ($(CONFIG_HOSTFS), n) + O_TARGET := hostfs.o +endif + +obj-y += hostfs_kern.o hostfs_user.o +obj-m += $(O_TARGET) + +override CFLAGS = + +include $(TOPDIR)/Rules.make diff -urN linux-2.4.17/arch/um/fs/hostfs/hostfs.h linux_umopenmosix/arch/um/fs/hostfs/hostfs.h --- linux-2.4.17/arch/um/fs/hostfs/hostfs.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/fs/hostfs/hostfs.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,74 @@ +#ifndef __UM_FS_HOSTFS +#define __UM_FS_HOSTFS + +#define HOSTFS_FILE 1 +#define HOSTFS_DIR 2 +#define HOSTFS_SYMLINK 3 +#define HOSTFS_CHARDEV 4 +#define HOSTFS_BLOCDEV 5 +#define HOSTFS_FIFO 6 +#define HOSTFS_SOCK 7 + +/* These are exactly the same definitions as in fs.h, but the names are + * changed so that this file can be included in both kernel and user files. + */ + +#define HOSTFS_ATTR_MODE 1 +#define HOSTFS_ATTR_UID 2 +#define HOSTFS_ATTR_GID 4 +#define HOSTFS_ATTR_SIZE 8 +#define HOSTFS_ATTR_ATIME 16 +#define HOSTFS_ATTR_MTIME 32 +#define HOSTFS_ATTR_CTIME 64 +#define HOSTFS_ATTR_ATIME_SET 128 +#define HOSTFS_ATTR_MTIME_SET 256 +#define HOSTFS_ATTR_FORCE 512 /* Not a change, but a change it */ +#define HOSTFS_ATTR_ATTR_FLAG 1024 + +struct hostfs_iattr { + unsigned int ia_valid; + mode_t ia_mode; + uid_t ia_uid; + gid_t ia_gid; + loff_t ia_size; + time_t ia_atime; + time_t ia_mtime; + time_t ia_ctime; + unsigned int ia_attr_flags; +}; + +extern int stat_file(const char *path, int *dev_out, unsigned long long *inode_out, + int *mode_out, int *nlink_out, int *uid_out, + int *gid_out, unsigned long long *size_out, + unsigned long *atime_out, unsigned long *mtime_out, + unsigned long *ctime_out, int *blksize_out, + unsigned long long *blocks_out); +extern int access_file(char *path, int r, int w, int x); +extern int open_file(char *path, int r, int w); +extern int file_type(const char *path, int *rdev); +extern void *open_dir(char *path, int *err_out); +extern char *read_dir(void *stream, unsigned long long *pos, + unsigned long long *ino_out, int *len_out); +extern void close_file(void *stream); +extern void close_dir(void *stream); +extern int read_file(int fd, unsigned long long *offset, char *buf, int len); +extern int write_file(int fd, unsigned long long *offset, const char *buf, + int len); +extern int lseek_file(int fd, long long offset, int whence); +extern int file_create(char *name, int ur, int uw, int ux, int gr, + int gw, int gx, int or, int ow, int ox); +extern int set_attr(const char *file, struct hostfs_iattr *attrs); +extern int make_symlink(const char *from, const char *to); +extern int unlink_file(const char *file); +extern int do_mkdir(const char *file, int mode); +extern int do_rmdir(const char *file); +extern int do_mknod(const char *file, int mode, int dev); +extern int link_file(const char *from, const char *to); +extern int do_readlink(char *file, char *buf, int size); +extern int rename_file(char *from, char *to); +extern int do_statfs(char *root, long *bsize_out, long long *blocks_out, + long long *bfree_out, long long *bavail_out, long long *files_out, + long long *ffree_out, void *fsid_out, int fsid_size, + long *namelen_out, long *spare_out); + +#endif diff -urN linux-2.4.17/arch/um/fs/hostfs/hostfs_kern.c linux_umopenmosix/arch/um/fs/hostfs/hostfs_kern.c --- linux-2.4.17/arch/um/fs/hostfs/hostfs_kern.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/fs/hostfs/hostfs_kern.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,784 @@ +/* + * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "hostfs.h" +#include "kern_util.h" +#include "kern.h" +#include "user_util.h" +#include "2_5compat.h" + +#define file_hostfs_i(file) (&(file)->f_dentry->d_inode->u.hostfs_i) + +int hostfs_d_delete(struct dentry *dentry) +{ + return(1); +} + +struct dentry_operations hostfs_dentry_ops = { + d_delete: hostfs_d_delete, +}; + +static char *root_ino = "/"; + +#define HOSTFS_SUPER_MAGIC 0x00c0ffee + +static struct inode_operations hostfs_iops; +static struct address_space_operations hostfs_link_aops; + +static char *dentry_name(struct dentry *dentry, int extra) +{ + struct dentry *parent; + char *root, *name; + int len; + + len = 0; + parent = dentry; + while(parent->d_parent != parent){ + len += parent->d_name.len + 1; + parent = parent->d_parent; + } + + root = parent->d_inode->u.hostfs_i.host_filename; + len += strlen(root); + name = kmalloc(len + extra + 1, GFP_KERNEL); + if(name == NULL) return(NULL); + + name[len] = '\0'; + parent = dentry; + while(parent->d_parent != parent){ + len -= parent->d_name.len + 1; + name[len] = '/'; + strncpy(&name[len + 1], parent->d_name.name, + parent->d_name.len); + parent = parent->d_parent; + } + strncpy(name, root, strlen(root)); + return(name); +} + +static char *inode_name(struct inode *ino, int extra) +{ + struct dentry *dentry; + + dentry = list_entry(ino->i_dentry.next, struct dentry, d_alias); + return(dentry_name(dentry, extra)); +} + +static int read_name(struct inode *ino, char *name) +{ + /* The non-int inode fields are copied into ints by stat_file and + * then copied into the inode because passing the actual pointers + * in and having them treated as int * breaks on big-endian machines + */ + int err; + int i_dev, i_mode, i_nlink, i_blksize; + unsigned long long i_size; + unsigned long long i_ino; + unsigned long long i_blocks; + err = stat_file(name, &i_dev, &i_ino, &i_mode, &i_nlink, + &ino->i_uid, &ino->i_gid, &i_size, &ino->i_atime, + &ino->i_mtime, &ino->i_ctime, &i_blksize, &i_blocks); + if(err) return(err); + ino->i_ino = i_ino; + ino->i_dev = i_dev; + ino->i_mode = i_mode; + ino->i_nlink = i_nlink; + ino->i_size = i_size; + ino->i_blksize = i_blksize; + ino->i_blocks = i_blocks; + if(kdev_same(ino->i_sb->s_dev, ROOT_DEV) && (ino->i_uid == getuid())) + ino->i_uid = 0; + return(0); +} + +static int read_inode(struct inode *ino) +{ + char *name; + int err; + + name = inode_name(ino, 0); + if(name == NULL) return(-ENOMEM); + err = read_name(ino, name); + kfree(name); + return(err); +} + +void hostfs_delete_inode(struct inode *ino) +{ + if(ino->u.hostfs_i.host_filename) kfree(ino->u.hostfs_i.host_filename); + ino->u.hostfs_i.host_filename = NULL; + if(ino->u.hostfs_i.fd != -1) close_file(&ino->u.hostfs_i.fd); + ino->u.hostfs_i.mode = 0; + clear_inode(ino); +} + +int hostfs_statfs(struct super_block *sb, struct statfs *sf) +{ + /* do_statfs uses struct statfs64 internally, but the linux kernel + * struct statfs still has 32-bit versions for most of these fields, + * so we convert them here + */ + int err; + long long f_blocks; + long long f_bfree; + long long f_bavail; + long long f_files; + long long f_ffree; + + err = do_statfs(sb->s_root->d_inode->u.hostfs_i.host_filename, + &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files, + &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid), + &sf->f_namelen, sf->f_spare); + if(err) return(err); + sf->f_blocks = f_blocks; + sf->f_bfree = f_bfree; + sf->f_bavail = f_bavail; + sf->f_files = f_files; + sf->f_ffree = f_ffree; + sf->f_type = HOSTFS_SUPER_MAGIC; + return(0); +} + +static struct super_operations hostfs_sbops = { + put_inode: force_delete, + delete_inode: hostfs_delete_inode, + statfs: hostfs_statfs, +}; + +int hostfs_readdir(struct file *file, void *ent, filldir_t filldir) +{ + void *dir; + char *name; + unsigned long long next, ino; + int error, len; + + name = dentry_name(file->f_dentry, 0); + if(name == NULL) return(-ENOMEM); + dir = open_dir(name, &error); + kfree(name); + if(dir == NULL) return(-error); + next = file->f_pos; + while((name = read_dir(dir, &next, &ino, &len)) != NULL){ + error = (*filldir)(ent, name, len, file->f_pos, + ino, DT_UNKNOWN); + if(error) break; + file->f_pos = next; + } + close_dir(dir); + return(0); +} + +unsigned int hostfs_poll(struct file *file, struct poll_table_struct *table) +{ + not_implemented(); + return(-EINVAL); +} + +int hostfs_ioctl(struct inode *ino, struct file *file, unsigned int code, + unsigned long data) +{ + not_implemented(); + return(-EINVAL); +} + +int hostfs_file_open(struct inode *ino, struct file *file) +{ + char *name; + int mode = 0, r = 0, w = 0, fd; + + mode = file->f_mode & (FMODE_READ | FMODE_WRITE); + if((mode & ino->u.hostfs_i.mode) == mode) return(0); + + if(ino->u.hostfs_i.fd != -1){ + close_file(&ino->u.hostfs_i.fd); + ino->u.hostfs_i.fd = -1; + } + ino->u.hostfs_i.mode |= mode; + if(ino->u.hostfs_i.mode & FMODE_READ) r = 1; + if(ino->u.hostfs_i.mode & FMODE_WRITE) w = 1; + if(w) r = 1; + name = dentry_name(file->f_dentry, 0); + if(name == NULL) return(-ENOMEM); + fd = open_file(name, r, w); + kfree(name); + if(fd < 0) return(fd); + file_hostfs_i(file)->fd = fd; + return(0); +} + +int hostfs_dir_open(struct inode *ino, struct file *file) +{ + return(0); +} + +int hostfs_dir_release(struct inode *ino, struct file *file) +{ + return(0); +} + +int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync) +{ + return(0); +} + +int hostfs_fasync(int fd, struct file *file, int on) +{ + not_implemented(); + return(-EINVAL); +} + +static struct file_operations hostfs_file_fops = { + owner: NULL, + read: generic_file_read, + write: generic_file_write, + poll: hostfs_poll, + mmap: generic_file_mmap, + open: hostfs_file_open, + release: NULL, + fsync: hostfs_fsync, + fasync: hostfs_fasync +}; + +static struct file_operations hostfs_dir_fops = { + owner: NULL, + readdir: hostfs_readdir, + poll: hostfs_poll, + ioctl: hostfs_ioctl, + open: hostfs_dir_open, + release: hostfs_dir_release, + fsync: hostfs_fsync, + fasync: hostfs_fasync +}; + +int hostfs_writepage(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + char *buffer; + unsigned long long base; + int count = PAGE_CACHE_SIZE; + int end_index = inode->i_size >> PAGE_CACHE_SHIFT; + int err; + + if (page->index >= end_index) + count = inode->i_size & (PAGE_CACHE_SIZE-1); + + buffer = kmap(page); + base = ((unsigned long long) page->index) << PAGE_CACHE_SHIFT; + + err = write_file(inode->u.hostfs_i.fd, &base, buffer, count); + if(err != count){ + ClearPageUptodate(page); + goto out; + } + + if (base > inode->i_size) + inode->i_size = base; + + if (PageError(page)) + ClearPageError(page); + + out: + kunmap(page); + + UnlockPage(page); + return err; +} + +int hostfs_readpage(struct file *file, struct page *page) +{ + char *buffer; + long long start; + int err = 0; + + start = (long long) page->index << PAGE_CACHE_SHIFT; + buffer = kmap(page); + err = read_file(file_hostfs_i(file)->fd, &start, buffer, + PAGE_CACHE_SIZE); + if(err < 0) goto out; + + flush_dcache_page(page); + SetPageUptodate(page); + if (PageError(page)) ClearPageError(page); + err = 0; + out: + kunmap(page); + UnlockPage(page); + return(err); +} + +int hostfs_prepare_write(struct file *file, struct page *page, + unsigned int from, unsigned int to) +{ + char *buffer; + long long start, tmp; + int err; + + start = (long long) page->index << PAGE_CACHE_SHIFT; + buffer = kmap(page); + if(from != 0){ + tmp = start; + err = read_file(file_hostfs_i(file)->fd, &tmp, buffer, + from); + if(err < 0) goto out; + } + if(to != PAGE_CACHE_SIZE){ + start += to; + err = read_file(file_hostfs_i(file)->fd, &start, buffer + to, + PAGE_CACHE_SIZE - to); + if(err < 0) goto out; + } + err = 0; + out: + kunmap(page); + return(err); +} + +int hostfs_commit_write(struct file *file, struct page *page, unsigned from, + unsigned to) +{ + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + char *buffer; + long long start; + int err = 0; + + start = (long long) (page->index << PAGE_CACHE_SHIFT) + from; + buffer = kmap(page); + err = write_file(file_hostfs_i(file)->fd, &start, buffer + from, + to - from); + if(err > 0) err = 0; + if(!err && (start > inode->i_size)) + inode->i_size = start; + + kunmap(page); + return(err); +} + +static struct address_space_operations hostfs_aops = { + writepage: hostfs_writepage, + readpage: hostfs_readpage, + prepare_write: hostfs_prepare_write, + commit_write: hostfs_commit_write +}; + +static struct inode *get_inode(struct super_block *sb, struct dentry *dentry, + int *error) +{ + struct inode *inode; + char *name; + int type, err = 0, rdev; + + inode = get_empty_inode(); + if(inode == NULL) return(NULL); + inode->u.hostfs_i.host_filename = NULL; + inode->u.hostfs_i.fd = -1; + inode->u.hostfs_i.mode = 0; + if(error) *error = 0; + insert_inode_hash(inode); + if(dentry){ + name = dentry_name(dentry, 0); + if(name == NULL){ + err = -ENOMEM; + goto out; + } + type = file_type(name, &rdev); + kfree(name); + } + else type = HOSTFS_DIR; + inode->i_sb = sb; + + if(type == HOSTFS_SYMLINK) + inode->i_op = &page_symlink_inode_operations; + else inode->i_op = &hostfs_iops; + + if(type == HOSTFS_DIR) inode->i_fop = &hostfs_dir_fops; + else inode->i_fop = &hostfs_file_fops; + + if(type == HOSTFS_SYMLINK) inode->i_mapping->a_ops = &hostfs_link_aops; + else inode->i_mapping->a_ops = &hostfs_aops; + + switch (type) { + case HOSTFS_CHARDEV: + init_special_inode(inode, S_IFCHR, rdev); + break; + case HOSTFS_BLOCDEV: + init_special_inode(inode, S_IFBLK, rdev); + break; + case HOSTFS_FIFO: + init_special_inode(inode, S_IFIFO, 0); + break; + case HOSTFS_SOCK: + init_special_inode(inode, S_IFSOCK, 0); + break; + } + + return(inode); + out: + iput(inode); + if(error) *error = err; + return(NULL); +} + +int hostfs_create(struct inode *dir, struct dentry *dentry, int mode) +{ + struct inode *inode; + char *name; + int error; + + inode = get_inode(dir->i_sb, dentry, &error); + if(error) return(error); + name = dentry_name(dentry, 0); + if(name == NULL){ + iput(inode); + return(-ENOMEM); + } + error = file_create(name, + mode | S_IRUSR, mode | S_IWUSR, mode | S_IXUSR, + mode | S_IRGRP, mode | S_IWGRP, mode | S_IXGRP, + mode | S_IROTH, mode | S_IWOTH, mode | S_IXOTH); + if(!error) error = read_name(inode, name); + kfree(name); + if(error){ + iput(inode); + return(error); + } + d_instantiate(dentry, inode); + return(0); +} + +struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry) +{ + struct inode *inode; + char *name; + int error; + + inode = get_inode(ino->i_sb, dentry, &error); + if(error != 0) return(ERR_PTR(error)); + name = dentry_name(dentry, 0); + if(name == NULL) return(ERR_PTR(-ENOMEM)); + error = read_name(inode, name); + kfree(name); + if(error){ + iput(inode); + if(error == -ENOENT) inode = NULL; + else return(ERR_PTR(error)); + } + d_add(dentry, inode); + dentry->d_op = &hostfs_dentry_ops; + return(NULL); +} + +static char *inode_dentry_name(struct inode *ino, struct dentry *dentry) +{ + char *file; + int len; + + file = inode_name(ino, dentry->d_name.len + 1); + if(file == NULL) return(NULL); + strcat(file, "/"); + len = strlen(file); + strncat(file, dentry->d_name.name, dentry->d_name.len); + file[len + dentry->d_name.len] = '\0'; + return(file); +} + +int hostfs_link(struct dentry *to, struct inode *ino, struct dentry *from) +{ + char *from_name, *to_name; + int err; + + if((from_name = inode_dentry_name(ino, from)) == NULL) + return(-ENOMEM); + to_name = dentry_name(to, 0); + if(to_name == NULL){ + kfree(from_name); + return(-ENOMEM); + } + err = link_file(to_name, from_name); + kfree(from_name); + kfree(to_name); + return(err); +} + +int hostfs_unlink(struct inode *ino, struct dentry *dentry) +{ + char *file; + int err; + + if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); + err = unlink_file(file); + kfree(file); + return(err); +} + +int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to) +{ + char *file; + int err; + + if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); + err = make_symlink(file, to); + kfree(file); + return(err); +} + +int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode) +{ + char *file; + int err; + + if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); + err = do_mkdir(file, mode); + kfree(file); + return(err); +} + +int hostfs_rmdir(struct inode *ino, struct dentry *dentry) +{ + char *file; + int err; + + if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); + err = do_rmdir(file); + kfree(file); + return(err); +} + +int hostfs_mknod(struct inode *ino, struct dentry *dentry, int mode, int dev) +{ + char *file; + int err; + + if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); + err = do_mknod(file, mode, dev); + kfree(file); + return(err); +} + +int hostfs_rename(struct inode *from_ino, struct dentry *from, + struct inode *to_ino, struct dentry *to) +{ + char *from_name, *to_name; + int err; + + if((from_name = inode_dentry_name(from_ino, from)) == NULL) + return(-ENOMEM); + if((to_name = inode_dentry_name(to_ino, to)) == NULL){ + kfree(from_name); + return(-ENOMEM); + } + err = rename_file(from_name, to_name); + kfree(from_name); + kfree(to_name); + return(err); +} + +void hostfs_truncate(struct inode *ino) +{ + not_implemented(); +} + +int hostfs_permission(struct inode *ino, int desired) +{ + char *name; + int r = 0, w = 0, x = 0, err; + + if(desired & MAY_READ) r = 1; + if(desired & MAY_WRITE) w = 1; + if(desired & MAY_EXEC) x = 1; + name = inode_name(ino, 0); + if(name == NULL) return(-ENOMEM); + err = access_file(name, r, w, x); + kfree(name); + if(!err) err = vfs_permission(ino, desired); + return(err); +} + +int hostfs_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct hostfs_iattr attrs; + char *name; + int err; + + attrs.ia_valid = 0; + if(attr->ia_valid & ATTR_MODE){ + attrs.ia_valid |= HOSTFS_ATTR_MODE; + attrs.ia_mode = attr->ia_mode; + } + if(attr->ia_valid & ATTR_UID){ + attrs.ia_valid |= HOSTFS_ATTR_UID; + attrs.ia_uid = attr->ia_uid; + } + if(attr->ia_valid & ATTR_GID){ + attrs.ia_valid |= HOSTFS_ATTR_GID; + attrs.ia_gid = attr->ia_gid; + } + if(attr->ia_valid & ATTR_SIZE){ + attrs.ia_valid |= HOSTFS_ATTR_SIZE; + attrs.ia_size = attr->ia_size; + } + if(attr->ia_valid & ATTR_ATIME){ + attrs.ia_valid |= HOSTFS_ATTR_ATIME; + attrs.ia_atime = attr->ia_atime; + } + if(attr->ia_valid & ATTR_MTIME){ + attrs.ia_valid |= HOSTFS_ATTR_MTIME; + attrs.ia_mtime = attr->ia_mtime; + } + if(attr->ia_valid & ATTR_CTIME){ + attrs.ia_valid |= HOSTFS_ATTR_CTIME; + attrs.ia_ctime = attr->ia_ctime; + } + if(attr->ia_valid & ATTR_ATIME_SET){ + attrs.ia_valid |= HOSTFS_ATTR_ATIME_SET; + } + if(attr->ia_valid & ATTR_MTIME_SET){ + attrs.ia_valid |= HOSTFS_ATTR_MTIME_SET; + } + name = dentry_name(dentry, 0); + if(name == NULL) return(-ENOMEM); + err = set_attr(name, &attrs); + kfree(name); + return(err); +} + +int hostfs_getattr(struct dentry *dentry, struct iattr *attr) +{ + not_implemented(); + return(-EINVAL); +} + +static struct inode_operations hostfs_iops = { + create: hostfs_create, + lookup: hostfs_lookup, + link: hostfs_link, + unlink: hostfs_unlink, + symlink: hostfs_symlink, + mkdir: hostfs_mkdir, + rmdir: hostfs_rmdir, + mknod: hostfs_mknod, + rename: hostfs_rename, + truncate: hostfs_truncate, + permission: hostfs_permission, + setattr: hostfs_setattr, + getattr: hostfs_getattr, +}; + +int hostfs_link_readpage(struct file *file, struct page *page) +{ + char *buffer, *name; + long long start; + int err; + + start = page->index << PAGE_CACHE_SHIFT; + buffer = kmap(page); + name = inode_name(page->mapping->host, 0); + if(name == NULL) return(-ENOMEM); + err = do_readlink(name, buffer, PAGE_CACHE_SIZE); + kfree(name); + if(err == 0){ + flush_dcache_page(page); + SetPageUptodate(page); + if (PageError(page)) ClearPageError(page); + } + kunmap(page); + UnlockPage(page); + return(err); +} + +static struct address_space_operations hostfs_link_aops = { + readpage: hostfs_link_readpage, +}; + +static struct super_block *hostfs_read_super_common(struct super_block *sb, + char *data) +{ + struct inode * root_inode; + char *name; + + sb->s_blocksize = 1024; + sb->s_blocksize_bits = 10; + sb->s_magic = HOSTFS_SUPER_MAGIC; + sb->s_op = &hostfs_sbops; + if((data == NULL) || (*((char *) data) == '\0')) data = root_ino; + name = kmalloc(strlen(data) + 1, GFP_KERNEL); + if(name == NULL) return(NULL); + strcpy(name, data); + root_inode = get_inode(sb, NULL, NULL); + if(root_inode == NULL){ + kfree(name); + return(NULL); + } + root_inode->u.hostfs_i.host_filename = name; + sb->s_root = d_alloc_root(root_inode); + if(read_inode(root_inode)){ + iput(root_inode); + return(NULL); + } + return(sb); +} + +struct super_block *hostfs_read_super(struct super_block *sb, void *data, + int silent) +{ + return(hostfs_read_super_common(sb, data)); +} + +struct super_block *hostfs_root_read_super(struct super_block *sb, void *data, + int silent) +{ + struct buffer_head * bh; + struct super_block *ret = NULL; + kdev_t dev = sb->s_dev; + int blocksize = get_hardsect_size(dev); + + if(blocksize == 0) blocksize = BLOCK_SIZE; + set_blocksize (dev, blocksize); + if(!(bh = bread (dev, 0, blocksize))) return NULL; + if(strncmp(bh->b_data, "HOSTFS:", strlen("HOSTFS:"))) goto out; + ret = hostfs_read_super_common(sb, bh->b_data + strlen("HOSTFS:")); + out: + brelse (bh); + return(ret); +} + +DECLARE_FSTYPE(hostfs_type, "hostfs", hostfs_read_super, 0); +DECLARE_FSTYPE_DEV(hostfs_root_type, "root-hostfs", hostfs_root_read_super); + +static int __init init_hostfs(void) +{ + return(register_filesystem(&hostfs_type) || + register_filesystem(&hostfs_root_type)); +} + +static void __exit exit_hostfs(void) +{ + unregister_filesystem(&hostfs_type); + unregister_filesystem(&hostfs_root_type); +} + +module_init(init_hostfs) +module_exit(exit_hostfs) + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/fs/hostfs/hostfs_user.c linux_umopenmosix/arch/um/fs/hostfs/hostfs_user.c --- linux-2.4.17/arch/um/fs/hostfs/hostfs_user.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/fs/hostfs/hostfs_user.c Wed Jun 26 23:45:14 2002 @@ -0,0 +1,337 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "hostfs.h" +#include "kern_util.h" +#include "user.h" + +int stat_file(const char *path, int *dev_out, unsigned long long *inode_out, + int *mode_out, int *nlink_out, int *uid_out, int *gid_out, + unsigned long long *size_out, unsigned long *atime_out, + unsigned long *mtime_out, unsigned long *ctime_out, + int *blksize_out, unsigned long long *blocks_out) +{ + struct stat64 buf; + + if(lstat64(path, &buf) < 0) + return(-errno); + if(dev_out != NULL) *dev_out = buf.st_dev; + + /* See the Makefile for why STAT64_INO_FIELD is passed in + * by the build + */ + if(inode_out != NULL) *inode_out = buf.STAT64_INO_FIELD; + if(mode_out != NULL) *mode_out = buf.st_mode; + if(nlink_out != NULL) *nlink_out = buf.st_nlink; + if(uid_out != NULL) *uid_out = buf.st_uid; + if(gid_out != NULL) *gid_out = buf.st_gid; + if(size_out != NULL) *size_out = buf.st_size; + if(atime_out != NULL) *atime_out = buf.st_atime; + if(mtime_out != NULL) *mtime_out = buf.st_mtime; + if(ctime_out != NULL) *ctime_out = buf.st_ctime; + if(blksize_out != NULL) *blksize_out = buf.st_blksize; + if(blocks_out != NULL) *blocks_out = buf.st_blocks; + return(0); +} + +int file_type(const char *path, int *rdev) +{ + struct stat64 buf; + + if(lstat64(path, &buf) < 0) return(-errno); + *rdev = buf.st_rdev; + if(S_ISDIR(buf.st_mode)) return(HOSTFS_DIR); + else if(S_ISLNK(buf.st_mode)) return(HOSTFS_SYMLINK); + else if(S_ISCHR(buf.st_mode)) return(HOSTFS_CHARDEV); + else if(S_ISBLK(buf.st_mode)) return(HOSTFS_BLOCDEV); + else if(S_ISFIFO(buf.st_mode))return(HOSTFS_FIFO); + else if(S_ISSOCK(buf.st_mode))return(HOSTFS_SOCK); + + else return(HOSTFS_FILE); +} + +int access_file(char *path, int r, int w, int x) +{ + int mode = 0; + + if(r) mode = R_OK; + if(w) mode |= W_OK; + if(x) mode |= X_OK; + if(access(path, mode) != 0) return(-errno); + else return(0); +} + +int open_file(char *path, int r, int w) +{ + int mode = 0, fd; + + if(r && !w) mode = O_RDONLY; + else if(!r && w) mode = O_WRONLY; + else if(r && w) mode = O_RDWR; + else panic("Impossible mode in open_file"); + fd = open64(path, mode); + if(fd < 0) return(-errno); + else return(fd); +} + +void *open_dir(char *path, int *err_out) +{ + DIR *dir; + + dir = opendir(path); + *err_out = errno; + if(dir == NULL) return(NULL); + return(dir); +} + +char *read_dir(void *stream, unsigned long long *pos, + unsigned long long *ino_out, int *len_out) +{ + DIR *dir = stream; + struct dirent *ent; + + seekdir(dir, *pos); + ent = readdir(dir); + if(ent == NULL) return(NULL); + *len_out = strlen(ent->d_name); + *ino_out = ent->d_ino; + *pos = telldir(dir); + return(ent->d_name); +} + +int read_file(int fd, unsigned long long *offset, char *buf, int len) +{ + int n; + + n = pread64(fd, buf, len, *offset); + if(n < 0) return(-errno); + *offset += n; + return(n); +} + +int write_file(int fd, unsigned long long *offset, const char *buf, int len) +{ + int n; + + n = pwrite64(fd, buf, len, *offset); + if(n < 0) return(-errno); + *offset += n; + return(n); +} + +int lseek_file(int fd, long long offset, int whence) +{ + int ret; + + ret = lseek64(fd, offset, whence); + if(ret < 0) return(-errno); + return(0); +} + +void close_file(void *stream) +{ + close(*((int *) stream)); +} + +void close_dir(void *stream) +{ + closedir(stream); +} + +int file_create(char *name, int ur, int uw, int ux, int gr, + int gw, int gx, int or, int ow, int ox) +{ + int mode, fd; + + mode = 0; + mode |= ur ? S_IRUSR : 0; + mode |= uw ? S_IWUSR : 0; + mode |= ux ? S_IXUSR : 0; + mode |= gr ? S_IRGRP : 0; + mode |= gw ? S_IWGRP : 0; + mode |= gx ? S_IXGRP : 0; + mode |= or ? S_IROTH : 0; + mode |= ow ? S_IWOTH : 0; + mode |= ox ? S_IXOTH : 0; + fd = open64(name, O_CREAT, mode); + if(fd < 0) return(-errno); + close(fd); + return(0); +} + +int set_attr(const char *file, struct hostfs_iattr *attrs) +{ + struct utimbuf buf; + int err, ma; + + if(attrs->ia_valid & HOSTFS_ATTR_MODE){ + if(chmod(file, attrs->ia_mode) != 0) return(-errno); + } + if(attrs->ia_valid & HOSTFS_ATTR_UID){ + if(chown(file, attrs->ia_uid, -1)) return(-errno); + } + if(attrs->ia_valid & HOSTFS_ATTR_GID){ + if(chown(file, -1, attrs->ia_gid)) return(-errno); + } + if(attrs->ia_valid & HOSTFS_ATTR_SIZE){ + if(truncate(file, attrs->ia_size)) return(-errno); + } + ma = HOSTFS_ATTR_ATIME_SET | HOSTFS_ATTR_MTIME_SET; + if((attrs->ia_valid & ma) == ma){ + buf.actime = attrs->ia_atime; + buf.modtime = attrs->ia_mtime; + if(utime(file, &buf) != 0) return(-errno); + } + else { + if(attrs->ia_valid & HOSTFS_ATTR_ATIME_SET){ + err = stat_file(file, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, &buf.modtime, NULL, + NULL, NULL); + if(err != 0) return(err); + buf.actime = attrs->ia_atime; + if(utime(file, &buf) != 0) return(-errno); + } + if(attrs->ia_valid & HOSTFS_ATTR_MTIME_SET){ + err = stat_file(file, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, &buf.actime, NULL, NULL, + NULL, NULL); + if(err != 0) return(err); + buf.modtime = attrs->ia_mtime; + if(utime(file, &buf) != 0) return(-errno); + } + } + if(attrs->ia_valid & HOSTFS_ATTR_CTIME) ; + if(attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)){ + err = stat_file(file, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, &attrs->ia_atime, &attrs->ia_mtime, + NULL, NULL, NULL); + if(err != 0) return(err); + } + return(0); +} + +int make_symlink(const char *from, const char *to) +{ + int err; + + err = symlink(to, from); + if(err) return(-errno); + return(0); +} + +int unlink_file(const char *file) +{ + int err; + + err = unlink(file); + if(err) return(-errno); + return(0); +} + +int do_mkdir(const char *file, int mode) +{ + int err; + + err = mkdir(file, mode); + if(err) return(-errno); + return(0); +} + +int do_rmdir(const char *file) +{ + int err; + + err = rmdir(file); + if(err) return(-errno); + return(0); +} + +int do_mknod(const char *file, int mode, int dev) +{ + int err; + + err = mknod(file, mode, dev); + if(err) return(-errno); + return(0); +} + +int link_file(const char *to, const char *from) +{ + int err; + + err = link(to, from); + if(err) return(-errno); + return(0); +} + +int do_readlink(char *file, char *buf, int size) +{ + int err; + + err = readlink(file, buf, size); + if(err < 0) return(-errno); + if(err < size) buf[err] = '\0'; + return(0); +} + +int rename_file(char *from, char *to) +{ + int err; + + err = rename(from, to); + if(err < 0) return(-errno); + return(0); +} + +int do_statfs(char *root, long *bsize_out, long long *blocks_out, + long long *bfree_out, long long *bavail_out, + long long *files_out, long long *ffree_out, + void *fsid_out, int fsid_size, long *namelen_out, + long *spare_out) +{ + struct statfs64 buf; + int err; + + err = statfs64(root, &buf); + if(err < 0) return(-errno); + *bsize_out = buf.f_bsize; + *blocks_out = buf.f_blocks; + *bfree_out = buf.f_bfree; + *bavail_out = buf.f_bavail; + *files_out = buf.f_files; + *ffree_out = buf.f_ffree; + memcpy(fsid_out, &buf.f_fsid, + sizeof(buf.f_fsid) > fsid_size ? fsid_size : + sizeof(buf.f_fsid)); + *namelen_out = buf.f_namelen; + spare_out[0] = buf.f_spare[0]; + spare_out[1] = buf.f_spare[1]; + spare_out[2] = buf.f_spare[2]; + spare_out[3] = buf.f_spare[3]; + spare_out[4] = buf.f_spare[4]; + spare_out[5] = buf.f_spare[5]; + return(0); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/2_5compat.h linux_umopenmosix/arch/um/include/2_5compat.h --- linux-2.4.17/arch/um/include/2_5compat.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/2_5compat.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,121 @@ +/* + * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __2_5_COMPAT_H__ +#define __2_5_COMPAT_H__ + +#include "linux/version.h" + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 0) + +#define major(dev) MAJOR(dev) +#define minor(dev) MINOR(dev) +#define kdev_same(dev1, dev2) ((dev1) == (dev2)) +#define mk_kdev(maj, min) MKDEV(maj, min) +#define INIT_CONSOLE(dev_name, write_proc, device_proc, setup_proc, f) { \ + name : dev_name, \ + write : write_proc, \ + read : NULL, \ + device : device_proc, \ + wait_key : NULL, \ + unblank : NULL, \ + setup : setup_proc, \ + flags : f, \ + index : -1, \ + cflag : 0, \ + next : NULL \ +} + +#define INIT_GENDISK(maj, name, parts, bsizes, max, blops) { \ + major : maj, \ + major_name : name, \ + minor_shift : 0, \ + max_p : 1, \ + part : parts, \ + sizes : bsizes, \ + nr_real : max, \ + real_devices : NULL, \ + next : NULL, \ + fops : blops, \ + de_arr : NULL, \ + flags : 0 \ +} + +#define INIT_QUEUE(queue, request, lock) blk_init_queue(queue, request) + +#define ELV_NOOP ELEVATOR_NOOP +#define INIT_ELV(queue, elv) elevator_init(elv, ELV_NOOP) + +#define REQUEST_LOCK io_request_lock + +#define INIT_HARDSECT(arr, maj, sizes) arr[maj] = sizes + +#define IS_WRITE(req) ((req)->cmd == WRITE) +#define IS_READ(req) ((req)->cmd == READ) + +#define CPU(task) ((task)->processor) + +#define yield() do { current->policy |= SCHED_YIELD; schedule(); } while(0) + +#define SET_PRI(task) \ + do { (task)->nice = 20; (task)->counter = -100; } while(0); + +#else + +#define INIT_CONSOLE(dev_name, write_proc, device_proc, setup_proc, f) { \ + name : dev_name, \ + write : write_proc, \ + read : NULL, \ + device : device_proc, \ + setup : setup_proc, \ + flags : f, \ + index : -1, \ + cflag : 0, \ + next : NULL \ +} + +#define INIT_GENDISK(maj, name, parts, bsizes, max, blops) { \ + major : maj, \ + major_name : name, \ + minor_shift : 0, \ + part : parts, \ + sizes : bsizes, \ + nr_real : max, \ + next : NULL, \ + fops : blops, \ + de_arr : NULL, \ + flags : 0 \ +} + +#define INIT_QUEUE(queue, request, lock) blk_init_queue(queue, request, lock) + +#define ELV_NOOP elevator_noop +#define INIT_ELV(queue, elv) elevator_init(queue, elv, ELV_NOOP) + +#define REQUEST_LOCK ubd_lock + +#define INIT_HARDSECT(arr, maj, sizes) + +#define IS_WRITE(req) (rq_data_dir(req) == WRITE) +#define IS_READ(req) (rq_data_dir(req) == READ) + +#define CPU(task) ((task)->cpu) + +#define SET_PRI(task) do ; while(0) + +#endif + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/chan_kern.h linux_umopenmosix/arch/um/include/chan_kern.h --- linux-2.4.17/arch/um/include/chan_kern.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/chan_kern.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __CHAN_KERN_H__ +#define __CHAN_KERN_H__ + +#include "linux/tty.h" +#include "linux/list.h" +#include "chan_user.h" + +struct chan { + struct list_head list; + unsigned int primary:1; + unsigned int input:1; + unsigned int output:1; + int opened; + int fd; + enum chan_init_pri pri; + struct chan_ops *ops; + void *data; +}; + +extern void chan_interrupt(struct list_head *chans, struct tty_struct *tty); +extern int parse_chan_pair(char *str, struct list_head *chans, int pri, + int device, struct chan_opts *opts); +extern int open_chan(struct list_head *chans); +extern int write_chan(struct list_head *chans, const char *buf, int len); +extern int console_write_chan(struct list_head *chans, const char *buf, + int len); +extern void close_chan(struct list_head *chans); +extern void enable_chan(struct list_head *chans, + int (*irq_setup)(int fd, int input, int output, + void *data), + void *data); +extern void disable_chan(struct list_head *chans); +extern int chan_window_size(struct list_head *chans, + unsigned short *rows_out, + unsigned short *cols_out); +extern int chan_out_fd(struct list_head *chans); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/chan_user.h linux_umopenmosix/arch/um/include/chan_user.h --- linux-2.4.17/arch/um/include/chan_user.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/chan_user.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __CHAN_USER_H__ +#define __CHAN_USER_H__ + +#include "init.h" + +struct chan_opts { + void (*announce)(char *dev_name, int dev); + char *xterm_title; + int raw; +}; + +enum chan_init_pri { INIT_STATIC, INIT_ALL, INIT_ONE }; + +struct chan_ops { + void *(*init)(char *, int, struct chan_opts *); + int (*open)(int, int, void *); + void (*close)(int, void *); + int (*read)(int, void *); + int (*write)(int, const char *, int, void *); + int (*console_write)(int, const char *, int, void *); + int (*window_size)(int, void *, unsigned short *, unsigned short *); + void (*free)(void *); +}; + +extern struct chan_ops pty_ops, pts_ops, tty_ops, xterm_ops, fd_ops, + port_ops; + +extern void generic_close(int fd, void *unused); +extern int generic_read(int fd, void *unused); +extern int generic_write(int fd, const char *buf, int n, void *unused); +extern int generic_console_write(int fd, const char *buf, int n, void *state); +extern int generic_window_size(int fd, void *unused, unsigned short *rows_out, + unsigned short *cols_out); +extern void generic_free(void *data); +extern int getmaster(char *line); +extern void run_winch_handlers(void); + +#define __channel_help(fn, prefix) \ +__uml_help(fn, prefix "[0-9]*=\n" \ +" Attach a console or serial line to a host channel. See\n" \ +" http://user-mode-linux.sourceforge.net/input.html for a complete\n" \ +" description of this switch.\n\n" \ +); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/debug.h linux_umopenmosix/arch/um/include/debug.h --- linux-2.4.17/arch/um/include/debug.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/debug.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) and + * Lars Brinkhoff. + * Licensed under the GPL + */ +#ifndef __DEBUG_H +#define __DEBUG_H + +extern int debugger_proxy(int status, pid_t pid); +extern void child_proxy(pid_t pid, int status); +extern void init_proxy (pid_t pid, int waiting, int status); +extern int start_debugger(char *prog, int startup, int stop, int *debugger_fd); +extern void fake_child_exit(void); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/frame.h linux_umopenmosix/arch/um/include/frame.h --- linux-2.4.17/arch/um/include/frame.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/frame.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __FRAME_H_ +#define __FRAME_H_ + +#include "sysdep/frame.h" + +struct sc_frame { + void *data; + int len; + int sig_index; + int sc_index; + int sr_index; + int sr_relative; + int sp_index; + struct arch_frame_data arch; +}; + +extern struct sc_frame signal_frame_sc; + +struct si_frame { + void *data; + int len; + int sig_index; + int sip_index; + int si_index; + int sr_index; + int sr_relative; + int sp_index; +}; + +extern struct si_frame signal_frame_si; + +extern void capture_signal_stack(void); +extern void set_sc_ip_sp(void *sc_ptr, unsigned long ip, unsigned long sp); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/frame_kern.h linux_umopenmosix/arch/um/include/frame_kern.h --- linux-2.4.17/arch/um/include/frame_kern.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/frame_kern.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __FRAME_KERN_H_ +#define __FRAME_KERN_H_ + +extern int setup_signal_stack_sc(unsigned long stack_top, int sig, + unsigned long handler, + void (*restorer)(void), + struct sys_pt_regs *regs, void *sc, + void *context_sc); +extern int setup_signal_stack_si(unsigned long stack_top, int sig, + unsigned long handler, + void (*restorer)(void), + struct sys_pt_regs *regs, siginfo_t *info); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/hostaudio.h linux_umopenmosix/arch/um/include/hostaudio.h --- linux-2.4.17/arch/um/include/hostaudio.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/hostaudio.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2002 Steve Schmidtke + * Licensed under the GPL + */ + +#ifndef HOSTAUDIO_H +#define HOSTAUDIO_H + +#define HOSTAUDIO_DEV_DSP "/dev/sound/dsp" +#define HOSTAUDIO_DEV_MIXER "/dev/sound/mixer" + +struct hostaudio_state { + int fd; +}; + +struct hostmixer_state { + int fd; +}; + +/* UML user-side protoypes */ +extern ssize_t hostaudio_read_user(struct hostaudio_state *state, char *buffer, + size_t count, loff_t *ppos); +extern ssize_t hostaudio_write_user(struct hostaudio_state *state, + const char *buffer, size_t count, + loff_t *ppos); +extern int hostaudio_ioctl_user(struct hostaudio_state *state, + unsigned int cmd, unsigned long arg); +extern int hostaudio_open_user(struct hostaudio_state *state, int r, int w, + char *dsp); +extern int hostaudio_release_user(struct hostaudio_state *state); +extern int hostmixer_ioctl_mixdev_user(struct hostmixer_state *state, + unsigned int cmd, unsigned long arg); +extern int hostmixer_open_mixdev_user(struct hostmixer_state *state, int r, + int w, char *mixer); +extern int hostmixer_release_mixdev_user(struct hostmixer_state *state); + +#endif /* HOSTAUDIO_H */ + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/init.h linux_umopenmosix/arch/um/include/init.h --- linux-2.4.17/arch/um/include/init.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/init.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,106 @@ +#ifndef _LINUX_UML_INIT_H +#define _LINUX_UML_INIT_H + +/* These macros are used to mark some functions or + * initialized data (doesn't apply to uninitialized data) + * as `initialization' functions. The kernel can take this + * as hint that the function is used only during the initialization + * phase and free up used memory resources after + * + * Usage: + * For functions: + * + * You should add __init immediately before the function name, like: + * + * static void __init initme(int x, int y) + * { + * extern int z; z = x * y; + * } + * + * If the function has a prototype somewhere, you can also add + * __init between closing brace of the prototype and semicolon: + * + * extern int initialize_foobar_device(int, int, int) __init; + * + * For initialized data: + * You should insert __initdata between the variable name and equal + * sign followed by value, e.g.: + * + * static int init_variable __initdata = 0; + * static char linux_logo[] __initdata = { 0x32, 0x36, ... }; + * + * Don't forget to initialize data not at file scope, i.e. within a function, + * as gcc otherwise puts the data into the bss section and not into the init + * section. + * + * Also note, that this data cannot be "const". + */ + +#ifndef _LINUX_INIT_H +typedef int (*initcall_t)(void); +typedef void (*exitcall_t)(void); + +#define __init __attribute__ ((__section__ (".text.init"))) +#define __exit __attribute__ ((unused, __section__(".text.exit"))) +#define __initdata __attribute__ ((__section__ (".data.init"))) + +#endif +struct uml_param { + const char *str; + int (*setup_func)(char *, int *); +}; + +extern initcall_t __uml_initcall_start, __uml_initcall_end; +extern initcall_t __uml_postsetup_start, __uml_postsetup_end; +extern const char *__uml_help_start, *__uml_help_end; + +#define __uml_initcall(fn) \ + static initcall_t __uml_initcall_##fn __uml_init_call = fn + +#define __uml_exitcall(fn) \ + static exitcall_t __uml_exitcall_##fn __uml_exit_call = fn + +extern struct uml_param __uml_setup_start, __uml_setup_end; + +#define __uml_postsetup(fn) \ + static initcall_t __uml_postsetup_##fn __uml_postsetup_call = fn + +#define __non_empty_string(dummyname,string) \ + struct __uml_non_empty_string_struct_##dummyname \ + { \ + char _string[sizeof(string)-2]; \ + } + +#define __uml_setup(str, fn, help...) \ + __non_empty_string(fn ##_setup, str); \ + __uml_help(fn, help); \ + static char __uml_setup_str_##fn[] __initdata = str; \ + static struct uml_param __uml_setup_##fn __uml_init_setup = { __uml_setup_str_##fn, fn } + +#define __uml_help(fn, help...) \ + __non_empty_string(fn ##__help, help); \ + static char __uml_help_str_##fn[] __initdata = help; \ + static const char *__uml_help_##fn __uml_setup_help = __uml_help_str_##fn + +/* + * Mark functions and data as being only used at initialization + * or exit time. + */ +#define __uml_init_setup __attribute__ ((unused,__section__ (".uml.setup.init"))) +#define __uml_setup_help __attribute__ ((unused,__section__ (".uml.help.init"))) +#define __uml_init_call __attribute__ ((unused,__section__ (".uml.initcall.init"))) +#define __uml_postsetup_call __attribute__ ((unused,__section__ (".uml.postsetup.init"))) +#define __uml_exit_call __attribute__ ((unused,__section__ (".uml.exitcall.exit"))) + +#endif /* _LINUX_UML_INIT_H */ + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/initrd.h linux_umopenmosix/arch/um/include/initrd.h --- linux-2.4.17/arch/um/include/initrd.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/initrd.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __INITRD_USER_H__ +#define __INITRD_USER_H__ + +extern int load_initrd(char *filename, void *buf, int size); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/irq_user.h linux_umopenmosix/arch/um/include/irq_user.h --- linux-2.4.17/arch/um/include/irq_user.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/irq_user.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __IRQ_USER_H__ +#define __IRQ_USER_H__ + +extern void sigio_handler(int sig, void *sc, int usermode); +extern int activate_fd(int irq, int fd, void *dev_id); +extern void free_irq_by_dev(void *dev_id); +extern void free_irq_by_fd(int fd); +extern void reactivate_fd(int fd); +extern void forward_interrupts(int pid); +extern void init_irq_signals(int on_sigstack); +extern void forward_ipi(int fd, int pid); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/kern.h linux_umopenmosix/arch/um/include/kern.h --- linux-2.4.17/arch/um/include/kern.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/kern.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __KERN_H__ +#define __KERN_H__ + +/* These are all user-mode things which are convenient to call directly + * from kernel code and for which writing a wrapper is too much of a pain. + * The regular include files can't be included because this file is included + * only into kernel code, and user-space includes conflict with kernel + * includes. + */ + +extern int errno; + +extern int getpid(void); +extern int clone(int (*proc)(void *), void *sp, int flags, void *data); +extern int sleep(int); +extern int printf(char *fmt, ...); +extern char *strerror(int errnum); +extern char *ptsname(int __fd); +extern int munmap(void *, int); +extern void *sbrk(int increment); +extern void *malloc(int size); +extern void perror(char *err); +extern int kill(int pid, int sig); +extern int getuid(void); +extern int pause(void); +extern int write(int, const void *, int); +extern int exit(int); +extern int close(int); +extern int read(unsigned int, char *, int); +extern int pipe(int *); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/kern_util.h linux_umopenmosix/arch/um/include/kern_util.h --- linux-2.4.17/arch/um/include/kern_util.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/kern_util.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __KERN_UTIL_H__ +#define __KERN_UTIL_H__ + +#include "sysdep/ptrace.h" + +extern int ncpus; +extern char *linux_prog; +extern char *gdb_init; +extern int kmalloc_ok; + +#define ROUND_DOWN(addr) ((void *)(((unsigned long) addr) & PAGE_MASK)) +#define ROUND_UP(addr) ROUND_DOWN(((unsigned long) addr) + PAGE_SIZE - 1) + +extern int kernel_fork(unsigned long flags, int (*fn)(void *), void * arg); +extern unsigned long stack_sp(unsigned long page); +extern int kernel_thread_proc(void *data); +extern long execute_syscall(struct sys_pt_regs regs); +extern void syscall_segv(int sig); +extern int current_pid(void); +extern void set_init_pid(int pid); +extern unsigned long alloc_stack(void); +extern int do_signal(int error); +extern int is_stack_fault(unsigned long sp); +extern unsigned long segv(unsigned long address, unsigned long ip, + int is_write, int is_user); +extern int set_user_mode(void *task, int restore_state, int protect_mem); +extern void syscall_ready(void); +extern void set_tracing(void *t, int tracing); +extern int is_tracing(void *task); +extern int segv_syscall(void); +extern void ret_from_sys_call(void); +extern void kern_finish_exec(void *task, int new_pid, unsigned long stack); +extern int page_size(void); +extern int page_mask(void); +extern int need_finish_fork(void); +extern int do_proc_op(void *t, int proc_id); +extern void free_stack(unsigned long stack); +extern void add_input_request(int op, void (*proc)(int), void *arg); +extern int sys_execve(char *file, char **argv, char **env); +extern char *current_cmd(void); +extern void timer_handler(int sig, void *sc, int usermode); +extern int set_signals(int enable); +extern void force_sigbus(void); +extern int pid_to_processor_id(int pid); +extern void block_signals(void); +extern void unblock_signals(void); +extern void deliver_signals(void *t); +extern void lock_syscall(void); +extern void unlock_syscall(void); +extern void lock_trap(void); +extern void unlock_trap(void); +extern void lock_pid(void); +extern void unlock_pid(void); +extern void cpu_idle(void); +extern void finish_fork(void); +extern void paging_init(void); +extern unsigned long um_virt_to_phys(void *t, unsigned long addr); +extern void init_flush_vm(void); +extern void *process_state(void *t); +extern void *syscall_sp(void *t); +extern void syscall_trace(void); +extern int hz(void); +extern void idle_timer(void); +extern unsigned int do_IRQ(int irq, int user_mode); +extern int external_pid(void *t); +extern int pid_to_processor_id(int pid); +extern void boot_timer_handler(int sig); +extern void interrupt_end(void); +extern void tracing_reboot(void); +extern void tracing_halt(void); +extern void tracing_cb(void (*proc)(void *), void *arg); +extern int debugger_signal(int status, int pid); +extern void child_signal(int pid, int status); +extern int init_ptrace_proxy(int idle_pid, int startup, int stop); +extern void check_stack_overflow(void *ptr); +extern void relay_signal(int sig, void *sc, int usermode); +extern int singlestepping(void *t); +extern void not_implemented(void); +extern void finish_fork_handler(int sig); +extern int user_context(unsigned long sp); +extern void timer_irq(int user_mode); +extern void unprotect_stack(unsigned long stack); +extern void kern_start_exec(int new_pid); +extern void do_exitcalls(void); +extern void do_uml_exitcalls(void); +extern int attach_debugger(int idle_pid, int pid, int stop); +extern void *round_up(unsigned long addr); +extern void *round_down(unsigned long addr); +extern void bad_segv(unsigned long address, unsigned long ip, int is_write); +extern int config_gdb(char *str); +extern int remove_gdb(void); +extern char *uml_strdup(char *string); +extern void unprotect_kernel_mem(int delay_signals); +extern void protect_kernel_mem(int delay_signals); +extern unsigned long get_kmem_end(void); +extern void set_kmem_end(unsigned long); +extern void set_task_sizes(int arg); +extern void uml_cleanup(void); +extern int pid_to_processor_id(int pid); +extern void set_current(void *t); +extern void lock_signalled_task(void *t); +extern void IPI_handler(int cpu); +extern int jail_setup(char *line, int *add); +extern void *get_init_task(void); +extern int copy_to_user_proc(void *to, void *from, int size); +extern int copy_from_user_proc(void *to, void *from, int size); +extern void set_thread_sc(void *sc); +extern int get_restore_state(void *t); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/line.h linux_umopenmosix/arch/um/include/line.h --- linux-2.4.17/arch/um/include/line.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/line.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __LINE_H__ +#define __LINE_H__ + +#include "linux/list.h" +#include "asm/semaphore.h" +#include "chan_user.h" + +struct line { + char *init_str; + int init_pri; + struct list_head chan_list; + int count; + struct tty_struct *tty; + struct semaphore sem; + int initialized; +}; + +#define LINE_INIT(str) \ + { init_str : str, \ + init_pri : INIT_STATIC, \ + chan_list : { }, \ + count : 0, \ + tty : NULL, \ + sem : { } } + +struct winch_lines { + struct list_head list; + struct line *lines; + int nlines; +}; + +extern void line_interrupt(int irq, void *data, struct pt_regs *unused); +extern void line_close(struct line *lines, int n); +extern int line_open(struct line *lines, int n, struct tty_struct *tty, + int (*setup_irq)(int fd, int input, int output, + void *data), + struct chan_opts *opts); +extern void line_setup(struct line *lines, int num, char *init); +extern void run_winch_handlers(void); +extern void register_winch(struct winch_lines *lines); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/mconsole.h linux_umopenmosix/arch/um/include/mconsole.h --- linux-2.4.17/arch/um/include/mconsole.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/mconsole.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) + * Licensed under the GPL + */ + +#ifndef __MCONSOLE_H__ +#define __MCONSOLE_H__ + +#define MCONSOLE_MAGIC (0xcafebabe) +#define MCONSOLE_MAX_DATA (512) +#define MCONSOLE_VERSION (1) + +struct mconsole_request { + unsigned long magic; + int version; + int len; + char data[MCONSOLE_MAX_DATA]; +}; + +struct mconsole_reply { + int err; + int more; + int len; + char data[MCONSOLE_MAX_DATA]; +}; + +struct mc_request; + +struct mconsole_command +{ + char *command; + void (*handler)(struct mc_request *req); + int as_interrupt; +}; + +struct mc_request +{ + int len; + int as_interrupt; + + int originating_fd; + int originlen; + unsigned char origin[128]; /* sockaddr_un */ + + struct mconsole_request request; + struct mconsole_command *cmd; +}; + +extern char mconsole_socket_name[]; + +extern int mconsole_unlink_socket(void); +extern int mconsole_reply(struct mc_request *req, char *reply, int err, + int more); +extern void mconsole_version(struct mc_request *req); +extern void mconsole_help(struct mc_request *req); +extern void mconsole_halt(struct mc_request *req); +extern void mconsole_reboot(struct mc_request *req); +extern void mconsole_config(struct mc_request *req); +extern void mconsole_remove(struct mc_request *req); +extern void mconsole_sysrq(struct mc_request *req); +extern void mconsole_cad(struct mc_request *req); +extern int mconsole_create_listening_socket(void); +extern int mconsole_get_request(int fd, struct mc_request *req); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/mconsole_kern.h linux_umopenmosix/arch/um/include/mconsole_kern.h --- linux-2.4.17/arch/um/include/mconsole_kern.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/mconsole_kern.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __MCONSOLE_KERN_H__ +#define __MCONSOLE_KERN_H__ + +#include "linux/config.h" +#include "linux/list.h" +#include "mconsole.h" + +struct mconsole_entry { + struct list_head list; + struct mc_request request; +}; + +struct mc_device { + struct list_head list; + char *name; + int (*config)(char *); + int (*remove)(char *); +}; + +#ifdef CONFIG_MCONSOLE + +extern void mconsole_register_dev(struct mc_device *new); + +#else + +static inline void mconsole_register_dev(struct mc_device *new) +{ +} + +#endif + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/mem_user.h linux_umopenmosix/arch/um/include/mem_user.h --- linux-2.4.17/arch/um/include/mem_user.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/mem_user.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,61 @@ +/* + * arch/um/include/mem_user.h + * + * BRIEF MODULE DESCRIPTION + * user side memory interface for support IO memory inside user mode linux + * + * Copyright (C) 2001 RidgeRun, Inc. + * Author: RidgeRun, Inc. + * Greg Lonnon glonnon@ridgerun.com or info@ridgerun.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _MEM_USER_H +#define _MEM_USER_H + +#define ROUND_4M(n) ((((unsigned long) (n)) + (1 << 22)) & ~((1 << 22) - 1)) + +extern unsigned long host_task_size; +extern unsigned long task_size; + +extern int init_mem_user(void); +extern int create_mem_file(unsigned long len); +extern void setup_range(int fd, char *driver, unsigned long start, + unsigned long usable, unsigned long total); +extern void map(unsigned long virt, void *p, unsigned long len, + int r, int w, int x); +extern int parse_iomem(char *str, int *add); +extern void setup_memory(void); +extern unsigned long find_iomem(char *driver, unsigned long *len_out); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/process.h linux_umopenmosix/arch/um/include/process.h --- linux-2.4.17/arch/um/include/process.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/process.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __PROCESS_H__ +#define __PROCESS_H__ + +#include + +extern void sig_handler(int sig, struct sigcontext sc); +extern void irq_handler(int sig, struct sigcontext sc); +extern void alarm_handler(int sig, struct sigcontext sc); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/sigcontext.h linux_umopenmosix/arch/um/include/sigcontext.h --- linux-2.4.17/arch/um/include/sigcontext.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/sigcontext.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __UML_SIGCONTEXT_H__ +#define __UML_SIGCONTEXT_H__ + +extern int copy_sigcontext_to_user(void *sc, struct sys_pt_regs *regs, + unsigned long cr2, int err); +extern void fill_in_regs(struct sys_pt_regs *regs, void *sc_ptr); +extern int copy_sc_to_user(void *to_ptr, void *from_ptr, void *data); +extern int copy_sc_from_user(void *to_ptr, void *from_ptr); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/signal_kern.h linux_umopenmosix/arch/um/include/signal_kern.h --- linux-2.4.17/arch/um/include/signal_kern.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/signal_kern.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SIGNAL_KERN_H__ +#define __SIGNAL_KERN_H__ + +#include "sysdep/ptrace.h" + +extern void signal_deliverer(int sig); +extern int probe_stack(unsigned long sp, int delta); +extern int have_signals(void *t); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/signal_user.h linux_umopenmosix/arch/um/include/signal_user.h --- linux-2.4.17/arch/um/include/signal_user.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/signal_user.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SIGNAL_USER_H__ +#define __SIGNAL_USER_H__ + +extern int signal_stack_size; + +extern int change_sig(int signal, int on); +extern void set_sigstack(void *stack, int size); +extern void set_handler(int sig, void (*handler)(int), int flags, ...); +extern void setup_stack(unsigned long stack_top, struct sys_pt_regs *regs_out); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/syscall_user.h linux_umopenmosix/arch/um/include/syscall_user.h --- linux-2.4.17/arch/um/include/syscall_user.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/syscall_user.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SYSCALL_USER_H__ +#define __SYSCALL_USER_H__ + +#include + +extern void syscall_handler(int sig, struct sigcontext sc); +extern int exit_kernel(int pid, void *task); +extern int do_syscall(void *task, int pid); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/sysdep-i386/frame.h linux_umopenmosix/arch/um/include/sysdep-i386/frame.h --- linux-2.4.17/arch/um/include/sysdep-i386/frame.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/sysdep-i386/frame.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,89 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __FRAME_I386_H +#define __FRAME_I386_H + +#include + +/* This stuff is to calculate the size of the fp state struct at runtime + * because it has changed between 2.2 and 2.4 and it would be good for a + * UML compiled on one to work on the other. + * So, setup_arch_frame_raw fills in the arch struct of the raw data, which + * just contains the address of the end of the sigcontext. This is invoked + * from the signal handler. + * setup_arch_frame uses that data to figure out what + * arch_frame_data.fpstate_size should be. It really has no idea, since it's + * not allowed to do sizeof(struct fpstate) but it's safe to consider that it's + * everything from the end of the sgcontext up to the top of the stack. So, + * it masks off the page number to get the offset within the page and subtracts + * that from the page size, and that's how big the fpstate struct will be + * considered to be. + */ + +struct arch_frame_data_raw { + unsigned long sc_end; +}; + +static inline void setup_arch_frame_raw(struct arch_frame_data_raw *data, + struct sigcontext *sc) +{ + data->sc_end = (unsigned long) sc; + data->sc_end += sizeof(*sc); +} + +struct arch_frame_data { + int fpstate_size; +}; + +static inline void setup_arch_frame(struct arch_frame_data_raw *in, + struct arch_frame_data *out) +{ + unsigned long fpstate_start = in->sc_end; + + fpstate_start &= ~PAGE_MASK; + out->fpstate_size = PAGE_SIZE - fpstate_start; +} + +/* This figures out where on the stack the SA_RESTORER function address + * is stored. For i386, it's the signal handler return address, so it's + * located next to the frame pointer. + * This is inlined, so __builtin_frame_address(0) is correct. Otherwise, + * it would have to be __builtin_frame_address(1). + */ + +static inline unsigned long frame_restorer(void) +{ + unsigned long *fp; + + fp = __builtin_frame_address(0); + return((unsigned long) (fp + 1)); +} + +/* Similarly, this returns the value of sp when the handler was first + * entered. This is used to calculate the proper sp when delivering + * signals. + */ + +static inline unsigned long frame_sp(void) +{ + unsigned long *fp; + + fp = __builtin_frame_address(0); + return((unsigned long) (fp + 1)); +} + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/sysdep-i386/ptrace.h linux_umopenmosix/arch/um/include/sysdep-i386/ptrace.h --- linux-2.4.17/arch/um/include/sysdep-i386/ptrace.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/sysdep-i386/ptrace.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,104 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_I386_PTRACE_H +#define __SYSDEP_I386_PTRACE_H + +#define UM_MAX_REG (17) +#define UM_MAX_FP_REG (27) + +#define UM_MAX_REG_OFFSET (UM_MAX_REG * sizeof(long)) + +struct sys_pt_regs { + unsigned long regs[UM_MAX_REG]; +}; + +#define EMPTY_REGS { { [ 0 ... UM_MAX_REG - 1 ] = 0 } } + +#define UM_REG(r, n) ((r)->regs[n]) + +#define UM_IP(r) UM_REG(r, EIP) +#define UM_SP(r) UM_REG(r, UESP) +#define UM_ELF_ZERO(r) UM_REG(r, EDX) + +#define UM_SYSCALL_RET(r) UM_REG(r, EAX) +#define UM_SYSCALL_NR(r) UM_REG(r, ORIG_EAX) +#define UM_ORIG_SYSCALL(r) UM_REG(r, EAX) + +#define UM_SYSCALL_ARG1(r) UM_REG(r, EBX) +#define UM_SYSCALL_ARG2(r) UM_REG(r, ECX) +#define UM_SYSCALL_ARG3(r) UM_REG(r, EDX) +#define UM_SYSCALL_ARG4(r) UM_REG(r, ESI) +#define UM_SYSCALL_ARG5(r) UM_REG(r, EDI) +#define UM_SYSCALL_ARG6(r) UM_REG(r, EBP) + +#define UM_IP_OFFSET (EIP * sizeof(long)) +#define UM_SP_OFFSET (UESP * sizeof(long)) +#define UM_ELF_ZERO_OFFSET (EDX * sizeof(long)) + +#define UM_SYSCALL_RET_OFFSET (EAX * sizeof(long)) +#define UM_SYSCALL_NR_OFFSET (ORIG_EAX * sizeof(long)) + +#define UM_SYSCALL_ARG1_OFFSET (EBX * sizeof(long)) +#define UM_SYSCALL_ARG2_OFFSET (ECX * sizeof(long)) +#define UM_SYSCALL_ARG3_OFFSET (EDX * sizeof(long)) +#define UM_SYSCALL_ARG4_OFFSET (ESI * sizeof(long)) +#define UM_SYSCALL_ARG5_OFFSET (EDI * sizeof(long)) +#define UM_SYSCALL_ARG6_OFFSET (EBP * sizeof(long)) + +#define UM_SET_SYSCALL_RETURN(r, result) UM_REG(r, EAX) = (result) + +#define UM_FIX_EXEC_STACK(sp) do ; while(0) + +#define UM_RESTART_SYSCALL(regs) (UM_IP(regs) -= 2) + +#define UM_HAVE_GETREGS +#define UM_HAVE_GETFPREGS +#define UM_HAVE_SETREGS +#define UM_HAVE_SETFPREGS + + + +#define EBX 0 +#define ECX 1 +#define EDX 2 +#define ESI 3 +#define EDI 4 +#define EBP 5 +#define EAX 6 +#define DS 7 +#define ES 8 +#define FS 9 +#define GS 10 +#define ORIG_EAX 11 +#define EIP 12 +#define CS 13 +#define EFL 14 +#define UESP 15 +#define SS 16 +#define FRAME_SIZE 17 + + + + + + + + + + + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/sysdep-i386/sigcontext.h linux_umopenmosix/arch/um/include/sysdep-i386/sigcontext.h --- linux-2.4.17/arch/um/include/sysdep-i386/sigcontext.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/sysdep-i386/sigcontext.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SYS_SIGCONTEXT_I386_H +#define __SYS_SIGCONTEXT_I386_H + +#define SC_FAULT_ADDR(sc) ((sc)->cr2) +#define SC_FAULT_WRITE(sc) (((sc)->err) & 2) +#define SC_IP(sc) ((sc)->eip) +#define SC_SP(sc) ((sc)->esp) + +#define SC_STACK_SIZE (sizeof(struct sigcontext) + sizeof(struct _fpstate)) + +/* These are General Protection and Page Fault */ +#define SEGV_IS_FIXABLE(sc) (((sc)->trapno == 13) || ((sc)->trapno == 14)) +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/sysdep-i386/syscalls.h linux_umopenmosix/arch/um/include/sysdep-i386/syscalls.h --- linux-2.4.17/arch/um/include/sysdep-i386/syscalls.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/sysdep-i386/syscalls.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "asm/unistd.h" + +typedef long syscall_handler_t(struct sys_pt_regs regs); + +#define EXECUTE_SYSCALL(syscall, regs) (*sys_call_table[syscall])(regs) + +extern syscall_handler_t sys_modify_ldt; +extern syscall_handler_t old_mmap_i386; +extern syscall_handler_t old_select; +extern syscall_handler_t sys_ni_syscall; + +#define ARCH_SYSCALLS \ + [ __NR_mmap ] = old_mmap_i386, \ + [ __NR_select ] = old_select, \ + [ __NR_vm86old ] = sys_ni_syscall, \ + [ __NR_modify_ldt ] = sys_modify_ldt, \ + [ __NR_lchown32 ] = sys_lchown, \ + [ __NR_getuid32 ] = sys_getuid, \ + [ __NR_getgid32 ] = sys_getgid, \ + [ __NR_geteuid32 ] = sys_geteuid, \ + [ __NR_getegid32 ] = sys_getegid, \ + [ __NR_setreuid32 ] = sys_setreuid, \ + [ __NR_setregid32 ] = sys_setregid, \ + [ __NR_getgroups32 ] = sys_getgroups, \ + [ __NR_setgroups32 ] = sys_setgroups, \ + [ __NR_fchown32 ] = sys_fchown, \ + [ __NR_setresuid32 ] = sys_setresuid, \ + [ __NR_getresuid32 ] = sys_getresuid, \ + [ __NR_setresgid32 ] = sys_setresgid, \ + [ __NR_getresgid32 ] = sys_getresgid, \ + [ __NR_chown32 ] = sys_chown, \ + [ __NR_setuid32 ] = sys_setuid, \ + [ __NR_setgid32 ] = sys_setgid, \ + [ __NR_setfsuid32 ] = sys_setfsuid, \ + [ __NR_setfsgid32 ] = sys_setfsgid, \ + [ __NR_pivot_root ] = sys_pivot_root, \ + [ __NR_mincore ] = sys_mincore, \ + [ __NR_madvise ] = sys_madvise, \ + [ 222 ] = sys_ni_syscall, + +/* 222 doesn't yet have a name in include/asm-i386/unistd.h */ + +#define LAST_ARCH_SYSCALL 222 + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/sysdep-ia64/ptrace.h linux_umopenmosix/arch/um/include/sysdep-ia64/ptrace.h --- linux-2.4.17/arch/um/include/sysdep-ia64/ptrace.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/sysdep-ia64/ptrace.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_IA64_PTRACE_H +#define __SYSDEP_IA64_PTRACE_H + +struct sys_pt_regs { + int foo; +}; + +#define EMPTY_REGS { 0 } + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/sysdep-ia64/sigcontext.h linux_umopenmosix/arch/um/include/sysdep-ia64/sigcontext.h --- linux-2.4.17/arch/um/include/sysdep-ia64/sigcontext.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/sysdep-ia64/sigcontext.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_IA64_SIGCONTEXT_H +#define __SYSDEP_IA64_SIGCONTEXT_H + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/sysdep-ia64/syscalls.h linux_umopenmosix/arch/um/include/sysdep-ia64/syscalls.h --- linux-2.4.17/arch/um/include/sysdep-ia64/syscalls.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/sysdep-ia64/syscalls.h Wed Jun 26 23:45:14 2002 @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_IA64_SYSCALLS_H +#define __SYSDEP_IA64_SYSCALLS_H + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/sysdep-ppc/ptrace.h linux_umopenmosix/arch/um/include/sysdep-ppc/ptrace.h --- linux-2.4.17/arch/um/include/sysdep-ppc/ptrace.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/sysdep-ppc/ptrace.h Wed Jun 26 23:45:15 2002 @@ -0,0 +1,104 @@ +/* + * Licensed under the GPL + */ + +#ifndef __SYS_PTRACE_PPC_H +#define __SYS_PTRACE_PPC_H + +#include "linux/config.h" +#include "linux/types.h" + +/* the following taken from */ + +#ifdef CONFIG_PPC64 +#define PPC_REG unsigned long /*long*/ +#else +#define PPC_REG unsigned long +#endif +struct sys_pt_regs_s { + PPC_REG gpr[32]; + PPC_REG nip; + PPC_REG msr; + PPC_REG orig_gpr3; /* Used for restarting system calls */ + PPC_REG ctr; + PPC_REG link; + PPC_REG xer; + PPC_REG ccr; + PPC_REG mq; /* 601 only (not used at present) */ + /* Used on APUS to hold IPL value. */ + PPC_REG trap; /* Reason for being here */ + PPC_REG dar; /* Fault registers */ + PPC_REG dsisr; + PPC_REG result; /* Result of a system call */ +}; + +#define NUM_REGS (sizeof(struct sys_pt_regs_s) / sizeof(PPC_REG)) + +struct sys_pt_regs { + PPC_REG regs[sizeof(struct sys_pt_regs_s) / sizeof(PPC_REG)]; +}; + +#define UM_MAX_REG (PT_FPR0) +#define UM_MAX_REG_OFFSET (UM_MAX_REG * sizeof(PPC_REG)) + +#define EMPTY_REGS { { [ 0 ... NUM_REGS - 1] = 0 } } + +#define UM_REG(r, n) ((r)->regs[n]) + +#define UM_SYSCALL_RET(r) UM_REG(r, PT_R3) +#define UM_SP(r) UM_REG(r, PT_R1) +#define UM_IP(r) UM_REG(r, PT_NIP) +#define UM_ELF_ZERO(r) UM_REG(r, PT_FPSCR) +#define UM_SYSCALL_NR(r) UM_REG(r, PT_R0) +#define UM_SYSCALL_ARG1(r) UM_REG(r, PT_ORIG_R3) +#define UM_SYSCALL_ARG2(r) UM_REG(r, PT_R4) +#define UM_SYSCALL_ARG3(r) UM_REG(r, PT_R5) +#define UM_SYSCALL_ARG4(r) UM_REG(r, PT_R6) +#define UM_SYSCALL_ARG5(r) UM_REG(r, PT_R7) +#define UM_SYSCALL_ARG6(r) UM_REG(r, PT_R8) + +#define UM_SYSCALL_NR_OFFSET (PT_R0 * sizeof(PPC_REG)) +#define UM_SYSCALL_RET_OFFSET (PT_R3 * sizeof(PPC_REG)) +#define UM_SYSCALL_ARG1_OFFSET (PT_R3 * sizeof(PPC_REG)) +#define UM_SYSCALL_ARG2_OFFSET (PT_R4 * sizeof(PPC_REG)) +#define UM_SYSCALL_ARG3_OFFSET (PT_R5 * sizeof(PPC_REG)) +#define UM_SYSCALL_ARG4_OFFSET (PT_R6 * sizeof(PPC_REG)) +#define UM_SYSCALL_ARG5_OFFSET (PT_R7 * sizeof(PPC_REG)) +#define UM_SYSCALL_ARG6_OFFSET (PT_R8 * sizeof(PPC_REG)) +#define UM_SP_OFFSET (PT_R1 * sizeof(PPC_REG)) +#define UM_IP_OFFSET (PT_NIP * sizeof(PPC_REG)) +#define UM_ELF_ZERO_OFFSET (PT_R3 * sizeof(PPC_REG)) + +#define UM_SET_SYSCALL_RETURN(_regs, result) \ +do { \ + if (result < 0) { \ + (_regs)->regs[PT_CCR] |= 0x10000000; \ + UM_SYSCALL_RET((_regs)) = -result; \ + } else { \ + UM_SYSCALL_RET((_regs)) = result; \ + } \ +} while(0) + +extern void shove_aux_table(unsigned long sp); +#define UM_FIX_EXEC_STACK(sp) shove_aux_table(sp); + +/* These aren't actually defined. The undefs are just to make sure + * everyone's clear on the concept. + */ +#undef UML_HAVE_GETREGS +#undef UML_HAVE_GETFPREGS +#undef UML_HAVE_SETREGS +#undef UML_HAVE_SETFPREGS + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/sysdep-ppc/sigcontext.h linux_umopenmosix/arch/um/include/sysdep-ppc/sigcontext.h --- linux-2.4.17/arch/um/include/sysdep-ppc/sigcontext.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/sysdep-ppc/sigcontext.h Wed Jun 26 23:45:15 2002 @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SYS_SIGCONTEXT_PPC_H +#define __SYS_SIGCONTEXT_PPC_H + +#define DSISR_WRITE 0x02000000 + +#define SC_FAULT_ADDR(sc) ({ \ + struct sigcontext_struct *_sc = (sc); \ + long retval = -1; \ + switch (_sc->regs->trap) { \ + case 0x300: \ + /* data exception */ \ + retval = _sc->regs->dar; \ + break; \ + case 0x400: \ + /* instruction exception */ \ + retval = _sc->regs->nip; \ + break; \ + default: \ + panic("SC_FAULT_ADDR: unhandled trap type\n"); \ + } \ + retval; \ + }) + +#define SC_FAULT_WRITE(sc) ({ \ + struct sigcontext_struct *_sc = (sc); \ + long retval = -1; \ + switch (_sc->regs->trap) { \ + case 0x300: \ + /* data exception */ \ + retval = !!(_sc->regs->dsisr & DSISR_WRITE); \ + break; \ + case 0x400: \ + /* instruction exception: not a write */ \ + retval = 0; \ + break; \ + default: \ + panic("SC_FAULT_ADDR: unhandled trap type\n"); \ + } \ + retval; \ + }) + +#define SC_IP(sc) ((sc)->regs->nip) +#define SC_SP(sc) ((sc)->regs->gpr[1]) +#define SEGV_IS_FIXABLE(sc) (1) + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/sysdep-ppc/syscalls.h linux_umopenmosix/arch/um/include/sysdep-ppc/syscalls.h --- linux-2.4.17/arch/um/include/sysdep-ppc/syscalls.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/sysdep-ppc/syscalls.h Wed Jun 26 23:45:15 2002 @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +typedef long syscall_handler_t(unsigned long arg1, unsigned long arg2, + unsigned long arg3, unsigned long arg4, + unsigned long arg5, unsigned long arg6); + +#define EXECUTE_SYSCALL(syscall, regs) \ + (*sys_call_table[syscall])(UM_SYSCALL_ARG1(®s), \ + UM_SYSCALL_ARG2(®s), \ + UM_SYSCALL_ARG3(®s), \ + UM_SYSCALL_ARG4(®s), \ + UM_SYSCALL_ARG5(®s), \ + UM_SYSCALL_ARG6(®s)) + +extern syscall_handler_t sys_mincore; +extern syscall_handler_t sys_madvise; + +/* old_mmap needs the correct prototype since syscall_kern.c includes + * this file. + */ +int old_mmap(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long offset); + +#define ARCH_SYSCALLS \ + [ __NR_modify_ldt ] = sys_ni_syscall, \ + [ __NR_pciconfig_read ] = sys_ni_syscall, \ + [ __NR_pciconfig_write ] = sys_ni_syscall, \ + [ __NR_pciconfig_iobase ] = sys_ni_syscall, \ + [ __NR_pivot_root ] = sys_ni_syscall, \ + [ __NR_multiplexer ] = sys_ni_syscall, \ + [ __NR_mmap ] = old_mmap, \ + [ __NR_madvise ] = sys_madvise, \ + [ __NR_mincore ] = sys_mincore, + +#define LAST_ARCH_SYSCALL __NR_mincore + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/sysrq.h linux_umopenmosix/arch/um/include/sysrq.h --- linux-2.4.17/arch/um/include/sysrq.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/sysrq.h Wed Jun 26 23:45:15 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_SYSRQ_H +#define __UM_SYSRQ_H + +extern void show_trace(unsigned long *stack); + +#endif diff -urN linux-2.4.17/arch/um/include/tlb.h linux_umopenmosix/arch/um/include/tlb.h --- linux-2.4.17/arch/um/include/tlb.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/tlb.h Wed Jun 26 23:45:15 2002 @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __TLB_H__ +#define __TLB_H__ + +extern void mprotect_kernel_vm(int w); +extern void force_flush_all(void); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/ubd_user.h linux_umopenmosix/arch/um/include/ubd_user.h --- linux-2.4.17/arch/um/include/ubd_user.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/ubd_user.h Wed Jun 26 23:45:15 2002 @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2001 RidgeRun, Inc (glonnon@ridgerun.com) + * Licensed under the GPL + */ + +#ifndef __UM_UBD_USER_H +#define __UM_UBD_USER_H + +enum ubd_req { UBD_READ, UBD_WRITE }; + +struct io_thread_req { + enum ubd_req op; + int fds[2]; + unsigned long offsets[2]; + unsigned long long offset; + unsigned long length; + char *buffer; + int sectorsize; + unsigned long sector_mask; + unsigned long cow_offset; + unsigned long bitmap_words[2]; + int error; +}; + +extern int open_ubd_file(char *file, int *openflags, char **backing_file_out, + int *bitmap_offset_out, unsigned long *bitmap_len_out, + int *data_offset_out, int *create_cow_out); +extern int create_cow_file(char *cow_file, char *backing_file, int sectorsize, + int *bitmap_offset_out, + unsigned long *bitmap_len_out, + int *data_offset_out); +extern int read_cow_bitmap(int fd, void *buf, int offset, int len); +extern int read_ubd_fs(int fd, void *buffer, int len); +extern int write_ubd_fs(int fd, char *buffer, int len); +extern int start_io_thread(unsigned long sp, int *fds_out); +extern void do_io(struct io_thread_req *req); +extern int ubd_is_dir(char *file); + +static inline int ubd_test_bit(int bit, unsigned long *data) +{ + int bits, n, off; + + bits = sizeof(data[0]) * 8; + n = bit / bits; + off = bit % bits; + return((data[n] & (1 << off)) != 0); +} + +static inline void ubd_set_bit(int bit, unsigned long *data) +{ + int bits, n, off; + + bits = sizeof(data[0]) * 8; + n = bit / bits; + off = bit % bits; + data[n] |= (1 << off); +} + + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/umid.h linux_umopenmosix/arch/um/include/umid.h --- linux-2.4.17/arch/um/include/umid.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/umid.h Wed Jun 26 23:45:15 2002 @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +extern int umid_file_name(char *name, char *buf, int len); + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/umn.h linux_umopenmosix/arch/um/include/umn.h --- linux-2.4.17/arch/um/include/umn.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/umn.h Wed Jun 26 23:45:15 2002 @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __UMN_H +#define __UMN_H + +extern int open_umn_tty(int *slave_out, int *slipno_out); +extern void close_umn_tty(int master, int slave); +extern int umn_send_packet(int fd, void *data, int len); +extern int set_umn_addr(int fd, char *addr, char *ptp_addr); +extern void slip_unesc(unsigned char s); +extern void umn_read(int fd); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/user.h linux_umopenmosix/arch/um/include/user.h --- linux-2.4.17/arch/um/include/user.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/user.h Wed Jun 26 23:45:15 2002 @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __USER_H__ +#define __USER_H__ + +extern void panic(char *fmt, ...); +extern int printk(char *fmt, ...); +extern void schedule(void); +extern void *um_kmalloc(int size); +extern void kfree(void *ptr); +extern int in_aton(char *str); +extern int open_gdb_chan(void); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/include/user_util.h linux_umopenmosix/arch/um/include/user_util.h --- linux-2.4.17/arch/um/include/user_util.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/include/user_util.h Wed Jun 26 23:45:15 2002 @@ -0,0 +1,132 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __USER_UTIL_H__ +#define __USER_UTIL_H__ + +#include "sysdep/ptrace.h" + +extern int grantpt(int __fd); +extern int unlockpt(int __fd); +extern char *ptsname(int __fd); + +enum { OP_NONE, OP_EXEC, OP_THREAD, OP_FORK, OP_TRACE_ON, OP_REBOOT, + OP_HALT, OP_CB }; + +struct cpu_task { + int pid; + void *task; +}; + +extern struct cpu_task cpu_tasks[]; + +extern unsigned long low_physmem; +extern unsigned long high_physmem; +extern unsigned long uml_physmem; +extern unsigned long end_vm; +extern unsigned long start_vm; + +extern int tracing_pid; +extern int honeypot; + +extern char host_info[]; + +extern char saved_command_line[]; +extern char command_line[]; + +extern int gdb_pid; + +extern char *tempdir; + +extern unsigned long _stext, _etext, _sdata, _edata, __bss_start, _end; +extern unsigned long _unprotected_end; +extern void *brk_start; + +extern void *open_maps(void); +extern void close_maps(void *fd); +extern unsigned long get_brk(void); +extern void stop(void); +extern int proc_start_thread(unsigned long ip, unsigned long sp); +extern void stack_protections(unsigned long address); +extern void task_protections(unsigned long address); +extern void abandon_proc_space(int (*proc)(void *), unsigned long sp); +extern int signals(int (*init_proc)(void *), void *sp); +extern int unmap(unsigned long address, unsigned long len); +extern int protect(unsigned long addr, unsigned long len, int r, int w, + int x, int must_succeed); +extern void stop_pid(int pid); +extern void kill_pid(int pid); +extern void usr1_pid(int pid); +extern int __personality(int); +extern int wait_for_stop(int pid, int sig, int cont_type); +extern void *add_signal_handler(int sig, void (*handler)(int)); +extern void signal_init(void); +extern void finish_exec(int old_pid, int new_pid, struct sys_pt_regs *regs); +extern int start_fork_tramp(void *arg, unsigned long temp_stack, int clone_vm, + int (*tramp)(void *)); +extern void trace_myself(void); +extern void timer(void); +extern void get_profile_timer(void); +extern void disable_profile_timer(void); +extern void set_timers(int set_signal); +extern int clone_and_wait(int (*fn)(void *), void *arg, void *sp, int flags); +extern int input_loop(void); +extern void continue_execing_proc(int pid); +extern int linux_main(int argc, char **argv); +extern void remap_data(void *segment_start, void *segment_end, int w); +extern void set_cmdline(char *cmd); +extern void input_cb(void (*proc)(void *), void *arg, int arg_len); +extern void setup_input(void); +extern int get_pty(void); +extern void save_signal_state(int *sig_ptr); +extern void *um_kmalloc(int size); +extern int raw(int fd, int complain); +extern int switcheroo(int fd, int prot, void *from, void *to, int size); +extern void idle_sleep(int secs); +extern void setup_machinename(char *machine_out); +extern void setup_hostinfo(void); +extern void add_arg(char *cmd_line, char *arg); +extern void init_new_thread(void *sig_stack, void (*usr1_handler)(int)); +extern void start_exec(int old_pid, int new_pid, int *error, + struct sys_pt_regs *regs); +extern void attach_process(int pid); +extern void calc_sigframe_size(void); +extern int fork_tramp(void *sig_stack); +extern void do_exec(int old_pid, int new_pid); +extern void tracer_panic(char *msg, ...); +extern void close_fd(int); +extern int make_tempfile(const char *template, char **tempname, int do_unlink); +extern char *get_umid(void); + + +#ifdef CONFIG_MOSIX +extern void ptrace_getregs(unsigned long *); +#else +extern int ptrace_getregs(long pid, struct sys_pt_regs *regs_out); +#endif /* CONFIG_MOSIX */ + + +extern int ptrace_setregs(long pid, struct sys_pt_regs *regs_in); +extern void do_longjmp(void *p); +extern void term_handler(int sig); +extern void suspend_new_thread(int fd); +extern int detach(int pid, int sig); +extern int attach(int pid); +extern void kill_child_dead(int pid); +extern int cont(int pid); +extern void check_ptrace(void); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/Makefile linux_umopenmosix/arch/um/kernel/Makefile --- linux-2.4.17/arch/um/kernel/Makefile Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/Makefile Wed Jun 26 23:45:15 2002 @@ -0,0 +1,66 @@ +OBJ = um.o + +OBJS = exec_kern.o exec_user.o frame_kern.o frame_user.o init_task.o irq.o \ + irq_user.o mem.o mem_user.o process.o ptrace.o reboot.o resource.o \ + setup.o signal_user.o smp.o syscall_kern.o syscall_user.o sysrq.o \ + sys_call_table.o time.o time_kern.o tlb.o trap_kern.o trap_user.o \ + uaccess_user.o um_arch.o umid.o user_util.o i387.o + +ifeq ($(CONFIG_BLK_DEV_INITRD), y) + OBJS += initrd_kern.o initrd_user.o +endif + +# user_syms.o not included here because Rules.make has its own ideas about +# building anything in export-objs + +USER_OBJS = $(filter %_user.o,$(OBJS)) process.o time.o umid.o user_util.o + +export-objs = ksyms.o process_kern.o signal_kern.o user_syms.o + +UNMAP_CFLAGS := $(patsubst -pg -DPROFILING,,$(USER_CFLAGS)) +UNMAP_CFLAGS := $(patsubst -fprofile-arcs -ftest-coverage,,$(UNMAP_CFLAGS)) + +ifeq ($(CONFIG_MODULES), y) + DMODULES = -D__CONFIG_MODULES__ +endif + +ifeq ($(CONFIG_MODVERSIONS), y) + DMODVERSIONS = -D__CONFIG_MODVERSIONS__ +endif + +ifeq ($(CONFIG_GPROF), y) + OBJS += gprof_syms.o + export-objs += gprof_syms.o +endif + +ifeq ($(CONFIG_GCOV), y) + OBJS += gmon_syms.o + export-objs += gmon_syms.o +endif + +CFLAGS_user_syms.o = -D__AUTOCONF_INCLUDED__ $(DMODULES) $(DMODVERSIONS) -I- \ + -I../include + +all: $(OBJ) unmap_fin.o + +$(USER_OBJS) : %.o: %.c + $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< + +unmap.o: unmap.c + $(CC) $(UNMAP_CFLAGS) -c -o $@ $< + +unmap_fin.o : unmap.o + ld -r -o $@ $< -lc -L/usr/lib + +$(OBJ): $(OBJS) $(export-objs) + rm -f $@ + $(LD) $(LINKFLAGS) --start-group $^ --end-group -o $@ + +clean: + rm -f $(OBJS) $(export-objs) + +modules: + +fastdep: + +include $(TOPDIR)/Rules.make diff -urN linux-2.4.17/arch/um/kernel/exec_kern.c linux_umopenmosix/arch/um/kernel/exec_kern.c --- linux-2.4.17/arch/um/kernel/exec_kern.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/exec_kern.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,118 @@ +/* + * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "linux/slab.h" +#include "linux/smp_lock.h" +#include "asm/ptrace.h" +#include "asm/pgtable.h" +#include "asm/pgalloc.h" +#include "asm/uaccess.h" +#include "user_util.h" +#include "kern_util.h" +#include "kern.h" +#include "irq_user.h" +#include "tlb.h" +#include "2_5compat.h" + +/* See comment above fork_tramp for why sigstop is defined and used like + * this + */ + +static int sigstop = SIGSTOP; + +static int exec_tramp(void *sig_stack) +{ + int sig = sigstop; + + block_signals(); + init_new_thread(sig_stack, NULL); + kill(getpid(), sig); + return(0); +} + +void flush_thread(void) +{ + unsigned long stack; + int new_pid; + + stack = alloc_stack(); + new_pid = start_fork_tramp((void *) current->thread.kernel_stack, + stack, 0, exec_tramp); + if(new_pid < 0){ + printk(KERN_ERR + "flush_thread : new thread failed, errno = %d\n", + errno); + do_exit(SIGKILL); + } + + if(CPU(current) == 0) + forward_interrupts(new_pid); + current->thread.request.op = OP_EXEC; + current->thread.request.u.exec.pid = new_pid; + unprotect_stack((unsigned long) current); + usr1_pid(getpid()); + + free_page(stack); + protect(uml_physmem, high_physmem - uml_physmem, 1, 1, 0, 1); + task_protections((unsigned long) current); + force_flush_all(); + unblock_signals(); +} + +void start_thread(struct pt_regs * regs, unsigned long eip, unsigned long esp) +{ + set_fs(USER_DS); + flush_tlb_mm(current->mm); + UM_IP(¤t->thread.process_regs) = eip; + UM_SP(¤t->thread.process_regs) = esp; + UM_ELF_ZERO(¤t->thread.process_regs) = 0; + UM_FIX_EXEC_STACK(esp); +} + +static int execve1(char *file, char **argv, char **env) +{ + int error; + + error = do_execve(file, argv, env, + (struct pt_regs *) ¤t->thread.process_regs); + if (error == 0){ + current->ptrace &= ~PT_DTRACE; + set_cmdline(current_cmd()); + } + return(error); +} + +int um_execve(char *file, char **argv, char **env) +{ + if(execve1(file, argv, env) == 0) set_user_mode(current, 1, 1); + return(-1); +} + +int sys_execve(char *file, char **argv, char **env) +{ + int error; + char *filename; + + lock_kernel(); + filename = getname((char *) file); + error = PTR_ERR(filename); + if (IS_ERR(filename)) goto out; + error = execve1(filename, argv, env); + putname(filename); + out: + unlock_kernel(); + return(error); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/exec_user.c linux_umopenmosix/arch/um/kernel/exec_user.c --- linux-2.4.17/arch/um/kernel/exec_user.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/exec_user.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "user_util.h" +#include "kern_util.h" +#include "user.h" + +void do_exec(int old_pid, int new_pid) +{ + struct sys_pt_regs regs; + + if((ptrace(PTRACE_ATTACH, new_pid, 0, 0) < 0) || + (ptrace(PTRACE_CONT, new_pid, 0, 0) < 0) || + (waitpid(new_pid, 0, WUNTRACED) < 0)) + tracer_panic("do_exec failed to attach proc"); + + if(ptrace_getregs(old_pid, ®s) < 0) + tracer_panic("do_exec failed to get registers"); + + kill(old_pid, SIGKILL); + + if(ptrace_setregs(new_pid, ®s) < 0) + tracer_panic("do_exec failed to start new proc"); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/frame_kern.c linux_umopenmosix/arch/um/kernel/frame_kern.c --- linux-2.4.17/arch/um/kernel/frame_kern.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/frame_kern.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,118 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "asm/ptrace.h" +#include "asm/uaccess.h" +#include "frame.h" +#include "frame_kern.h" +#include "sigcontext.h" +#include "sysdep/ptrace.h" + +int copy_siginfo_to_user(siginfo_t *to, siginfo_t *from) +{ + if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t))) + return -EFAULT; + if (from->si_code < 0) + return __copy_to_user(to, from, sizeof(siginfo_t)); + else { + int err; + + /* If you change siginfo_t structure, please be sure + this code is fixed accordingly. + It should never copy any pad contained in the structure + to avoid security leaks, but must copy the generic + 3 ints plus the relevant union member. */ + err = __put_user(from->si_signo, &to->si_signo); + err |= __put_user(from->si_errno, &to->si_errno); + err |= __put_user((short)from->si_code, &to->si_code); + /* First 32bits of unions are always present. */ + err |= __put_user(from->si_pid, &to->si_pid); + switch (from->si_code >> 16) { + case __SI_FAULT >> 16: + break; + case __SI_CHLD >> 16: + err |= __put_user(from->si_utime, &to->si_utime); + err |= __put_user(from->si_stime, &to->si_stime); + err |= __put_user(from->si_status, &to->si_status); + default: + err |= __put_user(from->si_uid, &to->si_uid); + break; + } + return err; + } +} + +static int copy_restorer(void (*restorer)(void), unsigned long start, + unsigned long sr_index, int sr_relative) +{ + if(restorer != 0){ + if(copy_to_user((void *) (start + sr_index), &restorer, + sizeof(restorer))) + return(1); + } + else if(sr_relative){ + unsigned long *sr = (unsigned long *) (start + sr_index); + *sr += (unsigned long) sr; + } + return(0); +} + +int setup_signal_stack_si(unsigned long stack_top, int sig, + unsigned long handler, void (*restorer)(void), + struct sys_pt_regs *regs, siginfo_t *info) +{ + unsigned long start = stack_top - signal_frame_si.len; + void *sip = (void *) (start + signal_frame_si.si_index); + + if(copy_to_user((void *) start, signal_frame_si.data, + signal_frame_si.len) || + copy_to_user((void *) (start + signal_frame_si.sig_index), &sig, + sizeof(sig)) || + copy_siginfo_to_user(sip, info) || + copy_to_user((void *) (start + signal_frame_si.sip_index), &sip, + sizeof(sip)) || + copy_restorer(restorer, start, signal_frame_si.sr_index, + signal_frame_si.sr_relative)) + return(1); + + UM_IP(regs) = handler; + UM_SP(regs) = start + signal_frame_sc.sp_index; + return(0); +} + +int setup_signal_stack_sc(unsigned long stack_top, int sig, + unsigned long handler, void (*restorer)(void), + struct sys_pt_regs *regs, void *sc, void *context_sc) +{ + unsigned long start = stack_top - signal_frame_sc.len; + void *user_sc = (void *) (start + signal_frame_sc.sc_index); + + if(copy_to_user((void *) start, signal_frame_sc.data, + signal_frame_sc.len) || + copy_to_user((void *) (start + signal_frame_sc.sig_index), &sig, + sizeof(sig)) || + copy_sc_to_user(user_sc, sc, &signal_frame_sc.arch) || + copy_to_user(context_sc, &user_sc, sizeof(user_sc)) || + copy_restorer(restorer, start, signal_frame_sc.sr_index, + signal_frame_sc.sr_relative)) + return(1); + + UM_IP(regs) = handler; + UM_SP(regs) = start + signal_frame_sc.sp_index; + + set_sc_ip_sp(sc, handler, start + signal_frame_sc.sp_index); + return(0); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/frame_user.c linux_umopenmosix/arch/um/kernel/frame_user.c --- linux-2.4.17/arch/um/kernel/frame_user.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/frame_user.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,286 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "sysdep/ptrace.h" +#include "sysdep/frame.h" +#include "sysdep/sigcontext.h" +#include "frame.h" +#include "kern_util.h" + +static int capture_stack(int (*child)(void *arg), void *arg, void *sp, + unsigned long top, void **data_out) +{ + struct sys_pt_regs regs; + int pid, status, n, len; + + /* Start the child as a thread */ + pid = clone(child, sp, CLONE_VM | SIGCHLD, arg); + if(pid < 0){ + printf("setup_stack : clone failed - errno = %d\n", errno); + exit(1); + } + + /* Wait for it to stop itself and continue it with a SIGUSR1 to force + * it into the signal handler. + */ + n = waitpid(-1, &status, WUNTRACED); + if(n < 0){ + printf("setup_stack : waitpid failed - errno = %d\n", errno); + exit(1); + } + if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)){ + fprintf(stderr, "Expected SIGSTOP, got status = 0x%x\n", + status); + exit(1); + } + if(ptrace(PTRACE_CONT, pid, 0, SIGUSR1) < 0){ + printf("setup_stack : PTRACE_CONT failed - errno = %d\n", + errno); + exit(1); + } + + /* Wait for it to stop itself again and grab its registers again. + * At this point, the handler has stuffed the addresses of + * sig, sc, and SA_RESTORER in raw. + */ + n = waitpid(pid, &status, WUNTRACED); + if(n < 0){ + printf("setup_stack : waitpid failed - errno = %d\n", errno); + exit(1); + } + if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)){ + fprintf(stderr, "Expected SIGSTOP, got status = 0x%x\n", + status); + exit(1); + } + if(ptrace(PTRACE_GETREGS, pid, 0, ®s) < 0){ + printf("setup_stack : PTRACE_GETREGS failed - errno = %d\n", + errno); + exit(1); + } + + /* It has outlived its usefulness, so continue it so it can exit */ + if(ptrace(PTRACE_CONT, pid, 0, 0) < 0){ + printf("setup_stack : mmap failed - errno = %d\n", errno); + exit(1); + } + if(waitpid(pid, &status, 0) < 0){ + printf("setup_stack : waitpid failed - errno = %d\n", errno); + exit(1); + } + if(!WIFEXITED(status) || (WEXITSTATUS(status) != 0)){ + printf("setup_stack : Expected exit status 0, " + "got status = 0x%x\n", status); + exit(1); + } + + /* The frame that we want is the top of the signal stack */ + len = top - UM_SP(®s); + *data_out = malloc(len); + if(*data_out == NULL){ + printf("setup_stack : malloc failed - errno = %d\n", errno); + exit(1); + } + memcpy(*data_out, (void *) UM_SP(®s), len); + + return(len); +} + +static void child_common(void *sp, int size, sighandler_t handler, int flags) +{ + stack_t ss; + struct sigaction sa; + + ptrace(PTRACE_TRACEME, 0, 0, 0); + ss.ss_sp = sp; + ss.ss_flags = 0; + ss.ss_size = size; + if(sigaltstack(&ss, NULL) < 0){ + printf("sigaltstack failed - errno = %d\n", errno); + exit(1); + } + + sa.sa_handler = handler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_ONSTACK | flags; + if(sigaction(SIGUSR1, &sa, NULL) < 0){ + printf("sigaction failed - errno = %d\n", errno); + exit(1); + } + + kill(getpid(), SIGSTOP); +} + +struct sc_frame signal_frame_sc; + +struct sc_frame_raw { + void *stack; + int size; + unsigned long sig; + unsigned long sc; + unsigned long sr; + unsigned long sp; + struct arch_frame_data_raw arch; +}; + +static struct sc_frame_raw *raw_sc = NULL; + +static void sc_handler(int sig, struct sigcontext sc) +{ + raw_sc->sig = (unsigned long) &sig; + raw_sc->sc = (unsigned long) ≻ + raw_sc->sr = frame_restorer(); + raw_sc->sp = frame_sp(); + setup_arch_frame_raw(&raw_sc->arch, &sc); + kill(getpid(), SIGSTOP); + exit(0); +} + +static int sc_child(void *arg) +{ + raw_sc = arg; + child_common(raw_sc->stack, raw_sc->size, (sighandler_t) sc_handler, + 0); + return(0); +} + +struct si_frame signal_frame_si; + +struct si_frame_raw { + void *stack; + int size; + unsigned long sig; + unsigned long sip; + unsigned long si; + unsigned long sr; + unsigned long sp; +}; + +static struct si_frame_raw *raw_si = NULL; + +static void si_handler(int sig, siginfo_t *si) +{ + raw_si->sig = (unsigned long) &sig; + raw_si->sip = (unsigned long) &si; + raw_si->si = (unsigned long) si; + raw_si->sr = frame_restorer(); + raw_si->sp = frame_sp(); + kill(getpid(), SIGSTOP); + exit(0); +} + +static int si_child(void *arg) +{ + raw_si = arg; + child_common(raw_si->stack, raw_si->size, (sighandler_t) si_handler, + SA_SIGINFO); + return(0); +} + +void capture_signal_stack(void) +{ + struct sc_frame_raw raw_sc; + struct si_frame_raw raw_si; + void *stack, *sigstack; + unsigned long top, sig_top, base; + + stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + sigstack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if((stack == MAP_FAILED) || (sigstack == MAP_FAILED)){ + printf("setup_stack : mmap failed - errno = %d\n", errno); + exit(1); + } + + top = (unsigned long) stack + PAGE_SIZE - sizeof(void *); + sig_top = (unsigned long) sigstack + PAGE_SIZE; + + raw_sc.stack = sigstack; + raw_sc.size = PAGE_SIZE; + signal_frame_sc.len = capture_stack(sc_child, &raw_sc, (void *) top, + sig_top, &signal_frame_sc.data); + + /* These are the offsets within signal_frame_sc.data (counting from + * the bottom) of sig, sc, SA_RESTORER, and the initial sp. + */ + base = sig_top - signal_frame_sc.len; + signal_frame_sc.sig_index = raw_sc.sig - base; + signal_frame_sc.sc_index = raw_sc.sc - base; + signal_frame_sc.sr_index = raw_sc.sr - base; + if((*((unsigned long *) raw_sc.sr) & PAGE_MASK) == + (unsigned long) sigstack){ + unsigned long *sr = (unsigned long *) raw_sc.sr; + unsigned long frame = (unsigned long) signal_frame_sc.data; + + signal_frame_sc.sr_relative = 1; + *sr -= raw_sc.sr; + *((unsigned long *) (frame + signal_frame_sc.sr_index)) = *sr; + } + else signal_frame_sc.sr_relative = 0; + signal_frame_sc.sp_index = raw_sc.sp - base; + setup_arch_frame(&raw_sc.arch, &signal_frame_sc.arch); + + /* Repeat for the siginfo variant */ + + raw_si.stack = sigstack; + raw_si.size = PAGE_SIZE; + signal_frame_si.len = capture_stack(si_child, &raw_si, (void *) top, + sig_top, &signal_frame_si.data); + base = sig_top - signal_frame_si.len; + signal_frame_si.sig_index = raw_si.sig - base; + signal_frame_si.sip_index = raw_si.sip - base; + signal_frame_si.si_index = raw_si.si - base; + signal_frame_si.sr_index = raw_si.sr - base; + if((*((unsigned long *) raw_si.sr) & PAGE_MASK) == + (unsigned long) sigstack){ + unsigned long *sr = (unsigned long *) raw_si.sr; + unsigned long frame = (unsigned long) signal_frame_si.data; + + signal_frame_sc.sr_relative = 1; + *sr -= raw_si.sr; + *((unsigned long *) (frame + signal_frame_si.sr_index)) = *sr; + } + else signal_frame_si.sr_relative = 0; + signal_frame_si.sp_index = raw_si.sp - base; + + if((munmap(stack, PAGE_SIZE) < 0) || + (munmap(sigstack, PAGE_SIZE) < 0)){ + printf("setup_stack : munmap failed - errno = %d\n", errno); + exit(1); + } +} + +void set_sc_ip_sp(void *sc_ptr, unsigned long ip, unsigned long sp) +{ + struct sigcontext *sc = sc_ptr; + + SC_IP(sc) = ip; + SC_SP(sc) = sp; +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/gmon_syms.c linux_umopenmosix/arch/um/kernel/gmon_syms.c --- linux-2.4.17/arch/um/kernel/gmon_syms.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/gmon_syms.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "linux/module.h" + +extern void __bb_init_func(void *); +EXPORT_SYMBOL(__bb_init_func); + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/gprof_syms.c linux_umopenmosix/arch/um/kernel/gprof_syms.c --- linux-2.4.17/arch/um/kernel/gprof_syms.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/gprof_syms.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "linux/module.h" + +extern void mcount(void); +EXPORT_SYMBOL(mcount); + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/i387.c linux_umopenmosix/arch/um/kernel/i387.c --- linux-2.4.17/arch/um/kernel/i387.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/i387.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,596 @@ +/* + * linux/arch/i386/kernel/i387.c + * + * Copyright (C) 1994 Linus Torvalds + * + * Pentium III FXSR, SSE support + * General FPU state handling cleanups + * Gareth Hughes , May 2000 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_MATH_EMULATION +#define HAVE_HWFP (boot_cpu_data.hard_math) +#else +#define HAVE_HWFP 1 +#endif + +/* + * The _current_ task is using the FPU for the first time + * so initialize it and set the mxcsr to its default + * value at reset if we support XMM instructions and then + * remeber the current task has used the FPU. + */ +void init_fpu(void) +{ + __asm__("fninit"); + if ( cpu_has_xmm ) + load_mxcsr(0x1f80); + + current->used_math = 1; +} + +/* + * FPU lazy state save handling. + */ + +static inline void __save_init_fpu( struct task_struct *tsk ) +{ + if ( cpu_has_fxsr ) { + asm volatile( "fxsave %0 ; fnclex" + : "=m" (tsk->thread.i387.fxsave) ); + } else { + asm volatile( "fnsave %0 ; fwait" + : "=m" (tsk->thread.i387.fsave) ); + } + tsk->flags &= ~PF_USEDFPU; +} + +void save_init_fpu( struct task_struct *tsk ) +{ + __save_init_fpu(tsk); + stts(); +} + +void kernel_fpu_begin(void) +{ + struct task_struct *tsk = current; + + if (tsk->flags & PF_USEDFPU) { + __save_init_fpu(tsk); + return; + } + clts(); +} + +void restore_fpu( struct task_struct *tsk ) +{ + if ( cpu_has_fxsr ) { + asm volatile( "fxrstor %0" + : : "m" (tsk->thread.i387.fxsave) ); + } else { + asm volatile( "frstor %0" + : : "m" (tsk->thread.i387.fsave) ); + } +} + +/* + * FPU tag word conversions. + */ + +static inline unsigned short twd_i387_to_fxsr( unsigned short twd ) +{ + unsigned int tmp; /* to avoid 16 bit prefixes in the code */ + + /* Transform each pair of bits into 01 (valid) or 00 (empty) */ + tmp = ~twd; + tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ + /* and move the valid bits to the lower byte. */ + tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ + tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ + tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ + return tmp; +} + +static inline unsigned long twd_fxsr_to_i387( struct i387_fxsave_struct *fxsave ) +{ + struct _fpxreg *st = NULL; + unsigned long twd = (unsigned long) fxsave->twd; + unsigned long tag; + unsigned long ret = 0xffff0000; + int i; + +#define FPREG_ADDR(f, n) ((char *)&(f)->st_space + (n) * 16); + + for ( i = 0 ; i < 8 ; i++ ) { + if ( twd & 0x1 ) { + st = (struct _fpxreg *) FPREG_ADDR( fxsave, i ); + + switch ( st->exponent & 0x7fff ) { + case 0x7fff: + tag = 2; /* Special */ + break; + case 0x0000: + if ( !st->significand[0] && + !st->significand[1] && + !st->significand[2] && + !st->significand[3] ) { + tag = 1; /* Zero */ + } else { + tag = 2; /* Special */ + } + break; + default: + if ( st->significand[3] & 0x8000 ) { + tag = 0; /* Valid */ + } else { + tag = 2; /* Special */ + } + break; + } + } else { + tag = 3; /* Empty */ + } + ret |= (tag << (2 * i)); + twd = twd >> 1; + } + return ret; +} + +/* + * FPU state interaction. + */ + +unsigned short get_fpu_cwd( struct task_struct *tsk ) +{ + if ( cpu_has_fxsr ) { + return tsk->thread.i387.fxsave.cwd; + } else { + return (unsigned short)tsk->thread.i387.fsave.cwd; + } +} + +unsigned short get_fpu_swd( struct task_struct *tsk ) +{ + if ( cpu_has_fxsr ) { + return tsk->thread.i387.fxsave.swd; + } else { + return (unsigned short)tsk->thread.i387.fsave.swd; + } +} + +unsigned short get_fpu_twd( struct task_struct *tsk ) +{ + if ( cpu_has_fxsr ) { + return tsk->thread.i387.fxsave.twd; + } else { + return (unsigned short)tsk->thread.i387.fsave.twd; + } +} + +unsigned short get_fpu_mxcsr( struct task_struct *tsk ) +{ + if ( cpu_has_xmm ) { + return tsk->thread.i387.fxsave.mxcsr; + } else { + return 0x1f80; + } +} + +void set_fpu_cwd( struct task_struct *tsk, unsigned short cwd ) +{ + if ( cpu_has_fxsr ) { + tsk->thread.i387.fxsave.cwd = cwd; + } else { + tsk->thread.i387.fsave.cwd = ((long)cwd | 0xffff0000); + } +} + +void set_fpu_swd( struct task_struct *tsk, unsigned short swd ) +{ + if ( cpu_has_fxsr ) { + tsk->thread.i387.fxsave.swd = swd; + } else { + tsk->thread.i387.fsave.swd = ((long)swd | 0xffff0000); + } +} + +void set_fpu_twd( struct task_struct *tsk, unsigned short twd ) +{ + if ( cpu_has_fxsr ) { + tsk->thread.i387.fxsave.twd = twd_i387_to_fxsr(twd); + } else { + tsk->thread.i387.fsave.twd = ((long)twd | 0xffff0000); + } +} + +void set_fpu_mxcsr( struct task_struct *tsk, unsigned short mxcsr ) +{ + if ( cpu_has_xmm ) { + tsk->thread.i387.fxsave.mxcsr = (mxcsr & 0xffbf); + } +} + +/* + * FXSR floating point environment conversions. + */ + +static inline int convert_fxsr_to_user( struct _fpstate *buf, + struct i387_fxsave_struct *fxsave ) +{ + unsigned long env[7]; + struct _fpreg *to; + struct _fpxreg *from; + int i; + + env[0] = (unsigned long)fxsave->cwd | 0xffff0000; + env[1] = (unsigned long)fxsave->swd | 0xffff0000; + env[2] = twd_fxsr_to_i387(fxsave); + env[3] = fxsave->fip; + env[4] = fxsave->fcs | ((unsigned long)fxsave->fop << 16); + env[5] = fxsave->foo; + env[6] = fxsave->fos; + + if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) ) + return 1; + + to = &buf->_st[0]; + from = (struct _fpxreg *) &fxsave->st_space[0]; + for ( i = 0 ; i < 8 ; i++, to++, from++ ) { + if ( __copy_to_user( to, from, sizeof(*to) ) ) + return 1; + } + return 0; +} + +static inline int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave, + struct _fpstate *buf ) +{ + unsigned long env[7]; + struct _fpxreg *to; + struct _fpreg *from; + int i; + + if ( __copy_from_user( env, buf, 7 * sizeof(long) ) ) + return 1; + + fxsave->cwd = (unsigned short)(env[0] & 0xffff); + fxsave->swd = (unsigned short)(env[1] & 0xffff); + fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff)); + fxsave->fip = env[3]; + fxsave->fop = (unsigned short)((env[4] & 0xffff0000) >> 16); + fxsave->fcs = (env[4] & 0xffff); + fxsave->foo = env[5]; + fxsave->fos = env[6]; + + to = (struct _fpxreg *) &fxsave->st_space[0]; + from = &buf->_st[0]; + for ( i = 0 ; i < 8 ; i++, to++, from++ ) { + if ( __copy_from_user( to, from, sizeof(*from) ) ) + return 1; + } + return 0; +} + +/* + * Signal frame handlers. + */ + +static inline int save_i387_fsave( struct _fpstate *buf ) +{ + struct task_struct *tsk = current; + + unlazy_fpu( tsk ); + tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd; + if ( __copy_to_user( buf, &tsk->thread.i387.fsave, + sizeof(struct i387_fsave_struct) ) ) + return -1; + return 1; +} + +static inline int save_i387_fxsave( struct _fpstate *buf ) +{ + struct task_struct *tsk = current; + int err = 0; + + unlazy_fpu( tsk ); + + if ( convert_fxsr_to_user( buf, &tsk->thread.i387.fxsave ) ) + return -1; + + err |= __put_user( tsk->thread.i387.fxsave.swd, &buf->status ); + err |= __put_user( X86_FXSR_MAGIC, &buf->magic ); + if ( err ) + return -1; + + if ( __copy_to_user( &buf->_fxsr_env[0], &tsk->thread.i387.fxsave, + sizeof(struct i387_fxsave_struct) ) ) + return -1; + return 1; +} + +int save_i387( struct _fpstate *buf ) +{ + if ( !current->used_math ) + return 0; + + /* This will cause a "finit" to be triggered by the next + * attempted FPU operation by the 'current' process. + */ + current->used_math = 0; + + if ( HAVE_HWFP ) { + if ( cpu_has_fxsr ) { + return save_i387_fxsave( buf ); + } else { + return save_i387_fsave( buf ); + } + } else { + return save_i387_soft( ¤t->thread.i387.soft, buf ); + } +} + +static inline int restore_i387_fsave( struct _fpstate *buf ) +{ + struct task_struct *tsk = current; + clear_fpu( tsk ); + return __copy_from_user( &tsk->thread.i387.fsave, buf, + sizeof(struct i387_fsave_struct) ); +} + +static inline int restore_i387_fxsave( struct _fpstate *buf ) +{ + struct task_struct *tsk = current; + clear_fpu( tsk ); + if ( __copy_from_user( &tsk->thread.i387.fxsave, &buf->_fxsr_env[0], + sizeof(struct i387_fxsave_struct) ) ) + return 1; + /* mxcsr bit 6 and 31-16 must be zero for security reasons */ + tsk->thread.i387.fxsave.mxcsr &= 0xffbf; + return convert_fxsr_from_user( &tsk->thread.i387.fxsave, buf ); +} + +int restore_i387( struct _fpstate *buf ) +{ + int err; + + if ( HAVE_HWFP ) { + if ( cpu_has_fxsr ) { + err = restore_i387_fxsave( buf ); + } else { + err = restore_i387_fsave( buf ); + } + } else { + err = restore_i387_soft( ¤t->thread.i387.soft, buf ); + } + current->used_math = 1; + return err; +} + +/* + * ptrace request handlers. + */ + +static inline int get_fpregs_fsave( struct user_i387_struct *buf, + struct task_struct *tsk ) +{ + return __copy_to_user( buf, &tsk->thread.i387.fsave, + sizeof(struct user_i387_struct) ); +} + +static inline int get_fpregs_fxsave( struct user_i387_struct *buf, + struct task_struct *tsk ) +{ + return convert_fxsr_to_user( (struct _fpstate *)buf, + &tsk->thread.i387.fxsave ); +} + +int get_fpregs( struct user_i387_struct *buf, struct task_struct *tsk ) +{ + if ( HAVE_HWFP ) { + if ( cpu_has_fxsr ) { + return get_fpregs_fxsave( buf, tsk ); + } else { + return get_fpregs_fsave( buf, tsk ); + } + } else { + return save_i387_soft( &tsk->thread.i387.soft, + (struct _fpstate *)buf ); + } +} + +static inline int set_fpregs_fsave( struct task_struct *tsk, + struct user_i387_struct *buf ) +{ + return __copy_from_user( &tsk->thread.i387.fsave, buf, + sizeof(struct user_i387_struct) ); +} + +static inline int set_fpregs_fxsave( struct task_struct *tsk, + struct user_i387_struct *buf ) +{ + return convert_fxsr_from_user( &tsk->thread.i387.fxsave, + (struct _fpstate *)buf ); +} + +int set_fpregs( struct task_struct *tsk, struct user_i387_struct *buf ) +{ + if ( HAVE_HWFP ) { + if ( cpu_has_fxsr ) { + return set_fpregs_fxsave( tsk, buf ); + } else { + return set_fpregs_fsave( tsk, buf ); + } + } else { + return restore_i387_soft( &tsk->thread.i387.soft, + (struct _fpstate *)buf ); + } +} + +int get_fpxregs( struct user_fxsr_struct *buf, struct task_struct *tsk ) +{ + if ( cpu_has_fxsr ) { + if (__copy_to_user( (void *)buf, &tsk->thread.i387.fxsave, + sizeof(struct user_fxsr_struct) )) + return -EFAULT; + return 0; + } else { + return -EIO; + } +} + +int set_fpxregs( struct task_struct *tsk, struct user_fxsr_struct *buf ) +{ + if ( cpu_has_fxsr ) { + __copy_from_user( &tsk->thread.i387.fxsave, (void *)buf, + sizeof(struct user_fxsr_struct) ); + /* mxcsr bit 6 and 31-16 must be zero for security reasons */ + tsk->thread.i387.fxsave.mxcsr &= 0xffbf; + return 0; + } else { + return -EIO; + } +} + +/* + * FPU state for core dumps. + */ + +static inline void copy_fpu_fsave( struct task_struct *tsk, + struct user_i387_struct *fpu ) +{ + memcpy( fpu, &tsk->thread.i387.fsave, + sizeof(struct user_i387_struct) ); +} + +static inline void copy_fpu_fxsave( struct task_struct *tsk, + struct user_i387_struct *fpu ) +{ + unsigned short *to; + unsigned short *from; + int i; + + memcpy( fpu, &tsk->thread.i387.fxsave, 7 * sizeof(long) ); + + to = (unsigned short *)&fpu->st_space[0]; + from = (unsigned short *)&tsk->thread.i387.fxsave.st_space[0]; + for ( i = 0 ; i < 8 ; i++, to += 5, from += 8 ) { + memcpy( to, from, 5 * sizeof(unsigned short) ); + } +} + + + + +/* +int dump_fpu( struct pt_regs *regs, struct user_i387_struct *fpu ) +{ + int fpvalid; + struct task_struct *tsk = current; + + fpvalid = tsk->used_math; + if ( fpvalid ) { + unlazy_fpu( tsk ); + if ( cpu_has_fxsr ) { + copy_fpu_fxsave( tsk, fpu ); + } else { + copy_fpu_fsave( tsk, fpu ); + } + } + + return fpvalid; +} +*/ + + + + + + + +int dump_extended_fpu( struct pt_regs *regs, struct user_fxsr_struct *fpu ) +{ + int fpvalid; + struct task_struct *tsk = current; + + fpvalid = tsk->used_math && cpu_has_fxsr; + if ( fpvalid ) { + unlazy_fpu( tsk ); + memcpy( fpu, &tsk->thread.i387.fxsave, + sizeof(struct user_fxsr_struct) ); + } + + return fpvalid; +} + +#ifdef CONFIG_MOSIX + +int +has_fxsr(void) +{ + return(cpu_has_fxsr); +} + +void +fsave_to_fxsave(union i387_union *from, union i387_union *to) +{ + int i; + long *fcp, *tcp; + + to->fxsave.cwd = from->fsave.cwd; + to->fxsave.swd = from->fsave.swd; + to->fxsave.twd = twd_i387_to_fxsr(from->fsave.twd); + to->fxsave.fop = from->fxsave.padding[0]; + to->fxsave.fip = from->fsave.fip; + to->fxsave.fcs = from->fsave.fcs; + to->fxsave.foo = from->fsave.foo; + to->fxsave.mxcsr = from->fxsave.padding[1]; + to->fxsave.fos = from->fsave.fos; + for(fcp = from->fsave.st_space , tcp = to->fxsave.st_space , + i = 0 ; i < 8 ; i++) + { + *tcp++ = *fcp++; + *tcp++ = *fcp++; + *tcp = *((unsigned short *)fcp)++; + tcp += 2; + } + memcpy(to->fxsave.xmm_space, from->fxsave.xmm_space, + sizeof(from->fxsave.xmm_space)); +} + +void +fxsave_to_fsave(union i387_union *from, union i387_union *to) +{ + int i; + long *fcp, *tcp; + + to->fsave.cwd = from->fxsave.cwd; + to->fsave.swd = from->fxsave.swd; + to->fsave.twd = twd_fxsr_to_i387(&from->fxsave); + to->fsave.fip = from->fxsave.fip; + to->fsave.fcs = from->fxsave.fcs; + to->fsave.foo = from->fxsave.foo; + to->fsave.fos = from->fxsave.fos; + to->fxsave.padding[0] = from->fxsave.fop; + to->fxsave.padding[1] = from->fxsave.mxcsr; + for(fcp = from->fxsave.st_space , tcp = to->fsave.st_space , + i = 0 ; i < 8 ; i++) + { + *tcp++ = *fcp++; + *tcp++ = *fcp++; + *((unsigned short *)tcp)++ = *fcp; + fcp += 2; + } + memcpy(to->fxsave.xmm_space, from->fxsave.xmm_space, + sizeof(to->fxsave.xmm_space)); +} +#endif /* CONFIG_MOSIX */ diff -urN linux-2.4.17/arch/um/kernel/init_task.c linux_umopenmosix/arch/um/kernel/init_task.c --- linux-2.4.17/arch/um/kernel/init_task.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/init_task.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "linux/mm.h" +#include "linux/sched.h" +#include "linux/version.h" +#include "asm/uaccess.h" +#include "asm/pgtable.h" +#include "user_util.h" + +static struct fs_struct init_fs = INIT_FS; +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 0) +static struct files_struct init_files = INIT_FILES; +static struct signal_struct init_signals = INIT_SIGNALS; +#endif +struct mm_struct init_mm = INIT_MM(init_mm); + +/* + * Initial task structure. + * + * We need to make sure that this is 16384-byte aligned due to the + * way process stacks are handled. This is done by having a special + * "init_task" linker map entry.. + */ + +union task_union init_task_union +__attribute__((__section__(".data.init_task"))) = +{ INIT_TASK(init_task_union.task) }; + +struct task_struct *alloc_task_struct(void){ + struct task_struct *task; + + task = (struct task_struct *) __get_free_pages(GFP_KERNEL, 2); + if(task == NULL) return(NULL); + return(task); +} + +void unprotect_stack(unsigned long stack) +{ + protect(stack, 4 * PAGE_SIZE, 1, 1, 0, 1); +} + +void free_task_struct(struct task_struct *task) +{ + /* free_pages decrements the page counter and only actually frees + * the pages if they are now not accessed by anything. + */ + free_pages((unsigned long) task, 2); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/initrd_kern.c linux_umopenmosix/arch/um/kernel/initrd_kern.c --- linux-2.4.17/arch/um/kernel/initrd_kern.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/initrd_kern.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "linux/init.h" +#include "linux/bootmem.h" +#include "linux/blk.h" +#include "asm/types.h" +#include "user_util.h" +#include "kern_util.h" +#include "initrd.h" +#include "init.h" + +extern __u64 file_size(char *file); + +static char *initrd __initdata = NULL; + +static int __init read_initrd(void) +{ + void *area; + int size; + + if(initrd == NULL) return 0; + size = file_size(initrd); + if(size < 0) return 0; + area = alloc_bootmem(size); + if(area == NULL) return 0; + if(load_initrd(initrd, area, size) == -1) return 0; + initrd_start = (unsigned long) area; + initrd_end = initrd_start + size; + return 0; +} + +__uml_postsetup(read_initrd); + +static int __init uml_initrd_setup(char *line, int *add) +{ + initrd = line; + return 0; +} + +__uml_setup("initrd=", uml_initrd_setup, +"initrd=\n" +" This is used to boot UML from an initrd image. The argument is the\n" +" name of the file containing the image\n" +); + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/initrd_user.c linux_umopenmosix/arch/um/kernel/initrd_user.c --- linux-2.4.17/arch/um/kernel/initrd_user.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/initrd_user.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include + +#include "user_util.h" +#include "kern_util.h" +#include "user.h" +#include "initrd.h" + +int load_initrd(char *filename, void *buf, int size) +{ + int fd, n; + + if((fd = open(filename, O_RDONLY)) == -1){ + printk("Opening '%s' failed - errno = %d\n", filename, errno); + return(-1); + } + if((n = read(fd, buf, size)) != size){ + printk("Read of %d bytes from '%s' returned %d, errno = %d\n", + size, filename, n, errno); + return(-1); + } + return(0); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/irq.c linux_umopenmosix/arch/um/kernel/irq.c --- linux-2.4.17/arch/um/kernel/irq.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/irq.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,814 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c: + * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar + */ + +#include "linux/config.h" +#include "linux/kernel.h" +#include "linux/smp.h" +#include "linux/irq.h" +#include "linux/kernel_stat.h" +#include "linux/interrupt.h" +#include "linux/random.h" +#include "linux/slab.h" +#include "linux/file.h" +#include "linux/proc_fs.h" +#include "linux/init.h" +#include "linux/seq_file.h" +#include "asm/irq.h" +#include "asm/hw_irq.h" +#include "asm/hardirq.h" +#include "asm/atomic.h" +#include "asm/signal.h" +#include "asm/system.h" +#include "asm/errno.h" +#include "asm/uaccess.h" +#include "user_util.h" +#include "irq_user.h" + +static void register_irq_proc (unsigned int irq); + +irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = + { [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}}; + +/* + * Generic no controller code + */ + +static void enable_none(unsigned int irq) { } +static unsigned int startup_none(unsigned int irq) { return 0; } +static void disable_none(unsigned int irq) { } +static void ack_none(unsigned int irq) +{ +/* + * 'what should we do if we get a hw irq event on an illegal vector'. + * each architecture has to answer this themselves, it doesnt deserve + * a generic callback i think. + */ +#if CONFIG_X86 + printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq); +#ifdef CONFIG_X86_LOCAL_APIC + /* + * Currently unexpected vectors happen only on SMP and APIC. + * We _must_ ack these because every local APIC has only N + * irq slots per priority level, and a 'hanging, unacked' IRQ + * holds up an irq slot - in excessive cases (when multiple + * unexpected vectors occur) that might lock up the APIC + * completely. + */ + ack_APIC_irq(); +#endif +#endif +} + +/* startup is the same as "enable", shutdown is same as "disable" */ +#define shutdown_none disable_none +#define end_none enable_none + +struct hw_interrupt_type no_irq_type = { + "none", + startup_none, + shutdown_none, + enable_none, + disable_none, + ack_none, + end_none +}; + +volatile unsigned long irq_err_count; + +/* + * Generic, controller-independent functions: + */ + +int get_irq_list(char *buf) +{ + int i, j; + struct irqaction * action; + char *p = buf; + + p += sprintf(p, " "); + for (j=0; jtypename); + p += sprintf(p, " %s", action->name); + + for (action=action->next; action; action = action->next) + p += sprintf(p, ", %s", action->name); + *p++ = '\n'; + } + p += sprintf(p, "\n"); +#ifdef notdef +#if CONFIG_SMP + p += sprintf(p, "LOC: "); + for (j = 0; j < smp_num_cpus; j++) + p += sprintf(p, "%10u ", + apic_timer_irqs[cpu_logical_map(j)]); + p += sprintf(p, "\n"); +#endif +#endif + p += sprintf(p, "ERR: %10lu\n", irq_err_count); + return p - buf; +} + + +/* + * This should really return information about whether + * we should do bottom half handling etc. Right now we + * end up _always_ checking the bottom half, which is a + * waste of time and is not what some drivers would + * prefer. + */ +int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, + struct irqaction * action) +{ + int status; + int cpu = smp_processor_id(); + + irq_enter(cpu, irq); + + status = 1; /* Force the "do bottom halves" bit */ + + if (!(action->flags & SA_INTERRUPT)) + __sti(); + + do { + status |= action->flags; + action->handler(irq, action->dev_id, regs); + action = action->next; + } while (action); + if (status & SA_SAMPLE_RANDOM) + add_interrupt_randomness(irq); + __cli(); + + irq_exit(cpu, irq); + + return status; +} + +/* + * Generic enable/disable code: this just calls + * down into the PIC-specific version for the actual + * hardware disable after having gotten the irq + * controller lock. + */ + +/** + * disable_irq_nosync - disable an irq without waiting + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Disables of an interrupt + * stack. Unlike disable_irq(), this function does not ensure existing + * instances of the IRQ handler have completed before returning. + * + * This function may be called from IRQ context. + */ + +void inline disable_irq_nosync(unsigned int irq) +{ + irq_desc_t *desc = irq_desc + irq; + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + if (!desc->depth++) { + desc->status |= IRQ_DISABLED; + desc->handler->disable(irq); + } + spin_unlock_irqrestore(&desc->lock, flags); +} + +/** + * disable_irq - disable an irq and wait for completion + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Disables of an interrupt + * stack. That is for two disables you need two enables. This + * function waits for any pending IRQ handlers for this interrupt + * to complete before returning. If you use this function while + * holding a resource the IRQ handler may need you will deadlock. + * + * This function may be called - with care - from IRQ context. + */ + +void disable_irq(unsigned int irq) +{ + disable_irq_nosync(irq); + + if (!local_irq_count(smp_processor_id())) { + do { + barrier(); + } while (irq_desc[irq].status & IRQ_INPROGRESS); + } +} + +/** + * enable_irq - enable interrupt handling on an irq + * @irq: Interrupt to enable + * + * Re-enables the processing of interrupts on this IRQ line + * providing no disable_irq calls are now in effect. + * + * This function may be called from IRQ context. + */ + +void enable_irq(unsigned int irq) +{ + irq_desc_t *desc = irq_desc + irq; + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + switch (desc->depth) { + case 1: { + unsigned int status = desc->status & ~IRQ_DISABLED; + desc->status = status; + if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { + desc->status = status | IRQ_REPLAY; + hw_resend_irq(desc->handler,irq); + } + desc->handler->enable(irq); + /* fall-through */ + } + default: + desc->depth--; + break; + case 0: + printk(KERN_ERR "enable_irq() unbalanced from %p\n", + __builtin_return_address(0)); + } + spin_unlock_irqrestore(&desc->lock, flags); +} + +/* + * do_IRQ handles all normal device IRQ's (the special + * SMP cross-CPU interrupts have their own specific + * handlers). + */ +unsigned int do_IRQ(int irq, int user_mode) +{ + /* + * 0 return value means that this irq is already being + * handled by some other CPU. (or is disabled) + */ + int cpu = smp_processor_id(); + irq_desc_t *desc = irq_desc + irq; + struct irqaction * action; + struct pt_regs regs; + unsigned int status; + + regs.user_mode = user_mode; + kstat.irqs[cpu][irq]++; + spin_lock(&desc->lock); + desc->handler->ack(irq); + /* + REPLAY is when Linux resends an IRQ that was dropped earlier + WAITING is used by probe to mark irqs that are being tested + */ + status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); + status |= IRQ_PENDING; /* we _want_ to handle it */ + + /* + * If the IRQ is disabled for whatever reason, we cannot + * use the action we have. + */ + action = NULL; + if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) { + action = desc->action; + status &= ~IRQ_PENDING; /* we commit to handling */ + status |= IRQ_INPROGRESS; /* we are handling it */ + } + desc->status = status; + + /* + * If there is no IRQ handler or it was disabled, exit early. + Since we set PENDING, if another processor is handling + a different instance of this same irq, the other processor + will take care of it. + */ + if (!action) + goto out; + + /* + * Edge triggered interrupts need to remember + * pending events. + * This applies to any hw interrupts that allow a second + * instance of the same irq to arrive while we are in do_IRQ + * or in the handler. But the code here only handles the _second_ + * instance of the irq, not the third or fourth. So it is mostly + * useful for irq hardware that does not mask cleanly in an + * SMP environment. + */ + for (;;) { + spin_unlock(&desc->lock); + handle_IRQ_event(irq, ®s, action); + spin_lock(&desc->lock); + + if (!(desc->status & IRQ_PENDING)) + break; + desc->status &= ~IRQ_PENDING; + } + desc->status &= ~IRQ_INPROGRESS; +out: + /* + * The ->end() handler has to deal with interrupts which got + * disabled while the handler was running. + */ + desc->handler->end(irq); + spin_unlock(&desc->lock); + + if (softirq_pending(cpu)) + do_softirq(); + return 1; +} + +/** + * request_irq - allocate an interrupt line + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs + * @irqflags: Interrupt type flags + * @devname: An ascii name for the claiming device + * @dev_id: A cookie passed back to the handler function + * + * This call allocates interrupt resources and enables the + * interrupt line and IRQ handling. From the point this + * call is made your handler function may be invoked. Since + * your handler function must clear any interrupt the board + * raises, you must take care both to initialise your hardware + * and to set up the interrupt handler in the right order. + * + * Dev_id must be globally unique. Normally the address of the + * device data structure is used as the cookie. Since the handler + * receives this value it makes sense to use it. + * + * If your interrupt is shared you must pass a non NULL dev_id + * as this is required when freeing the interrupt. + * + * Flags: + * + * SA_SHIRQ Interrupt is shared + * + * SA_INTERRUPT Disable local interrupts while processing + * + * SA_SAMPLE_RANDOM The interrupt can be used for entropy + * + */ + +int request_irq(unsigned int irq, + void (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, + const char * devname, + void *dev_id) +{ + int retval; + struct irqaction * action; + +#if 1 + /* + * Sanity-check: shared interrupts should REALLY pass in + * a real dev-ID, otherwise we'll have trouble later trying + * to figure out which interrupt is which (messes up the + * interrupt freeing logic etc). + */ + if (irqflags & SA_SHIRQ) { + if (!dev_id) + printk(KERN_ERR "Bad boy: %s (at 0x%x) called us " + "without a dev_id!\n", devname, (&irq)[-1]); + } +#endif + + if (irq >= NR_IRQS) + return -EINVAL; + if (!handler) + return -EINVAL; + + action = (struct irqaction *) + kmalloc(sizeof(struct irqaction), GFP_KERNEL); + if (!action) + return -ENOMEM; + + action->handler = handler; + action->flags = irqflags; + action->mask = 0; + action->name = devname; + action->next = NULL; + action->dev_id = dev_id; + + retval = setup_irq(irq, action); + if (retval) + kfree(action); + return retval; +} + +int um_request_irq(unsigned int irq, int fd, + void (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, const char * devname, + void *dev_id) +{ + int retval; + + retval = request_irq(irq, handler, irqflags, devname, dev_id); + if(retval) return(retval); + return(activate_fd(irq, fd, dev_id)); +} + +/* this was setup_x86_irq but it seems pretty generic */ +int setup_irq(unsigned int irq, struct irqaction * new) +{ + int shared = 0; + unsigned long flags; + struct irqaction *old, **p; + irq_desc_t *desc = irq_desc + irq; + + /* + * Some drivers like serial.c use request_irq() heavily, + * so we have to be careful not to interfere with a + * running system. + */ + if (new->flags & SA_SAMPLE_RANDOM) { + /* + * This function might sleep, we want to call it first, + * outside of the atomic block. + * Yes, this might clear the entropy pool if the wrong + * driver is attempted to be loaded, without actually + * installing a new handler, but is this really a problem, + * only the sysadmin is able to do this. + */ + rand_initialize_irq(irq); + } + + /* + * The following block of code has to be executed atomically + */ + spin_lock_irqsave(&desc->lock,flags); + p = &desc->action; + if ((old = *p) != NULL) { + /* Can't share interrupts unless both agree to */ + if (!(old->flags & new->flags & SA_SHIRQ)) { + spin_unlock_irqrestore(&desc->lock,flags); + return -EBUSY; + } + + /* add new interrupt at end of irq queue */ + do { + p = &old->next; + old = *p; + } while (old); + shared = 1; + } + + *p = new; + + if (!shared) { + desc->depth = 0; + desc->status &= ~IRQ_DISABLED; + desc->handler->startup(irq); + } + spin_unlock_irqrestore(&desc->lock,flags); + + register_irq_proc(irq); + return 0; +} + +/** + * free_irq - free an interrupt + * @irq: Interrupt line to free + * @dev_id: Device identity to free + * + * Remove an interrupt handler. The handler is removed and if the + * interrupt line is no longer in use by any driver it is disabled. + * On a shared IRQ the caller must ensure the interrupt is disabled + * on the card it drives before calling this function. The function + * does not return until any executing interrupts for this IRQ + * have completed. + * + * This function may be called from interrupt context. + * + * Bugs: Attempting to free an irq in a handler for the same irq hangs + * the machine. + */ + +void free_irq(unsigned int irq, void *dev_id) +{ + irq_desc_t *desc; + struct irqaction **p; + unsigned long flags; + + if (irq >= NR_IRQS) + return; + + desc = irq_desc + irq; + spin_lock_irqsave(&desc->lock,flags); + p = &desc->action; + for (;;) { + struct irqaction * action = *p; + if (action) { + struct irqaction **pp = p; + p = &action->next; + if (action->dev_id != dev_id) + continue; + + /* Found it - now remove it from the list of entries */ + *pp = action->next; + if (!desc->action) { + desc->status |= IRQ_DISABLED; + desc->handler->shutdown(irq); + } + free_irq_by_dev(dev_id); + spin_unlock_irqrestore(&desc->lock,flags); + +#ifdef CONFIG_SMP + /* Wait to make sure it's not being used on another CPU */ + while (desc->status & IRQ_INPROGRESS) + barrier(); +#endif + kfree(action); + return; + } + printk(KERN_ERR "Trying to free free IRQ%d\n",irq); + spin_unlock_irqrestore(&desc->lock,flags); + return; + } +} + +static struct proc_dir_entry * root_irq_dir; +static struct proc_dir_entry * irq_dir [NR_IRQS]; +static struct proc_dir_entry * smp_affinity_entry [NR_IRQS]; + +static unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL }; + +#define HEX_DIGITS 8 + +static int irq_affinity_read_proc (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + if (count < HEX_DIGITS+1) + return -EINVAL; + return sprintf (page, "%08lx\n", irq_affinity[(long)data]); +} + +static unsigned int parse_hex_value (const char *buffer, + unsigned long count, unsigned long *ret) +{ + unsigned char hexnum [HEX_DIGITS]; + unsigned long value; + int i; + + if (!count) + return -EINVAL; + if (count > HEX_DIGITS) + count = HEX_DIGITS; + if (copy_from_user(hexnum, buffer, count)) + return -EFAULT; + + /* + * Parse the first 8 characters as a hex string, any non-hex char + * is end-of-string. '00e1', 'e1', '00E1', 'E1' are all the same. + */ + value = 0; + + for (i = 0; i < count; i++) { + unsigned int c = hexnum[i]; + + switch (c) { + case '0' ... '9': c -= '0'; break; + case 'a' ... 'f': c -= 'a'-10; break; + case 'A' ... 'F': c -= 'A'-10; break; + default: + goto out; + } + value = (value << 4) | c; + } +out: + *ret = value; + return 0; +} + +static int irq_affinity_write_proc (struct file *file, const char *buffer, + unsigned long count, void *data) +{ + int irq = (long) data, full_count = count, err; + unsigned long new_value; + + if (!irq_desc[irq].handler->set_affinity) + return -EIO; + + err = parse_hex_value(buffer, count, &new_value); + +#if CONFIG_SMP + /* + * Do not allow disabling IRQs completely - it's a too easy + * way to make the system unusable accidentally :-) At least + * one online CPU still has to be targeted. + */ + if (!(new_value & cpu_online_map)) + return -EINVAL; +#endif + + irq_affinity[irq] = new_value; + irq_desc[irq].handler->set_affinity(irq, new_value); + + return full_count; +} + +static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + unsigned long *mask = (unsigned long *) data; + if (count < HEX_DIGITS+1) + return -EINVAL; + return sprintf (page, "%08lx\n", *mask); +} + +static int prof_cpu_mask_write_proc (struct file *file, const char *buffer, + unsigned long count, void *data) +{ + unsigned long *mask = (unsigned long *) data, full_count = count, err; + unsigned long new_value; + + err = parse_hex_value(buffer, count, &new_value); + if (err) + return err; + + *mask = new_value; + return full_count; +} + +#define MAX_NAMELEN 10 + +static void register_irq_proc (unsigned int irq) +{ + struct proc_dir_entry *entry; + char name [MAX_NAMELEN]; + + if (!root_irq_dir || (irq_desc[irq].handler == &no_irq_type) || + irq_dir[irq]) + return; + + memset(name, 0, MAX_NAMELEN); + sprintf(name, "%d", irq); + + /* create /proc/irq/1234 */ + irq_dir[irq] = proc_mkdir(name, root_irq_dir); + + /* create /proc/irq/1234/smp_affinity */ + entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]); + + entry->nlink = 1; + entry->data = (void *)(long)irq; + entry->read_proc = irq_affinity_read_proc; + entry->write_proc = irq_affinity_write_proc; + + smp_affinity_entry[irq] = entry; +} + +unsigned long prof_cpu_mask = -1; + +void init_irq_proc (void) +{ + struct proc_dir_entry *entry; + int i; + + /* create /proc/irq */ + root_irq_dir = proc_mkdir("irq", 0); + + /* create /proc/irq/prof_cpu_mask */ + entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir); + + entry->nlink = 1; + entry->data = (void *)&prof_cpu_mask; + entry->read_proc = prof_cpu_mask_read_proc; + entry->write_proc = prof_cpu_mask_write_proc; + + /* + * Create entries for all existing IRQs. + */ + for (i = 0; i < NR_IRQS; i++) + register_irq_proc(i); +} + +unsigned long probe_irq_on(void) +{ + return(0); +} + +int probe_irq_off(unsigned long val) +{ + return(0); +} + +static unsigned int startup_SIGIO_irq(unsigned int irq) +{ + return(0); +} + +static void shutdown_SIGIO_irq(unsigned int irq) +{ +} + +static void enable_SIGIO_irq(unsigned int irq) +{ +} + +static void disable_SIGIO_irq(unsigned int irq) +{ +} + +static void mask_and_ack_SIGIO(unsigned int irq) +{ +} + +static void end_SIGIO_irq(unsigned int irq) +{ +} + +static unsigned int startup_SIGVTALRM_irq(unsigned int irq) +{ + return(0); +} + +static void shutdown_SIGVTALRM_irq(unsigned int irq) +{ +} + +static void enable_SIGVTALRM_irq(unsigned int irq) +{ +} + +static void disable_SIGVTALRM_irq(unsigned int irq) +{ +} + +static void mask_and_ack_SIGVTALRM(unsigned int irq) +{ +} + +static void end_SIGVTALRM_irq(unsigned int irq) +{ +} + +static struct hw_interrupt_type SIGIO_irq_type = { + "SIGIO", + startup_SIGIO_irq, + shutdown_SIGIO_irq, + enable_SIGIO_irq, + disable_SIGIO_irq, + mask_and_ack_SIGIO, + end_SIGIO_irq, + NULL +}; + +static struct hw_interrupt_type SIGVTALRM_irq_type = { + "SIGVTALRM", + startup_SIGVTALRM_irq, + shutdown_SIGVTALRM_irq, + enable_SIGVTALRM_irq, + disable_SIGVTALRM_irq, + mask_and_ack_SIGVTALRM, + end_SIGVTALRM_irq, + NULL +}; + +void __init init_IRQ(void) +{ + int i; + + irq_desc[TIMER_IRQ].status = IRQ_DISABLED; + irq_desc[TIMER_IRQ].action = 0; + irq_desc[TIMER_IRQ].depth = 1; + irq_desc[TIMER_IRQ].handler = &SIGVTALRM_irq_type; + enable_irq(TIMER_IRQ); + for(i=1;i +#include +#include +#include +#include +#include +#include +#include +#include +#include "user_util.h" +#include "kern_util.h" +#include "user.h" +#include "process.h" +#include "signal_user.h" + +struct irq_fd { + struct irq_fd *next; + void *id; + int fd; + int irq; + int pid; + int events; + int current_events; +}; + +static struct irq_fd *active_fds = NULL; +static struct irq_fd **last_irq_ptr = &active_fds; + +static struct pollfd *pollfds = NULL; +static int pollfds_num = 0; +static int pollfds_size = 0; + +extern int io_count, intr_count; + +void sigio_handler(int sig, void *sc, int usermode) +{ + struct irq_fd *irq_fd, *next; + int i, n; + +#ifdef CONFIG_SMP + IPI_handler(hard_smp_processor_id()); + if (hard_smp_processor_id() != 0) return; +#endif + while(1){ + if((n = poll(pollfds, pollfds_num, 0)) < 0){ + if(errno == EINTR) continue; + printk("sigio_handler : poll returned %d, " + "errno = %d\n", n, errno); + break; + } + if(n == 0) break; + + irq_fd = active_fds; + for(i = 0; i < pollfds_num; i++){ + if(pollfds[i].revents != 0){ + irq_fd->current_events = pollfds[i].revents; + pollfds[i].events = 0; + } + irq_fd = irq_fd->next; + } + + for(irq_fd = active_fds; irq_fd != NULL; irq_fd = next){ + /* This mysterious assignment protects us against + * the irq handler freeing the irq from under us. + */ + next = irq_fd->next; + if(irq_fd->current_events != 0){ + irq_fd->current_events = 0; + do_IRQ(irq_fd->irq, usermode); + } + } + } +} + +static int prepare_fd_async(int fd, int pid) +{ + int retval; + + if((retval = fcntl(fd, F_SETFL, O_ASYNC | O_NONBLOCK)) < 0){ + printk("Failed to set O_ASYNC and O_NONBLOCK on fd # %d, " + "errno = %d\n", fd, errno); + return(-retval); + } + + if(((retval = fcntl(fd, F_SETSIG, SIGIO)) < 0) || + ((retval = fcntl(fd, F_SETOWN, pid)) < 0)){ + printk("Failed to fcntl F_SETOWN (or F_SETSIG) " + "fd %d to pid %d, errno = %d\n", fd, pid, errno); + return(-retval); + } + + return(0); +} + +int activate_ipi(int fd, int pid) +{ + return prepare_fd_async(fd, pid); +} + +int activate_fd(int irq, int fd, void *dev_id) +{ + struct irq_fd *new_fd; + int pid, retval, events = POLLIN | POLLPRI; + + for(new_fd = active_fds;new_fd;new_fd = new_fd->next){ + if(new_fd->fd == fd){ + printk("Registering fd %d twice\n", fd); + printk("Irqs : %d, %d\n", new_fd->irq, irq); + printk("Ids : 0x%x, 0x%x\n", new_fd->id, dev_id); + return(-EIO); + } + } + pid = cpu_tasks[0].pid; + if ((retval = prepare_fd_async(fd, pid)) != 0) + return(retval); + new_fd = um_kmalloc(sizeof(*new_fd)); + if(new_fd == NULL) return(-ENOMEM); + pollfds_num++; + if(pollfds_num > pollfds_size){ + struct pollfd *tmp_pfd; + + tmp_pfd = um_kmalloc(pollfds_num * sizeof(pollfds[0])); + if(tmp_pfd == NULL){ + pollfds_num--; + return(-ENOMEM); + } + if(pollfds != NULL){ + memcpy(tmp_pfd, pollfds, + sizeof(pollfds[0]) * pollfds_size); + kfree(pollfds); + } + pollfds = tmp_pfd; + pollfds_size = pollfds_num; + } + *new_fd = ((struct irq_fd) { next : NULL, + id : dev_id, + fd : fd, + irq : irq, + pid : pid, + events : events, + current_events: 0 } ); + + *last_irq_ptr = new_fd; + last_irq_ptr = &new_fd->next; + + pollfds[pollfds_num - 1].fd = fd; + pollfds[pollfds_num - 1].events = events; + pollfds[pollfds_num - 1].revents = 0; + return(0); +} + +static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg) +{ + struct irq_fd **prev; + int i = 0; + + prev = &active_fds; + while(*prev != NULL){ + if((*test)(*prev, arg)){ + struct irq_fd *old_fd = *prev; + if(pollfds[i].fd != (*prev)->fd){ + printk("free_irq_fd - mismatch between " + "active_fds and pollfds, fd %d vs %d\n", + (*prev)->fd, pollfds[i].fd); + return; + } + memcpy(&pollfds[i], &pollfds[i + 1], + (pollfds_num - i - 1) * sizeof(pollfds[0])); + pollfds_num--; + if(last_irq_ptr == &old_fd->next) + last_irq_ptr = prev; + *prev = (*prev)->next; + kfree(old_fd); + continue; + } + prev = &(*prev)->next; + i++; + } +} + +static int same_dev(struct irq_fd *irq, void *dev) +{ + return(irq->id == dev); +} + +void free_irq_by_dev(void *dev) +{ + free_irq_by_cb(same_dev, dev); +} + +static int same_fd(struct irq_fd *irq, void *fd) +{ + return(irq->fd == *((int *) fd)); +} + +void free_irq_by_fd(int fd) +{ + free_irq_by_cb(same_fd, &fd); +} + +static struct irq_fd *find_irq_by_fd(int fd, int *index_out) +{ + struct irq_fd *irq; + int i = 0; + + for(irq=active_fds; irq != NULL; irq = irq->next){ + if(irq->fd == fd) break; + i++; + } + if(irq == NULL){ + printk("find_irq_by_fd doesn't have descriptor %d\n", fd); + return(NULL); + } + if(pollfds[i].fd != fd){ + printk("find_irq_by_fd - mismatch between active_fds and " + "pollfds, fd %d vs %d, need %d\n", irq->fd, + pollfds[i].fd, fd); + return(NULL); + } + *index_out = i; + return(irq); +} + +void reactivate_fd(int fd) +{ + struct irq_fd *irq; + int i; + + irq = find_irq_by_fd(fd, &i); + if(irq == NULL) return; + pollfds[i].events = irq->events; +} + +void forward_ipi(int fd, int pid) +{ + if(fcntl(fd, F_SETOWN, pid) < 0) + printk("forward_ipi: F_SETOWN failed, errno = %d\n", errno); +} + +void forward_interrupts(int pid) +{ + struct irq_fd *irq; + + for(irq=active_fds;irq != NULL;irq = irq->next){ + if(fcntl(irq->fd, F_SETOWN, pid) < 0){ + int save_errno = errno; + if(fcntl(irq->fd, F_GETOWN, 0) != pid){ + /* XXX Just remove the irq rather than + * print out an infinite stream of these + */ + printk("Failed to forward %d to pid %d, " + "errno = %d\n", irq->fd, pid, + save_errno); + } + } + irq->pid = pid; + } +} + +void init_irq_signals(int on_sigstack) +{ + int flags; + + flags = on_sigstack ? SA_ONSTACK : 0; + set_handler(SIGVTALRM, (__sighandler_t) alarm_handler, + flags | SA_NODEFER | SA_RESTART, SIGUSR1, SIGIO, + SIGWINCH, -1); + set_handler(SIGIO, (__sighandler_t) irq_handler, flags | SA_RESTART, + SIGUSR1, SIGIO, SIGWINCH, -1); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/ksyms.c linux_umopenmosix/arch/um/kernel/ksyms.c --- linux-2.4.17/arch/um/kernel/ksyms.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/ksyms.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,29 @@ +#include "linux/module.h" +#include "linux/string.h" +#include "asm/current.h" +#include "asm/delay.h" +#include "asm/processor.h" +#include "asm/unistd.h" +#include "asm/pgalloc.h" +#include "kern_util.h" +#include "user_util.h" + +EXPORT_SYMBOL(stop); +EXPORT_SYMBOL(strtok); +EXPORT_SYMBOL(uml_physmem); +EXPORT_SYMBOL(set_signals); +EXPORT_SYMBOL(kernel_thread); +EXPORT_SYMBOL(__const_udelay); +EXPORT_SYMBOL(sys_waitpid); +EXPORT_SYMBOL(task_size); +EXPORT_SYMBOL(__do_copy_from_user); +EXPORT_SYMBOL(__do_strncpy_from_user); +EXPORT_SYMBOL(flush_tlb_range); +EXPORT_SYMBOL(__do_clear_user); +EXPORT_SYMBOL(honeypot); +EXPORT_SYMBOL(host_task_size); + +/* This is here because UML expands open to sys_open, not to a system + * call instruction. + */ +EXPORT_SYMBOL(sys_open); diff -urN linux-2.4.17/arch/um/kernel/mem.c linux_umopenmosix/arch/um/kernel/mem.c --- linux-2.4.17/arch/um/kernel/mem.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/mem.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,224 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "linux/config.h" +#include "linux/types.h" +#include "linux/mm.h" +#include "linux/fs.h" +#include "linux/init.h" +#include "linux/bootmem.h" +#include "linux/swap.h" +#include "asm/page.h" +#include "asm/pgtable.h" +#include "asm/pgalloc.h" +#include "asm/bitops.h" +#include "asm/uaccess.h" +#include "user_util.h" +#include "kern_util.h" +#include "mem_user.h" +#include "kern.h" +#include "init.h" + +unsigned long high_physmem; + +unsigned long low_physmem; + +unsigned long vm_start; + +unsigned long vm_end; + +pgd_t swapper_pg_dir[1024]; + +unsigned long *empty_zero_page = NULL; + +unsigned long *empty_bad_page = NULL; + +const char bad_pmd_string[] = "Bad pmd in pte_alloc: %08lx\n"; + +static unsigned long totalram_pages = 0; + +extern char __init_begin, __init_end; +extern long physmem_size; + +int kmalloc_ok = 0; + +void mem_init(void) +{ + max_mapnr = num_physpages = max_low_pfn; + + /* clear the zero-page */ + memset((void *) empty_zero_page, 0, PAGE_SIZE); + + /* this will put all low memory onto the freelists */ + totalram_pages += free_all_bootmem(); + printk(KERN_INFO "Memory: %luk available\n", + (unsigned long) nr_free_pages() << (PAGE_SHIFT-10)); + kmalloc_ok = 1; +} + +void paging_init(void) +{ + unsigned long zones_size[MAX_NR_ZONES]; + int i; + + empty_zero_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); + empty_bad_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); + for(i=0;i> PAGE_SHIFT) - + (uml_physmem >> PAGE_SHIFT) - zones_size[0]; + free_area_init(zones_size); +} + +static int meminfo_22 = 0; + +static int meminfo_compat(char *str) +{ + meminfo_22 = 1; + return(1); +} + +__setup("22_meminfo", meminfo_compat); + +void si_meminfo(struct sysinfo *val) +{ + val->totalram = totalram_pages; + val->sharedram = 0; + val->freeram = nr_free_pages(); + val->bufferram = atomic_read(&buffermem_pages); + val->totalhigh = 0; + val->freehigh = 0; + val->mem_unit = PAGE_SIZE; + if(meminfo_22){ + val->freeram <<= PAGE_SHIFT; + val->bufferram <<= PAGE_SHIFT; + val->totalram <<= PAGE_SHIFT; + val->sharedram <<= PAGE_SHIFT; + } +} + +pte_t __bad_page(void) +{ + clear_page(empty_bad_page); + return pte_mkdirty(mk_pte((struct page *) empty_bad_page, + PAGE_SHARED)); +} + +/* This can't do anything because nothing in the kernel image can be freed + * since it's not in kernel physical memory. + */ + +void free_initmem(void) +{ +} + +#ifdef CONFIG_BLK_DEV_INITRD + +void free_initrd_mem(unsigned long start, unsigned long end) +{ + if (start < end) + printk ("Freeing initrd memory: %ldk freed\n", + (end - start) >> 10); + for (; start < end; start += PAGE_SIZE) { + ClearPageReserved(virt_to_page(start)); + set_page_count(virt_to_page(start), 1); + free_page(start); + totalram_pages++; + } +} + +#endif + +int do_check_pgt_cache(int low, int high) +{ + int freed = 0; + if(pgtable_cache_size > high) { + do { + if (pgd_quicklist) { + free_pgd_slow(get_pgd_fast()); + freed++; + } + if (pmd_quicklist) { + pmd_free_slow(pmd_alloc_one_fast(NULL, 0)); + freed++; + } + if (pte_quicklist) { + pte_free_slow(pte_alloc_one_fast(NULL, 0)); + freed++; + } + } while(pgtable_cache_size > low); + } + return freed; +} + +void show_mem(void) +{ + int i, total = 0, reserved = 0; + int shared = 0, cached = 0; + int highmem = 0; + + printk("Mem-info:\n"); + show_free_areas(); + printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); + i = max_mapnr; + while (i-- > 0) { + total++; + if (PageHighMem(mem_map+i)) + highmem++; + if (PageReserved(mem_map+i)) + reserved++; + else if (PageSwapCache(mem_map+i)) + cached++; + else if (page_count(mem_map+i)) + shared += page_count(mem_map+i) - 1; + } + printk("%d pages of RAM\n", total); + printk("%d pages of HIGHMEM\n",highmem); + printk("%d reserved pages\n",reserved); + printk("%d pages shared\n",shared); + printk("%d pages swap cached\n",cached); + printk("%ld pages in page table cache\n",pgtable_cache_size); + show_buffers(); +} + +unsigned long kmem_top = 0; + +unsigned long get_kmem_end(void) +{ + if(kmem_top == 0) kmem_top = host_task_size - ABOVE_KMEM; + return(kmem_top); +} + +void set_kmem_end(unsigned long new) +{ + kmem_top = new; +} + +static int __init uml_mem_setup(char *line, int *add) +{ + char *retptr; + physmem_size = memparse(line,&retptr); + return 0; +} +__uml_setup("mem=",uml_mem_setup, +"mem=\n" +" This controls how much \"physical\" memory the kernel allocates\n" +" for the system. The size is specified as a number followed by\n" +" one of 'k', 'K', 'm', 'M', which have the obvious meanings.\n" +" This is not related to the amount of memory in the physical\n" +" machine. It can be more, and the excess, if it's ever used, will\n" +" just be swapped out.\n Example: mem=64M\n\n" +); + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/mem_user.c linux_umopenmosix/arch/um/kernel/mem_user.c --- linux-2.4.17/arch/um/kernel/mem_user.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/mem_user.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,240 @@ +/* + * arch/um/kernel/mem_user.c + * + * BRIEF MODULE DESCRIPTION + * user side memory routines for supporting IO memory inside user mode linux + * + * Copyright (C) 2001 RidgeRun, Inc. + * Author: RidgeRun, Inc. + * Greg Lonnon glonnon@ridgerun.com or info@ridgerun.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "kern_util.h" +#include "user.h" +#include "user_util.h" +#include "init.h" + +struct mem_region { + struct mem_region *next; + char *driver; + unsigned long start; + unsigned long usable; + unsigned long total; + int fd; +}; + +struct mem_region physmem_region; + +struct mem_region *mem_list = &physmem_region; + +#define TEMPNAME_TEMPLATE "vm_file-XXXXXX" + +int create_mem_file(unsigned long len) +{ + int fd; + char zero; + + fd = make_tempfile(TEMPNAME_TEMPLATE, NULL, 1); + if (fchmod(fd, 0777) < 0){ + perror("fchmod"); + exit(1); + } + if(lseek(fd, len, SEEK_SET) < 0){ + perror("lseek"); + exit(1); + } + zero = 0; + if(write(fd, &zero, 1) != 1){ + perror("write"); + exit(1); + } + if(fcntl(fd, F_SETFD, 1) != 0) + perror("Setting FD_CLOEXEC failed"); + return(fd); +} + +void setup_range(int fd, char *driver, unsigned long start, + unsigned long usable, unsigned long total) +{ + struct mem_region *region, *next; + + if(fd == -1){ + fd = create_mem_file(usable); + region = &physmem_region; + next = physmem_region.next; + } + else { + region = malloc(sizeof(*region)); + if(region == NULL){ + perror("Allocating iomem struct"); + exit(1); + } + next = physmem_region.next; + } + *region = ((struct mem_region) { next, driver, start, usable, + total, fd } ); + if(region != &physmem_region) physmem_region.next = region; +} + +void setup_memory(void) +{ + struct mem_region *region; + void *loc; + unsigned long start; + int page; + + start = -1; + region = mem_list; + page = page_size(); + while(region){ + if(region->start != -1) start = region->start; + else region->start = start; + loc = mmap((void *) region->start, region->usable, + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, + region->fd, 0); + if(loc != (void *) region->start){ + perror("Mapping memory"); + exit(1); + } + start += region->total; + start = (start + page - 1) & ~(page - 1); + region = region->next; + } +} + +static int __init parse_iomem(char *str, int *add) +{ + struct stat buf; + char *file, *driver; + int fd; + + driver = str; + file = strchr(str,','); + if(file == NULL){ + printk(__FUNCTION__ " failed to parse iomem\n"); + return 1; + } + *file = '\0'; + file++; + fd = open(file, O_RDWR); + if(fd < 0){ + perror("Couldn't open io file"); + return 1; + } + if(fstat(fd, &buf) < 0) { + perror(__FUNCTION__ "fstat - cannot fstat file"); + exit(1); + } + setup_range(fd, driver, -1, buf.st_size, buf.st_size); + return 0; +} +__uml_setup("iomem=",parse_iomem, +"iomem=,\n" +" Configure as a named IO memory region named .\n\n" +); + +#ifdef notdef +int logging = 0; +int logging_fd = -1; + +int logging_line = 0; +char logging_buf[256]; + +void log(char *fmt, ...) +{ + va_list ap; + struct timeval tv; + + if(logging == 0) return; + if(logging_fd == -1) + logging_fd = open("log", O_RDWR | O_CREAT | O_TRUNC, 0644); + gettimeofday(&tv, NULL); + sprintf(logging_buf, "%d\t %u.%u ", logging_line++, tv.tv_sec, + tv.tv_usec); + va_start(ap, fmt); + vsprintf(&logging_buf[strlen(logging_buf)], fmt, ap); + va_end(ap); + write(logging_fd, logging_buf, strlen(logging_buf)); +} +#endif + +void map(unsigned long virt, void *p, unsigned long len, + int r, int w, int x) +{ + struct mem_region *region; + unsigned long phys = (unsigned long) p; + void *loc; + int prot; + + prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | + (x ? PROT_EXEC : 0); + for(region = mem_list; region ; region = region->next) { + if((phys < region->start) || + (phys >= region->start + region->usable)) + continue; + phys -= region->start; + loc = mmap((void *) virt, len, prot, MAP_SHARED | MAP_FIXED, + region->fd, phys); + if(loc != (void *) virt){ + panic("Error mapping a page - errno = %d", errno); + } + return; + } + panic("No physical or IO memory region for address 0x%x\n", phys); +} + +unsigned long find_iomem(char *driver, unsigned long *len_out) +{ + struct mem_region *region; + + for(region = mem_list; region ; region = region->next) { + if((region->driver != NULL) && + !strcmp(region->driver, driver)){ + *len_out = region->usable; + return(region->start); + } + } + *len_out = 0; + return 0; +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/mprot.h linux_umopenmosix/arch/um/kernel/mprot.h --- linux-2.4.17/arch/um/kernel/mprot.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/mprot.h Wed Jun 26 23:45:15 2002 @@ -0,0 +1,6 @@ +#ifndef __MPROT_H__ +#define __MPROT_H__ + +extern void no_access(unsigned long addr, unsigned int len); + +#endif diff -urN linux-2.4.17/arch/um/kernel/process.c linux_umopenmosix/arch/um/kernel/process.c --- linux-2.4.17/arch/um/kernel/process.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/process.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,284 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef PROFILING +#include +#endif +#include "user_util.h" +#include "kern_util.h" +#include "user.h" +#include "process.h" +#include "signal_kern.h" +#include "signal_user.h" +#include "sysdep/ptrace.h" +#include "sysdep/sigcontext.h" +#include "irq_user.h" +#include "syscall_user.h" + +void stop_pid(int pid) +{ + kill(pid, SIGSTOP); +} + +void kill_pid(int pid) +{ + kill(pid, SIGKILL); +} + +void usr1_pid(int pid) +{ + kill(pid, SIGUSR1); +} + +void init_new_thread(void *sig_stack, void (*usr1_handler)(int)) +{ + int flags = 0; + + if(sig_stack != NULL){ + set_sigstack(sig_stack, 2 * page_size()); + flags = SA_ONSTACK; + } + set_handler(SIGSEGV, (__sighandler_t) sig_handler, flags, + SIGUSR1, SIGIO, SIGWINCH, -1); + set_handler(SIGTRAP, (__sighandler_t) sig_handler, flags, + SIGUSR1, SIGIO, SIGWINCH, -1); + set_handler(SIGFPE, (__sighandler_t) sig_handler, flags, + SIGUSR1, SIGIO, SIGWINCH, -1); + set_handler(SIGILL, (__sighandler_t) sig_handler, flags, + SIGUSR1, SIGIO, SIGWINCH, -1); + set_handler(SIGBUS, (__sighandler_t) sig_handler, flags, + SIGUSR1, SIGIO, SIGWINCH, -1); + set_handler(SIGWINCH, (__sighandler_t) sig_handler, flags, + SIGUSR1, SIGIO, SIGWINCH, -1); + set_handler(SIGUSR2, (__sighandler_t) syscall_handler, + SA_NOMASK | flags, -1); + if(usr1_handler) set_handler(SIGUSR1, usr1_handler, flags, -1); + signal(SIGCHLD, SIG_IGN); + signal(SIGHUP, SIG_IGN); + set_timers(1); /* XXX A bit of a race here */ + init_irq_signals(sig_stack != NULL); +} + +struct tramp { + int (*tramp)(void *); + void *tramp_data; + unsigned long temp_stack; + int flags; + int pid; +}; + +/* See above for why sigkill is here */ + +int sigkill = SIGKILL; + +int outer_tramp(void *arg) +{ + struct tramp *t; + int sig = sigkill; + + t = arg; + t->pid = clone(t->tramp, (void *) t->temp_stack + page_size()/2, + t->flags, t->tramp_data); + if(t->pid > 0) wait_for_stop(t->pid, SIGSTOP, PTRACE_CONT); + kill(getpid(), sig); + exit(0); +} + +int start_fork_tramp(void *thread_arg, unsigned long temp_stack, int clone_vm, + int (*tramp)(void *)) +{ + struct tramp arg; + unsigned long sp; + int new_pid, flags, status, err; + + /* The trampoline will run on the temporary stack */ + sp = stack_sp(temp_stack); + + flags = CLONE_FILES | SIGCHLD; + if(clone_vm) flags |= CLONE_VM; + + arg.tramp = tramp; + arg.tramp_data = thread_arg; + arg.temp_stack = temp_stack; + arg.flags = flags; + + /* Start the process and wait for it to stop itself */ + new_pid = clone(outer_tramp, (void *) sp, flags, &arg); + if(new_pid < 0) return(-errno); + while((err = waitpid(new_pid, &status, 0) < 0) && (errno == EINTR)) ; + if(err < 0) panic("Waiting for outer trampoline failed - errno = %d", + errno); + if(!WIFSIGNALED(status) || (WTERMSIG(status) != SIGKILL)) + panic("outer trampoline didn't exit with SIGKILL"); + + return(arg.pid); +} + +void trace_myself(void) +{ + if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0) + panic("ptrace failed in trace_myself"); +} + +void attach_process(int pid) +{ + if((ptrace(PTRACE_ATTACH, pid, 0, 0) < 0) || + (ptrace(PTRACE_CONT, pid, 0, 0) < 0)) + tracer_panic("OP_FORK failed to attach pid"); + wait_for_stop(pid, SIGSTOP, PTRACE_CONT); + if(ptrace(PTRACE_CONT, pid, 0, 0) < 0) + tracer_panic("OP_FORK failed to continue process"); +} + +void tracer_panic(char *format, ...) +{ + va_list ap; + + va_start(ap, format); + vprintf(format, ap); + printf("\n"); + while(1) sleep(10); +} + +void suspend_new_thread(int fd) +{ + char c; + + kill(getpid(), SIGSTOP); + + if(read(fd, &c, sizeof(c)) != sizeof(c)) + panic("read failed in suspend_new_thread"); +} + +static int ptrace_child(void *arg) +{ + int pid = getpid(); + + if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){ + perror("ptrace"); + exit(1); + } + kill(pid, SIGSTOP); + exit(getpid() == pid); +} + +void check_ptrace(void) +{ + void *stack; + unsigned long sp; + int status, pid, n, syscall; + + printk("Checking that the host ptrace works..."); + stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if(stack == MAP_FAILED) + panic("check_ptrace : mmap failed, errno = %d", errno); + sp = (unsigned long) stack + PAGE_SIZE - sizeof(void *); + pid = clone(ptrace_child, (void *) sp, SIGCHLD, NULL); + if(pid < 0) + panic("check_ptrace : clone failed, errno = %d", errno); + n = waitpid(pid, &status, WUNTRACED); + if(n < 0) + panic("check_ptrace : wait failed, errno = %d", errno); + if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) + panic("check_ptrace : expected SIGSTOP, got status = %d", + status); + while(1){ + if(ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) + panic("check_ptrace : ptrace failed, errno = %d", + errno); + n = waitpid(pid, &status, WUNTRACED); + if(n < 0) + panic("check_ptrace : wait failed, errno = %d", errno); + if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP)) + panic("check_ptrace : expected SIGTRAP, " + "got status = %d", status); + + syscall = ptrace(PTRACE_PEEKUSER, pid, UM_SYSCALL_NR_OFFSET, + 0); + if(syscall == __NR_getpid){ + n = ptrace(PTRACE_POKEUSER, pid, UM_SYSCALL_NR_OFFSET, + __NR_getppid); + if(n < 0) + panic("check_ptrace : failed to modify system " + "call, errno = %d", errno); + break; + } + } + if(ptrace(PTRACE_CONT, pid, 0, 0) < 0) + panic("check_ptrace : ptrace failed, errno = %d", errno); + n = waitpid(pid, &status, 0); + if(!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) + panic("check_ptrace : child exited with status %d", status); + + if(munmap(stack, PAGE_SIZE) < 0) + panic("check_ptrace : munmap failed, errno = %d", errno); + printk("OK\n"); +} + + + + +int user_thread(int (*fn)(void *), void * arg, unsigned long flags) +{ + long retval, d0; + + __asm__ __volatile__( + "movl %%esp,%%esi\n\t" + "int $0x80\n\t" /* Linux/i386 system call */ + "cmpl %%esp,%%esi\n\t" /* child or parent? */ + "je 1f\n\t" /* parent - jump */ + "subl %7,%%esp\n\t" /* space for user-registers */ + /* Load the argument into eax, and push it. That way, it does + * not matter whether the called function is compiled with + * -mregparm or not. */ + "movl %4,%%eax\n\t" + "pushl %%eax\n\t" + "call *%5\n\t" /* call fn */ + "movl %3,%0\n\t" /* exit */ + "int $0x80\n" + "1:\t" + :"=&a" (retval), "=&S" (d0) + :"0" (__NR_clone), "i" (__NR_exit), + "r" (arg), "r" (fn), + "b" (flags | SIGCHLD), + "i" (sizeof(struct pt_regs)) + : "memory"); + return retval; +} + + + + + + + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/process_kern.c linux_umopenmosix/arch/um/kernel/process_kern.c --- linux-2.4.17/arch/um/kernel/process_kern.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/process_kern.c Fri Jun 28 00:29:35 2002 @@ -0,0 +1,751 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "linux/config.h" +#include "linux/kernel.h" +#include "linux/sched.h" +#include "linux/interrupt.h" +#include "linux/mm.h" +#include "linux/slab.h" +#include "linux/utsname.h" +#include "linux/fs.h" +#include "linux/utime.h" +#include "linux/smp_lock.h" +#include "linux/module.h" +#include "linux/init.h" +#include "linux/capability.h" +#include "asm/unistd.h" +#include "asm/mman.h" +#include "asm/segment.h" +#include "asm/stat.h" +#include "asm/pgtable.h" +#include "asm/processor.h" +#include "asm/pgalloc.h" +#include "asm/spinlock.h" +#include "asm/uaccess.h" +#include "asm/user.h" +#include "user_util.h" +#include "kern_util.h" +#include "kern.h" +#include "signal_kern.h" +#include "signal_user.h" +#include "init.h" +#include "irq_user.h" +#include "tlb.h" +#include "frame.h" +#include "2_5compat.h" + +struct cpu_task cpu_tasks[NR_CPUS] = { [0 ... NR_CPUS - 1] = { -1, NULL } }; + +static struct task_struct *get_task(int pid, int require) +{ + struct task_struct *task, *ret; + + ret = NULL; + read_lock(&tasklist_lock); + for_each_task(task){ + if(task->pid == pid){ + ret = task; + break; + } + } + read_unlock(&tasklist_lock); + if(require && (ret == NULL)) panic("get_task couldn't find a task\n"); + return(ret); +} + +int external_pid(void *t) +{ + struct task_struct *task = t ? t : current; + + return(task->thread.extern_pid); +} + +int pid_to_processor_id(int pid) +{ + int i; + + for(i = 0; i < smp_num_cpus; i++){ + if(cpu_tasks[i].pid == pid) return(i); + } + return(-1); +} + +void free_stack(unsigned long stack) +{ + free_page(stack); +} + +void set_init_pid(int pid) +{ + init_task.thread.extern_pid = pid; + if(pipe(init_task.thread.switch_pipe) < 0) + panic("Can't create switch pipe for init_task"); +} + +int set_user_mode(void *t, int restore_state, int protect_mem) +{ + struct task_struct *task; + + task = t ? t : current; + if(task->thread.tracing) return(1); + task->thread.request.op = OP_TRACE_ON; + task->thread.request.u.trace_on.restore_state = restore_state; + if(protect_mem) protect_kernel_mem(1); + usr1_pid(getpid()); + return(0); +} + +void set_tracing(void *task, int tracing) +{ + ((struct task_struct *) task)->thread.tracing = tracing; +} + +int is_tracing(void *t) +{ + return (((struct task_struct *) t)->thread.tracing); +} + +extern void schedule_tail(struct task_struct *prev); + +static int new_thread_proc(void *t) +{ + struct task_struct *task; + int (*fn)(void *), pid; + void *arg; + + task = t; + trace_myself(); + init_new_thread(NULL, NULL); + pid = getpid(); + fn = task->thread.request.u.thread.proc; + arg = task->thread.request.u.thread.arg; + task->thread.extern_pid = pid; + + suspend_new_thread(task->thread.switch_pipe[0]); + + set_cmdline("(kernel thread)"); + force_flush_all(); + if(current->thread.prev_sched != NULL) + schedule_tail(current->thread.prev_sched); + current->thread.prev_sched = NULL; + (*fn)(arg); + do_exit(0); + return(0); +} + +unsigned long alloc_stack(void) +{ + unsigned long page; + + if((page = __get_free_page(GFP_KERNEL)) == 0) + panic("Couldn't allocate new stack"); + stack_protections(page); + return(page); +} + +extern int inited_cpus; + +static int start_kernel_thread(struct task_struct *task, int (*fn)(void *), + void *arg, int cpu) +{ + int extern_pid; + unsigned long sp; + + sp = ((unsigned long) task) + 4 * PAGE_SIZE - sizeof(void *); + task->thread.request.u.thread.proc = fn; + task->thread.request.u.thread.arg = arg; + task->thread.extern_pid = -1; + extern_pid = clone_and_wait(new_thread_proc, task, (void *) sp, + CLONE_FILES | SIGCHLD); + if(task->thread.extern_pid == -1) + tracer_panic("task didn't set its pid"); + task->mm = NULL; + task->active_mm = NULL; +#ifdef CONFIG_SMP + if(cpu != NO_PROC_ID){ + unsigned char c; + + cpu_tasks[cpu].pid = extern_pid; + cpu_tasks[cpu].task = task; + inited_cpus++; + init_tasks[cpu] = task; + task->processor = cpu; + write(task->thread.switch_pipe[1], &c, sizeof(c)); + } +#endif + return(extern_pid); +} + +int kernel_thread1(int (*fn)(void *), void * arg, unsigned long flags, + int cpu, int *extern_pid_out) +{ + struct task_struct *new_task; + int pid, extern_pid; + + pid = do_fork(CLONE_VM | flags, 0, NULL, 0); + if(pid < 0) panic("do_fork failed in kernel_thread"); + new_task = get_task(pid, 1); + current->thread.request.op = OP_THREAD; + current->thread.request.u.thread.proc = fn; + current->thread.request.u.thread.arg = arg; + current->thread.request.u.thread.flags = flags; + current->thread.request.u.thread.new_task = new_task; + current->thread.request.u.thread.cpu = cpu; + usr1_pid(getpid()); + extern_pid = current->thread.request.u.thread.new_pid; + if(extern_pid < 0){ + printk(KERN_ERR "Kernel thread failed : errno = %d\n", + -extern_pid); + return(extern_pid); + } + if(extern_pid_out != NULL) *extern_pid_out = extern_pid; + current->thread.prev_sched = NULL; + return(pid); +} + +int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) +{ + return(kernel_thread1(fn, arg, flags, -1, NULL)); +} + +void switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk, unsigned cpu) +{ + if (prev != next) + clear_bit(cpu, &prev->cpu_vm_mask); + set_bit(cpu, &next->cpu_vm_mask); +} + +void set_current(void *t) +{ + struct task_struct *task = t; + + cpu_tasks[CPU(task)] = ((struct cpu_task) + { task->thread.extern_pid, task }); +} + +void *_switch_to(void *prev, void *next) +{ + struct task_struct *from, *to; + int vtalrm, alrm; + char c; + + from = prev; + to = next; + + to->thread.prev_sched = from; + + if(CPU(from) == 0) forward_interrupts(to->thread.extern_pid); + forward_ipi(cpu_data[CPU(from)].ipi_pipe[0], to->thread.extern_pid); + block_signals(); + + vtalrm = change_sig(SIGVTALRM, 0); + alrm = change_sig(SIGALRM, 0); + + c = 0; + set_current(to); + if(write(to->thread.switch_pipe[1], &c, sizeof(c)) != sizeof(c)) + panic("write of switch_pipe failed, errno = %d", errno); + + if(from->state == TASK_ZOMBIE) kill_pid(getpid()); + if(read(from->thread.switch_pipe[0], &c, sizeof(c)) != sizeof(c)) + panic("read of switch_pipe failed, errno = %d", errno); + + change_sig(SIGVTALRM, vtalrm); + change_sig(SIGALRM, alrm); + + flush_tlb_all(); + unblock_signals(); + + return(current->thread.prev_sched); +} + +void ret_from_sys_call(void) +{ + if(current->need_resched) schedule(); + if(current->sigpending != 0) do_signal(0); +} + + +void release_thread(struct task_struct *task) +{ + close(task->thread.switch_pipe[0]); + close(task->thread.switch_pipe[1]); + kill_pid(task->thread.extern_pid); +} + +void exit_thread(void) +{ + unprotect_stack((unsigned long) current); +} + +/* This sigusr1 business works around a bug in gcc's -pg support. + * Normally a procedure's mcount call comes after esp has been copied to + * ebp and the new frame is constructed. With procedures with no locals, + * the mcount comes before, as the first thing that the procedure does. + * When that procedure is main for a thread, ebp comes in as NULL. So, + * when mcount dereferences it, it segfaults. So, UML works around this + * by adding a non-optimizable local to the various trampolines, fork_tramp + * and outer_tramp below, and exec_tramp. + */ + +static int sigusr1 = SIGUSR1; + +int fork_tramp(void *stack) +{ + int sig = sigusr1; + + block_signals(); + init_new_thread(stack, finish_fork_handler); + + kill(getpid(), sig); + return(0); +} + +int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, + unsigned long stack_top, struct task_struct * p, + struct pt_regs *regs) +{ + int new_pid, clone_vm; + unsigned long stack; + + p->thread = (struct thread_struct) INIT_THREAD; + p->thread.kernel_stack = (unsigned long) p + 2 * PAGE_SIZE; + p->thread.process_regs = current->thread.process_regs; + if(pipe(p->thread.switch_pipe) < 0) + panic("copy_thread : pipe failed"); + if(current->thread.forking){ + stack = alloc_stack(); + clone_vm = (p->mm == current->mm); + p->thread.temp_stack = stack; + new_pid = start_fork_tramp((void *) p->thread.kernel_stack, + stack, clone_vm, fork_tramp); + if(new_pid < 0){ + printk(KERN_ERR "copy_thread : clone failed - " + "errno = %d\n", -new_pid); + return(new_pid); + } + + UM_SET_SYSCALL_RETURN(&p->thread.process_regs, 0); + if(sp != 0) UM_SP(&p->thread.process_regs) = sp; + p->thread.extern_pid = new_pid; + + current->thread.request.op = OP_FORK; + current->thread.request.u.fork.pid = new_pid; + usr1_pid(getpid()); + } + current->need_resched = 1; + return(0); +} + +void tracing_reboot(void) +{ + current->thread.request.op = OP_REBOOT; + usr1_pid(getpid()); +} + +void tracing_halt(void) +{ + current->thread.request.op = OP_HALT; + usr1_pid(getpid()); +} + +void tracing_cb(void (*proc)(void *), void *arg) +{ + if(getpid() == tracing_pid){ + (*proc)(arg); + } + else { + current->thread.request.op = OP_CB; + current->thread.request.u.cb.proc = proc; + current->thread.request.u.cb.arg = arg; + usr1_pid(getpid()); + } +} + +int do_proc_op(void *t, int proc_id) +{ + struct task_struct *task; + struct thread_struct *thread; + int op, pid; + + task = t; + thread = &task->thread; + op = thread->request.op; + switch(op){ + case OP_NONE: + case OP_TRACE_ON: + break; + case OP_EXEC: + { + int new_pid = thread->request.u.exec.pid; + do_exec(thread->extern_pid, new_pid); + thread->extern_pid = new_pid; + cpu_tasks[CPU(task)].pid = new_pid; + break; + } + case OP_THREAD: + pid = start_kernel_thread(thread->request.u.thread.new_task, + thread->request.u.thread.proc, + thread->request.u.thread.arg, + thread->request.u.thread.cpu); + thread->request.u.thread.new_pid = pid; + break; + case OP_FORK: + attach_process(thread->request.u.fork.pid); + break; + case OP_CB: + (*thread->request.u.cb.proc)(thread->request.u.cb.arg); + break; + case OP_REBOOT: + case OP_HALT: + break; + default: + tracer_panic("Bad op in do_proc_op"); + break; + } + thread->request.op = OP_NONE; + return(op); +} + +unsigned long stack_sp(unsigned long page) +{ + return(page + PAGE_SIZE - sizeof(void *)); +} + +int current_pid(void) +{ + return(current->pid); +} + +void cpu_idle(void) +{ + if(CPU(current) == 0) idle_timer(); + + atomic_inc(&init_mm.mm_count); + current->mm = &init_mm; + current->active_mm = &init_mm; + + while(1){ + /* endless idle loop with no priority at all */ + SET_PRI(current); + + /* + * although we are an idle CPU, we do not want to + * get into the scheduler unnecessarily. + */ + if (current->need_resched) { + schedule(); + check_pgt_cache(); + } + idle_sleep(10); + } +} + +int page_size(void) +{ + return(PAGE_SIZE); +} + +int page_mask(void) +{ + return(PAGE_MASK); +} + +unsigned long um_virt_to_phys(void *t, unsigned long addr) +{ + struct task_struct *task; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + + task = t; + if(task->mm == NULL) return(0xffffffff); + pgd = pgd_offset(task->mm, addr); + pmd = pmd_offset(pgd, addr); + if(!pmd_present(*pmd)) return(0xffffffff); + pte = pte_offset(pmd, addr); + if(!pte_present(*pte)) return(0xffffffff); + return((pte_val(*pte) & PAGE_MASK) + (addr & ~PAGE_MASK)); +} + +char *current_cmd(void) +{ +#ifdef CONFIG_SMP + return("(Unknown)"); +#else + unsigned long addr; + + if((addr = um_virt_to_phys(current, + current->mm->arg_start)) == 0xffffffff) + return("(Unknown)"); + else return((char *) addr); +#endif +} + +void force_sigbus(void) +{ + printk(KERN_ERR "Killing pid %d because of a lack of memory\n", + current->pid); + lock_kernel(); + sigaddset(¤t->pending.signal, SIGBUS); + recalc_sigpending(current); + current->flags |= PF_SIGNALED; + do_exit(SIGBUS | 0x80); +} + +void finish_fork_handler(int sig) +{ + suspend_new_thread(current->thread.switch_pipe[0]); + + force_flush_all(); + if(current->mm != current->p_pptr->mm) + protect(uml_physmem, high_physmem - uml_physmem, 1, 1, 0, 1); + task_protections((unsigned long) current); + if(current->thread.prev_sched != NULL) + schedule_tail(current->thread.prev_sched); + current->thread.prev_sched = NULL; + + free_page(current->thread.temp_stack); + change_sig(SIGUSR1, 1); + unblock_signals(); + set_user_mode(current, 1, 1); +} + +void *process_state(void *t) +{ + struct task_struct *task = t ? t : current; + + return(&task->thread.process_regs); +} + +void dump_thread(struct pt_regs *regs, struct user *u) +{ +} + +void enable_hlt(void) +{ + panic("enable_hlt"); +} + +void disable_hlt(void) +{ + panic("disable_hlt"); +} + +extern int signal_frame_size; + +void interrupt_end(void) +{ + if(current->need_resched) schedule(); + do_signal(0); +} + +void *um_kmalloc(int size) +{ + return(kmalloc(size, GFP_KERNEL)); +} + +unsigned long get_fault_addr(void) +{ + return((unsigned long) current->thread.fault_addr); +} + +EXPORT_SYMBOL(get_fault_addr); + +int singlestepping(void *t) +{ + struct task_struct *task; + int ret; + + task = (struct task_struct *) t; + ret = (task->ptrace & PT_DTRACE); + task->ptrace &= ~PT_DTRACE; + return(ret); +} + +void not_implemented(void) +{ + printk(KERN_DEBUG "Something isn't implemented in here\n"); +} + +EXPORT_SYMBOL(not_implemented); + +int user_context(unsigned long sp) +{ + return((sp & (PAGE_MASK << 1)) != current->thread.kernel_stack); +} + +extern void remove_umid_dir(void); +__uml_exitcall(remove_umid_dir); + +extern exitcall_t __uml_exitcall_begin, __uml_exitcall_end; + +void do_uml_exitcalls(void) +{ + exitcall_t *call; + + call = &__uml_exitcall_end; + while (--call >= &__uml_exitcall_begin) + (*call)(); +} + +extern exitcall_t __exitcall_begin, __exitcall_end; + +void do_exitcalls(void) +{ + exitcall_t *call; + + call = &__exitcall_end; + while (--call >= &__exitcall_begin) + (*call)(); + do_uml_exitcalls(); +} + +void *round_up(unsigned long addr) +{ + return(ROUND_UP(addr)); +} + +void *round_down(unsigned long addr) +{ + return(ROUND_DOWN(addr)); +} + +char *uml_strdup(char *string) +{ + char *new; + + new = kmalloc(strlen(string) + 1, GFP_KERNEL); + if(new == NULL) return(NULL); + strcpy(new, string); + return(new); +} + +int jail = 0; + +int __init jail_setup(char *line, int *add) +{ + int ok = 1; + + if(jail) return(0); +#ifdef CONFIG_SMP + printf("'jail' may not used used in a kernel with CONFIG_SMP " + "enabled\n"); + ok = 0; +#endif +#ifdef CONFIG_HOSTFS + printf("'jail' may not used used in a kernel with CONFIG_HOSTFS " + "enabled\n"); + ok = 0; +#endif +#ifdef CONFIG_MODULES + printf("'jail' may not used used in a kernel with CONFIG_MODULES " + "enabled\n"); + ok = 0; +#endif + if(!ok) exit(1); + + /* CAP_SYS_RAWIO controls the ability to open /dev/mem and /dev/kmem. + * Removing it from the bounding set eliminates the ability of anything + * to acquire it, and thus read or write kernel memory. + */ + cap_lower(cap_bset, CAP_SYS_RAWIO); + jail = 1; + return(0); +} + +__uml_setup("jail", jail_setup, +"jail\n" +" Enables the protection of kernel memory from processes\n\n" +); + +static void mprotect_kernel_mem(int w, int delay_signals) +{ + unsigned long start, end, flags = 0; + int alrm = 0, vtalrm = 0; + + if(!jail || (current == &init_task)) return; + + if(delay_signals){ + local_irq_save(flags); + alrm = change_sig(SIGALRM, 0); + vtalrm = change_sig(SIGVTALRM, 0); + } + + start = (unsigned long) current + PAGE_SIZE; + end = (unsigned long) current + PAGE_SIZE * 4; + protect(uml_physmem, start - uml_physmem, 1, w, 1, 1); + protect(end, high_physmem - end, 1, w, 1, 1); + + start = (unsigned long) ROUND_DOWN(&_stext); + end = (unsigned long) ROUND_UP(&_etext); + protect(start, end - start, 1, w, 1, 1); + + start = (unsigned long) ROUND_DOWN(&_unprotected_end); + end = (unsigned long) ROUND_UP(&_edata); + protect(start, end - start, 1, w, 1, 1); + + start = (unsigned long) ROUND_DOWN(&__bss_start); + end = (unsigned long) ROUND_UP(brk_start); + protect(start, end - start, 1, w, 1, 1); + + mprotect_kernel_vm(w); + + if(delay_signals){ + local_irq_restore(flags); + change_sig(SIGALRM, alrm); + change_sig(SIGVTALRM, vtalrm); + } +} + +void unprotect_kernel_mem(int delay_signals) +{ + mprotect_kernel_mem(1, delay_signals); +} + +void protect_kernel_mem(int delay_signals) +{ + mprotect_kernel_mem(0, delay_signals); +} + +void *get_init_task(void) +{ + return(&init_task_union.task); +} + +int copy_to_user_proc(void *to, void *from, int size) +{ + return(copy_to_user(to, from, size)); +} + +int copy_from_user_proc(void *to, void *from, int size) +{ + return(copy_from_user(to, from, size)); +} + +void set_thread_sc(void *sc) +{ + current->thread.sc = sc; +} + +int get_restore_state(void *t) +{ + struct task_struct *task = t; + + return(task->thread.request.u.trace_on.restore_state); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/ptrace.c linux_umopenmosix/arch/um/kernel/ptrace.c --- linux-2.4.17/arch/um/kernel/ptrace.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/ptrace.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,521 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "linux/sched.h" +#include "linux/mm.h" +#include "linux/errno.h" +#include "linux/smp_lock.h" +#include "asm/ptrace.h" +#include "asm/uaccess.h" +#include "kern_util.h" +#include "asm/i387.h" + + +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ + +#define EFL_OFFSET ((EFL-2)*4-sizeof(struct pt_regs)) +#define TRAP_FLAG 0x100 + + +/* + * Called by kernel/ptrace.c when detaching.. + */ +void ptrace_disable(struct task_struct *child) +{ +} + + +asmlinkage void do_syscall_trace(void) +{ + if ((current->ptrace & (PT_PTRACED|PT_TRACESYS)) != + (PT_PTRACED|PT_TRACESYS)) + return; + /* the 0x80 provides a way for the tracing parent to distinguish + between a syscall stop and SIGTRAP delivery */ + current->exit_code = SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) + ? 0x80 : 0); + current->state = TASK_STOPPED; + notify_parent(current, SIGCHLD); + schedule(); + /* + * this isn't the same as continuing with a signal, but it will do + * for normal use. strace only continues with a signal if the + * stopping signal is not SIGTRAP. -brl + */ + if (current->exit_code) { + send_sig(current->exit_code, current, 1); + current->exit_code = 0; + } +} + + +int sys_ptrace(long request, long pid, long addr, long data) +{ + struct task_struct *child; + int i, ret; + + lock_kernel(); + ret = -EPERM; + if (request == PTRACE_TRACEME) { + /* are we already being traced? */ + if (current->ptrace & PT_PTRACED) + goto out; + /* set the ptrace bit in the process flags. */ + current->ptrace |= PT_PTRACED; + ret = 0; + goto out; + } + ret = -ESRCH; + read_lock(&tasklist_lock); + child = find_task_by_pid(pid); + if (child) + get_task_struct(child); + read_unlock(&tasklist_lock); + if (!child) + goto out; + + ret = -EPERM; + if (pid == 1) /* you may not mess with init */ + goto out_tsk; + + if (request == PTRACE_ATTACH) { + ret = ptrace_attach(child); + goto out_tsk; + } + + ret = ptrace_check_attach(child, request == PTRACE_KILL); + if (ret < 0) + goto out_tsk; + + switch (request) { + /* when I and D space are separate, these will need to be fixed. */ + case PTRACE_PEEKTEXT: /* read word at location addr. */ + case PTRACE_PEEKDATA: { + unsigned long tmp; + int copied; + + ret = -EIO; + copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); + if (copied != sizeof(tmp)) + break; + ret = put_user(tmp,(unsigned long *) data); + break; + } + + /* read the word at location addr in the USER area. */ + case PTRACE_PEEKUSR: { + unsigned long tmp; + + ret = -EIO; + if ((addr & 3) || addr < 0) + break; + + tmp = 0; /* Default return condition */ + if(addr < UM_MAX_REG_OFFSET){ + tmp = getreg(child, addr); + ret = put_user(tmp,(unsigned long *) data); + } + break; + } + + /* when I and D space are separate, this will have to be fixed. */ + case PTRACE_POKETEXT: /* write the word at location addr. */ + case PTRACE_POKEDATA: + ret = -EIO; + if (access_process_vm(child, addr, &data, sizeof(data), + 1) != sizeof(data)) + break; + ret = 0; + break; + + case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ + ret = -EIO; + if ((addr & 3) || addr < 0) + break; + + if (addr < UM_MAX_REG_OFFSET) { + ret = putreg(child, addr, data); + break; + } + + break; + + case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ + case PTRACE_CONT: { /* restart after signal. */ + ret = -EIO; + if ((unsigned long) data > _NSIG) + break; + if (request == PTRACE_SYSCALL) + child->ptrace |= PT_TRACESYS; + else + child->ptrace &= ~PT_TRACESYS; + child->exit_code = data; + wake_up_process(child); + ret = 0; + break; + } + +/* + * make the child exit. Best I can do is send it a sigkill. + * perhaps it should be put in the status that it wants to + * exit. + */ + case PTRACE_KILL: { + ret = 0; + if (child->state == TASK_ZOMBIE) /* already dead */ + break; + child->exit_code = SIGKILL; + wake_up_process(child); + break; + } + + case PTRACE_SINGLESTEP: { /* set the trap flag. */ + ret = -EIO; + if ((unsigned long) data > _NSIG) + break; + child->ptrace &= ~PT_TRACESYS; + child->ptrace |= PT_DTRACE; + child->exit_code = data; + /* give it a chance to run. */ + wake_up_process(child); + ret = 0; + break; + } + + case PTRACE_DETACH: + /* detach a process that was attached. */ + ret = ptrace_detach(child, data); + break; + +#ifdef PTRACE_GETREGS + case PTRACE_GETREGS: { /* Get all gp regs from the child. */ + if (!access_ok(VERIFY_WRITE, (unsigned *)data, + UM_MAX_REG_OFFSET)) { + ret = -EIO; + break; + } + for ( i = 0; i < UM_MAX_REG_OFFSET; i += sizeof(long) ) { + __put_user(getreg(child, i),(unsigned long *) data); + data += sizeof(long); + } + ret = 0; + break; + } +#endif +#ifdef PTRACE_SETREGS + case PTRACE_SETREGS: { /* Set all gp regs in the child. */ + unsigned long tmp = 0; + if (!access_ok(VERIFY_READ, (unsigned *)data, + UM_MAX_REG_OFFSET)) { + ret = -EIO; + break; + } + for ( i = 0; i < UM_MAX_REG_OFFSET; i += sizeof(long) ) { + __get_user(tmp, (unsigned long *) data); + putreg(child, i, tmp); + data += sizeof(long); + } + ret = 0; + break; + } +#endif +#ifdef PTRACE_GETFPREGS + case PTRACE_GETFPREGS: { /* Get the child FPU state. */ + ret = -EIO; + break; + } +#endif +#ifdef PTRACE_SETFPREGS + case PTRACE_SETFPREGS: { /* Set the child FPU state. */ + ret = -EIO; + break; + } +#endif + default: + ret = -EIO; + break; + } + out_tsk: + free_task_struct(child); + out: + unlock_kernel(); + return ret; +} + +void syscall_trace(void) +{ + if ((current->ptrace & (PT_PTRACED|PT_TRACESYS)) + != (PT_PTRACED|PT_TRACESYS)) + return; + current->exit_code = SIGTRAP; + current->state = TASK_STOPPED; + notify_parent(current, SIGCHLD); + schedule(); + /* + * this isn't the same as continuing with a signal, but it will do + * for normal use. strace only continues with a signal if the + * stopping signal is not SIGTRAP. -brl + */ + if (current->exit_code) { + send_sig(current->exit_code, current, 1); + current->exit_code = 0; + } +} + + +#ifdef CONFIG_MOSIX +inline int get_stack_long(struct task_struct *task, int offset) +#else +static inline int get_stack_long(struct task_struct *task, int offset) +#endif /* CONFIG_MOSIX */ +{ + unsigned char *stack; + +#ifdef CONFIG_MOSIX + if(task != current) + lock_mosix(); + if(task->mosix.dflags & DDEPUTY) + { + if(task != current) + unlock_mosix(); + return(request_process(task, NULL, PR_PTRACE_GET_STACK_LONG, + offset)); + } + if(task == current) + mosix_obtain_registers(ALL_REGISTERS); + else + task_lock(task); + if(!task->thread.saved_esp0) /* unless in VM86 mode */ + stack = (char *)(mos_to_regs(&task->mosix) + 1); + else +#endif /* CONFIG_MOSIX */ + stack = (unsigned char *)task->thread.esp0; + stack += offset; +#ifdef CONFIG_MOSIX + { + int res = *((int *)stack); + if(task != current) + { + task_unlock(task); + unlock_mosix(); + } + return(res); + } +#else + return (*((int *)stack)); +#endif /* CONFIG_MOSIX */ +} + + + + + + +#ifdef CONFIG_MOSIX +inline int put_stack_long(struct task_struct *task, int offset, +#else +static inline int put_stack_long(struct task_struct *task, int offset, +#endif /* CONFIG_MOSIX */ + unsigned long data) +{ + unsigned char * stack; + +#ifdef CONFIG_MOSIX + if(task != current) + lock_mosix(); + if(task->mosix.dflags & DDEPUTY) + { + if(task != current) + unlock_mosix(); + return(request_process_arg2(task, NULL, + PR_PTRACE_PUT_STACK_LONG, offset, data)); + } + if(task == current) + mosix_obtain_registers(ALL_REGISTERS); + else + task_lock(task); + if(!task->thread.saved_esp0) /* unless in VM86 mode */ + stack = (char *)(mos_to_regs(&task->mosix) + 1); + else +#endif /* CONFIG_MOSIX */ + stack = (unsigned char *) task->thread.esp0; + stack += offset; + *(unsigned long *) stack = data; +#ifdef CONFIG_MOSIX + if(task != current) + { + task_unlock(task); + unlock_mosix(); + } +#endif /* CONFIG_MOSIX */ + return 0; +} + + + + + +#ifdef CONFIG_MOSIX +void +ptrace_putregs(unsigned long *data) +{ + register int i; + + for (i = 0; i < FRAME_SIZE ; i++) + putreg(current, i << 2, data[i]); +} + + + +unsigned long +ptrace_peekuser(long addr) +{ + struct user * dummy = NULL; + unsigned long tmp; + + tmp = 0; /* Default return condition */ + if(addr < FRAME_SIZE*sizeof(long)) + tmp = getreg(current, addr); + if(addr >= (long) &dummy->u_debugreg[0] && + addr <= (long) &dummy->u_debugreg[7]){ + addr -= (long) &dummy->u_debugreg[0]; + addr = addr >> 2; + tmp = current->thread.debugreg[addr]; + }; + return(tmp); +} + +void +ptrace_pokeuser(long addr, long data) +{ + current->thread.debugreg[addr] = data; +#define loaddebug(tsk,register) \ + __asm__("movl %0,%%db" #register \ + : /* no output */ \ + :"r" (tsk->thread.debugreg[register])) + /* unlike the original ptrace code, we are doing this for ourselves, + * and there may be no "switch_to" before we go to user-mode again, + * so here we go: + */ + if(current->thread.debugreg[7]) + { + loaddebug(current, 0); + loaddebug(current, 1); + loaddebug(current, 2); + loaddebug(current, 3); + loaddebug(current, 6); + loaddebug(current, 7); + } +} + +void +ptrace_cont(int request) +{ + unsigned long tmp; + struct task_struct *tsk = current; + + tmp = get_stack_long(tsk, EFL_OFFSET) & ~TRAP_FLAG; + put_stack_long(tsk, EFL_OFFSET, tmp); + if (request == PTRACE_SYSCALL) + tsk->ptrace |= PT_TRACESYS; + else + tsk->ptrace &= ~PT_TRACESYS; + if (request == PTRACE_DETACH) + tsk->ptrace &= ~PT_PTRACED; +} + + + +void +ptrace_single_step(void) +{ + unsigned long tmp; + + tmp = get_stack_long(current, EFL_OFFSET) | TRAP_FLAG; + put_stack_long(current, EFL_OFFSET, tmp); + current->ptrace |= PT_PTRACED | PT_DTRACE; +#ifdef CONFIG_MOSIX_DFSA + tell_process(current, DREQ_NOTUPTODATE); +#endif /* CONFIG_MOSIX_DFSA */ +} + + + + + +void +ptrace_getfpregs(struct user_i387_struct *to) +{ + struct task_struct *p = current; + + unlazy_fpu(p); + if (!p->used_math) + { + /* Simulate an empty FPU. */ + set_fpu_cwd(p, 0x037f); + set_fpu_swd(p, 0x0000); + set_fpu_twd(p, 0xffff); + } + get_fpregs(to, p); +} + +void +ptrace_getfpxregs(struct user_fxsr_struct *to) +{ + struct task_struct *p = current; + + unlazy_fpu(p); + if (!p->used_math) + { + /* Simulate an empty FPU. */ + set_fpu_cwd(p, 0x037f); + set_fpu_swd(p, 0x0000); + set_fpu_twd(p, 0xffff); + set_fpu_mxcsr(p, 0x1f80); + } + get_fpxregs(to, p); +} + +void +ptrace_setfpregs(struct user_i387_struct *from) +{ + struct task_struct *p = current; + + clear_fpu(p); + p->used_math = 1; + set_fpregs(p, from); +} + +void +ptrace_setfpxregs(struct user_fxsr_struct *from) +{ + struct task_struct *p = current; + + clear_fpu(p); + p->used_math = 1; + set_fpxregs(p, from); +} +#endif /* CONFIG_MOSIX */ + + + + + + + + + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/reboot.c linux_umopenmosix/arch/um/kernel/reboot.c --- linux-2.4.17/arch/um/kernel/reboot.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/reboot.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "linux/sched.h" +#include "user_util.h" +#include "kern_util.h" +#include "kern.h" + +static void kill_off_processes(void) +{ + struct task_struct *p; + int me; + + me = getpid(); + for_each_task(p){ + if(p->thread.extern_pid != me) kill_pid(p->thread.extern_pid); + } + if(init_task.thread.extern_pid != me) + kill_pid(init_task.thread.extern_pid); +} + +void uml_cleanup(void) +{ + kill_off_processes(); + do_uml_exitcalls(); +} + +void machine_restart(char * __unused) +{ + do_exitcalls(); + kill_off_processes(); + tracing_reboot(); + kill_pid(getpid()); +} + +void machine_power_off(void) +{ + do_exitcalls(); + kill_off_processes(); + tracing_halt(); + kill_pid(getpid()); +} + +void machine_halt(void) +{ + machine_power_off(); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/resource.c linux_umopenmosix/arch/um/kernel/resource.c --- linux-2.4.17/arch/um/kernel/resource.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/resource.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "linux/pci.h" + +unsigned long resource_fixup(struct pci_dev * dev, struct resource * res, + unsigned long start, unsigned long size) +{ + return start; +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/setup.c linux_umopenmosix/arch/um/kernel/setup.c --- linux-2.4.17/arch/um/kernel/setup.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/setup.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "asm/processor.h" + +struct cpuinfo_um boot_cpu_data = { 0, 0, 0, 0 }; + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/signal_kern.c linux_umopenmosix/arch/um/kernel/signal_kern.c --- linux-2.4.17/arch/um/kernel/signal_kern.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/signal_kern.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,686 @@ +/* + * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "linux/config.h" +#include "linux/stddef.h" +#include "linux/sys.h" +#include "linux/sched.h" +#include "linux/wait.h" +#include "linux/kernel.h" +#include "linux/smp_lock.h" +#include "linux/module.h" +#include "linux/slab.h" +#include "asm/signal.h" +#include "asm/uaccess.h" +#include "user_util.h" +#include "kern_util.h" +#include "signal_kern.h" +#include "signal_user.h" +#include "kern.h" +#include "frame_kern.h" +#include "frame.h" +#include "sigcontext.h" +#include "sysdep/sigcontext.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ + + +#define TF_MASK 0x00000100 + + +EXPORT_SYMBOL(block_signals); +EXPORT_SYMBOL(unblock_signals); + + +struct ucontext { + unsigned long uc_flags; + struct ucontext *uc_link; + stack_t uc_stack; + struct sigcontext uc_mcontext; + sigset_t uc_sigmask; /* mask last for extensibility */ +}; + + +struct rt_sigframe +{ + char *pretcode; + int sig; + struct siginfo *pinfo; + void *puc; + struct siginfo info; + struct ucontext uc; + struct _fpstate fpstate; + char retcode[8]; +}; + + + +struct sigframe +{ + char *pretcode; + int sig; + struct sigcontext sc; + struct _fpstate fpstate; + unsigned long extramask[_NSIG_WORDS-1]; + char retcode[8]; +}; + + + + + + + + + + + + + +static int +setup_sigcontext(struct sigcontext *sc, struct _fpstate *fpstate, + struct pt_regs *regs, unsigned long mask) +{ + int tmp, err = 0; + +#ifdef CONFIG_MOSIX_NO_NEED__ALL_CALLERS_ALREADY_DID + mosix_obtain_registers(ALL_REGISTERS); +#endif /* CONFIG_MOSIX_NO_NEED__ALL_CALLERS_ALREADY_DID */ + tmp = 0; + __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp)); + err |= __put_user(tmp, (unsigned int *)&sc->gs); + __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp)); + err |= __put_user(tmp, (unsigned int *)&sc->fs); + + err |= __put_user(regs->xes, (unsigned int *)&sc->es); + err |= __put_user(regs->xds, (unsigned int *)&sc->ds); + err |= __put_user(regs->edi, &sc->edi); + err |= __put_user(regs->esi, &sc->esi); + err |= __put_user(regs->ebp, &sc->ebp); + err |= __put_user(regs->esp, &sc->esp); + err |= __put_user(regs->ebx, &sc->ebx); + err |= __put_user(regs->edx, &sc->edx); + err |= __put_user(regs->ecx, &sc->ecx); + err |= __put_user(regs->eax, &sc->eax); + err |= __put_user(current->thread.trap_no, &sc->trapno); + err |= __put_user(current->thread.error_code, &sc->err); + err |= __put_user(regs->eip, &sc->eip); + err |= __put_user(regs->xcs, (unsigned int *)&sc->cs); + err |= __put_user(regs->eflags, &sc->eflags); + err |= __put_user(regs->esp, &sc->esp_at_signal); + err |= __put_user(regs->xss, (unsigned int *)&sc->ss); + + tmp = save_i387(fpstate); + if (tmp < 0) + err = 1; + else + err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate); + + /* non-iBCS2 extensions.. */ + err |= __put_user(mask, &sc->oldmask); + err |= __put_user(current->thread.cr2, &sc->cr2); + + return err; +} + + + +int probe_stack(unsigned long sp, int delta) +{ + int n; + + if((get_user(n, (int *) sp) != 0) || + (put_user(n, (int *) sp) != 0) || + (get_user(n, (int *) (sp - delta)) != 0) || + (put_user(n, (int *) (sp - delta)) != 0)) + return(-EFAULT); + return(0); +} + +static void force_segv(int sig) +{ + if(sig == SIGSEGV){ + struct k_sigaction *ka; + + ka = ¤t->sig->action[SIGSEGV - 1]; + ka->sa.sa_handler = SIG_DFL; + } + force_sig(SIGSEGV, current); +} + +#define _S(nr) (1<<((nr)-1)) + +#define _BLOCKABLE (~(_S(SIGKILL) | _S(SIGSTOP))) + +/* + * OK, we're invoking a handler + */ +static int handle_signal(unsigned long signr, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, int error) +{ + struct signal_context *context; + __sighandler_t handler; + void (*restorer)(void); + unsigned long sp; + sigset_t save; + int err, ret; + + ret = 0; + switch(error){ + case -ERESTARTNOHAND: + ret = -EINTR; + break; + + case -ERESTARTSYS: + if (!(ka->sa.sa_flags & SA_RESTART)) { + ret = -EINTR; + break; + } + /* fallthrough */ + case -ERESTARTNOINTR: + UM_RESTART_SYSCALL(¤t->thread.process_regs); + UM_ORIG_SYSCALL(¤t->thread.process_regs) = + UM_SYSCALL_NR(¤t->thread.process_regs); + + /* This is because of the UM_SET_SYSCALL_RETURN and the fact + * that on i386 the system call number and return value are + * in the same register. When the system call restarts, %eax + * had better have the system call number in it. Since the + * return value doesn't matter (except that it shouldn't be + * -ERESTART*), we'll stick the system call number there. + */ + ret = UM_SYSCALL_NR(¤t->thread.process_regs); + break; + } + + handler = ka->sa.sa_handler; + save = *oldset; + + if (ka->sa.sa_flags & SA_ONESHOT) + ka->sa.sa_handler = SIG_DFL; + + if (!(ka->sa.sa_flags & SA_NODEFER)) { + spin_lock_irq(¤t->sigmask_lock); + sigorsets(¤t->blocked, ¤t->blocked, + &ka->sa.sa_mask); + sigaddset(¤t->blocked, signr); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + } + + sp = UM_SP(¤t->thread.process_regs); + + if((ka->sa.sa_flags & SA_ONSTACK) && (sas_ss_flags(sp) == 0)) + sp = current->sas_ss_sp + current->sas_ss_size; + + sp -= 4 * sizeof(void *) + sizeof(*context); + context = (struct signal_context *) sp; + + if(error != 0) + UM_SET_SYSCALL_RETURN(¤t->thread.process_regs, ret); + + if(copy_to_user(&context->regs, ¤t->thread.process_regs, + sizeof(current->thread.process_regs)) || + copy_to_user(&context->sigs, &save, sizeof(save)) || + copy_to_user(&context->prev, ¤t->thread.signal_context, + sizeof(current->thread.signal_context))){ + force_segv(signr); + return(1); + } + + if(ka->sa.sa_flags & SA_SIGINFO){ + sp -= SC_STACK_SIZE; + if(copy_sc_to_user((void *) sp, current->thread.sc, + &signal_frame_sc.arch)) + goto segv; + context->sc = (struct sigcontext *) sp; + } + + current->thread.signal_context = context; + sp -= 4 * sizeof(void *); + + if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; + else restorer = NULL; + + if(ka->sa.sa_flags & SA_SIGINFO) + err = setup_signal_stack_si(sp, signr, (unsigned long) handler, + restorer, + ¤t->thread.process_regs, + info); + else + err = setup_signal_stack_sc(sp, signr, (unsigned long) handler, + restorer, + ¤t->thread.process_regs, + current->thread.sc, &context->sc); + if(err) goto segv; + + return(0); + segv: + force_segv(signr); + return(1); +} + +/* + * Note that 'init' is a special process: it doesn't get signals it doesn't + * want to handle. Thus you cannot kill init even with a SIGKILL even by + * mistake. + */ + +static int kern_do_signal(sigset_t *oldset, int error) +{ + siginfo_t info; + struct k_sigaction *ka; + int err; + + if (!oldset) + oldset = ¤t->blocked; + + for (;;) { + unsigned long signr; + + spin_lock_irq(¤t->sigmask_lock); + signr = dequeue_signal(¤t->blocked, &info); + spin_unlock_irq(¤t->sigmask_lock); + + if (!signr) + break; + + if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) { + /* Let the debugger run. */ + current->exit_code = signr; + current->state = TASK_STOPPED; + notify_parent(current, SIGCHLD); + schedule(); + + /* We're back. Did the debugger cancel the sig? */ + if (!(signr = current->exit_code)) + continue; + current->exit_code = 0; + + /* The debugger continued. Ignore SIGSTOP. */ + if (signr == SIGSTOP) + continue; + + /* Update the siginfo structure. Is this good? */ + if (signr != info.si_signo) { + info.si_signo = signr; + info.si_errno = 0; + info.si_code = SI_USER; + info.si_pid = current->p_pptr->pid; + info.si_uid = current->p_pptr->uid; + } + + /* If the (new) signal is now blocked, requeue it. */ + if (sigismember(¤t->blocked, signr)) { + send_sig_info(signr, &info, current); + continue; + } + } + + ka = ¤t->sig->action[signr-1]; + if (ka->sa.sa_handler == SIG_IGN) { + if (signr != SIGCHLD) + continue; + /* Check for SIGCHLD: it's special. */ + while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0) + /* nothing */; + continue; + } + + if (ka->sa.sa_handler == SIG_DFL) { + int exit_code = signr; + + /* Init gets no signals it doesn't want. */ + if (current->pid == 1) + continue; + + switch (signr) { + case SIGCONT: case SIGCHLD: case SIGWINCH: + continue; + + case SIGTSTP: case SIGTTIN: case SIGTTOU: + if (is_orphaned_pgrp(current->pgrp)) + continue; + /* FALLTHRU */ + + case SIGSTOP: { + struct signal_struct *sig; + current->state = TASK_STOPPED; + current->exit_code = signr; + sig = current->p_pptr->sig; + if (sig && !(sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP)) + notify_parent(current, SIGCHLD); + schedule(); + continue; + } + case SIGQUIT: case SIGILL: case SIGTRAP: + case SIGABRT: case SIGFPE: case SIGSEGV: + case SIGBUS: case SIGSYS: case SIGXCPU: case SIGXFSZ: + if (do_coredump(signr, + (struct pt_regs *) + ¤t->thread.process_regs)) + exit_code |= 0x80; + /* FALLTHRU */ + + default: + sigaddset(¤t->pending.signal, signr); + recalc_sigpending(current); + current->flags |= PF_SIGNALED; + do_exit(exit_code); + /* NOTREACHED */ + } + } + + /* Whee! Actually deliver the signal. */ + err = handle_signal(signr, ka, &info, oldset, error); + if(!err) return(1); + } + return(0); +} + +int do_signal(int error) +{ + return(kern_do_signal(NULL, error)); +} + +/* + * Atomically swap in the new signal mask, and wait for a signal. + */ +int sys_sigsuspend(int history0, int history1, old_sigset_t mask) +{ + sigset_t saveset; + + mask &= _BLOCKABLE; + spin_lock_irq(¤t->sigmask_lock); + saveset = current->blocked; + siginitset(¤t->blocked, mask); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + while (1) { + current->state = TASK_INTERRUPTIBLE; + schedule(); + if(kern_do_signal(&saveset, -EINTR)) + return(-EINTR); + } +} + +int sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize) +{ + sigset_t saveset, newset; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; + + if (copy_from_user(&newset, unewset, sizeof(newset))) + return -EFAULT; + sigdelsetmask(&newset, ~_BLOCKABLE); + + spin_lock_irq(¤t->sigmask_lock); + saveset = current->blocked; + current->blocked = newset; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + while (1) { + current->state = TASK_INTERRUPTIBLE; + schedule(); + if (kern_do_signal(&saveset, -EINTR)) + return(-EINTR); + } +} + +int sys_sigreturn(struct sys_pt_regs regs) +{ + struct signal_context *context = current->thread.signal_context; + + sigdelsetmask(&context->sigs, ~_BLOCKABLE); + spin_lock_irq(¤t->sigmask_lock); + current->blocked = context->sigs; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + current->thread.process_regs = context->regs; + current->thread.signal_context = context->prev; + if(context->sc != NULL) + copy_sc_from_user(current->thread.sc, context->sc); + return(UM_SYSCALL_RET(¤t->thread.process_regs)); +} + + + + +// ################################################################ + +/* + * Determine which stack to use.. + */ +static inline void * +get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) +{ + unsigned long esp; + + /* Default to using normal stack */ + esp = regs->esp; + + /* This is the X/Open sanctioned signal stack switching. */ + if (ka->sa.sa_flags & SA_ONSTACK) { + if (sas_ss_flags(esp) == 0) + esp = current->sas_ss_sp + current->sas_ss_size; + } + + /* This is the legacy signal stack switching. */ + else if ((regs->xss & 0xffff) != __USER_DS && + !(ka->sa.sa_flags & SA_RESTORER) && + ka->sa.sa_restorer) { + esp = (unsigned long) ka->sa.sa_restorer; + } + + return (void *)((esp - frame_size) & -8ul); +} + + + + + + + + +#ifdef CONFIG_MOSIX +void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, +#else +static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, +#endif /* CONFIG_MOSIX */ + sigset_t *set, struct pt_regs * regs) +{ + struct rt_sigframe *frame; + int err = 0; + +#ifdef CONFIG_MOSIX + mosix_obtain_registers(ALL_REGISTERS); +#endif /* CONFIG_MOSIX */ + frame = get_sigframe(ka, regs, sizeof(*frame)); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto give_sigsegv; + + err |= __put_user((current->exec_domain + && current->exec_domain->signal_invmap + && sig < 32 + ? current->exec_domain->signal_invmap[sig] + : sig), + &frame->sig); + err |= __put_user(&frame->info, &frame->pinfo); + err |= __put_user(&frame->uc, &frame->puc); + err |= copy_siginfo_to_user(&frame->info, info); + if (err) + goto give_sigsegv; + + /* Create the ucontext. */ + err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(0, &frame->uc.uc_link); + err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + err |= __put_user(sas_ss_flags(regs->esp), + &frame->uc.uc_stack.ss_flags); + err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) + goto give_sigsegv; + + /* Set up to return from userspace. If provided, use a stub + already in userspace. */ + if (ka->sa.sa_flags & SA_RESTORER) { + err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); + } else { + err |= __put_user(frame->retcode, &frame->pretcode); + /* This is movl $,%eax ; int $0x80 */ + err |= __put_user(0xb8, (char *)(frame->retcode+0)); + err |= __put_user(__NR_rt_sigreturn, (int *)(frame->retcode+1)); + err |= __put_user(0x80cd, (short *)(frame->retcode+5)); + } + + if (err) + goto give_sigsegv; + + /* Set up registers for signal handler */ + regs->esp = (unsigned long) frame; + regs->eip = (unsigned long) ka->sa.sa_handler; + + set_fs(USER_DS); + regs->xds = __USER_DS; + regs->xes = __USER_DS; + regs->xss = __USER_DS; + regs->xcs = __USER_CS; + regs->eflags &= ~TF_MASK; + +#if DEBUG_SIG + printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", + current->comm, current->pid, frame, regs->eip, frame->pretcode); +#endif + + return; + +give_sigsegv: + if (sig == SIGSEGV) + ka->sa.sa_handler = SIG_DFL; + force_sig(SIGSEGV, current); +} + + + + + + + + +#ifdef CONFIG_MOSIX +void setup_frame(int sig, struct k_sigaction *ka, +#else +static void setup_frame(int sig, struct k_sigaction *ka, +#endif /* CONFIG_MOSIX */ + sigset_t *set, struct pt_regs * regs) +{ + struct sigframe *frame; + int err = 0; + +#ifdef CONFIG_MOSIX + mosix_obtain_registers(ALL_REGISTERS); +#endif /* CONFIG_MOSIX */ + frame = get_sigframe(ka, regs, sizeof(*frame)); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto give_sigsegv; + + err |= __put_user((current->exec_domain + && current->exec_domain->signal_invmap + && sig < 32 + ? current->exec_domain->signal_invmap[sig] + : sig), + &frame->sig); + if (err) + goto give_sigsegv; + + err |= setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]); + if (err) + goto give_sigsegv; + + if (_NSIG_WORDS > 1) { + err |= __copy_to_user(frame->extramask, &set->sig[1], + sizeof(frame->extramask)); + } + if (err) + goto give_sigsegv; + + /* Set up to return from userspace. If provided, use a stub + already in userspace. */ + if (ka->sa.sa_flags & SA_RESTORER) { + err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); + } else { + err |= __put_user(frame->retcode, &frame->pretcode); + /* This is popl %eax ; movl $,%eax ; int $0x80 */ + err |= __put_user(0xb858, (short *)(frame->retcode+0)); + err |= __put_user(__NR_sigreturn, (int *)(frame->retcode+2)); + err |= __put_user(0x80cd, (short *)(frame->retcode+6)); + } + + if (err) + goto give_sigsegv; + + /* Set up registers for signal handler */ + regs->esp = (unsigned long) frame; + regs->eip = (unsigned long) ka->sa.sa_handler; + + set_fs(USER_DS); + regs->xds = __USER_DS; + regs->xes = __USER_DS; + regs->xss = __USER_DS; + regs->xcs = __USER_CS; + regs->eflags &= ~TF_MASK; + +#if DEBUG_SIG + printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", + current->comm, current->pid, frame, regs->eip, frame->pretcode); +#endif + + return; + +give_sigsegv: + if (sig == SIGSEGV) + ka->sa.sa_handler = SIG_DFL; + force_sig(SIGSEGV, current); +} + + + + + + + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/signal_user.c linux_umopenmosix/arch/um/kernel/signal_user.c --- linux-2.4.17/arch/um/kernel/signal_user.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/signal_user.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,139 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "user_util.h" +#include "kern_util.h" +#include "user.h" +#include "signal_user.h" +#include "signal_kern.h" +#include "sysdep/sigcontext.h" +#include "sigcontext.h" + +extern int timer_on; + +void set_sigstack(void *sig_stack, int size) +{ + stack_t stack; + + stack.ss_sp = (__ptr_t) sig_stack; + stack.ss_flags = 0; + stack.ss_size = size - sizeof(void *); + if(sigaltstack(&stack, NULL) != 0) + panic("sigaltstack failed"); +} + +void set_handler(int sig, void (*handler)(int), int flags, ...) +{ + struct sigaction action; + va_list ap; + int mask; + + va_start(ap, flags); + action.sa_handler = handler; + sigemptyset(&action.sa_mask); + while((mask = va_arg(ap, int)) != -1){ + sigaddset(&action.sa_mask, mask); + } + action.sa_flags = flags; + action.sa_restorer = NULL; + if(sigaction(sig, &action, NULL) < 0) + panic("sigaction failed"); +} + +int change_sig(int signal, int on) +{ + sigset_t sigset, old; + + sigemptyset(&sigset); + sigaddset(&sigset, signal); + sigprocmask(on ? SIG_UNBLOCK : SIG_BLOCK, &sigset, &old); + return(sigismember(&old, signal)); +} + +static void change_signals(int type) +{ + sigset_t mask; + + sigemptyset(&mask); + if(type == SIG_BLOCK) timer_on = 0; + else { + timer_on = 1; + sigaddset(&mask, SIGVTALRM); + sigaddset(&mask, SIGALRM); + } + sigaddset(&mask, SIGIO); + sigaddset(&mask, SIGWINCH); + sigaddset(&mask, SIGPROF); + if(sigprocmask(type, &mask, NULL) < 0) + panic("Failed to change signal mask - errno = %d", errno); +} + +void block_signals(void) +{ + change_signals(SIG_BLOCK); +} + +void unblock_signals(void) +{ + change_signals(SIG_UNBLOCK); +} + +#define SIGIO_BIT 0 +#define SIGVTALRM_BIT 1 + +static int enable_mask(sigset_t *mask) +{ + int sigs; + + sigs = sigismember(mask, SIGIO) ? 0 : 1 << SIGIO_BIT; + sigs |= sigismember(mask, SIGVTALRM) ? 0 : 1 << SIGVTALRM_BIT; + sigs |= sigismember(mask, SIGALRM) ? 0 : 1 << SIGVTALRM_BIT; + if(timer_on) sigs |= 1 << SIGVTALRM_BIT; + return(sigs); +} + +int set_signals(int enable) +{ + sigset_t mask; + int ret; + + sigemptyset(&mask); + if(enable & (1 << SIGIO_BIT)) sigaddset(&mask, SIGIO); + if(enable & (1 << SIGVTALRM_BIT)){ + timer_on = 1; + sigaddset(&mask, SIGVTALRM); + sigaddset(&mask, SIGALRM); + } + if(sigprocmask(SIG_UNBLOCK, &mask, &mask) < 0) + panic("Failed to enable signals"); + ret = enable_mask(&mask); + sigemptyset(&mask); + if((enable & (1 << SIGIO_BIT)) == 0) sigaddset(&mask, SIGIO); + if((enable & (1 << SIGVTALRM_BIT)) == 0){ + timer_on = 0; + } + if(sigprocmask(SIG_BLOCK, &mask, NULL) < 0) + panic("Failed to block signals"); + return(ret); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/smp.c linux_umopenmosix/arch/um/kernel/smp.c --- linux-2.4.17/arch/um/kernel/smp.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/smp.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,299 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "linux/config.h" + + +#ifdef CONFIG_SMP + +#include "linux/sched.h" +#include "linux/threads.h" +#include "linux/interrupt.h" +#include "asm/smp.h" +#include "asm/processor.h" +#include "asm/spinlock.h" +#include "asm/softirq.h" +#include "asm/hardirq.h" +#include "user_util.h" +#include "kern_util.h" +#include "kern.h" + +/* Total count of live CPUs */ +int smp_num_cpus = 1; + +/* The 'big kernel lock' */ +spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED; + +/* Per CPU bogomips and other parameters */ +struct cpuinfo_um cpu_data[NR_CPUS]; + +/* CPU online map */ +unsigned long cpu_online_map; + +spinlock_t um_bh_lock = SPIN_LOCK_UNLOCKED; + +atomic_t global_bh_count; + +unsigned char global_irq_holder = NO_PROC_ID; +unsigned volatile long global_irq_lock; + +/* Set when the idlers are all forked */ +int smp_threads_ready = 0; +int num_reschedules_sent = 0; + +void smp_send_reschedule(int cpu) +{ + write(cpu_data[cpu].ipi_pipe[1], "R", 1); + num_reschedules_sent++; +} + +static void show(char * str) +{ + int cpu = smp_processor_id(); + + printk(KERN_INFO "\n%s, CPU %d:\n", str, cpu); +} + +#define MAXCOUNT 100000000 + +static inline void wait_on_bh(void) +{ + int count = MAXCOUNT; + do { + if (!--count) { + show("wait_on_bh"); + count = ~0; + } + /* nothing .. wait for the other bh's to go away */ + } while (atomic_read(&global_bh_count) != 0); +} + +/* + * This is called when we want to synchronize with + * bottom half handlers. We need to wait until + * no other CPU is executing any bottom half handler. + * + * Don't wait if we're already running in an interrupt + * context or are inside a bh handler. + */ +void synchronize_bh(void) +{ + if (atomic_read(&global_bh_count) && !in_interrupt()) + wait_on_bh(); +} + +void smp_send_stop(void) +{ + printk(KERN_INFO "Stopping all CPUs\n"); +} + + +static atomic_t smp_commenced = ATOMIC_INIT(0); +static volatile unsigned long smp_callin_map = 0; + +void smp_commence(void) +{ + printk("All CPUs are go!\n"); + + wmb(); + atomic_set(&smp_commenced, 1); +} + +static int idle_proc(void *unused) +{ + int cpu; + + set_current(current); + del_from_runqueue(current); + unhash_process(current); + + cpu = current->processor; + if (socketpair(AF_UNIX, SOCK_STREAM, 0, cpu_data[cpu].ipi_pipe) < 0) + panic("CPU#%d failed to create IPI pipe", cpu); + + activate_ipi(cpu_data[cpu].ipi_pipe[0], current->thread.extern_pid); + + wmb(); + if (test_and_set_bit(current->processor, &smp_callin_map)) { + printk("huh, CPU#%d already present??\n", current->processor); + BUG(); + } + + while (!atomic_read(&smp_commenced)) + cpu_relax(); + + init_idle(); + cpu_idle(); + return(0); +} + +void smp_boot_cpus(void) +{ + set_bit(0, &cpu_online_map); + set_bit(0, &smp_callin_map); + + if (socketpair(AF_UNIX, SOCK_STREAM, 0, cpu_data[0].ipi_pipe) < 0) + panic("CPU#0 failed to create IPI pipe"); + activate_ipi(cpu_data[0].ipi_pipe[0], current->thread.extern_pid); + + if(ncpus < 1){ + printk(KERN_INFO "ncpus set to 1\n"); + ncpus = 1; + } + else if(ncpus > NR_CPUS){ + printk(KERN_INFO + "ncpus can't be greater than NR_CPUS, set to %d\n", + NR_CPUS); + ncpus = NR_CPUS; + } + + if(ncpus > 1){ + int i, pid; + + printk(KERN_INFO "Starting up other processors:\n"); + for(i=1;ineed_resched = 1; + break; + + default: + printk("CPU#%d received unknown IPI [%c]!\n", cpu, c); + break; + } + } +} + +int inited_cpus = 1; + +int hard_smp_processor_id(void) +{ + return(pid_to_processor_id(getpid())); +} + +static spinlock_t call_lock = SPIN_LOCK_UNLOCKED; +static atomic_t scf_started; +static atomic_t scf_finished; +static void (*func)(void *info); +static void *info; + +void smp_call_function_slave(int cpu) +{ + atomic_inc(&scf_started); + (*func)(info); + atomic_inc(&scf_finished); +} + +int smp_call_function(void (*_func)(void *info), void *_info, int nonatomic, + int wait) +{ + int cpus = smp_num_cpus - 1; + int i; + + if (!cpus) + return 0; + + spin_lock_bh(&call_lock); + atomic_set(&scf_started, 0); + atomic_set(&scf_finished, 0); + func = _func; + info = _info; + + for (i=0;iprocessor && test_bit(i, &cpu_online_map)) + write(cpu_data[i].ipi_pipe[1], "C", 1); + + while (atomic_read(&scf_started) != cpus) + barrier(); + + if (wait) + while (atomic_read(&scf_finished) != cpus) + barrier(); + + spin_unlock_bh(&call_lock); + return 0; +} + + + + +void flush_tlb_current_task(void) +{ + struct mm_struct *mm = current->mm; + unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id()); + + local_flush_tlb(); + if (cpu_mask) + flush_tlb_others(cpu_mask, mm, FLUSH_ALL); +} + + + + + + + + + + + + + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/sys_call_table.c linux_umopenmosix/arch/um/kernel/sys_call_table.c --- linux-2.4.17/arch/um/kernel/sys_call_table.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/sys_call_table.c Thu Jun 27 22:35:16 2002 @@ -0,0 +1,458 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "linux/unistd.h" +#include "linux/version.h" +#include "linux/sys.h" +#include "asm/signal.h" +#include "sysdep/syscalls.h" +#include "kern_util.h" + +extern syscall_handler_t sys_ni_syscall; +extern syscall_handler_t sys_exit; +extern syscall_handler_t sys_fork; +extern syscall_handler_t sys_read; +extern syscall_handler_t sys_write; +extern syscall_handler_t sys_creat; +extern syscall_handler_t sys_link; +extern syscall_handler_t sys_unlink; +extern syscall_handler_t sys_chdir; +extern syscall_handler_t sys_mknod; +extern syscall_handler_t sys_chmod; +extern syscall_handler_t sys_lchown16; +extern syscall_handler_t sys_ni_syscall; +extern syscall_handler_t sys_stat; +extern syscall_handler_t sys_lseek; +extern syscall_handler_t sys_getpid; +extern syscall_handler_t sys_oldumount; +extern syscall_handler_t sys_setuid16; +extern syscall_handler_t sys_getuid16; +extern syscall_handler_t sys_ptrace; +extern syscall_handler_t sys_alarm; +extern syscall_handler_t sys_fstat; +extern syscall_handler_t sys_pause; +extern syscall_handler_t sys_utime; +extern syscall_handler_t sys_ni_syscall; +extern syscall_handler_t sys_ni_syscall; +extern syscall_handler_t sys_access; +extern syscall_handler_t sys_nice; +extern syscall_handler_t sys_ni_syscall; +extern syscall_handler_t sys_sync; +extern syscall_handler_t sys_kill; +extern syscall_handler_t sys_rename; +extern syscall_handler_t sys_mkdir; +extern syscall_handler_t sys_rmdir; +extern syscall_handler_t sys_pipe; +extern syscall_handler_t sys_times; +extern syscall_handler_t sys_ni_syscall; +extern syscall_handler_t sys_brk; +extern syscall_handler_t sys_setgid16; +extern syscall_handler_t sys_getgid16; +extern syscall_handler_t sys_signal; +extern syscall_handler_t sys_geteuid16; +extern syscall_handler_t sys_getegid16; +extern syscall_handler_t sys_acct; +extern syscall_handler_t sys_umount; +extern syscall_handler_t sys_ni_syscall; +extern syscall_handler_t sys_ioctl; +extern syscall_handler_t sys_fcntl; +extern syscall_handler_t sys_ni_syscall; +extern syscall_handler_t sys_setpgid; +extern syscall_handler_t sys_ni_syscall; +extern syscall_handler_t sys_olduname; +extern syscall_handler_t sys_umask; +extern syscall_handler_t sys_chroot; +extern syscall_handler_t sys_ustat; +extern syscall_handler_t sys_dup2; +extern syscall_handler_t sys_getppid; +extern syscall_handler_t sys_getpgrp; +extern syscall_handler_t sys_sigaction; +extern syscall_handler_t sys_sgetmask; +extern syscall_handler_t sys_ssetmask; +extern syscall_handler_t sys_setreuid16; +extern syscall_handler_t sys_setregid16; +extern syscall_handler_t sys_sigsuspend; +extern syscall_handler_t sys_sigpending; +extern syscall_handler_t sys_sethostname; +extern syscall_handler_t sys_setrlimit; +extern syscall_handler_t sys_old_getrlimit; +extern syscall_handler_t sys_getrusage; +extern syscall_handler_t sys_gettimeofday; +extern syscall_handler_t sys_settimeofday; +extern syscall_handler_t sys_getgroups16; +extern syscall_handler_t sys_setgroups16; +extern syscall_handler_t sys_symlink; +extern syscall_handler_t sys_lstat; +extern syscall_handler_t sys_readlink; +extern syscall_handler_t sys_uselib; +extern syscall_handler_t sys_swapon; +extern syscall_handler_t sys_reboot; +extern syscall_handler_t old_readdir; +extern syscall_handler_t sys_munmap; +extern syscall_handler_t sys_truncate; +extern syscall_handler_t sys_ftruncate; +extern syscall_handler_t sys_fchmod; +extern syscall_handler_t sys_fchown16; +extern syscall_handler_t sys_getpriority; +extern syscall_handler_t sys_setpriority; +extern syscall_handler_t sys_ni_syscall; +extern syscall_handler_t sys_statfs; +extern syscall_handler_t sys_fstatfs; +extern syscall_handler_t sys_ni_syscall; +extern syscall_handler_t sys_socketcall; +extern syscall_handler_t sys_syslog; +extern syscall_handler_t sys_setitimer; +extern syscall_handler_t sys_getitimer; +extern syscall_handler_t sys_newstat; +extern syscall_handler_t sys_newlstat; +extern syscall_handler_t sys_newfstat; +extern syscall_handler_t sys_uname; +extern syscall_handler_t sys_ni_syscall; +extern syscall_handler_t sys_vhangup; +extern syscall_handler_t sys_ni_syscall; +extern syscall_handler_t sys_ni_syscall; +extern syscall_handler_t sys_swapoff; +extern syscall_handler_t sys_sysinfo; +extern syscall_handler_t sys_ipc; +extern syscall_handler_t sys_fsync; +extern syscall_handler_t sys_sigreturn; +extern syscall_handler_t sys_clone; +extern syscall_handler_t sys_setdomainname; +extern syscall_handler_t sys_newuname; +extern syscall_handler_t sys_ni_syscall; +extern syscall_handler_t sys_adjtimex; +extern syscall_handler_t sys_mprotect; +extern syscall_handler_t sys_sigprocmask; +extern syscall_handler_t sys_create_module; +extern syscall_handler_t sys_init_module; +extern syscall_handler_t sys_delete_module; +extern syscall_handler_t sys_get_kernel_syms; +extern syscall_handler_t sys_quotactl; +extern syscall_handler_t sys_getpgid; +extern syscall_handler_t sys_fchdir; +extern syscall_handler_t sys_bdflush; +extern syscall_handler_t sys_sysfs; +extern syscall_handler_t sys_personality; +extern syscall_handler_t sys_ni_syscall; +extern syscall_handler_t sys_setfsuid16; +extern syscall_handler_t sys_setfsgid16; +extern syscall_handler_t sys_llseek; +extern syscall_handler_t sys_getdents; +extern syscall_handler_t sys_flock; +extern syscall_handler_t sys_msync; +extern syscall_handler_t sys_readv; +extern syscall_handler_t sys_writev; +extern syscall_handler_t sys_getsid; +extern syscall_handler_t sys_fdatasync; +extern syscall_handler_t sys_sysctl; +extern syscall_handler_t sys_mlock; +extern syscall_handler_t sys_munlock; +extern syscall_handler_t sys_mlockall; +extern syscall_handler_t sys_munlockall; +extern syscall_handler_t sys_sched_setparam; +extern syscall_handler_t sys_sched_getparam; +extern syscall_handler_t sys_sched_setscheduler; +extern syscall_handler_t sys_sched_getscheduler; +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 0) +extern syscall_handler_t sys_sched_yield; +#endif +extern syscall_handler_t sys_sched_get_priority_max; +extern syscall_handler_t sys_sched_get_priority_min; +extern syscall_handler_t sys_sched_rr_get_interval; +extern syscall_handler_t sys_nanosleep; +extern syscall_handler_t sys_mremap; +extern syscall_handler_t sys_setresuid16; +extern syscall_handler_t sys_getresuid16; +extern syscall_handler_t sys_ni_syscall; +extern syscall_handler_t sys_query_module; +extern syscall_handler_t sys_poll; +extern syscall_handler_t sys_nfsservctl; +extern syscall_handler_t sys_setresgid16; +extern syscall_handler_t sys_getresgid16; +extern syscall_handler_t sys_prctl; +extern syscall_handler_t sys_ni_syscall; +extern syscall_handler_t sys_rt_sigaction; +extern syscall_handler_t sys_rt_sigprocmask; +extern syscall_handler_t sys_rt_sigpending; +extern syscall_handler_t sys_rt_sigtimedwait; +extern syscall_handler_t sys_rt_sigqueueinfo; +extern syscall_handler_t sys_rt_sigsuspend; +extern syscall_handler_t sys_pread; +extern syscall_handler_t sys_pwrite; +extern syscall_handler_t sys_chown16; +extern syscall_handler_t sys_getcwd; +extern syscall_handler_t sys_capget; +extern syscall_handler_t sys_capset; +extern syscall_handler_t sys_sigaltstack; +extern syscall_handler_t sys_sendfile; +extern syscall_handler_t sys_ni_syscall; +extern syscall_handler_t sys_ni_syscall; +extern syscall_handler_t sys_vfork; +extern syscall_handler_t sys_getrlimit; +extern syscall_handler_t sys_mmap2; +extern syscall_handler_t sys_truncate64; +extern syscall_handler_t sys_ftruncate64; +extern syscall_handler_t sys_stat64; +extern syscall_handler_t sys_lstat64; +extern syscall_handler_t sys_fstat64; +extern syscall_handler_t sys_lchown; +extern syscall_handler_t sys_getuid; +extern syscall_handler_t sys_getgid; +extern syscall_handler_t sys_geteuid; +extern syscall_handler_t sys_getegid; +extern syscall_handler_t sys_setreuid; +extern syscall_handler_t sys_setregid; +extern syscall_handler_t sys_getgroups; +extern syscall_handler_t sys_setgroups; +extern syscall_handler_t sys_fchown; +extern syscall_handler_t sys_setresuid; +extern syscall_handler_t sys_getresuid; +extern syscall_handler_t sys_setresgid; +extern syscall_handler_t sys_getresgid; +extern syscall_handler_t sys_chown; +extern syscall_handler_t sys_setuid; +extern syscall_handler_t sys_setgid; +extern syscall_handler_t sys_setfsuid; +extern syscall_handler_t sys_setfsgid; +extern syscall_handler_t sys_pivot_root; +extern syscall_handler_t sys_mincore; +extern syscall_handler_t sys_madvise; +extern syscall_handler_t sys_fcntl64; +extern syscall_handler_t sys_getdents64; +extern syscall_handler_t sys_gettid; +extern syscall_handler_t sys_readahead; +extern syscall_handler_t um_mount; +extern syscall_handler_t um_time; +extern syscall_handler_t um_stime; + +#define LAST_GENERIC_SYSCALL __NR_readahead + +#if LAST_GENERIC_SYSCALL > LAST_ARCH_SYSCALL +#define LAST_SYSCALL LAST_GENERIC_SYSCALL +#else +#define LAST_SYSCALL LAST_ARCH_SYSCALL +#endif + +syscall_handler_t *sys_call_table[] = { + [ 0 ] = sys_ni_syscall, + [ __NR_exit ] = sys_exit, + [ __NR_fork ] = sys_fork, + [ __NR_read ] = sys_read, + [ __NR_write ] = sys_write, + + /* These three are declared differently in asm/unistd.h */ + [ __NR_open ] = (syscall_handler_t *) sys_open, + [ __NR_close ] = (syscall_handler_t *) sys_close, + [ __NR_waitpid ] = (syscall_handler_t *) sys_waitpid, + [ __NR_creat ] = sys_creat, + [ __NR_link ] = sys_link, + [ __NR_unlink ] = sys_unlink, + + /* declared differently in kern_util.h */ + [ __NR_execve ] = (syscall_handler_t *) sys_execve, + [ __NR_chdir ] = sys_chdir, + [ __NR_time ] = um_time, + [ __NR_mknod ] = sys_mknod, + [ __NR_chmod ] = sys_chmod, + [ __NR_lchown ] = sys_lchown16, + [ __NR_break ] = sys_ni_syscall, + [ __NR_oldstat ] = sys_stat, + [ __NR_lseek ] = sys_lseek, + [ __NR_getpid ] = sys_getpid, + [ __NR_mount ] = um_mount, + [ __NR_umount ] = sys_oldumount, + [ __NR_setuid ] = sys_setuid16, + [ __NR_getuid ] = sys_getuid16, + [ __NR_stime ] = um_stime, + [ __NR_ptrace ] = sys_ptrace, + [ __NR_alarm ] = sys_alarm, + [ __NR_oldfstat ] = sys_fstat, + [ __NR_pause ] = sys_pause, + [ __NR_utime ] = sys_utime, + [ __NR_stty ] = sys_ni_syscall, + [ __NR_gtty ] = sys_ni_syscall, + [ __NR_access ] = sys_access, + [ __NR_nice ] = sys_nice, + [ __NR_ftime ] = sys_ni_syscall, + [ __NR_sync ] = sys_sync, + [ __NR_kill ] = sys_kill, + [ __NR_rename ] = sys_rename, + [ __NR_mkdir ] = sys_mkdir, + [ __NR_rmdir ] = sys_rmdir, + + /* Declared differently in asm/unistd.h */ + [ __NR_dup ] = (syscall_handler_t *) sys_dup, + [ __NR_pipe ] = sys_pipe, + [ __NR_times ] = sys_times, + [ __NR_prof ] = sys_ni_syscall, + [ __NR_brk ] = sys_brk, + [ __NR_setgid ] = sys_setgid16, + [ __NR_getgid ] = sys_getgid16, + [ __NR_signal ] = sys_signal, + [ __NR_geteuid ] = sys_geteuid16, + [ __NR_getegid ] = sys_getegid16, + [ __NR_acct ] = sys_acct, + [ __NR_umount2 ] = sys_umount, + [ __NR_lock ] = sys_ni_syscall, + [ __NR_ioctl ] = sys_ioctl, + [ __NR_fcntl ] = sys_fcntl, + [ __NR_mpx ] = sys_ni_syscall, + [ __NR_setpgid ] = sys_setpgid, + [ __NR_ulimit ] = sys_ni_syscall, + [ __NR_oldolduname ] = sys_olduname, + [ __NR_umask ] = sys_umask, + [ __NR_chroot ] = sys_chroot, + [ __NR_ustat ] = sys_ustat, + [ __NR_dup2 ] = sys_dup2, + [ __NR_getppid ] = sys_getppid, + [ __NR_getpgrp ] = sys_getpgrp, + [ __NR_setsid ] = (syscall_handler_t *) sys_setsid, + [ __NR_sigaction ] = sys_sigaction, + [ __NR_sgetmask ] = sys_sgetmask, + [ __NR_ssetmask ] = sys_ssetmask, + [ __NR_setreuid ] = sys_setreuid16, + [ __NR_setregid ] = sys_setregid16, + [ __NR_sigsuspend ] = sys_sigsuspend, + [ __NR_sigpending ] = sys_sigpending, + [ __NR_sethostname ] = sys_sethostname, + [ __NR_setrlimit ] = sys_setrlimit, + [ __NR_getrlimit ] = sys_old_getrlimit, + [ __NR_getrusage ] = sys_getrusage, + [ __NR_gettimeofday ] = sys_gettimeofday, + [ __NR_settimeofday ] = sys_settimeofday, + [ __NR_getgroups ] = sys_getgroups16, + [ __NR_setgroups ] = sys_setgroups16, + [ __NR_symlink ] = sys_symlink, + [ __NR_oldlstat ] = sys_lstat, + [ __NR_readlink ] = sys_readlink, + [ __NR_uselib ] = sys_uselib, + [ __NR_swapon ] = sys_swapon, + [ __NR_reboot ] = sys_reboot, + [ __NR_readdir ] = old_readdir, + [ __NR_munmap ] = sys_munmap, + [ __NR_truncate ] = sys_truncate, + [ __NR_ftruncate ] = sys_ftruncate, + [ __NR_fchmod ] = sys_fchmod, + [ __NR_fchown ] = sys_fchown16, + [ __NR_getpriority ] = sys_getpriority, + [ __NR_setpriority ] = sys_setpriority, + [ __NR_profil ] = sys_ni_syscall, + [ __NR_statfs ] = sys_statfs, + [ __NR_fstatfs ] = sys_fstatfs, + [ __NR_ioperm ] = sys_ni_syscall, + [ __NR_socketcall ] = sys_socketcall, + [ __NR_syslog ] = sys_syslog, + [ __NR_setitimer ] = sys_setitimer, + [ __NR_getitimer ] = sys_getitimer, + [ __NR_stat ] = sys_newstat, + [ __NR_lstat ] = sys_newlstat, + [ __NR_fstat ] = sys_newfstat, + [ __NR_olduname ] = sys_uname, + [ __NR_iopl ] = sys_ni_syscall, + [ __NR_vhangup ] = sys_vhangup, + [ __NR_idle ] = sys_ni_syscall, + [ __NR_wait4 ] = (syscall_handler_t *) sys_wait4, + [ __NR_swapoff ] = sys_swapoff, + [ __NR_sysinfo ] = sys_sysinfo, + [ __NR_ipc ] = sys_ipc, + [ __NR_fsync ] = sys_fsync, + [ __NR_sigreturn ] = sys_sigreturn, + [ __NR_clone ] = sys_clone, + [ __NR_setdomainname ] = sys_setdomainname, + [ __NR_uname ] = sys_newuname, + [ __NR_adjtimex ] = sys_adjtimex, + [ __NR_mprotect ] = sys_mprotect, + [ __NR_sigprocmask ] = sys_sigprocmask, + [ __NR_create_module ] = sys_create_module, + [ __NR_init_module ] = sys_init_module, + [ __NR_delete_module ] = sys_delete_module, + [ __NR_get_kernel_syms ] = sys_get_kernel_syms, + [ __NR_quotactl ] = sys_quotactl, + [ __NR_getpgid ] = sys_getpgid, + [ __NR_fchdir ] = sys_fchdir, + [ __NR_bdflush ] = sys_bdflush, + [ __NR_sysfs ] = sys_sysfs, + [ __NR_personality ] = sys_personality, + [ __NR_afs_syscall ] = sys_ni_syscall, + [ __NR_setfsuid ] = sys_setfsuid16, + [ __NR_setfsgid ] = sys_setfsgid16, + [ __NR__llseek ] = sys_llseek, + [ __NR_getdents ] = sys_getdents, + [ __NR__newselect ] = (syscall_handler_t *) sys_select, + [ __NR_flock ] = sys_flock, + [ __NR_msync ] = sys_msync, + [ __NR_readv ] = sys_readv, + [ __NR_writev ] = sys_writev, + [ __NR_getsid ] = sys_getsid, + [ __NR_fdatasync ] = sys_fdatasync, + [ __NR__sysctl ] = sys_sysctl, + [ __NR_mlock ] = sys_mlock, + [ __NR_munlock ] = sys_munlock, + [ __NR_mlockall ] = sys_mlockall, + [ __NR_munlockall ] = sys_munlockall, + [ __NR_sched_setparam ] = sys_sched_setparam, + [ __NR_sched_getparam ] = sys_sched_getparam, + [ __NR_sched_setscheduler ] = sys_sched_setscheduler, + [ __NR_sched_getscheduler ] = sys_sched_getscheduler, + [ __NR_sched_yield ] = sys_sched_yield, + [ __NR_sched_get_priority_max ] = sys_sched_get_priority_max, + [ __NR_sched_get_priority_min ] = sys_sched_get_priority_min, + [ __NR_sched_rr_get_interval ] = sys_sched_rr_get_interval, + [ __NR_nanosleep ] = sys_nanosleep, + [ __NR_mremap ] = sys_mremap, + [ __NR_setresuid ] = sys_setresuid16, + [ __NR_getresuid ] = sys_getresuid16, + [ __NR_vm86 ] = sys_ni_syscall, + [ __NR_query_module ] = sys_query_module, + [ __NR_poll ] = sys_poll, + [ __NR_nfsservctl ] = sys_nfsservctl, + [ __NR_setresgid ] = sys_setresgid16, + [ __NR_getresgid ] = sys_getresgid16, + [ __NR_prctl ] = sys_prctl, + [ __NR_rt_sigreturn ] = sys_ni_syscall, + [ __NR_rt_sigaction ] = sys_rt_sigaction, + [ __NR_rt_sigprocmask ] = sys_rt_sigprocmask, + [ __NR_rt_sigpending ] = sys_rt_sigpending, + [ __NR_rt_sigtimedwait ] = sys_rt_sigtimedwait, + [ __NR_rt_sigqueueinfo ] = sys_rt_sigqueueinfo, + [ __NR_rt_sigsuspend ] = sys_rt_sigsuspend, + [ __NR_pread ] = sys_pread, + [ __NR_pwrite ] = sys_pwrite, + [ __NR_chown ] = sys_chown16, + [ __NR_getcwd ] = sys_getcwd, + [ __NR_capget ] = sys_capget, + [ __NR_capset ] = sys_capset, + [ __NR_sigaltstack ] = sys_sigaltstack, + [ __NR_sendfile ] = sys_sendfile, + [ __NR_getpmsg ] = sys_ni_syscall, + [ __NR_putpmsg ] = sys_ni_syscall, + [ __NR_vfork ] = sys_vfork, + [ __NR_ugetrlimit ] = sys_getrlimit, + [ __NR_mmap2 ] = sys_mmap2, + [ __NR_truncate64 ] = sys_truncate64, + [ __NR_ftruncate64 ] = sys_ftruncate64, + [ __NR_stat64 ] = sys_stat64, + [ __NR_lstat64 ] = sys_lstat64, + [ __NR_fstat64 ] = sys_fstat64, + [ __NR_fcntl64 ] = sys_fcntl64, + [ __NR_getdents64 ] = sys_getdents64, + [ __NR_security ] = sys_ni_syscall, + [ __NR_gettid ] = sys_gettid, + [ __NR_readahead ] = sys_readahead, + ARCH_SYSCALLS + [ LAST_SYSCALL + 1 ... NR_syscalls ] = + (syscall_handler_t *) sys_ni_syscall +}; + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/syscall_kern.c linux_umopenmosix/arch/um/kernel/syscall_kern.c --- linux-2.4.17/arch/um/kernel/syscall_kern.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/syscall_kern.c Thu Jun 27 00:16:10 2002 @@ -0,0 +1,438 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "linux/sched.h" +#include "linux/file.h" +#include "linux/smp_lock.h" +#include "linux/mm.h" +#include "linux/utsname.h" +#include "linux/msg.h" +#include "linux/shm.h" +#include "linux/sys.h" +#include "linux/unistd.h" +#include "linux/slab.h" +#include "linux/utime.h" +#include "asm/mman.h" +#include "asm/uaccess.h" +#include "asm/ipc.h" +#include "kern_util.h" +#include "user_util.h" +#include "sysdep/syscalls.h" + +long um_mount(char * dev_name, char * dir_name, char * type, + unsigned long new_flags, void * data) +{ + if(type == NULL) type = ""; + return(sys_mount(dev_name, dir_name, type, new_flags, data)); +} + +long sys_fork(void) +{ + long ret; + + current->thread.forking = 1; + ret = do_fork(SIGCHLD, 0, NULL, 0); + current->thread.forking = 0; + return(ret); +} + +long sys_clone(unsigned long clone_flags, unsigned long newsp) +{ + long ret; + + current->thread.forking = 1; + ret = do_fork(clone_flags, newsp, NULL, 0); + current->thread.forking = 0; + return(ret); +} + +long sys_vfork(void) +{ + long ret; + + current->thread.forking = 1; + ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, NULL, 0); + current->thread.forking = 0; + return(ret); +} + +/* common code for old and new mmaps */ +static inline long do_mmap2( + unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff) +{ + int error = -EBADF; + struct file * file = NULL; + + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); + if (!(flags & MAP_ANONYMOUS)) { + file = fget(fd); + if (!file) + goto out; + } + + down_write(¤t->mm->mmap_sem); + error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + up_write(¤t->mm->mmap_sem); + + if (file) + fput(file); + out: + return error; +} + +long sys_mmap2(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff) +{ + return do_mmap2(addr, len, prot, flags, fd, pgoff); +} + +/* + * Perform the select(nd, in, out, ex, tv) and mmap() system + * calls. Linux/i386 didn't use to be able to handle more than + * 4 system call parameters, so these system calls used a memory + * block for parameter passing.. + */ + +struct mmap_arg_struct { + unsigned long addr; + unsigned long len; + unsigned long prot; + unsigned long flags; + unsigned long fd; + unsigned long offset; +}; + +int old_mmap(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long offset) +{ + int err = -EINVAL; + if (offset & ~PAGE_MASK) + goto out; + + err = do_mmap2(addr, len, prot, flags, fd, offset >> PAGE_SHIFT); + out: + return err; +} +/* + * sys_pipe() is the normal C calling standard for creating + * a pipe. It's not the way unix traditionally does this, though. + */ +int sys_pipe(unsigned long * fildes) +{ + int fd[2]; + int error; + + error = do_pipe(fd); + if (!error) { + if (copy_to_user(fildes, fd, 2*sizeof(int))) + error = -EFAULT; + } + return error; +} + +int sys_pause(void) +{ + current->state = TASK_INTERRUPTIBLE; + schedule(); + return -ERESTARTNOHAND; +} + +int sys_sigaction(int sig, const struct old_sigaction *act, + struct old_sigaction *oact) +{ + struct k_sigaction new_ka, old_ka; + int ret; + + if (act) { + old_sigset_t mask; + if (verify_area(VERIFY_READ, act, sizeof(*act)) || + __get_user(new_ka.sa.sa_handler, &act->sa_handler) || + __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) + return -EFAULT; + __get_user(new_ka.sa.sa_flags, &act->sa_flags); + __get_user(mask, &act->sa_mask); + siginitset(&new_ka.sa.sa_mask, mask); + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) || + __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || + __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) + return -EFAULT; + __put_user(old_ka.sa.sa_flags, &oact->sa_flags); + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); + } + + return ret; +} + +/* + * sys_ipc() is the de-multiplexer for the SysV IPC calls.. + * + * This is really horribly ugly. + */ +int sys_ipc (uint call, int first, int second, + int third, void *ptr, long fifth) +{ + int version, ret; + + version = call >> 16; /* hack for backward compatibility */ + call &= 0xffff; + + switch (call) { + case SEMOP: + return sys_semop (first, (struct sembuf *)ptr, second); + case SEMGET: + return sys_semget (first, second, third); + case SEMCTL: { + union semun fourth; + if (!ptr) + return -EINVAL; + if (get_user(fourth.__pad, (void **) ptr)) + return -EFAULT; + return sys_semctl (first, second, third, fourth); + } + + case MSGSND: + return sys_msgsnd (first, (struct msgbuf *) ptr, + second, third); + case MSGRCV: + switch (version) { + case 0: { + struct ipc_kludge tmp; + if (!ptr) + return -EINVAL; + + if (copy_from_user(&tmp, + (struct ipc_kludge *) ptr, + sizeof (tmp))) + return -EFAULT; + return sys_msgrcv (first, tmp.msgp, second, + tmp.msgtyp, third); + } + default: + panic("msgrcv with version != 0"); + return sys_msgrcv (first, + (struct msgbuf *) ptr, + second, fifth, third); + } + case MSGGET: + return sys_msgget ((key_t) first, second); + case MSGCTL: + return sys_msgctl (first, second, (struct msqid_ds *) ptr); + + case SHMAT: + switch (version) { + default: { + ulong raddr; + ret = sys_shmat (first, (char *) ptr, second, &raddr); + if (ret) + return ret; + return put_user (raddr, (ulong *) third); + } + case 1: /* iBCS2 emulator entry point */ + if (!segment_eq(get_fs(), get_ds())) + return -EINVAL; + return sys_shmat (first, (char *) ptr, second, (ulong *) third); + } + case SHMDT: + return sys_shmdt ((char *)ptr); + case SHMGET: + return sys_shmget (first, second, third); + case SHMCTL: + return sys_shmctl (first, second, + (struct shmid_ds *) ptr); + default: + return -EINVAL; + } +} + +int sys_uname(struct old_utsname * name) +{ + int err; + if (!name) + return -EFAULT; + down_read(&uts_sem); + err=copy_to_user(name, &system_utsname, sizeof (*name)); + up_read(&uts_sem); + return err?-EFAULT:0; +} + +int sys_olduname(struct oldold_utsname * name) +{ + int error; + + if (!name) + return -EFAULT; + if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname))) + return -EFAULT; + + down_read(&uts_sem); + + error = __copy_to_user(&name->sysname,&system_utsname.sysname, + __OLD_UTS_LEN); + error |= __put_user(0,name->sysname+__OLD_UTS_LEN); + error |= __copy_to_user(&name->nodename,&system_utsname.nodename, + __OLD_UTS_LEN); + error |= __put_user(0,name->nodename+__OLD_UTS_LEN); + error |= __copy_to_user(&name->release,&system_utsname.release, + __OLD_UTS_LEN); + error |= __put_user(0,name->release+__OLD_UTS_LEN); + error |= __copy_to_user(&name->version,&system_utsname.version, + __OLD_UTS_LEN); + error |= __put_user(0,name->version+__OLD_UTS_LEN); + error |= __copy_to_user(&name->machine,&system_utsname.machine, + __OLD_UTS_LEN); + error |= __put_user(0,name->machine+__OLD_UTS_LEN); + + up_read(&uts_sem); + + error = error ? -EFAULT : 0; + + return error; +} + +int sys_sigaltstack(const stack_t *uss, stack_t *uoss) +{ + return(do_sigaltstack(uss, uoss, + UM_SP(¤t->thread.process_regs))); +} + +static int check_readlink(struct sys_pt_regs *regs) +{ + return(verify_area(VERIFY_WRITE, (void *) UM_SYSCALL_ARG2(regs), + UM_SYSCALL_ARG3(regs))); +} + +static int check_utime(struct sys_pt_regs *regs) +{ + return(verify_area(VERIFY_WRITE, (void *) UM_SYSCALL_ARG2(regs), + sizeof(struct utimbuf))); +} + +static int check_oldstat(struct sys_pt_regs *regs) +{ + return(verify_area(VERIFY_WRITE, (void *) UM_SYSCALL_ARG2(regs), + sizeof(struct __old_kernel_stat))); +} + +static int check_stat(struct sys_pt_regs *regs) +{ + return(verify_area(VERIFY_WRITE, (void *) UM_SYSCALL_ARG2(regs), + sizeof(struct stat))); +} + +static int check_stat64(struct sys_pt_regs *regs) +{ + return(verify_area(VERIFY_WRITE, (void *) UM_SYSCALL_ARG2(regs), + sizeof(struct stat64))); +} + +struct bogus { + int kernel_ds; + int (*check_params)(struct sys_pt_regs *); +}; + +struct bogus this_is_bogus[] = { + [ __NR_mknod ] = { 1, NULL }, + [ __NR_mkdir ] = { 1, NULL }, + [ __NR_rmdir ] = { 1, NULL }, + [ __NR_unlink ] = { 1, NULL }, + [ __NR_symlink ] = { 1, NULL }, + [ __NR_link ] = { 1, NULL }, + [ __NR_rename ] = { 1, NULL }, + [ __NR_umount ] = { 1, NULL }, + [ __NR_mount ] = { 1, NULL }, + [ __NR_pivot_root ] = { 1, NULL }, + [ __NR_chdir ] = { 1, NULL }, + [ __NR_chroot ] = { 1, NULL }, + [ __NR_open ] = { 1, NULL }, + [ __NR_quotactl ] = { 1, NULL }, + [ __NR_sysfs ] = { 1, NULL }, + [ __NR_readlink ] = { 1, check_readlink }, + [ __NR_acct ] = { 1, NULL }, + [ __NR_execve ] = { 1, NULL }, + [ __NR_uselib ] = { 1, NULL }, + [ __NR_statfs ] = { 1, NULL }, + [ __NR_truncate ] = { 1, NULL }, + [ __NR_access ] = { 1, NULL }, + [ __NR_chmod ] = { 1, NULL }, + [ __NR_chown ] = { 1, NULL }, + [ __NR_lchown ] = { 1, NULL }, + [ __NR_utime ] = { 1, check_utime }, + [ __NR_oldlstat ] = { 1, check_oldstat }, + [ __NR_oldstat ] = { 1, check_oldstat }, + [ __NR_stat ] = { 1, check_stat }, + [ __NR_lstat ] = { 1, check_stat }, + [ __NR_stat64 ] = { 1, check_stat64 }, + [ __NR_lstat64 ] = { 1, check_stat64 }, +}; + +/* sys_utimes */ + +static int check_bogosity(struct sys_pt_regs *regs) +{ + struct bogus *bogon = &this_is_bogus[UM_SYSCALL_NR(regs)]; + + if(!bogon->kernel_ds) return(0); + if(bogon->check_params && (*bogon->check_params)(regs)) + return(-EFAULT); + set_fs(KERNEL_DS); + return(0); +} + +int nsyscalls = 0; + +extern syscall_handler_t *sys_call_table[]; + +long execute_syscall(struct sys_pt_regs regs) +{ + long res; + int syscall; + + current->thread.nsyscalls++; + nsyscalls++; + syscall = UM_SYSCALL_NR(®s); + + if((syscall >= NR_syscalls) || (syscall < 0)) + res = -ENOSYS; + else if(honeypot && check_bogosity(®s)) + res = -EFAULT; + else res = EXECUTE_SYSCALL(syscall, regs); + UM_SET_SYSCALL_RETURN(¤t->thread.process_regs, res); + + set_fs(USER_DS); + + return(res); +} + +spinlock_t syscall_lock = SPIN_LOCK_UNLOCKED; + +void lock_syscall(void) +{ + spin_lock(&syscall_lock); +} + +void unlock_syscall(void) +{ + spin_unlock(&syscall_lock); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/syscall_user.c linux_umopenmosix/arch/um/kernel/syscall_user.c --- linux-2.4.17/arch/um/kernel/syscall_user.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/syscall_user.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +/* XXX FIXME : Ensure that SIGIO and SIGVTALRM can't happen immediately + * after setting up syscall stack + * block SIGVTALRM in any code that's under wait_for_stop + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "user_util.h" +#include "kern_util.h" +#include "user.h" +#include "signal_kern.h" +#include "signal_user.h" +#include "frame.h" +#include "sigcontext.h" +#include "sysdep/ptrace.h" + +/* XXX Bogus */ +#define ERESTARTSYS 512 +#define ERESTARTNOINTR 513 +#define ERESTARTNOHAND 514 + +struct { + int syscall; + int pid; + int result; + struct timeval start; + struct timeval end; +} syscall_record[1024]; + +int syscall_index = 0; + +extern int timer_ready, timer_on; + +void syscall_handler(int sig, struct sigcontext sc) +{ + struct sys_pt_regs *regs; + long result; + int index, syscall; + + unprotect_kernel_mem(1); + timer_ready = 1; + lock_syscall(); + if(syscall_index == 1024) syscall_index = 0; + index = syscall_index; + syscall_index++; + unlock_syscall(); + regs = process_state(NULL); + syscall = UM_SYSCALL_NR(regs); + set_thread_sc(&sc); + fill_in_regs(regs, &sc); + UM_ORIG_SYSCALL(regs) = -ENOSYS; + syscall_record[index].syscall = syscall; + syscall_record[index].pid = current_pid(); + syscall_record[index].result = 0xdeadbeef; + gettimeofday(&syscall_record[index].start, NULL); + syscall_trace(); + result = execute_syscall(*regs); + if((result == -ERESTARTNOHAND) || (result == -ERESTARTSYS) || + (result == -ERESTARTNOINTR)) + do_signal(result); + syscall_trace(); + syscall_record[index].result = result; + gettimeofday(&syscall_record[index].end, NULL); + ret_from_sys_call(); + + block_signals(); + change_sig(SIGUSR1, 0); + + timer_ready = 0; + set_user_mode(NULL, 1, 1); + timer_on = 1; +} + +int exit_kernel(int pid, void *task) +{ + struct sys_pt_regs *regs; + + if(!get_restore_state(task)) return(1); + regs = process_state(task); + if(ptrace_setregs(pid, regs) < 0) + tracer_panic("Couldn't restore registers"); + return(1); +} + +int do_syscall(void *task, int pid) +{ + struct sys_pt_regs *regs, proc_regs; + int syscall; + + if(ptrace_getregs(pid, &proc_regs) < 0) + tracer_panic("Couldn't read registers"); + + syscall = UM_SYSCALL_NR(&proc_regs); + if(syscall < 1) return(0); + + regs = process_state(task); + UM_SYSCALL_NR(regs) = syscall; + + if((syscall != __NR_sigreturn) && + ((unsigned long *) UM_IP(&proc_regs) >= &_stext) && + ((unsigned long *) UM_IP(&proc_regs) <= &_etext)) + tracer_panic("I'm tracing myself and I can't get out"); + + if(ptrace(PTRACE_POKEUSER, pid, UM_SYSCALL_NR_OFFSET, + __NR_getpid) < 0) + tracer_panic("do_syscall : Nullifying syscall failed, " + "errno = %d", errno); + return(1); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/sysrq.c linux_umopenmosix/arch/um/kernel/sysrq.c --- linux-2.4.17/arch/um/kernel/sysrq.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/sysrq.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,98 @@ +/* + * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "linux/sched.h" +#include "linux/kernel.h" +#include "linux/module.h" +#include "asm/page.h" +#include "asm/processor.h" +#include "sysrq.h" +#include "user_util.h" + + /* + * If the address is either in the .text section of the + * kernel, or in the vmalloc'ed module regions, it *may* + * be the address of a calling routine + */ + +#ifdef CONFIG_MODULES + +extern struct module *module_list; +extern struct module kernel_module; + +static inline int kernel_text_address(unsigned long addr) +{ + int retval = 0; + struct module *mod; + + if (addr >= (unsigned long) &_stext && + addr <= (unsigned long) &_etext) + return 1; + + for (mod = module_list; mod != &kernel_module; mod = mod->next) { + /* mod_bound tests for addr being inside the vmalloc'ed + * module area. Of course it'd be better to test only + * for the .text subset... */ + if (mod_bound(addr, 0, mod)) { + retval = 1; + break; + } + } + + return retval; +} + +#else + +static inline int kernel_text_address(unsigned long addr) +{ + return (addr >= (unsigned long) &_stext && + addr <= (unsigned long) &_etext); +} + +#endif + +void show_trace(unsigned long * stack) +{ + int i; + unsigned long addr; + + if (!stack) + stack = (unsigned long*) &stack; + + printk("Call Trace: "); + i = 1; + while (((long) stack & (THREAD_SIZE-1)) != 0) { + addr = *stack++; + if (kernel_text_address(addr)) { + if (i && ((i % 6) == 0)) + printk("\n "); + printk("[<%08lx>] ", addr); + i++; + } + } + printk("\n"); +} + +void show_trace_task(struct task_struct *tsk) +{ + unsigned long esp = UM_SP(&tsk->thread.process_regs); + + /* User space on another CPU? */ + if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1)) + return; + show_trace((unsigned long *)esp); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/time.c linux_umopenmosix/arch/um/kernel/time.c --- linux-2.4.17/arch/um/kernel/time.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/time.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#define _GNU_SOURCE /* to get timeradd and timersub */ + +#include +#include +#include +#include +#include +#include +#include "user_util.h" +#include "kern_util.h" +#include "user.h" +#include "process.h" +#include "signal_user.h" + +extern struct timeval xtime; + +void timer_handler(int sig, void *sc, int usermode) +{ + timer_irq(usermode); +} + +void timer(void) +{ + gettimeofday(&xtime, NULL); +} + +static struct itimerval profile_interval; + +void get_profile_timer(void) +{ + getitimer(ITIMER_PROF, &profile_interval); + profile_interval.it_value = profile_interval.it_interval; +} + +void disable_profile_timer(void) +{ + struct itimerval interval = ((struct itimerval) { { 0, 0 }, { 0, 0 }}); + setitimer(ITIMER_PROF, &interval, NULL); +} + +static void set_interval(int timer_type) +{ + struct itimerval interval; + + interval.it_interval.tv_sec = 0; + interval.it_interval.tv_usec = 1000000/hz(); + interval.it_value.tv_sec = 0; + interval.it_value.tv_usec = 1000000/hz(); + if(setitimer(timer_type, &interval, NULL) == -1) + panic("setitimer failed - errno = %d\n", errno); +} + +void idle_timer(void) +{ + if(signal(SIGVTALRM, SIG_IGN) == SIG_ERR) + panic("Couldn't unset SIGVTALRM handler"); + set_handler(SIGALRM, (__sighandler_t) alarm_handler, + SA_NODEFER | SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, -1); + set_interval(ITIMER_REAL); +} + +void time_init(void) +{ + if(signal(SIGVTALRM, boot_timer_handler) == SIG_ERR) + panic("Couldn't set SIGVTALRM handler"); + set_interval(ITIMER_VIRTUAL); +} + +void set_timers(int set_signal) +{ + if(set_signal){ + if(signal(SIGVTALRM, + (__sighandler_t) alarm_handler) == SIG_ERR) + panic("Couldn't set SIGVTALRM handler"); + set_interval(ITIMER_VIRTUAL); + } + if(setitimer(ITIMER_PROF, &profile_interval, NULL) == -1) + panic("setitimer ITIMER_PROF failed - errno = %d\n", errno); +} + +struct timeval local_offset = { 0, 0 }; + +void do_gettimeofday(struct timeval *tv) +{ + gettimeofday(tv, NULL); + timeradd(tv, &local_offset, tv); +} + +void do_settimeofday(struct timeval *tv) +{ + struct timeval now; + + gettimeofday(&now, NULL); + timersub(tv, &now, &local_offset); +} + +void idle_sleep(int secs) +{ + struct timespec ts; + + ts.tv_sec = secs; + ts.tv_nsec = 0; + nanosleep(&ts, &ts); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/time_kern.c linux_umopenmosix/arch/um/kernel/time_kern.c --- linux-2.4.17/arch/um/kernel/time_kern.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/time_kern.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,141 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "linux/kernel.h" +#include "linux/unistd.h" +#include "linux/stddef.h" +#include "linux/spinlock.h" +#include "linux/sched.h" +#include "linux/interrupt.h" +#include "linux/init.h" +#include "linux/delay.h" +#include "asm/param.h" +#include "asm/current.h" +#include "kern_util.h" +#include "user_util.h" + +extern rwlock_t xtime_lock; + +int hz(void) +{ + return(HZ); +} + +int timer_irq_inited = 0; + +/* timer_on and missed_ticks are modified after kernel memory has been + * write-protected, so this puts it in a section which will be left + * write-enabled. + */ +int __attribute__ ((__section__ (".unprotected"))) timer_on = 0; +int __attribute__ ((__section__ (".unprotected"))) missed_ticks = 0; + +int timer_ready = 0; + +void timer_irq(int user_mode) +{ + int ticks = missed_ticks; + + if(!timer_irq_inited) return; + missed_ticks = 0; + while(ticks--) do_IRQ(TIMER_IRQ, user_mode); +} + +void boot_timer_handler(int sig) +{ + struct pt_regs regs; + + regs.user_mode = 0; + do_timer(®s); +} + +void um_timer(int irq, void *dev, struct pt_regs *regs) +{ + do_timer(regs); + write_lock(&xtime_lock); + timer(); + write_unlock(&xtime_lock); +} + +long um_time(int * tloc) +{ + struct timeval now; + + do_gettimeofday(&now); + if (tloc) { + if (put_user(now.tv_sec,tloc)) + now.tv_sec = -EFAULT; + } + return now.tv_sec; +} + +long um_stime(int * tptr) +{ + int value; + struct timeval new; + + if (get_user(value, tptr)) + return -EFAULT; + new.tv_sec = value; + new.tv_usec = 0; + do_settimeofday(&new); + return 0; +} + +void __delay(um_udelay_t time) +{ + /* Stolen from the i386 __loop_delay */ + int d0; + __asm__ __volatile__( + "\tjmp 1f\n" + ".align 16\n" + "1:\tjmp 2f\n" + ".align 16\n" + "2:\tdecl %0\n\tjns 2b" + :"=&a" (d0) + :"0" (time)); +} + +void __udelay(um_udelay_t usecs) +{ + int i, n; + + n = (loops_per_jiffy * HZ * usecs) / 1000000; + for(i=0;ithread.extern_pid != -1) && + (current->thread.extern_pid != getpid())) + panic("fix_range fixing wrong address space, current = 0x%p", + current); + if(mm == NULL) return; + for(addr=start_addr;addr TASK_SIZE, which is + * only true in the honeypot case. + */ + addr = STACK_TOP - ABOVE_KMEM; + continue; + } + npgd = pgd_offset(mm, addr); + npmd = pmd_offset(npgd, addr); + if(pmd_present(*npmd)){ + npte = pte_offset(npmd, addr); + r = pte_read(*npte); + w = pte_write(*npte); + x = pte_exec(*npte); + if(!pte_dirty(*npte)) w = 0; + if(!pte_young(*npte)){ + r = 0; + w = 0; + } + if(force || pte_newpage(*npte)){ + if(munmap((void *) addr, PAGE_SIZE) < 0) + panic("munmap failed, errno = %d\n", + errno); + if(pte_present(*npte)) + map(addr, pte_address(*npte), + PAGE_SIZE, r, w, x); + } + else if(pte_newprot(*npte)) + protect(addr, PAGE_SIZE, r, w, x, 1); + *npte = pte_mkuptodate(*npte); + addr += PAGE_SIZE; + } + else { + if(force || pmd_newpage(*npmd)){ + if(munmap((void *) addr, PMD_SIZE) < 0) + panic("munmap failed, errno = %d\n", + errno); + } + addr += PMD_SIZE; + } + } +} + +atomic_t vmchange_seq = ATOMIC_INIT(1); + +static void flush_kernel_vm_range(unsigned long start, unsigned long end, + int update_seq) +{ + struct mm_struct *mm; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + unsigned long addr; + int updated = 0; + + mm = &init_mm; + for(addr = start_vm; addr < end_vm;){ + pgd = pgd_offset(mm, addr); + pmd = pmd_offset(pgd, addr); + if(pmd_present(*pmd)){ + pte = pte_offset(pmd, addr); + if(!pte_present(*pte) || pte_newpage(*pte)){ + updated = 1; + if(munmap((void *) addr, PAGE_SIZE) < 0) + panic("munmap failed, errno = %d\n", + errno); + if(pte_present(*pte)) + map(addr, pte_address(*pte), + PAGE_SIZE, 1, 1, 1); + } + else if(pte_newprot(*pte)){ + updated = 1; + protect(addr, PAGE_SIZE, 1, 1, 1, 1); + } + addr += PAGE_SIZE; + + } + else { + if(pmd_newpage(*pmd)){ + updated = 1; + if(munmap((void *) addr, PMD_SIZE) < 0) + panic("munmap failed, errno = %d\n", + errno); + } + addr += PMD_SIZE; + } + } + if(updated && update_seq) atomic_inc(&vmchange_seq); +} + +static void protect_vm_page(unsigned long addr, int w, int must_succeed) +{ + int err; + + err = protect(addr, PAGE_SIZE, 1, w, 1, must_succeed); + if(err == 0) return; + else if(err == -EFAULT){ + flush_kernel_vm_range(addr, addr + PAGE_SIZE, 1); + protect_vm_page(addr, w, 1); + } + else panic("protect_vm_page : protect failed, errno = %d\n", err); +} + +void mprotect_kernel_vm(int w) +{ + struct mm_struct *mm; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + unsigned long addr; + + mm = &init_mm; + for(addr = start_vm; addr < end_vm;){ + pgd = pgd_offset(mm, addr); + pmd = pmd_offset(pgd, addr); + if(pmd_present(*pmd)){ + pte = pte_offset(pmd, addr); + if(pte_present(*pte)) protect_vm_page(addr, w, 0); + addr += PAGE_SIZE; + } + else addr += PMD_SIZE; + } +} + +void flush_tlb_kernel_vm(void) +{ + flush_kernel_vm_range(start_vm, end_vm, 1); +} + +void flush_tlb_range(struct mm_struct *mm, unsigned long start, + unsigned long end) +{ + if(mm != current->mm) return; + + /* Assumes that the range start ... end is entirely within + * either process memory or kernel vm + */ + if((start >= start_vm) && (start < end_vm)) + flush_kernel_vm_range(start, end, 1); + else fix_range(mm, start, end, 0); +} + +void flush_tlb_mm(struct mm_struct *mm) +{ + unsigned long seq; + + if(mm != current->mm) return; + + fix_range(mm, 0, STACK_TOP, 0); + + seq = atomic_read(&vmchange_seq); + if(current->thread.vm_seq == seq) return; + current->thread.vm_seq = seq; + flush_kernel_vm_range(start_vm, end_vm, 0); +} + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) +{ + address &= PAGE_MASK; + flush_tlb_range(vma->vm_mm, address, address + PAGE_SIZE); +} + +void flush_tlb_all(void) +{ + flush_tlb_mm(current->mm); +} + +void force_flush_all(void) +{ + fix_range(current->mm, 0, STACK_TOP, 1); + flush_kernel_vm_range(start_vm, end_vm, 0); +} + +pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address) +{ + return(pgd_offset(mm, address)); +} + +pmd_t *pmd_offset_proc(pgd_t *pgd, unsigned long address) +{ + return(pmd_offset(pgd, address)); +} + +pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address) +{ + return(pte_offset(pmd, address)); +} + +pte_t *addr_pte(struct task_struct *task, unsigned long addr) +{ + return(pte_offset(pmd_offset(pgd_offset(task->mm, addr), addr), addr)); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/trap_kern.c linux_umopenmosix/arch/um/kernel/trap_kern.c --- linux-2.4.17/arch/um/kernel/trap_kern.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/trap_kern.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,376 @@ +/* + * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "linux/kernel.h" +#include "linux/sched.h" +#include "linux/mm.h" +#include "linux/spinlock.h" +#include "linux/config.h" +#include "linux/init.h" +#include "asm/semaphore.h" +#include "asm/pgtable.h" +#include "asm/pgalloc.h" +#include "asm/a.out.h" +#include "asm/current.h" +#include "user_util.h" +#include "kern_util.h" +#include "kern.h" +#include "chan_kern.h" +#include "debug.h" +#include "mconsole_kern.h" +#include "2_5compat.h" + +extern int nsyscalls; + +unsigned long segv(unsigned long address, unsigned long ip, int is_write, + int is_user) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + struct siginfo si; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + unsigned long page; + + if((address >= start_vm) && (address < end_vm)){ + flush_tlb_kernel_vm(); + return(0); + } + if(mm == NULL) panic("Segfault with no mm"); + si.si_code = SEGV_MAPERR; + down_read(&mm->mmap_sem); + vma = find_vma(mm, address); + if(!vma) goto bad; + else if(vma->vm_start <= address) goto good_area; + else if(!(vma->vm_flags & VM_GROWSDOWN)) goto bad; + else if(expand_stack(vma, address)) goto bad; + + good_area: + si.si_code = SEGV_ACCERR; + if(is_write && !(vma->vm_flags & VM_WRITE)) goto bad; + page = address & PAGE_MASK; + if(page == (unsigned long) current + PAGE_SIZE) + panic("Kernel stack overflow"); + pgd = pgd_offset(mm, page); + pmd = pmd_offset(pgd, page); + do { + survive: + switch (handle_mm_fault(mm, vma, address, is_write)) { + case 1: + current->min_flt++; + break; + case 2: + current->maj_flt++; + break; + default: + if (current->pid == 1) { + up_read(&mm->mmap_sem); + yield(); + down_read(&mm->mmap_sem); + goto survive; + } + /* Fall through to bad area case */ + case 0: + goto bad; + } + pte = pte_offset(pmd, page); + } while(!pte_present(*pte)); + *pte = pte_mkyoung(*pte); + if(pte_write(*pte)) *pte = pte_mkdirty(*pte); + flush_tlb_page(vma, page); + up_read(&mm->mmap_sem); + return(0); + bad: + if (current->thread.fault_catcher != NULL) { + current->thread.fault_addr = (void *) address; + up_read(&mm->mmap_sem); + do_longjmp(current->thread.fault_catcher); + } + else if(current->thread.fault_addr != NULL){ + panic("fault_addr set but no fault catcher"); + } + if(!is_user) + panic("Kernel mode fault at addr 0x%lx, ip 0x%lx", + address, ip); + si.si_signo = SIGSEGV; + si.si_addr = (void *) address; + current->thread.cr2 = address; + current->thread.err = is_write; + force_sig_info(SIGSEGV, &si, current); + up_read(&mm->mmap_sem); + return(0); +} + +void bad_segv(unsigned long address, unsigned long ip, int is_write) +{ + struct siginfo si; + + printk(KERN_ERR "Unfixable SEGV in '%s' (pid %d) at 0x%lx " + "(ip 0x%lx)\n", current->comm, current->pid, address, ip); + si.si_signo = SIGSEGV; + si.si_code = SEGV_ACCERR; + si.si_addr = (void *) address; + current->thread.cr2 = address; + current->thread.err = is_write; + force_sig_info(SIGSEGV, &si, current); +} + +void relay_signal(int sig, void *sc, int usermode) +{ + force_sig(sig, current); +} + +void trap_init(void) +{ +} + +spinlock_t trap_lock = SPIN_LOCK_UNLOCKED; + +void lock_trap(void) +{ + spin_lock(&trap_lock); +} + +void unlock_trap(void) +{ + spin_unlock(&trap_lock); +} + +extern int debugger_pid; +extern int debugger_fd; + +#ifdef CONFIG_PT_PROXY + +int debugger_signal(int status, pid_t pid) +{ + return(debugger_proxy(status, pid)); +} + +void child_signal(pid_t pid, int status) +{ + child_proxy(pid, status); +} + +static void gdb_announce(char *dev_name, int dev) +{ + printf("gdb assigned device '%s'\n", dev_name); +} + +static struct chan_opts opts = { + announce : gdb_announce, + xterm_title : "UML kernel debugger", + raw : 0 +}; + +static void *xterm_data; +static int xterm_fd; + +extern void *xterm_init(char *, int, struct chan_opts *); +extern int xterm_open(int, int, void *); +extern void xterm_close(int, void *); + +int open_gdb_chan(void) +{ + xterm_data = xterm_init("", 0, &opts); + xterm_fd = xterm_open(1, 1, xterm_data); + return(xterm_fd); +} + +static void exit_debugger_cb(void *unused) +{ + if(debugger_pid != -1){ + if(gdb_pid != -1){ + fake_child_exit(); + gdb_pid = -1; + } + else kill_child_dead(debugger_pid); + debugger_pid = -1; + } + if(xterm_data != NULL) xterm_close(xterm_fd, xterm_data); +} + +static void exit_debugger(void) +{ + tracing_cb(exit_debugger_cb, NULL); +} + +__uml_exitcall(exit_debugger); + +struct gdb_data { + char *str; + int err; +}; + +static void config_gdb_cb(void *arg) +{ + struct gdb_data *data = arg; + struct task_struct *task; + int pid; + + data->err = -1; + if(debugger_pid != -1) exit_debugger_cb(NULL); + if(!strncmp(data->str, "pid,", strlen("pid,"))){ + data->str += strlen("pid,"); + pid = simple_strtoul(data->str, NULL, 0); + task = cpu_tasks[0].task; + debugger_pid = attach_debugger(task->thread.extern_pid, + pid, 0); + if(debugger_pid != -1){ + data->err = 0; + gdb_pid = pid; + } + return; + } + data->err = 0; + debugger_pid = start_debugger(linux_prog, 0, 0, &debugger_fd); + init_proxy(debugger_pid, 0, 0); +} + +int gdb_config(char *str) +{ + struct gdb_data data; + + if(*str++ != '=') return(-1); + data.str = str; + tracing_cb(config_gdb_cb, &data); + return(data.err); +} + +void remove_gdb_cb(void *unused) +{ + exit_debugger_cb(NULL); +} + +int gdb_remove(char *unused) +{ + tracing_cb(remove_gdb_cb, NULL); + return(0); +} + +#ifdef CONFIG_MCONSOLE + +static struct mc_device gdb_mc = { + name: "gdb", + config: gdb_config, + remove: gdb_remove, +}; + +int gdb_mc_init(void) +{ + mconsole_register_dev(&gdb_mc); + return(0); +} + +__initcall(gdb_mc_init); + +#endif + +void signal_usr1(int sig) +{ + if(debugger_pid != -1){ + printk(KERN_ERR "The debugger is already running\n"); + return; + } + debugger_pid = start_debugger(linux_prog, 0, 0, &debugger_fd); + init_proxy(debugger_pid, 0, 0); +} + +int init_ptrace_proxy(int idle_pid, int startup, int stop) +{ + int pid, status; + + pid = start_debugger(linux_prog, startup, stop, &debugger_fd); + status = wait_for_stop(idle_pid, SIGSTOP, PTRACE_CONT); + if(pid < 0){ + cont(idle_pid); + return(-1); + } + init_proxy(pid, 1, status); + return(pid); +} + +int attach_debugger(int idle_pid, int pid, int stop) +{ + int status = 0; + + if(attach(pid) < 0){ + printf("Failed to attach pid %d, errno = %d\n", pid, errno); + return(-1); + } + if(stop) status = wait_for_stop(idle_pid, SIGSTOP, PTRACE_CONT); + init_proxy(pid, 1, status); + return(pid); +} + +#ifdef notdef /* Put this back in when it does something useful */ +static int __init uml_gdb_init_setup(char *line, int *add) +{ + gdb_init = uml_strdup(line); + return 0; +} + +__uml_setup("gdb=", uml_gdb_init_setup, +"gdb=\n\n" +); +#endif + +static int __init uml_gdb_pid_setup(char *line, int *add) +{ + gdb_pid = simple_strtoul(line, NULL, 0); + return 0; +} + +__uml_setup("gdb-pid=", uml_gdb_pid_setup, +"gdb-pid=\n" +" gdb-pid is used to attach an external debugger to UML. This may be\n" +" an already-running gdb or a debugger-like process like strace.\n\n" +); + +#else + +int debugger_signal(int status, pid_t pid){ return(0); } +void child_signal(pid_t pid, int status){ } +int init_ptrace_proxy(int idle_pid, int startup, int stop) +{ + printk(KERN_ERR "debug requested when CONFIG_PT_PROXY is off\n"); + wait_for_stop(idle_pid, SIGSTOP, PTRACE_CONT); + cont(idle_pid); + return(-1); +} + +void signal_usr1(int sig) +{ + printk(KERN_ERR "debug requested when CONFIG_PT_PROXY is off\n"); +} + +int attach_debugger(int idle_pid, int pid, int stop) +{ + printk(KERN_ERR "attach_debugger called when CONFIG_PT_PROXY " + "is off\n"); + return(-1); +} + +int config_gdb(char *str) +{ + return(-1); +} + +int remove_gdb(void) +{ + return(-1); +} + +#endif +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/trap_user.c linux_umopenmosix/arch/um/kernel/trap_user.c --- linux-2.4.17/arch/um/kernel/trap_user.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/trap_user.c Fri Jun 28 00:30:19 2002 @@ -0,0 +1,537 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "user_util.h" +#include "kern_util.h" +#include "signal_user.h" +#include "mem_user.h" +#include "user.h" +#include "process.h" +#include "sigcontext.h" +#include "sysdep/sigcontext.h" +#include "init.h" +#include "chan_user.h" +#include "irq_user.h" +#include "frame.h" +#include "syscall_user.h" + +static void signal_segv(int sig) +{ + write(2, "Seg fault in signals\n", strlen("Seg fault in signals\n")); + for(;;) ; +} + +int detach(int pid, int sig) +{ + return(ptrace(PTRACE_DETACH, pid, 0, sig)); +} + +int attach(int pid) +{ + return(ptrace(PTRACE_ATTACH, pid, 0, 0)); +} + +int cont(int pid) +{ + return(ptrace(PTRACE_CONT, pid, 0, 0)); +} + +void kill_child_dead(int pid) +{ + kill(pid, SIGKILL); + kill(pid, SIGCONT); + while(waitpid(pid, NULL, 0) > 0) kill(pid, SIGCONT); +} + +int debug = 0; +int debug_stop = 1; + +int honeypot = 0; + +static int signal_tramp(void *arg) +{ + int (*proc)(void *); + + if(honeypot && munmap((void *) (host_task_size - 0x10000000), + 0x10000000)) + panic("Unmapping stack failed"); + if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0) + panic("ptrace PTRACE_TRACEME failed"); + kill(getpid(), SIGSTOP); + signal(SIGUSR1, SIG_IGN); + signal(SIGSEGV, (__sighandler_t) sig_handler); + set_timers(0); + set_cmdline("(idle thread)"); + set_init_pid(getpid()); + proc = arg; + return((*proc)(NULL)); +} + +static void last_ditch_exit(int sig) +{ + signal(SIGINT, SIG_DFL); + signal(SIGTERM, SIG_DFL); + signal(SIGHUP, SIG_DFL); + uml_cleanup(); + exit(1); +} + +static void sleeping_process_signal(int pid, int sig) +{ + switch(sig){ + /* These two result from UML being ^Z-ed and bg-ed. PTRACE_CONT is + * right because the process must be in the kernel already. + */ + case SIGCONT: + case SIGTSTP: + if(ptrace(PTRACE_CONT, pid, 0, sig) < 0) + tracer_panic("sleeping_process_signal : Failed to " + "continue pid %d, errno = %d\n", pid, + sig); + break; + + /* If an xterm changes size and the process handling SIGWINCH is + * asleep, its signal needs to be annulled and the SIGWINCH needs + * to be passed to the current running process. + * XXX if the current process is switching out, then the SIGWINCH + * might hit the wrong process. + */ + case SIGWINCH: + if(ptrace(PTRACE_CONT, pid, 0, 0) < 0) + tracer_panic("sleeping_process_signal : Failed to " + "annull SIGWINCH pid %d, errno = %d\n", + pid, sig); + kill(external_pid(cpu_tasks[0].task), SIGWINCH); + break; + + /* This happens when the debugger (e.g. strace) is doing system call + * tracing on the kernel. During a context switch, the current task + * will be set to the incoming process and the outgoing process will + * hop into write and then read. Since it's not the current process + * any more, the trace of those will land here. So, we need to just + * PTRACE_SYSCALL it. + */ + case SIGTRAP: + if(ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) + tracer_panic("sleeping_process_signal : Failed to " + "PTRACE_SYSCALL pid %d, errno = %d\n", + pid, sig); + break; + default: + tracer_panic("sleeping process %d got unexpected " + "signal : %d\n", sig); + break; + } +} + + +#ifdef CONFIG_SMP +#error need to make these arrays +#endif + +int debugger_pid = -1; +int debugger_fd = -1; +int gdb_pid = -1; + +struct { + unsigned long address; + int is_write; + int pid; + unsigned long sp; + int is_user; +} segfault_record[1024]; + +int segfault_index = 0; + +struct { + int pid; + int signal; + unsigned long addr; + struct timeval time; +} signal_record[1024]; + +int signal_index = 0; +int nsignals = 0; +int debug_trace = 0; +extern int io_nsignals, io_count, intr_count; + +extern void signal_usr1(int sig); + +int tracing_pid = -1; + +int signals(int (*init_proc)(void *), void *sp) +{ + void *task = NULL; + unsigned long eip = 0; + int status, pid = 0, sig, cont_type, tracing = 0, op = 0; + int last_index, proc_id, n, strace = 0; + + capture_signal_stack(); + signal(SIGPIPE, SIG_IGN); + tracing_pid = getpid(); + printk("tracing thread pid = %d\n", tracing_pid); + + pid = clone(signal_tramp, sp, CLONE_FILES | SIGCHLD, init_proc); + n = waitpid(pid, &status, WUNTRACED); + if(n < 0){ + printf("waitpid on idle thread failed, errno = %d\n", errno); + exit(1); + } + if((ptrace_getregs(pid, process_state(get_init_task())) < 0) || + (ptrace(PTRACE_CONT, pid, 0, 0) < 0)){ + printf("Failed to get idle thread registers or continue it, " + "errno = %d\n", errno); + exit(1); + } + if(ptrace(PTRACE_CONT, pid, 0, 0) < 0) + + signal(SIGSEGV, signal_segv); + signal(SIGUSR1, signal_usr1); + set_handler(SIGINT, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1); + set_handler(SIGTERM, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1); + set_handler(SIGHUP, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1); + if(debug){ + if(gdb_pid != -1) + debugger_pid = attach_debugger(pid, gdb_pid, 1); + else debugger_pid = init_ptrace_proxy(pid, 1, debug_stop); + } + set_cmdline("(tracing thread)"); + if(debug_trace){ + printk("Tracing thread pausing to be attached\n"); + stop(); + } + while(1){ + if((pid = waitpid(-1, &status, WUNTRACED)) <= 0){ + if(errno != ECHILD){ + printk("wait failed - errno = %d\n", errno); + } + continue; + } + if(pid == debugger_pid){ + int cont = 0; + + if(WIFEXITED(status) || WIFSIGNALED(status)) + debugger_pid = -1; + /* XXX Figure out how to deal with gdb and SMP */ + else cont = debugger_signal(status, cpu_tasks[0].pid); + if(cont == PTRACE_SYSCALL) strace = 1; + continue; + } + nsignals++; + if(WIFEXITED(status)) ; +#ifdef notdef + { + printk("Child %d exited with status %d\n", pid, + WEXITSTATUS(status)); + } +#endif + else if(WIFSIGNALED(status)){ + sig = WTERMSIG(status); + if(sig != 9){ + printk("Child %d exited with signal %d\n", pid, + sig); + } + } + else if(WIFSTOPPED(status)){ + sig = WSTOPSIG(status); + if(signal_index == 1024){ + signal_index = 0; + last_index = 1023; + } + else last_index = signal_index - 1; + if(((sig == SIGPROF) || (sig == SIGVTALRM) || + (sig == SIGALRM)) && + (signal_record[last_index].signal == sig) && + (signal_record[last_index].pid == pid)) + signal_index = last_index; + signal_record[signal_index].pid = pid; + gettimeofday(&signal_record[signal_index].time, NULL); + eip = ptrace(PTRACE_PEEKUSER, pid, UM_IP_OFFSET, 0); + signal_record[signal_index].addr = eip; + signal_record[signal_index++].signal = sig; + + proc_id = pid_to_processor_id(pid); + if(proc_id == -1){ + sleeping_process_signal(pid, sig); + continue; + } + + task = cpu_tasks[proc_id].task; + tracing = is_tracing(task); + + switch(sig){ + case SIGUSR1: + sig = 0; + op = do_proc_op(task, proc_id); + switch(op){ + case OP_TRACE_ON: + tracing = exit_kernel(pid, task); + break; + case OP_REBOOT: + case OP_HALT: + kmalloc_ok = 0; + ptrace(PTRACE_KILL, pid, 0, 0); + return(op == OP_REBOOT); + case OP_NONE: + printk("Detaching pid %d\n", pid); + detach(pid, SIGSTOP); + continue; + default: + break; + } + /* OP_EXEC switches host processes on us, + * we want to continue the new one. + */ + pid = cpu_tasks[proc_id].pid; + break; + case SIGTRAP: + if(!tracing && (debugger_pid != -1)){ + child_signal(pid, status); + continue; + } + tracing = 0; + if(do_syscall(task, pid)) sig = SIGUSR2; + break; + case SIGPROF: + if(tracing) sig = 0; + break; + case SIGCHLD: + case SIGHUP: + sig = 0; + break; + case SIGSEGV: + case SIGIO: + case SIGALRM: + case SIGVTALRM: + case SIGFPE: + case SIGBUS: + case SIGILL: + case SIGWINCH: + default: + tracing = 0; + break; + } + set_tracing(task, tracing); + + if(!tracing && (debugger_pid != -1) && (sig != 0) && + (sig != SIGALRM) && (sig != SIGVTALRM) && + (sig != SIGSEGV) && (sig != SIGTRAP) && + (sig != SIGUSR2)){ + child_signal(pid, status); + continue; + } + + if(tracing){ + if(singlestepping(task)) + cont_type = PTRACE_SINGLESTEP; + else cont_type = PTRACE_SYSCALL; + } + else cont_type = PTRACE_CONT; + + if((cont_type == PTRACE_CONT) && + (debugger_pid != -1) && strace) + cont_type = PTRACE_SYSCALL; + + if(ptrace(cont_type, pid, 0, sig) != 0){ + tracer_panic("ptrace failed to continue " + "process - errno = %d\n", + errno); + } + } + } + return(0); +} + +static int __init uml_debugtrace_setup(char *line, int *add) +{ + debug_trace = 1; + return 0; +} +__uml_setup("debugtrace=", uml_debugtrace_setup, +"debugtrace\n" +" Causes the tracing thread to pause until it is attached by a\n" +" debugger and continued. This is mostly for debugging crashes\n" +" early during boot, and should be pretty much obsoleted by\n" +" the debug switch.\n\n" +); + +static int __init uml_honeypot_setup(char *line, int *add) +{ + jail_setup("", add); + honeypot = 1; + return 0; +} +__uml_setup("honeypot", uml_honeypot_setup, +"honeypot\n" +" This makes UML put process stacks in the same location as they are\n" +" on the host, allowing expoits such as stack smashes to work against\n" +" UML. This implies 'jail'.\n\n" +); + +int nsegfaults = 0; + +void segv_handler(int sig, void *sc, int usermode) +{ + struct sigcontext_struct *context = sc; + int index; + + if(usermode && !SEGV_IS_FIXABLE(context)){ + bad_segv(SC_FAULT_ADDR(context), SC_IP(context), + SC_FAULT_WRITE(context)); + return; + } + lock_trap(); + index = segfault_index++; + if(segfault_index == 1024) segfault_index = 0; + unlock_trap(); + nsegfaults++; + segfault_record[index].address = SC_FAULT_ADDR(context); + segfault_record[index].pid = getpid(); + segfault_record[index].is_write = SC_FAULT_WRITE(context); + segfault_record[index].sp = SC_SP(context); + segfault_record[index].is_user = usermode; + segv(SC_FAULT_ADDR(context), SC_IP(context), SC_FAULT_WRITE(context), + usermode); +} + +static void winch_handler(int sig, void *sc, int usermode) +{ + run_winch_handlers(); +} + +extern int timer_ready, timer_on; + +static void (*handlers[])(int, void *, int) = { + [ SIGTRAP ] relay_signal, + [ SIGFPE ] relay_signal, + [ SIGILL ] relay_signal, + [ SIGBUS ] relay_signal, + [ SIGSEGV] segv_handler, + [ SIGIO ] sigio_handler, + [ SIGVTALRM ] timer_handler, + [ SIGALRM ] timer_handler, + [ SIGWINCH ] winch_handler, +}; + +void irq_handler_common(int sig, struct sigcontext *sc) +{ + int user, save_errno = errno, save_timer = timer_on; + + user = user_context(SC_SP(sc)); + unprotect_kernel_mem(0); + timer_on = 0; + if(user){ + set_thread_sc(sc); + fill_in_regs(process_state(NULL), sc); + timer_ready = 1; + } + change_sig(SIGUSR1, 1); + (*handlers[sig])(sig, sc, user); + if(user) interrupt_end(); + block_signals(); + change_sig(SIGUSR1, 0); + if(user) set_user_mode(NULL, 1, 0); + errno = save_errno; + if(user) timer_ready = 0; + timer_on = save_timer; + if(user) protect_kernel_mem(0); +} + +void irq_handler(int sig, struct sigcontext sc) +{ + irq_handler_common(sig, &sc); +} + +void sig_handler(int sig, struct sigcontext sc) +{ + int user, save_errno = errno, save_timer = timer_on; + + user = user_context(SC_SP(&sc)); + unprotect_kernel_mem(0); + timer_on = 0; + if(user){ + set_thread_sc(&sc); + fill_in_regs(process_state(NULL), &sc); + timer_ready = 1; + } + change_sig(SIGUSR1, 1); + unblock_signals(); + (*handlers[sig])(sig, &sc, user); + if(user) interrupt_end(); + block_signals(); + change_sig(SIGUSR1, 0); + if(user) set_user_mode(NULL, 1, 0); + errno = save_errno; + if(user) timer_ready = 0; + timer_on = save_timer; + if(user) protect_kernel_mem(0); +} + +extern int timer_irq_inited, missed_ticks; + +void alarm_handler(int sig, struct sigcontext sc) +{ + int user; + + if(!timer_irq_inited) return; + missed_ticks++; + user = user_context(SC_SP(&sc)); + if(!user && !timer_ready) return; + if(!timer_on) return; + irq_handler_common(sig, &sc); + timer_ready = 1; +} + +void do_longjmp(void *p) +{ + jmp_buf *jbuf = (jmp_buf *) p; + + longjmp(*jbuf, 1); +} + +static int __init uml_debug_setup(char *line, int *add) +{ + debug = 1; + if(!strcmp(line, "=go")){ + debug_stop = 0; + *add = 0; + } + return 0; +} + +__uml_setup("debug", uml_debug_setup, +"debug\n" +" Starts up the kernel under the control of gdb. See the \n" +" kernel debugging tutorial and the debugging session pages\n" +" at http://user-mode-linux.sourceforge.net/ for more information.\n\n" +); + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/uaccess_user.c linux_umopenmosix/arch/um/kernel/uaccess_user.c --- linux-2.4.17/arch/um/kernel/uaccess_user.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/uaccess_user.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,125 @@ +/* + * Copyright (C) 2001 Chris Emerson (cemerson@chiark.greenend.org.uk) + * Licensed under the GPL + */ + +#include +#include +#include "user_util.h" + +static unsigned long __do_user_copy(void *to, const void *from, int n, + void **fault_addr, void **fault_catcher, + void (*op)(void *to, const void *from, + int n), int *faulted_out) +{ + unsigned long *faddrp = (unsigned long *) fault_addr, ret; + + jmp_buf jbuf; + *fault_catcher = &jbuf; + if(setjmp(jbuf) == 0){ + (*op)(to, from, n); + ret = 0; + *faulted_out = 0; + } + else { + ret = *faddrp; + *faulted_out = 1; + } + *fault_addr = NULL; + *fault_catcher = NULL; + return ret; +} + +static void __do_copy(void *to, const void *from, int n) +{ + memcpy(to, from, n); +} + +int __do_copy_from_user(void *to, const void *from, int n, + void **fault_addr, void **fault_catcher) +{ + unsigned long fault; + int faulted; + + fault = __do_user_copy(to, from, n, fault_addr, fault_catcher, + __do_copy, &faulted); + if(!faulted) return(0); + else return(n - (fault - (unsigned long) from)); +} + + +int __do_copy_to_user(void *to, const void *from, int n, + void **fault_addr, void **fault_catcher) +{ + unsigned long fault; + int faulted; + + fault = __do_user_copy(to, from, n, fault_addr, fault_catcher, + __do_copy, &faulted); + if(!faulted) return(0); + else return(n - (fault - (unsigned long) to)); +} + +static void __do_strncpy(void *dst, const void *src, int count) +{ + strncpy(dst, src, count); +} + +int __do_strncpy_from_user(char *dst, const char *src, unsigned long count, + void **fault_addr, void **fault_catcher) +{ + unsigned long fault; + int faulted; + + fault = __do_user_copy(dst, src, count, fault_addr, fault_catcher, + __do_strncpy, &faulted); + if(!faulted) return(strlen(dst)); + else return(-1); +} + +static void __do_clear(void *to, const void *from, int n) +{ + memset(to, 0, n); +} + +int __do_clear_user(void *mem, unsigned long len, + void **fault_addr, void **fault_catcher) +{ + unsigned long fault; + int faulted; + + fault = __do_user_copy(mem, NULL, len, fault_addr, fault_catcher, + __do_clear, &faulted); + if(!faulted) return(0); + else return(len - (fault - (unsigned long) mem)); +} + +int __do_strnlen_user(const char *str, unsigned long n, + void **fault_addr, void **fault_catcher) +{ + int ret; + unsigned long *faddrp = (unsigned long *)fault_addr; + jmp_buf jbuf; + + *fault_catcher = &jbuf; + if(setjmp(jbuf) == 0){ + ret = strlen(str) + 1; + } + else { + ret = *faddrp - (unsigned long) str; + } + *fault_addr = NULL; + *fault_catcher = NULL; + return ret; +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/um_arch.c linux_umopenmosix/arch/um/kernel/um_arch.c --- linux-2.4.17/arch/um/kernel/um_arch.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/um_arch.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,375 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "linux/config.h" +#include "linux/sched.h" +#include "linux/mm.h" +#include "linux/types.h" +#include "linux/tty.h" +#include "linux/init.h" +#include "linux/bootmem.h" +#include "linux/spinlock.h" +#include "linux/utsname.h" +#include +#include "asm/page.h" +#include "asm/pgtable.h" +#include "asm/ptrace.h" +#include "asm/elf.h" +#include "asm/user.h" +#include "asm/delay.h" +#include "ubd_user.h" +#include "asm/current.h" +#include "user_util.h" +#include "kern_util.h" +#include "kern.h" +#include "mprot.h" +#include "mem_user.h" +#include "umid.h" +#include "initrd.h" +#include "init.h" + +#define DEFAULT_COMMAND_LINE "root=/dev/ubd0" + +int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu) +{ + return(0); +} + +unsigned long thread_saved_pc(struct thread_struct *thread) +{ + panic("Someone should implement thread_saved_pc"); + return(0); +} + +/* + * get_cpuinfo - Get information on one CPU for use by procfs. + * + * Prints info on the next CPU into buffer. Beware, doesn't check for + * buffer overflow. Current implementation of procfs assumes that the + * resulting data is <= 1K. + * + * Args: + * buffer -- you guessed it, the data buffer + * cpu_np -- Input: next cpu to get (start at 0). Output: Updated. + * + * Returns number of bytes written to buffer. + */ + +static int show_cpuinfo(struct seq_file *m, void *v) +{ + int index; + + index = (struct cpuinfo_um *)v - cpu_data; +#ifdef CONFIG_SMP + if (!(cpu_online_map & (1 << index))) + return 0; +#endif + + seq_printf(m, "processor\t: user-mode\n"); + seq_printf(m, "bogomips\t: %lu.%02lu\n", + loops_per_jiffy/(500000/HZ), + (loops_per_jiffy/(5000/HZ)) % 100); + seq_printf(m, "host\t\t: %s\n", host_info); + + return(0); +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + return *pos < NR_CPUS ? cpu_data + *pos : NULL; +} + +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} + +static void c_stop(struct seq_file *m, void *v) +{ +} + +struct seq_operations cpuinfo_op = { + start: c_start, + next: c_next, + stop: c_stop, + show: show_cpuinfo, +}; + +pte_t * __bad_pagetable(void) +{ + panic("Someone should implement __bad_pagetable"); + return(NULL); +} + +extern void start_kernel(void); + +extern int debug; +extern int debug_stop; + +static int start_kernel_proc(void *unused) +{ + int pid; + + block_signals(); + pid = getpid(); + + cpu_tasks[0].pid = pid; + cpu_tasks[0].task = current; +#ifdef CONFIG_SMP + cpu_online_map = 1; +#endif + if(debug) stop_pid(pid); + start_kernel(); + return(0); +} + +extern unsigned long high_physmem; + +#ifdef CONFIG_HOST_2G_2G +#define START 0x60000000 +#else +#define START 0xa0000000 +#endif + +unsigned long host_task_size; +unsigned long task_size; + +void set_task_sizes(int arg) +{ + /* Round up to the nearest 4M */ + host_task_size = ROUND_4M((unsigned long) &arg); + task_size = START; +} + +unsigned long uml_physmem; + +unsigned long start_vm; +unsigned long end_vm; + +int ncpus = 1; + +#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) +#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) +#define PFN_PHYS(x) ((x) << PAGE_SHIFT) + +static char *argv1_begin = NULL; +static char *argv1_end = NULL; + +static int have_root __initdata = 0; +long physmem_size = 32 * 1024 * 1024; + +void set_cmdline(char *cmd) +{ + if(honeypot) return; + strcpy(argv1_begin, "["); + strncat(argv1_begin, cmd, argv1_end - argv1_begin - strlen("[]")); + strcat(argv1_begin, "]"); + memset(argv1_begin + strlen(argv1_begin), '\0', + argv1_end - argv1_begin - strlen(argv1_begin)); +} + +static char *usage_string = +"User Mode Linux v%s\n" +" available at http://user-mode-linux.sourceforge.net/\n\n"; + +static int __init uml_version_setup(char *line, int *add) +{ + printf("%s\n", system_utsname.release); + exit(0); +} + +__uml_setup("--version", uml_version_setup, +"--version\n" +" Prints the version number of the kernel\n\n" +); + +static int __init uml_root_setup(char *line, int *add) +{ + have_root = 1; + return 0; +} + +__uml_setup("root=", uml_root_setup, +"root=\n" +" This is actually used by the generic kernel in exactly the same\n" +" way as in any other kernel. If you configure a number of block\n" +" devices and want to boot off something other than ubd0, you \n" +" would use something like:\n" +" root=/dev/ubd5\n\n" +); + +#ifdef CONFIG_SMP +static int __init uml_ncpus_setup(char *line, int *add) +{ + if (!sscanf(line, "%d", &ncpus)) { + printk("Couldn't parse [%s]\n", line); + return -1; + } + + return 0; +} + +__uml_setup("ncpus=", uml_ncpus_setup, +"ncpus=<# of desired CPUs>\n" +" This tells an SMP kernel how many virtual processors to start.\n" +" Currently, this has no effect because SMP isn't enabled.\n\n" +); +#endif + +static int __init Usage(char *line, int *add) +{ + const char **p; + + printf(usage_string, system_utsname.release); + p = &__uml_help_start; + while (p < &__uml_help_end) { + printf("%s", *p); + p++; + } + exit(0); +} + +__uml_setup("--help", Usage, +"--help\n" +" Prints this message\n\n" +); + +static int __init uml_checksetup(char *line, int *add) +{ + struct uml_param *p; + + p = &__uml_setup_start; + while(p < &__uml_setup_end) { + int n; + + n = strlen(p->str); + if(!strncmp(line, p->str, n)){ + if (p->setup_func(line + n, add)) return 1; + } + p++; + } + return 0; +} + +static void __init uml_postsetup(void) +{ + initcall_t *p; + + p = &__uml_postsetup_start; + while(p < &__uml_postsetup_end){ + (*p)(); + p++; + } + return; +} + +extern int debug_trace; +extern int jail; +void *brk_start; + +int linux_main(int argc, char **argv) +{ + unsigned long start_pfn, end_pfn, bootmap_size; + unsigned long virtmem_size; + unsigned int i, add; + void *sp; + + for (i = 1; i < argc; i++){ + if((i == 1) && (argv[i][0] == ' ')) continue; + add = 1; + uml_checksetup(argv[i], &add); + if(add) add_arg(saved_command_line, argv[i]); + } + if(have_root == 0) add_arg(saved_command_line, DEFAULT_COMMAND_LINE); + + if(!jail) + remap_data(ROUND_DOWN(&_stext), ROUND_UP(&_etext), 1); + remap_data(ROUND_DOWN(&_sdata), ROUND_UP(&_edata), 1); + brk_start = sbrk(0); + remap_data(ROUND_DOWN(&__bss_start), ROUND_UP(brk_start), 1); + + /* Start physical memory at least 4M after the current brk */ + uml_physmem = ROUND_4M(brk_start) + (1 << 22); + + setup_machinename(system_utsname.machine); + + argv1_begin = argv[1]; + argv1_end = &argv[1][strlen(argv[1])]; + + /* Kernel vm starts after physical memory and is either the size + * of physical memory or the remaining space left in the kernel + * area of the address space, whichever is smaller. + */ + start_vm = uml_physmem + physmem_size + VMALLOC_OFFSET; + if(start_vm >= get_kmem_end()) + panic("Physical memory too large to allow any kernel " + "virtual memory"); + + virtmem_size = physmem_size; + if(physmem_size > get_kmem_end() - start_vm) + virtmem_size = get_kmem_end() - start_vm; + end_vm = start_vm + virtmem_size; + + if(virtmem_size < physmem_size) + printk(KERN_INFO "Kernel virtual memory size shrunk to %ld " + "bytes\n", virtmem_size); + + setup_range(-1, NULL, uml_physmem, physmem_size, + physmem_size + VMALLOC_OFFSET + virtmem_size); + setup_memory(); + high_physmem = uml_physmem + physmem_size; + + start_pfn = PFN_UP(__pa(uml_physmem)); + end_pfn = PFN_DOWN(__pa(high_physmem)); + bootmap_size = init_bootmem(start_pfn, end_pfn - start_pfn); + free_bootmem(__pa(uml_physmem) + bootmap_size, + high_physmem - uml_physmem - bootmap_size); + uml_postsetup(); + + init_task.thread.kernel_stack = (unsigned long) &init_task + + 2 * PAGE_SIZE; + + task_protections((unsigned long) &init_task); + sp = (void *) init_task.thread.kernel_stack + 2 * PAGE_SIZE - + sizeof(unsigned long); + return(signals(start_kernel_proc, sp)); +} + +void setup_arch(char **cmdline_p) +{ + paging_init(); + strcpy(command_line, saved_command_line); + *cmdline_p = command_line; + setup_hostinfo(); +} + +void check_bugs(void) +{ + check_ptrace(); +} + +spinlock_t pid_lock = SPIN_LOCK_UNLOCKED; + +void lock_pid(void) +{ + spin_lock(&pid_lock); +} + +void unlock_pid(void) +{ + spin_unlock(&pid_lock); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/umid.c linux_umopenmosix/arch/um/kernel/umid.c --- linux-2.4.17/arch/um/kernel/umid.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/umid.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,291 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "user.h" +#include "umid.h" +#include "init.h" + +#define UMID_LEN 64 +#define UML_DIR "~/.uml/" + +static char umid[UMID_LEN] = { 0 }; +static char *uml_dir = UML_DIR; + +static int umid_inited = 0; + +static int make_umid(void); + +static int __init set_umid(char *name, int *add) +{ + if(umid_inited){ + printk("Unique machine name can't be set twice\n"); + return(-1); + } + + if(strlen(name) > UMID_LEN - 1) + printk("Unique machine name is being truncated to %s " + "characters\n", UMID_LEN); + strncpy(umid, name, UMID_LEN - 1); + umid[UMID_LEN - 1] = '\0'; + + umid_inited = 1; + return 0; +} + +__uml_setup("umid=", set_umid, +"umid=\n" +" This is used to assign a unique identity to this UML machine\n" +" This is used for naming the pid file and management console socket\n\n" +); + +int __init umid_file_name(char *name, char *buf, int len) +{ + int n; + + if(!umid_inited && make_umid()) return(-1); + + n = strlen(uml_dir) + strlen(umid) + strlen(name) + 1; + if(n > len){ + printk("umid_file_name : buffer too short\n"); + return(-1); + } + + sprintf(buf, "%s%s/%s", uml_dir, umid, name); + return(0); +} + +extern int tracing_pid; + +static int __init create_pid_file(void) +{ + char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")]; + char pid[sizeof("nnnnn\0")]; + int fd; + + if(umid_file_name("pid", file, sizeof(file))) return 0; + + if((fd = open(file, O_RDWR | O_CREAT | O_EXCL, 0644)) < 0){ + printk("Open of machine pid file \"%s\" failed - " + "errno = %d\n", file, errno); + return 0; + } + + sprintf(pid, "%d\n", (tracing_pid == -1) ? getpid() : tracing_pid); + if(write(fd, pid, strlen(pid)) != strlen(pid)) + printk("Write of pid file failed - errno = %d\n", errno); + close(fd); + return 0; +} + +static int actually_do_remove(char *dir) +{ + DIR *directory; + struct dirent *ent; + int len; + char file[256]; + + if((directory = opendir(dir)) == NULL){ + printk("actually_do_remove : couldn't open directory '%s', " + "errno = %d\n", dir, errno); + return(1); + } + while((ent = readdir(directory)) != NULL){ + if(!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) + continue; + len = strlen(dir) + sizeof("/") + strlen(ent->d_name) + 1; + if(len > sizeof(file)){ + printk("Not deleting '%s' from '%s' - name too long\n", + ent->d_name, dir); + continue; + } + sprintf(file, "%s/%s", dir, ent->d_name); + if(unlink(file) < 0){ + printk("actually_do_remove : couldn't remove '%s' " + "from '%s', errno = %d\n", ent->d_name, dir, + errno); + return(1); + } + } + if(rmdir(dir) < 0){ + printk("actually_do_remove : couldn't rmdir '%s', " + "errno = %d\n", dir, errno); + return(1); + } + return(0); +} + +void remove_umid_dir(void) +{ + char dir[strlen(uml_dir) + UMID_LEN + 1]; + if(!umid_inited) return; + + sprintf(dir, "%s%s", uml_dir, umid); + actually_do_remove(dir); +} + +char *get_umid(void) +{ + return(umid); +} + +int not_dead_yet(char *dir) +{ + char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")]; + char pid[sizeof("nnnnn\0")], *end; + int dead, fd, p; + + sprintf(file, "%s/pid", dir); + dead = 0; + if((fd = open(file, O_RDONLY)) < 0){ + if(errno != ENOENT){ + printk("not_dead_yet : couldn't open pid file '%s', " + "errno = %d\n", file, errno); + return(1); + } + dead = 1; + } + if(fd > 0){ + if(read(fd, pid, sizeof(pid)) < 0){ + printk("not_dead_yet : couldn't read pid file '%s', " + "errno = %d\n", file, errno); + return(1); + } + p = strtoul(pid, &end, 0); + if(end == pid){ + printk("not_dead_yet : couldn't parse pid file '%s', " + "errno = %d\n", file, errno); + dead = 1; + } + if(((kill(p, 0) < 0) && (errno == ESRCH)) || + (p == tracing_pid)) + dead = 1; + } + if(!dead) return(1); + return(actually_do_remove(dir)); + return(0); +} + +static int __init set_uml_dir(char *name, int *add) +{ + if((strlen(name) > 0) && (name[strlen(name) - 1] != '/')){ + uml_dir = malloc(strlen(name) + 1); + if(uml_dir == NULL){ + printk("Failed to malloc uml_dir - error = %d\n", + errno); + uml_dir = name; + return(0); + } + sprintf(uml_dir, "%s/", name); + } + else uml_dir = name; + return 0; +} + +static int __init make_uml_dir(void) +{ + char dir[MAXPATHLEN + 1] = { '\0' }; + int len; + + if(*uml_dir == '~'){ + char *home = getenv("HOME"); + + if(home == NULL){ + printk("make_uml_dir : no value in environment for " + "$HOME\n"); + exit(1); + } + strncpy(dir, home, sizeof(dir)); + uml_dir++; + } + len = strlen(dir); + strncat(dir, uml_dir, sizeof(dir) - len); + len = strlen(dir); + if((len > 0) && (len < sizeof(dir) - 1) && (dir[len - 1] != '/')){ + dir[len] = '/'; + dir[len + 1] = '\0'; + } + + if((uml_dir = malloc(strlen(dir) + 1)) == NULL){ + printf("make_uml_dir : malloc failed, errno = %d\n", errno); + exit(1); + } + strcpy(uml_dir, dir); + + if((mkdir(uml_dir, 0777) < 0) && (errno != EEXIST)){ + printk("Failed to mkdir %s - errno = %i\n", uml_dir, errno); + return(-1); + } + return 0; +} + +static int __init make_umid(void) +{ + int fd, err; + char tmp[strlen(uml_dir) + UMID_LEN + 1]; + + strncpy(tmp, uml_dir, sizeof(tmp) - 1); + tmp[sizeof(tmp) - 1] = '\0'; + + if(*umid == 0){ + strcat(tmp, "XXXXXX"); + fd = mkstemp(tmp); + if(fd < 0){ + printk("set_umid - mkstemp failed, errno = %d\n", + errno); + return(1); + } + + close(fd); + /* There's a nice tiny little race between this unlink and + * the mkdir below. It'd be nice if there were a mkstemp + * for directories. + */ + unlink(tmp); + strcpy(umid, &tmp[strlen(uml_dir)]); + } + + sprintf(tmp, "%s%s", uml_dir, umid); + + if((err = mkdir(tmp, 0777)) < 0){ + if(errno == EEXIST){ + if(not_dead_yet(tmp)){ + printk("umid '%s' is in use\n", umid); + return(-1); + } + err = mkdir(tmp, 0777); + } + } + if(err < 0){ + printk("Failed to create %s - errno = %d\n", umid, errno); + return(-1); + } + + return(0); +} + +__uml_setup("uml_dir=", set_uml_dir, +"uml_dir=\n" +" The location to place the pid and umid files.\n\n" +); + +__uml_postsetup(make_uml_dir); +__uml_postsetup(make_umid); +__uml_postsetup(create_pid_file); + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/unmap.c linux_umopenmosix/arch/um/kernel/unmap.c --- linux-2.4.17/arch/um/kernel/unmap.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/unmap.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include "user.h" + +int switcheroo(int fd, int prot, void *from, void *to, int size) +{ + if(munmap(to, size) < 0){ + return(-1); + } + if(mmap(to, size, prot, MAP_SHARED | MAP_FIXED, fd, 0) != to){ + return(-1); + } + if(munmap(from, size) < 0){ + return(-1); + } + return(0); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/kernel/user_syms.c linux_umopenmosix/arch/um/kernel/user_syms.c --- linux-2.4.17/arch/um/kernel/user_syms.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/user_syms.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,120 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "user_util.h" +#include "mem_user.h" + +/* XXX All the __CONFIG_* stuff is broken because this file can't include + * config.h + */ + +/* Had to steal this from linux/module.h because that file can't be included + * since this includes various user-level headers. + */ + +struct module_symbol +{ + unsigned long value; + const char *name; +}; + +/* Indirect stringification. */ + +#define __MODULE_STRING_1(x) #x +#define __MODULE_STRING(x) __MODULE_STRING_1(x) + +#if !defined(__AUTOCONF_INCLUDED__) + +#define __EXPORT_SYMBOL(sym,str) error config_must_be_included_before_module +#define EXPORT_SYMBOL(var) error config_must_be_included_before_module +#define EXPORT_SYMBOL_NOVERS(var) error config_must_be_included_before_module + +#elif !defined(__CONFIG_MODULES__) + +#define __EXPORT_SYMBOL(sym,str) +#define EXPORT_SYMBOL(var) +#define EXPORT_SYMBOL_NOVERS(var) + +#else + +#define __EXPORT_SYMBOL(sym, str) \ +const char __kstrtab_##sym[] \ +__attribute__((section(".kstrtab"))) = str; \ +const struct module_symbol __ksymtab_##sym \ +__attribute__((section("__ksymtab"))) = \ +{ (unsigned long)&sym, __kstrtab_##sym } + +#if defined(__MODVERSIONS__) || !defined(__CONFIG_MODVERSIONS__) +#define EXPORT_SYMBOL(var) __EXPORT_SYMBOL(var, __MODULE_STRING(var)) +#else +#define EXPORT_SYMBOL(var) __EXPORT_SYMBOL(var, __MODULE_STRING(__VERSIONED_SYMBOL(var))) +#endif + +#define EXPORT_SYMBOL_NOVERS(var) __EXPORT_SYMBOL(var, __MODULE_STRING(var)) + +#endif + +EXPORT_SYMBOL(__errno_location); + +EXPORT_SYMBOL(access); +EXPORT_SYMBOL(open); +EXPORT_SYMBOL(open64); +EXPORT_SYMBOL(close); +EXPORT_SYMBOL(read); +EXPORT_SYMBOL(write); +EXPORT_SYMBOL(__xstat); +EXPORT_SYMBOL(__lxstat); +EXPORT_SYMBOL(__lxstat64); +EXPORT_SYMBOL(lseek); +EXPORT_SYMBOL(lseek64); +EXPORT_SYMBOL(chown); +EXPORT_SYMBOL(truncate); +EXPORT_SYMBOL(utime); +EXPORT_SYMBOL(chmod); +EXPORT_SYMBOL(rename); +EXPORT_SYMBOL(__xmknod); + +EXPORT_SYMBOL(symlink); +EXPORT_SYMBOL(link); +EXPORT_SYMBOL(unlink); +EXPORT_SYMBOL(readlink); + +EXPORT_SYMBOL(mkdir); +EXPORT_SYMBOL(rmdir); +EXPORT_SYMBOL(opendir); +EXPORT_SYMBOL(readdir); +EXPORT_SYMBOL(closedir); +EXPORT_SYMBOL(seekdir); +EXPORT_SYMBOL(telldir); + +EXPORT_SYMBOL(ioctl); + +extern ssize_t pread64 (int __fd, void *__buf, size_t __nbytes, + __off64_t __offset); +extern ssize_t pwrite64 (int __fd, __const void *__buf, size_t __n, + __off64_t __offset); +EXPORT_SYMBOL(pread64); +EXPORT_SYMBOL(pwrite64); + +EXPORT_SYMBOL(statfs); +EXPORT_SYMBOL(statfs64); + +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(getuid); + +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(strstr); + +EXPORT_SYMBOL(find_iomem); + + + + + diff -urN linux-2.4.17/arch/um/kernel/user_util.c linux_umopenmosix/arch/um/kernel/user_util.c --- linux-2.4.17/arch/um/kernel/user_util.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/kernel/user_util.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,346 @@ +/* + * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "asm/types.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include "user_util.h" +#include "kern_util.h" +#include "user.h" +#include "mem_user.h" +#include "init.h" + +#define VERIFY_READ 0 +#define VERIFY_WRITE 1 + + + +#define access_ok(type,addr,size) (__range_ok(addr,size) == 0) + + +#define COMMAND_LINE_SIZE _POSIX_ARG_MAX + +char saved_command_line[COMMAND_LINE_SIZE] = { 0 }; +char command_line[COMMAND_LINE_SIZE] = { 0 }; + +void add_arg(char *cmd_line, char *arg) +{ + if (strlen(cmd_line) + strlen(arg) + 1 > COMMAND_LINE_SIZE) { + printf("add_arg: Too much command line!\n"); + exit(1); + } + if(strlen(cmd_line) > 0) strcat(cmd_line, " "); + strcat(cmd_line, arg); +} + +void remap_data(void *segment_start, void *segment_end, int w) +{ + void *addr; + unsigned long size; + int data, prot; + + if(w) prot = PROT_WRITE; + else prot = 0; + prot |= PROT_READ | PROT_EXEC; + size = (unsigned long) segment_end - + (unsigned long) segment_start; + data = create_mem_file(size); + if((addr = mmap(NULL, size, PROT_WRITE | PROT_READ, + MAP_SHARED, data, 0)) < 0){ + perror("mapping new data segment"); + exit(1); + } + memcpy(addr, segment_start, size); + if(switcheroo(data, prot, addr, segment_start, + size) < 0){ + printf("switcheroo failed\n"); + exit(1); + } +} + +__u64 file_size(char *file) +{ + struct stat64 buf; + + if(stat64(file, &buf) == -1){ + printk("Couldn't stat \"%s\" : errno = %d\n", file, errno); + return(-errno); + } + if(S_ISBLK(buf.st_mode)){ + long long size; + int fd; + + if((fd = open64(file, O_RDONLY)) < 0){ + printk("Couldn't open \"%s\", errno = %d\n", file, + errno); + return(-errno); + } + if(ioctl(fd, BLKGETSIZE, &size) < 0){ + printk("Couldn't get the block size of \"%s\", " + "errno = %d\n", file, errno); + close(fd); + return(-errno); + } + size *= 512; + close(fd); + return(size); + } + return(buf.st_size); +} + +void stop(void) +{ + while(1) sleep(1000000); +} + +void stack_protections(unsigned long address) +{ + int prot = PROT_READ | PROT_WRITE | PROT_EXEC; + + if(mprotect((void *) address, page_size(), prot) < 0) + panic("protecting stack failed, errno = %d", errno); +} + +void task_protections(unsigned long address) +{ + unsigned long guard = address + page_size(); + unsigned long stack = guard + page_size(); + int prot = 0; + + if(mprotect((void *) stack, page_size(), prot) < 0) + panic("protecting guard page failed, errno = %d", errno); + prot = PROT_READ | PROT_WRITE | PROT_EXEC; + if(mprotect((void *) stack, 2 * page_size(), prot) < 0) + panic("protecting stack failed, errno = %d", errno); +} + +int protect(unsigned long addr, unsigned long len, int r, int w, int x, + int must_succeed) +{ + int prot; + + prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | + (x ? PROT_EXEC : 0); + if(mprotect((void *) addr, len, prot) == -1){ + if(must_succeed) + panic("protect failed, errno = %d", errno); + else return(-errno); + } + return(0); +} + +int wait_for_stop(int pid, int sig, int cont_type) +{ + int status, ret; + + while(1){ + if(((ret = waitpid(pid, &status, WUNTRACED)) < 0) || + !WIFSTOPPED(status) || (WSTOPSIG(status) != sig)){ + if(ret < 0){ + if(errno == EINTR) continue; + printk("wait failed, errno = %d\n", + errno); + } + else if(WIFEXITED(status)) + printk("process exited with status %d\n", + WEXITSTATUS(status)); + else if(WIFSIGNALED(status)) + printk("process exited with signal %d\n", + WTERMSIG(status)); + else if((WSTOPSIG(status) == SIGVTALRM) || + (WSTOPSIG(status) == SIGALRM) || + (WSTOPSIG(status) == SIGIO) || + (WSTOPSIG(status) == SIGPROF) || + (WSTOPSIG(status) == SIGCHLD) || + (WSTOPSIG(status) == SIGWINCH) || + (WSTOPSIG(status) == SIGINT)){ + ptrace(cont_type, pid, 0, WSTOPSIG(status)); + continue; + } + else printk("process stopped with signal %d\n", + WSTOPSIG(status)); + panic("wait_for_stop failed to wait for %d to stop " + "with %d\n", pid, sig); + } + return(status); + } +} + +int clone_and_wait(int (*fn)(void *), void *arg, void *sp, int flags) +{ + int pid; + + pid = clone(fn, sp, flags, arg); + if(pid < 0) return(-1); + wait_for_stop(pid, SIGSTOP, PTRACE_CONT); + ptrace(PTRACE_CONT, pid, 0, 0); + return(pid); +} + +struct grantpt_info { + int fd; + int res; + int err; +}; + +static void grantpt_cb(void *arg) +{ + struct grantpt_info *info = arg; + + info->res = grantpt(info->fd); + info->err = errno; +} + +int get_pty(void) +{ + struct grantpt_info info; + int fd; + + if((fd = open("/dev/ptmx", O_RDWR)) < 0){ + printk("get_pty : Couldn't open /dev/ptmx - errno = %d\n", + errno); + return(-1); + } + info.fd = fd; + tracing_cb(grantpt_cb, &info); + if(info.res < 0){ + printk("get_pty : Couldn't grant pty - errno = %d\n", + info.err); + return(-1); + } + if(unlockpt(fd) < 0){ + printk("get_pty : Couldn't unlock pty - errno = %d\n", errno); + return(-1); + } + return(fd); +} + +int raw(int fd, int complain) +{ + struct termios tt; + int err; + + tcgetattr(fd, &tt); + cfmakeraw(&tt); + err = tcsetattr(fd, TCSANOW, &tt); + if((err < 0) && complain){ + printk("tcsetattr failed, errno = %d\n", errno); + return(-errno); + } + return(0); +} + +void setup_machinename(char *machine_out) +{ + struct utsname host; + + uname(&host); + strcpy(machine_out, host.machine); +} + +char host_info[(_UTSNAME_LENGTH + 1) * 4 + _UTSNAME_NODENAME_LENGTH + 1]; + +void setup_hostinfo(void) +{ + struct utsname host; + + uname(&host); + sprintf(host_info, "%s %s %s %s %s", host.sysname, host.nodename, + host.release, host.version, host.machine); +} + +void close_fd(int fd) +{ + close(fd); +} + +char *tempdir = NULL; + +static void __init find_tempdir(void) +{ + char *dirs[] = { "TMP", "TEMP", "TMPDIR", NULL }; + int i; + char *dir = NULL; + + if(tempdir != NULL) return; /* We've already been called */ + for(i = 0; dirs[i]; i++){ + dir = getenv(dirs[i]); + if(dir != NULL) break; + } + if(dir == NULL) dir = "/tmp"; + else if(*dir == '\0') dir = NULL; + if(dir != NULL) { + tempdir = malloc(strlen(dir) + 2); + if(tempdir == NULL){ + fprintf(stderr, "Failed to malloc tempdir, " + "errno = %d\n", errno); + return; + } + strcpy(tempdir, dir); + strcat(tempdir, "/"); + } +} + +int make_tempfile(const char *template, char **out_tempname, int do_unlink) +{ + char tempname[MAXPATHLEN]; + int fd; + + find_tempdir(); + if (*template != '/') + strcpy(tempname, tempdir); + else + *tempname = 0; + strcat(tempname, template); + if((fd = mkstemp(tempname)) < 0){ + fprintf(stderr, "open - cannot create %s: %s\n", tempname, + strerror(errno)); + return -1; + } + if(do_unlink && (unlink(tempname) < 0)){ + perror("unlink"); + return -1; + } + if(out_tempname){ + if((*out_tempname = strdup(tempname)) == NULL){ + perror("strdup"); + return -1; + } + } + return(fd); +} + + + + + + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/link.ld.in linux_umopenmosix/arch/um/link.ld.in --- linux-2.4.17/arch/um/link.ld.in Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/link.ld.in Wed Jun 26 23:45:15 2002 @@ -0,0 +1,133 @@ +OUTPUT_FORMAT("elf32-ELF_SUBARCH") +OUTPUT_ARCH(ELF_SUBARCH) +ENTRY(_start) + +SECTIONS +{ + . = START() + SIZEOF_HEADERS; + + . = ALIGN(4096); + .thread_private : { + __start_thread_private = .; + errno = .; + . += 4; + arch/um/kernel/unmap_fin.o (.data) + __end_thread_private = .; + } + . = ALIGN(4096); + .remap : { arch/um/kernel/unmap_fin.o (.text) } + + . = ALIGN(4096); /* Init code and data */ + _stext = .; + __init_begin = .; + .text.init : { *(.text.init) } + . = ALIGN(4096); + .text : + { + *(.text) + /* .gnu.warning sections are handled specially by elf32.em. */ + *(.gnu.warning) + *(.gnu.linkonce.t*) + } + .kstrtab : { *(.kstrtab) } + + . = ALIGN(16); /* Exception table */ + __start___ex_table = .; + __ex_table : { *(__ex_table) } + __stop___ex_table = .; + + __start___ksymtab = .; /* Kernel symbol table */ + __ksymtab : { *(__ksymtab) } + __stop___ksymtab = .; + .fini : { *(.fini) } =0x9090 + .rodata : { *(.rodata) *(.gnu.linkonce.r*) } + .rodata1 : { *(.rodata1) } + _etext = .; + PROVIDE (etext = .); + + . = ALIGN(4096); + PROVIDE (_sdata = .); + + .unprotected : { *(.unprotected) } + . = ALIGN(4096); + PROVIDE (_unprotected_end = .); + + . = ALIGN(4096); + __uml_setup_start = .; + .uml.setup.init : { *(.uml.setup.init) } + __uml_setup_end = .; + __uml_help_start = .; + .uml.help.init : { *(.uml.help.init) } + __uml_help_end = .; + __uml_postsetup_start = .; + .uml.postsetup.init : { *(.uml.postsetup.init) } + __uml_postsetup_end = .; + __setup_start = .; + .setup.init : { *(.setup.init) } + __setup_end = .; + __initcall_start = .; + .initcall.init : { *(.initcall.init) } + __initcall_end = .; + __uml_initcall_start = .; + .uml.initcall.init : { *(.uml.initcall.init) } + __uml_initcall_end = .; + __init_end = .; + __exitcall_begin = .; + .exitcall : { *(.exitcall.exit) } + __exitcall_end = .; + __uml_exitcall_begin = .; + .uml.exitcall : { *(.uml.exitcall.exit) } + __uml_exitcall_end = .; + + .data.init : { *(.data.init) } + .data : + { + . = ALIGN(16384); /* init_task */ + *(.data.init_task) + *(.data) + *(.gnu.linkonce.d*) + CONSTRUCTORS + } + .data1 : { *(.data1) } + .ctors : + { + *(.ctors) + } + .dtors : + { + *(.dtors) + } + + .got : { *(.got.plt) *(.got) } + .dynamic : { *(.dynamic) } + /* We want the small data sections together, so single-instruction offsets + can access them all, and initialized data all before uninitialized, so + we can shorten the on-disk segment size. */ + .sdata : { *(.sdata) } + _edata = .; + PROVIDE (edata = .); + . = ALIGN(0x1000); + .sbss : + { + __bss_start = .; + PROVIDE(_bss_start = .); + *(.sbss) + *(.scommon) + } + .bss : + { + *(.dynbss) + *(.bss) + *(COMMON) + } + _end = . ; + PROVIDE (end = .); + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } +} diff -urN linux-2.4.17/arch/um/main.c linux_umopenmosix/arch/um/main.c --- linux-2.4.17/arch/um/main.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/main.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,203 @@ +/* + * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "user_util.h" +#include "kern_util.h" +#include "mem_user.h" +#include "user.h" +#include "init.h" + +unsigned long stacksizelim; + +char *linux_prog; + +#define PGD_BOUND (4 * 1024 * 1024) +#define STACKSIZE (8 * 1024 * 1024) +#define THREAD_NAME_LEN (256) + +char padding[THREAD_NAME_LEN] = { [ 0 ... THREAD_NAME_LEN - 2] = ' ', '\0' }; + +static void set_stklim(void) +{ + struct rlimit lim; + + if(getrlimit(RLIMIT_STACK, &lim) < 0){ + perror("getrlimit"); + exit(1); + } + if((lim.rlim_cur == RLIM_INFINITY) || (lim.rlim_cur > STACKSIZE)){ + lim.rlim_cur = STACKSIZE; + if(setrlimit(RLIMIT_STACK, &lim) < 0){ + perror("setrlimit"); + exit(1); + } + } + stacksizelim = (lim.rlim_cur + PGD_BOUND - 1) & ~(PGD_BOUND - 1); +} + +static __init void do_uml_initcalls(void) +{ + initcall_t *call; + + call = &__uml_initcall_start; + while (call < &__uml_initcall_end){; + (*call)(); + call++; + } +} +int main(int argc, char **argv, char **envp) +{ + int ret, i; + char **new_argv; + + /* Allocate memory for thread command lines */ + if(argc < 2 || strlen(argv[1]) < THREAD_NAME_LEN - 1){ + new_argv = malloc((argc + 2) * sizeof(char*)); + if(!new_argv) { + perror("Allocating extended argv"); + exit(1); + } + + new_argv[0] = argv[0]; + new_argv[1] = padding; + + for(i = 2; i <= argc; i++) + new_argv[i] = argv[i - 1]; + new_argv[argc + 1] = NULL; + +#ifdef PROFILING + disable_profile_timer(); +#endif + execvp(new_argv[0], new_argv); + perror("execing with extended args"); + exit(1); + } + + linux_prog = argv[0]; + + set_stklim(); + set_task_sizes(0); + + if((new_argv = malloc((argc + 1) * sizeof(char *))) == NULL){ + perror("Mallocing argv"); + exit(1); + } + for(i=0;i +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ptproxy.h" +#include "sysdep.h" +#include "wait.h" + +#include "user_util.h" +#include "user.h" + +/* + * Handle debugger trap, i.e. syscall. + */ + +int debugger_syscall (debugger_state *debugger, pid_t child) +{ + long arg1, arg2, arg3, arg4, arg5, result; + int syscall, ret = 0; + + syscall = get_syscall(debugger->pid, &arg1, &arg2, &arg3, &arg4, + &arg5); + + switch(syscall){ + case __NR_execve: + /* execve never returns */ + debugger->handle_trace = debugger_syscall; + break; + + case __NR_ptrace: + if(debugger->debugee->pid != 0) arg2 = debugger->debugee->pid; + result = proxy_ptrace(debugger, arg1, arg2, arg3, arg4, child, + &ret); + syscall_cancel(debugger->pid, result); + debugger->handle_trace = debugger_syscall; + return(ret); + + case __NR_waitpid: + case __NR_wait4: + debugger->wait_status_ptr = (int *) arg2; + debugger->wait_options = arg3; + if(debugger->debugee->event){ + syscall_continue(debugger->pid); + wait_for_stop(debugger->pid, SIGTRAP, PTRACE_SYSCALL); + proxy_wait_return(debugger, -1); + return(0); + } + else if(debugger->wait_options & WNOHANG){ + syscall_cancel(debugger->pid, 0); + debugger->handle_trace = debugger_syscall; + return(0); + } + else { + syscall_pause(debugger->pid); + debugger->handle_trace = proxy_wait_return; + debugger->waiting = 1; + } + break; + + case __NR_kill: + if(arg1 == debugger->debugee->pid){ + result = kill(child, arg2); + syscall_cancel(debugger->pid, result); + debugger->handle_trace = debugger_syscall; + return(0); + } + else debugger->handle_trace = debugger_normal_return; + break; + + default: + debugger->handle_trace = debugger_normal_return; + } + + syscall_continue (debugger->pid); + return(ret); +} + +int debugger_normal_return(debugger_state *debugger, pid_t unused) +{ + debugger->handle_trace = debugger_syscall; + syscall_continue(debugger->pid); + return(0); +} + +void debugger_cancelled_return(debugger_state *debugger, int result) +{ + debugger->handle_trace = debugger_syscall; + syscall_set_result(debugger->pid, result); + syscall_continue(debugger->pid); +} + +#ifdef CONFIG_SMP +#error need to make these arrays +#endif + +static debugger_state debugger; +static debugee_state debugee; + +void init_proxy (pid_t debugger_pid, int stopped, int status) +{ + debugger.pid = debugger_pid; + debugger.handle_trace = debugger_syscall; + debugger.debugee = &debugee; + debugger.waiting = 0; + + debugee.pid = 0; + debugee.traced = 0; + debugee.stopped = stopped; + debugee.event = 0; + debugee.zombie = 0; + debugee.died = 0; + debugee.wait_status = status; +} + +int debugger_proxy(int status, int pid) +{ + int ret = 0; + + if(WIFSTOPPED(status)){ + if (WSTOPSIG (status) == SIGTRAP) + ret = (*debugger.handle_trace)(&debugger, pid); + else ptrace(PTRACE_SYSCALL, debugger.pid, 0, WSTOPSIG(status)); + } + else if(WIFEXITED(status)){ + tracer_panic("debugger (pid %d) exited with status %d", + debugger.pid, WEXITSTATUS(status)); + } + else if(WIFSIGNALED(status)){ + tracer_panic("debugger (pid %d) exited with signal %d", + debugger.pid, WTERMSIG(status)); + } + else { + tracer_panic("proxy got unknown status (0x%x) on debugger " + "(pid %d)", status, debugger.pid); + } + return(ret); +} + +void child_proxy(pid_t pid, int status) +{ + debugee.event = 1; + debugee.wait_status = status; + + if(WIFSTOPPED(status)){ + debugee.stopped = 1; + kill(debugger.pid, SIGCHLD); + } + else if(WIFEXITED(status) || WIFSIGNALED(status)){ + debugee.zombie = 1; + kill(debugger.pid, SIGCHLD); + } + else panic("proxy got unknown status (0x%x) on child (pid %d)", + status, pid); +} + +void fake_child_exit(void) +{ + int status, pid; + + child_proxy(1, W_EXITCODE(0, 0)); + while(debugger.waiting == 1){ + pid = waitpid(debugger.pid, &status, WUNTRACED); + if(pid != debugger.pid){ + printk("fake_child_exit - waitpid failed, " + "errno = %d\n", errno); + return; + } + debugger_proxy(status, debugger.pid); + } + pid = waitpid(debugger.pid, &status, WUNTRACED); + if(pid != debugger.pid){ + printk("fake_child_exit - waitpid failed, " + "errno = %d\n", errno); + return; + } + if(ptrace(PTRACE_DETACH, debugger.pid, 0, SIGCONT) < 0) + printk("fake_child_exit - PTRACE_DETACH failed, errno = %d\n", + errno); +} + +char gdb_init_string[] = +"att 1 +b panic +b stop +handle SIGWINCH nostop noprint pass +"; + +int start_debugger(char *prog, int startup, int stop, int *fd_out) +{ + int slave, child; + + slave = open_gdb_chan(); + if((child = fork()) == 0){ + char *tempname = NULL; + int fd; + + if(setsid() < 0) perror("setsid"); + if((dup2(slave, 0) < 0) || (dup2(slave, 1) < 0) || + (dup2(slave, 2) < 0)){ + printk("start_debugger : dup2 failed, errno = %d\n", + errno); + exit(1); + } + if(ioctl(0, TIOCSCTTY, 0) < 0){ + printk("start_debugger : TIOCSCTTY failed, " + "errno = %d\n", errno); + exit(1); + } + if(tcsetpgrp (1, getpid()) < 0){ + printk("start_debugger : tcsetpgrp failed, " + "errno = %d\n", errno); +#ifdef notdef + exit(1); +#endif + } + if((fd = make_tempfile("/tmp/gdb_init-XXXXXX", &tempname, 0)) < 0){ + printk("start_debugger : make_tempfile failed, errno = %d\n", + errno); + exit(1); + } + write(fd, gdb_init_string, sizeof(gdb_init_string) - 1); + if(startup){ + if(stop){ + write(fd, "b start_kernel\n", + strlen("b start_kernel\n")); + } + write(fd, "c\n", strlen("c\n")); + } + if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){ + printk("start_debugger : PTRACE_TRACEME failed, " + "errno = %d\n", errno); + exit(1); + } + execlp("gdb", "gdb", "--command", tempname, prog, NULL); + printk("start_debugger : exec of gdb failed, errno = %d\n", + errno); + } + if(child < 0){ + printk("start_debugger : fork for gdb failed, errno = %d\n", + errno); + return(-1); + } + *fd_out = slave; + return(child); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/ptproxy/ptproxy.h linux_umopenmosix/arch/um/ptproxy/ptproxy.h --- linux-2.4.17/arch/um/ptproxy/ptproxy.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/ptproxy/ptproxy.h Wed Jun 26 23:45:15 2002 @@ -0,0 +1,58 @@ +/********************************************************************** +ptproxy.h + +Copyright (C) 1999 Lars Brinkhoff. See the file COPYING for licensing +terms and conditions. +**********************************************************************/ + +#ifndef __PTPROXY_H +#define __PTPROXY_H + +#include + +typedef struct debugger debugger_state; +typedef struct debugee debugee_state; + +struct debugger +{ + pid_t pid; + int wait_options; + int *wait_status_ptr; + unsigned int waiting : 1; + int (*handle_trace) (debugger_state *, pid_t); + + debugee_state *debugee; +}; + +struct debugee +{ + pid_t pid; + int wait_status; + unsigned died : 1; + unsigned event : 1; + unsigned stopped : 1; + unsigned trace_singlestep : 1; + unsigned trace_syscall : 1; + unsigned traced : 1; + unsigned zombie : 1; +}; + +extern int debugger_syscall(debugger_state *debugger, pid_t pid); +extern int debugger_normal_return (debugger_state *debugger, pid_t unused); + +extern long proxy_ptrace (struct debugger *, int, pid_t, long, long, pid_t, + int *strace_out); +extern void debugger_cancelled_return(debugger_state *debugger, int result); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/ptproxy/ptrace.c linux_umopenmosix/arch/um/ptproxy/ptrace.c --- linux-2.4.17/arch/um/ptproxy/ptrace.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/ptproxy/ptrace.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,179 @@ +/********************************************************************** +ptrace.c + +Copyright (C) 1999 Lars Brinkhoff. See the file COPYING for licensing +terms and conditions. + +Jeff Dike (jdike@karaya.com) : Modified for integration into uml +**********************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include "ptproxy.h" +#include "debug.h" +#include "user_util.h" + +long proxy_ptrace(struct debugger *debugger, int arg1, pid_t arg2, + long arg3, long arg4, pid_t child, int *ret) +{ + long result; + int status; + + *ret = 0; + if(debugger->debugee->died) return(-ESRCH); + + switch(arg1){ + case PTRACE_ATTACH: + if(debugger->debugee->traced) return(-EPERM); + + debugger->debugee->pid = arg2; + debugger->debugee->traced = 1; + if(debugger->debugee->stopped) + child_proxy(child, W_STOPCODE(SIGSTOP)); + else kill(child, SIGSTOP); + return(0); + + case PTRACE_CONT: + *ret = PTRACE_CONT; + return(ptrace(PTRACE_CONT, child, arg3, arg4)); + + case PTRACE_DETACH: + if(!debugger->debugee->traced) return(-EPERM); + + debugger->debugee->traced = 0; + kill(child, SIGCONT); + return(0); + +#ifdef UM_HAVE_GETFPREGS + case PTRACE_GETFPREGS: + { + long regs[UM_MAX_FP_REG]; + int i, result; + + result = ptrace(PTRACE_GETFPREGS, child, 0, regs); + if(result == -1) return(-errno); + + for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++) + ptrace(PTRACE_POKEDATA, debugger->pid, arg4 + 4 * i, + regs[i]); + return(result); + } +#endif + +#ifdef UM_HAVE_GETREGS + case PTRACE_GETREGS: + { + long regs[UM_MAX_REG]; + int i, result; + + result = ptrace(PTRACE_GETREGS, child, 0, regs); + if(result == -1) return(-errno); + + for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++) + ptrace (PTRACE_POKEDATA, debugger->pid, + arg4 + 4 * i, regs[i]); + return(result); + } + break; +#endif + + case PTRACE_KILL: + result = ptrace(PTRACE_KILL, child, arg3, arg4); + if(result == -1) return(-errno); + + return(result); + + case PTRACE_PEEKDATA: + case PTRACE_PEEKTEXT: + case PTRACE_PEEKUSER: + /* The value being read out could be -1, so we have to + * check errno to see if there's an error, and zero it + * beforehand so we're not faked out by an old error + */ + + errno = 0; + result = ptrace(arg1, child, arg3, 0); + if((result == -1) && (errno != 0)) return(-errno); + + result = ptrace(PTRACE_POKEDATA, debugger->pid, arg4, result); + if(result == -1) return(-errno); + + return(result); + + case PTRACE_POKEDATA: + case PTRACE_POKETEXT: + case PTRACE_POKEUSER: + result = ptrace(arg1, child, arg3, arg4); + if(result == -1) return(-errno); + + return(result); + +#ifdef UM_HAVE_SETFPREGS + case PTRACE_SETFPREGS: + { + long regs[UM_MAX_FP_REG]; + int i; + + for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++) + regs[i] = ptrace (PTRACE_PEEKDATA, debugger->pid, + arg4 + 4 * i, 0); + result = ptrace(PTRACE_SETFPREGS, child, 0, regs); + if(result == -1) return(-errno); + + return(result); + } +#endif + +#ifdef UM_HAVE_SETREGS + case PTRACE_SETREGS: + { + long regs[UM_MAX_REG]; + int i; + + for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++) + regs[i] = ptrace(PTRACE_PEEKDATA, debugger->pid, + arg4 + 4 * i, 0); + result = ptrace(PTRACE_SETREGS, child, 0, regs); + if(result == -1) return(-errno); + + return(result); + } +#endif + + case PTRACE_SINGLESTEP: + result = ptrace(PTRACE_SINGLESTEP, child, arg3, arg4); + if(result == -1) return(-errno); + + status = wait_for_stop(child, SIGTRAP, PTRACE_SINGLESTEP); + child_proxy(child, status); + return(result); + + case PTRACE_SYSCALL: + result = ptrace(PTRACE_SYSCALL, child, arg3, arg4); + if(result == -1) return(-errno); + + *ret = PTRACE_SYSCALL; + return(result); + + case PTRACE_TRACEME: + default: + return(-EINVAL); + } +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/ptproxy/sysdep.c linux_umopenmosix/arch/um/ptproxy/sysdep.c --- linux-2.4.17/arch/um/ptproxy/sysdep.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/ptproxy/sysdep.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,71 @@ +/********************************************************************** +sysdep.c + +Copyright (C) 1999 Lars Brinkhoff. See the file COPYING for licensing +terms and conditions. +**********************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "sysdep/ptrace.h" +#include "user_util.h" +#include "user.h" + +int get_syscall(pid_t pid, long *arg1, long *arg2, long *arg3, long *arg4, + long *arg5) +{ + *arg1 = ptrace(PTRACE_PEEKUSER, pid, UM_SYSCALL_ARG1_OFFSET, 0); + *arg2 = ptrace(PTRACE_PEEKUSER, pid, UM_SYSCALL_ARG2_OFFSET, 0); + *arg3 = ptrace(PTRACE_PEEKUSER, pid, UM_SYSCALL_ARG3_OFFSET, 0); + *arg4 = ptrace(PTRACE_PEEKUSER, pid, UM_SYSCALL_ARG4_OFFSET, 0); + *arg5 = ptrace(PTRACE_PEEKUSER, pid, UM_SYSCALL_ARG5_OFFSET, 0); + return(ptrace(PTRACE_PEEKUSER, pid, UM_SYSCALL_NR_OFFSET, 0)); +} + +void syscall_cancel(pid_t pid, int result) +{ + if((ptrace(PTRACE_POKEUSER, pid, UM_SYSCALL_NR_OFFSET, + __NR_getpid) < 0) || + (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) || + (wait_for_stop(pid, SIGTRAP, PTRACE_SYSCALL) < 0) || + (ptrace(PTRACE_POKEUSER, pid, UM_SYSCALL_RET_OFFSET, result) < 0) || + (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0)) + printk("ptproxy: couldn't cancel syscall: errno = %d\n", + errno); +} + +void syscall_set_result(pid_t pid, long result) +{ + ptrace(PTRACE_POKEUSER, pid, UM_SYSCALL_RET_OFFSET, result); +} + +void syscall_continue(pid_t pid) +{ + ptrace(PTRACE_SYSCALL, pid, 0, 0); +} + +int syscall_pause(pid_t pid) +{ + if(ptrace(PTRACE_POKEUSER, pid, UM_SYSCALL_NR_OFFSET, __NR_pause) < 0){ + printk("syscall_change - ptrace failed, errno = %d\n", errno); + return(-1); + } + return(0); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/ptproxy/sysdep.h linux_umopenmosix/arch/um/ptproxy/sysdep.h --- linux-2.4.17/arch/um/ptproxy/sysdep.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/ptproxy/sysdep.h Wed Jun 26 23:45:15 2002 @@ -0,0 +1,25 @@ +/********************************************************************** +sysdep.h + +Copyright (C) 1999 Lars Brinkhoff. +Copyright (C) 2001 Jeff Dike (jdike@karaya.com) +See the file COPYING for licensing terms and conditions. +**********************************************************************/ + +extern int get_syscall(pid_t pid, long *arg1, long *arg2, long *arg3, + long *arg4, long *arg5); +extern void syscall_cancel (pid_t pid, long result); +extern void syscall_set_result (pid_t pid, long result); +extern void syscall_continue (pid_t pid); +extern int syscall_pause(pid_t pid); + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/ptproxy/wait.c linux_umopenmosix/arch/um/ptproxy/wait.c --- linux-2.4.17/arch/um/ptproxy/wait.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/ptproxy/wait.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,57 @@ +/********************************************************************** +wait.c + +Copyright (C) 1999 Lars Brinkhoff. See the file COPYING for licensing +terms and conditions. + +**********************************************************************/ + +#include +#include +#include + +#include "ptproxy.h" +#include "sysdep.h" +#include "wait.h" + +#include +#include "sysdep/ptrace.h" + +int proxy_wait_return (struct debugger *debugger, pid_t unused) +{ + debugger->waiting = 0; + + if(debugger->debugee->died || (debugger->wait_options & __WCLONE)){ + debugger_cancelled_return(debugger, -ECHILD); + return(0); + } + + if(debugger->debugee->zombie && debugger->debugee->event) + debugger->debugee->died = 1; + + if(debugger->debugee->event){ + debugger->debugee->event = 0; + ptrace(PTRACE_POKEDATA, debugger->pid, + debugger->wait_status_ptr, + debugger->debugee->wait_status); + /* if (wait4) + ptrace (PTRACE_POKEDATA, pid, rusage_ptr, ...); */ + debugger_cancelled_return(debugger, debugger->debugee->pid); + return(0); + } + + /* pause will return -EINTR, which happens to be right for wait */ + debugger_normal_return(debugger, -1); + return(0); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/ptproxy/wait.h linux_umopenmosix/arch/um/ptproxy/wait.h --- linux-2.4.17/arch/um/ptproxy/wait.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/ptproxy/wait.h Wed Jun 26 23:45:15 2002 @@ -0,0 +1,8 @@ +/********************************************************************** +wait.h + +Copyright (C) 1999 Lars Brinkhoff. See the file COPYING for licensing +terms and conditions. +**********************************************************************/ + +extern int proxy_wait_return (struct debugger *debugger, pid_t unused); diff -urN linux-2.4.17/arch/um/sys-i386/Makefile linux_umopenmosix/arch/um/sys-i386/Makefile --- linux-2.4.17/arch/um/sys-i386/Makefile Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/sys-i386/Makefile Fri Jun 28 00:26:36 2002 @@ -0,0 +1,51 @@ +OBJ = sys.o + +OBJS = checksum.o ldt.o old-checksum.o ptrace.o ptrace_user.o semaphore.o \ + sigcontext.o syscalls.o sysrq.o +export-objs = ksyms.o + +SYMLINKS = semaphore.c old-checksum.c checksum.S + +all: $(OBJ) + +$(OBJ): $(OBJS) $(export-objs) + rm -f $@ + $(LD) $(LINKFLAGS) --start-group $^ --end-group -o $@ + +sigcontext.o: sigcontext.c + $(CC) $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $< + +ldt.o: ldt.c + $(CC) $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $< + +ptrace_user.o: ptrace_user.c + $(CC) $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $< + +checksum.S old-checksum.c: + -rm -f $@ + -ln -s $(TOPDIR)/arch/i386/lib/$@ $@ + +semaphore.c: + -rm -f $@ + -ln -s $(TOPDIR)/arch/i386/kernel/$@ $@ + +clean: + rm -f $(OBJS) $(export-objs) + +fastdep: + +archmrproper: + rm -f $(SYMLINKS) + +archclean: + rm -f link.ld + @$(MAKEBOOT) clean + +archdep: + @$(MAKEBOOT) dep + +modules: + +include $(TOPDIR)/Rules.make + + diff -urN linux-2.4.17/arch/um/sys-i386/Makefile.entry.S linux_umopenmosix/arch/um/sys-i386/Makefile.entry.S --- linux-2.4.17/arch/um/sys-i386/Makefile.entry.S Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/sys-i386/Makefile.entry.S Fri Jun 28 00:13:02 2002 @@ -0,0 +1,74 @@ +OBJ = sys.o + +OBJS = checksum.o ldt.o old-checksum.o ptrace.o ptrace_user.o semaphore.o \ + sigcontext.o syscalls.o sysrq.o entry.o +export-objs = ksyms.o + +SYMLINKS = semaphore.c old-checksum.c checksum.S + +all: $(OBJ) + +ifdef CONFIG_PCI +$(OBJ) += pci-i386.o +ifdef CONFIG_VISWS +$(OBJ) += pci-visws.o +else +$(OBJ) += pci-pc.o pci-irq.o +endif +endif + + +$(OBJ): $(OBJS) $(export-objs) + rm -f $@ + $(LD) $(LINKFLAGS) --start-group $^ --end-group -o $@ + +entry.o: entry.S + $(CC) $(AFLAGS) -traditional -c -o $@ $< + +sigcontext.o: sigcontext.c + $(CC) $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $< + +ldt.o: ldt.c + $(CC) $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $< + +ptrace_user.o: ptrace_user.c + $(CC) $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $< + +checksum.S old-checksum.c: + -rm -f $@ + -ln -s $(TOPDIR)/arch/i386/lib/$@ $@ + +semaphore.c: + -rm -f $@ + -ln -s $(TOPDIR)/arch/i386/kernel/$@ $@ + +clean: + rm -f $(OBJS) $(export-objs) + +fastdep: + +archmrproper: + rm -f $(SYMLINKS) + +archclean: + rm -f link.ld + @$(MAKEBOOT) clean + +archdep: + @$(MAKEBOOT) dep + +modules: + +include $(TOPDIR)/Rules.make + +ifdef CONFIG_MOSIX +entry.o: ./mosasm.H + +offset: offset.c $(TOPDIR)/include/linux/config.h $(TOPDIR)/include/linux/types.h $(TOPDIR)/include/linux/sched.h $(TOPDIR)/include/linux/signal.h $(TOPDIR)/include/linux/sys.h $(TOPDIR)/include/linux/kernel.h + $(HOSTCC) $(HOSTCFLAGS) -D__KERNEL__ -I$(TOPDIR)/include -o offset offset.c + +./mosasm.H: offset entry.S + ./offset < entry.S > mosasm.H + rm ./offset +endif + diff -urN linux-2.4.17/arch/um/sys-i386/Makefile.ok linux_umopenmosix/arch/um/sys-i386/Makefile.ok --- linux-2.4.17/arch/um/sys-i386/Makefile.ok Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/sys-i386/Makefile.ok Wed Jun 26 23:45:15 2002 @@ -0,0 +1,73 @@ +OBJ = sys.o + +OBJS = checksum.o ldt.o old-checksum.o ptrace.o ptrace_user.o semaphore.o \ + sigcontext.o syscalls.o sysrq.o offset.o +export-objs = ksyms.o + +SYMLINKS = semaphore.c old-checksum.c checksum.S + +all: $(OBJ) $(obj-y) + +obj-y := entry.o +$(obj-y): gcc $(AFLAGS) -traditional -c -o entry.o entry.S + +ifdef CONFIG_PCI +obj-y += pci-i386.o +ifdef CONFIG_VISWS +obj-y += pci-visws.o +else +obj-y += pci-pc.o pci-irq.o +endif +endif + + +$(OBJ): $(OBJS) $(export-objs) + rm -f $@ + $(LD) $(LINKFLAGS) --start-group $^ --end-group -o $@ + +sigcontext.o: sigcontext.c + $(CC) $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $< + +ldt.o: ldt.c + $(CC) $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $< + +ptrace_user.o: ptrace_user.c + $(CC) $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $< + +checksum.S old-checksum.c: + -rm -f $@ + -ln -s $(TOPDIR)/arch/i386/lib/$@ $@ + +semaphore.c: + -rm -f $@ + -ln -s $(TOPDIR)/arch/i386/kernel/$@ $@ + +clean: + rm -f $(OBJS) $(export-objs) + +fastdep: + +archmrproper: + rm -f $(SYMLINKS) + +archclean: + rm -f link.ld + @$(MAKEBOOT) clean + +archdep: + @$(MAKEBOOT) dep + +modules: + +include $(TOPDIR)/Rules.make + +ifdef CONFIG_MOSIX +entry.o: ./mosasm.H + +offset: offset.c $(TOPDIR)/include/linux/config.h $(TOPDIR)/include/linux/types.h $(TOPDIR)/include/linux/sched.h $(TOPDIR)/include/linux/signal.h $(TOPDIR)/include/linux/sys.h $(TOPDIR)/include/linux/kernel.h + $(HOSTCC) $(HOSTCFLAGS) -D__KERNEL__ -I$(TOPDIR)/include -o offset offset.c + +./mosasm.H: offset entry.S + ./offset < entry.S > mosasm.H +endif + diff -urN linux-2.4.17/arch/um/sys-i386/Makefile.org linux_umopenmosix/arch/um/sys-i386/Makefile.org --- linux-2.4.17/arch/um/sys-i386/Makefile.org Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/sys-i386/Makefile.org Wed Jun 26 23:45:15 2002 @@ -0,0 +1,60 @@ +OBJ = sys.o + +OBJS = checksum.o ldt.o old-checksum.o ptrace.o ptrace_user.o semaphore.o \ + sigcontext.o syscalls.o sysrq.o offset.o entry.o +export-objs = ksyms.o + +SYMLINKS = semaphore.c old-checksum.c checksum.S + +all: $(OBJ) + +$(OBJ): $(OBJS) $(export-objs) + rm -f $@ + $(LD) $(LINKFLAGS) --start-group $^ --end-group -o $@ + +sigcontext.o: sigcontext.c + $(CC) $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $< + +ldt.o: ldt.c + $(CC) $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $< + +ptrace_user.o: ptrace_user.c + $(CC) $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $< + +checksum.S old-checksum.c: + -rm -f $@ + -ln -s $(TOPDIR)/arch/i386/lib/$@ $@ + +semaphore.c: + -rm -f $@ + -ln -s $(TOPDIR)/arch/i386/kernel/$@ $@ + +clean: + rm -f $(OBJS) $(export-objs) + +fastdep: + +archmrproper: + rm -f $(SYMLINKS) + +archclean: + rm -f link.ld + @$(MAKEBOOT) clean + +archdep: + @$(MAKEBOOT) dep + +modules: + +include $(TOPDIR)/Rules.make + +ifdef CONFIG_MOSIX +entry.o: ./mosasm.H + +offset: offset.c $(TOPDIR)/include/linux/config.h $(TOPDIR)/include/linux/types.h $(TOPDIR)/include/linux/sched.h $(TOPDIR)/include/linux/signal.h $(TOPDIR)/include/linux/sys.h $(TOPDIR)/include/linux/kernel.h + $(HOSTCC) $(HOSTCFLAGS) -D__KERNEL__ -I$(TOPDIR)/include -o offset offset.c + +./mosasm.H: offset entry.S + ./offset < entry.S > mosasm.H +endif + diff -urN linux-2.4.17/arch/um/sys-i386/entry.S linux_umopenmosix/arch/um/sys-i386/entry.S --- linux-2.4.17/arch/um/sys-i386/entry.S Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/sys-i386/entry.S Fri Jun 28 00:04:54 2002 @@ -0,0 +1,539 @@ +/* + * linux/arch/i386/entry.S + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* + * entry.S contains the system-call and fault low-level handling routines. + * This also contains the timer-interrupt handler, as well as all interrupts + * and faults that can result in a task-switch. + * + * NOTE: This code handles signal-recognition, which happens every time + * after a timer-interrupt and after each system call. + * + * I changed all the .align's to 4 (16 byte alignment), as that's faster + * on a 486. + * + * Stack layout in 'ret_from_system_call': + * ptrace needs to have all regs on the stack. + * if the order here is changed, it needs to be + * updated in fork.c:copy_process, signal.c:do_signal, + * ptrace.c and ptrace.h + * + * 0(%esp) - %ebx + * 4(%esp) - %ecx + * 8(%esp) - %edx + * C(%esp) - %esi + * 10(%esp) - %edi + * 14(%esp) - %ebp + * 18(%esp) - %eax + * 1C(%esp) - %ds + * 20(%esp) - %es + * 24(%esp) - orig_eax + * 28(%esp) - %eip + * 2C(%esp) - %cs + * 30(%esp) - %eflags + * 34(%esp) - %oldesp + * 38(%esp) - %oldss + * + * "current" is in register %ebx during any slow entries. + */ + +#include +#include +#include +#include +#include + +#ifdef CONFIG_MOSIX +#include "mosasm.H" +#endif /* CONFIG_MOSIX */ + +EBX = 0x00 +ECX = 0x04 +EDX = 0x08 +ESI = 0x0C +EDI = 0x10 +EBP = 0x14 +EAX = 0x18 +DS = 0x1C +ES = 0x20 +ORIG_EAX = 0x24 +EIP = 0x28 +CS = 0x2C +EFLAGS = 0x30 +OLDESP = 0x34 +OLDSS = 0x38 + +CF_MASK = 0x00000001 +IF_MASK = 0x00000200 +NT_MASK = 0x00004000 +VM_MASK = 0x00020000 + +/* + * these are offsets into the task-struct. + */ +state = 0 +flags = 4 +sigpending = 8 +addr_limit = 12 +exec_domain = 16 +need_resched = 20 +tsk_ptrace = 24 +processor = 52 + +ENOSYS = 38 + + +#define SAVE_ALL \ + cld; \ + pushl %es; \ + pushl %ds; \ + pushl %eax; \ + pushl %ebp; \ + pushl %edi; \ + pushl %esi; \ + pushl %edx; \ + pushl %ecx; \ + pushl %ebx; \ + movl $(__KERNEL_DS),%edx; \ + movl %edx,%ds; \ + movl %edx,%es; + +#define RESTORE_ALL \ + popl %ebx; \ + popl %ecx; \ + popl %edx; \ + popl %esi; \ + popl %edi; \ + popl %ebp; \ + popl %eax; \ +1: popl %ds; \ +2: popl %es; \ + addl $4,%esp; \ +3: iret; \ +.section .fixup,"ax"; \ +4: movl $0,(%esp); \ + jmp 1b; \ +5: movl $0,(%esp); \ + jmp 2b; \ +6: pushl %ss; \ + popl %ds; \ + pushl %ss; \ + popl %es; \ + pushl $11; \ + call do_exit; \ +.previous; \ +.section __ex_table,"a";\ + .align 4; \ + .long 1b,4b; \ + .long 2b,5b; \ + .long 3b,6b; \ +.previous + +#define GET_CURRENT(reg) \ + movl $-8192, reg; \ + andl %esp, reg + +ENTRY(lcall7) + pushfl # We get a different stack layout with call gates, + pushl %eax # which has to be cleaned up later.. + SAVE_ALL + movl EIP(%esp),%eax # due to call gates, this is eflags, not eip.. + movl CS(%esp),%edx # this is eip.. + movl EFLAGS(%esp),%ecx # and this is cs.. + movl %eax,EFLAGS(%esp) # + movl %edx,EIP(%esp) # Now we move them to their "normal" places + movl %ecx,CS(%esp) # + movl %esp,%ebx + pushl %ebx + andl $-8192,%ebx # GET_CURRENT + movl exec_domain(%ebx),%edx # Get the execution domain + movl 4(%edx),%edx # Get the lcall7 handler for the domain + pushl $0x7 + call *%edx + addl $4, %esp + popl %eax + jmp ret_from_sys_call + +ENTRY(lcall27) + pushfl # We get a different stack layout with call gates, + pushl %eax # which has to be cleaned up later.. + SAVE_ALL + movl EIP(%esp),%eax # due to call gates, this is eflags, not eip.. + movl CS(%esp),%edx # this is eip.. + movl EFLAGS(%esp),%ecx # and this is cs.. + movl %eax,EFLAGS(%esp) # + movl %edx,EIP(%esp) # Now we move them to their "normal" places + movl %ecx,CS(%esp) # + movl %esp,%ebx + pushl %ebx + andl $-8192,%ebx # GET_CURRENT + movl exec_domain(%ebx),%edx # Get the execution domain + movl 4(%edx),%edx # Get the lcall7 handler for the domain + pushl $0x27 + call *%edx + addl $4, %esp + popl %eax + jmp ret_from_sys_call + + +ENTRY(ret_from_fork) + pushl %ebx + call SYMBOL_NAME(schedule_tail) + addl $4, %esp +#ifdef CONFIG_MOSIX +ENTRY(ret_from_kickstart) + GET_CURRENT(%ebx) + jmp ret_from_sys_call +#else + GET_CURRENT(%ebx) + testb $0x02,tsk_ptrace(%ebx) # PT_TRACESYS + jne tracesys_exit + jmp ret_from_sys_call +#endif /* CONFIG_MOSIX */ + +/* + * Return to user mode is not as complex as all this looks, + * but we want the default path for a system call return to + * go as quickly as possible which is why some of this is + * less clear than it otherwise should be. + */ + +ENTRY(system_call) + pushl %eax # save orig_eax + SAVE_ALL + GET_CURRENT(%ebx) + testb $0x02,tsk_ptrace(%ebx) # PT_TRACESYS + jne tracesys +/* conflict resolution - Qlusters */ +#ifdef CONFIG_MOSIX_UDB + pushl %eax + call SYMBOL_NAME(sys_call_trace) # display syscalls for debugging + popl %eax +#endif /* CONFIG_MOSIX_UDB */ + cmpl $(NR_syscalls),%eax + jae badsys +#ifdef CONFIG_MOSIX + testl $(DTRACESYS1|DTRACESYS2),DFLAGS(%ebx) + jne adjust_trace_before_syscall +adjusted_trace: + testb $DREMOTE,DFLAGS(%ebx) + je local_syscall +on_remote: + pushl %eax + call *SYMBOL_NAME(remote_sys_call_table)(,%eax,4) + addl $4,%esp + movl %eax,EAX(%esp) + jmp ret_from_sys_call +local_syscall: +#endif /* CONFIG_MOSIX */ + call *SYMBOL_NAME(sys_call_table)(,%eax,4) + movl %eax,EAX(%esp) # save the return value +#ifdef CONFIG_MOSIX + call SYMBOL_NAME(mosix_local_syscall) +#endif /* CONFIG_MOSIX */ +ENTRY(ret_from_sys_call) +#ifdef CONFIG_MOSIX + testl $(DTRACESYS1|DTRACESYS2),DFLAGS(%ebx) + jne adjust_trace_before_syscall +ret_check_reschedule: +#endif /* CONFIG_MOSIX */ + cli # need_resched and signals atomic test + cmpl $0,need_resched(%ebx) + jne reschedule + cmpl $0,sigpending(%ebx) + jne signal_return +#ifdef CONFIG_MOSIX +straight_to_mosix: + call SYMBOL_NAME(mosix_pre_usermode_actions) + testl %eax,%eax + jne ret_from_sys_call +#endif /* CONFIG_MOSIX */ +restore_all: + RESTORE_ALL + + ALIGN +signal_return: + sti # we can get here from an interrupt handler + testl $(VM_MASK),EFLAGS(%esp) + movl %esp,%eax + jne v86_signal_return + xorl %edx,%edx + call SYMBOL_NAME(do_signal) +#ifdef CONFIG_MOSIX + jmp straight_to_mosix +#else + jmp restore_all +#endif /* CONFIG_MOSIX */ + + ALIGN +v86_signal_return: +// call SYMBOL_NAME(save_v86_state) ####### disabled by Matt + movl %eax,%esp + xorl %edx,%edx + call SYMBOL_NAME(do_signal) +#ifdef CONFIG_MOSIX + jmp straight_to_mosix +#else + jmp restore_all +#endif /* CONFIG_MOSIX */ + + ALIGN +tracesys: + movl $-ENOSYS,EAX(%esp) + call SYMBOL_NAME(syscall_trace) +#ifdef CONFIG_MOSIX +adjust_trace_before_syscall: # only arrive here with DTRACESYS(1|2) + testl $DDEPUTY,DFLAGS(%ebx) + jne straight_to_mosix # no mess with signals/syscalls/tracesys + testl $DREMOTE,DFLAGS(%ebx) + je no_need_to_unsync + call wait_for_permission_to_continue +no_need_to_unsync: + testl $DTRACESYS2,DFLAGS(%ebx) + jne second_tracesys # skipping system-call + orl $DTRACESYS2,DFLAGS(%ebx) # next time we skip the system-call + movl $-ENOSYS,EAX(%esp) + movl ORIG_EAX(%esp),%eax + cmpl $(NR_syscalls),%eax + jae second_tracesys # prevent system-call out of range trick + jmp adjusted_trace # now do the system-call +second_tracesys: # note: "syscall_trace" clears the flags +#else + movl ORIG_EAX(%esp),%eax + cmpl $(NR_syscalls),%eax + jae tracesys_exit + call *SYMBOL_NAME(sys_call_table)(,%eax,4) + movl %eax,EAX(%esp) # save the return value +tracesys_exit: +#endif /* CONFIG_MOSIX */ + call SYMBOL_NAME(syscall_trace) + jmp ret_from_sys_call +badsys: + movl $-ENOSYS,EAX(%esp) + jmp ret_from_sys_call + + ALIGN +ENTRY(ret_from_intr) + GET_CURRENT(%ebx) +ret_from_exception: + movl EFLAGS(%esp),%eax # mix EFLAGS and CS + movb CS(%esp),%al + testl $(VM_MASK | 3),%eax # return to VM86 mode or non-supervisor? +#ifdef CONFIG_MOSIX + jne ret_check_reschedule +#else + jne ret_from_sys_call +#endif /* CONFIG_MOSIX */ + jmp restore_all + + ALIGN +reschedule: + call SYMBOL_NAME(schedule) # test + jmp ret_from_sys_call + +#ifdef CONFIG_MOSIX +/* + * call_with_regs(caddr_t routine, pt_regs *before, pt_regs *after) + * pushes the "before" regs on the stack and calls routine, + * then places the possibly-modified registers in "after" + * (which may possibly equal "before"). + * Also, set "current->altregs" to the pushed registers, then restores it. + */ +ENTRY(call_with_regs) + pushl %ebx + GET_CURRENT(%ebx) + pushl ALTREGS(%ebx) + movl 16(%esp),%eax + pushl 56(%eax) + pushl 52(%eax) + pushl 48(%eax) + pushl 44(%eax) + pushl 40(%eax) + pushl 36(%eax) + pushl 32(%eax) + pushl 28(%eax) + pushl 24(%eax) + pushl 20(%eax) + pushl 16(%eax) + pushl 12(%eax) + pushl 8(%eax) + pushl 4(%eax) + pushl 0(%eax) + movl %esp,ALTREGS(%ebx) + movl 72(%esp),%eax + call *%eax + movl 80(%esp),%edx + popl %ecx + movl %ecx,0(%edx) + popl %ecx + movl %ecx,4(%edx) + popl %ecx + movl %ecx,8(%edx) + popl %ecx + movl %ecx,12(%edx) + popl %ecx + movl %ecx,16(%edx) + popl %ecx + movl %ecx,20(%edx) + popl %ecx + movl %ecx,24(%edx) + popl %ecx + movl %ecx,28(%edx) + popl %ecx + movl %ecx,32(%edx) + popl %ecx + movl %ecx,36(%edx) + popl %ecx + movl %ecx,40(%edx) + popl %ecx + movl %ecx,44(%edx) + popl %ecx + movl %ecx,48(%edx) + popl %ecx + movl %ecx,52(%edx) + popl %ecx + movl %ecx,56(%edx) + popl ALTREGS(%ebx) + popl %ebx + ret +#endif /* CONFIG_MOSIX */ + +ENTRY(divide_error) + pushl $0 # no error code +// pushl $ SYMBOL_NAME(do_divide_error) ###### disabled by Matt + ALIGN +error_code: + pushl %ds + pushl %eax + xorl %eax,%eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + decl %eax # eax = -1 + pushl %ecx + pushl %ebx + cld + movl %es,%ecx + movl ORIG_EAX(%esp), %esi # get the error code + movl ES(%esp), %edi # get the function address + movl %eax, ORIG_EAX(%esp) + movl %ecx, ES(%esp) + movl %esp,%edx + pushl %esi # push the error code + pushl %edx # push the pt_regs pointer + movl $(__KERNEL_DS),%edx + movl %edx,%ds + movl %edx,%es + GET_CURRENT(%ebx) + call *%edi + addl $8,%esp + jmp ret_from_exception + +ENTRY(coprocessor_error) + pushl $0 +// pushl $ SYMBOL_NAME(do_coprocessor_error) ###### disabled by Matt + jmp error_code + +ENTRY(simd_coprocessor_error) + pushl $0 +// pushl $ SYMBOL_NAME(do_simd_coprocessor_error) ###### disabled by Matt + jmp error_code + +ENTRY(device_not_available) + pushl $-1 # mark this as an int + SAVE_ALL + GET_CURRENT(%ebx) + movl %cr0,%eax + testl $0x4,%eax # EM (math emulation bit) + jne device_not_available_emulate +// call SYMBOL_NAME(math_state_restore) ###### disabled by Matt + jmp ret_from_exception +device_not_available_emulate: + pushl $0 # temporary storage for ORIG_EIP +// call SYMBOL_NAME(math_emulate) ###### disabled by Matt + addl $4,%esp + jmp ret_from_exception + +ENTRY(debug) + pushl $0 +// pushl $ SYMBOL_NAME(do_debug) ###### disabled by Matt + jmp error_code + +ENTRY(nmi) + pushl %eax + SAVE_ALL + movl %esp,%edx + pushl $0 + pushl %edx +// call SYMBOL_NAME(do_nmi) ###### disabled by Matt + addl $8,%esp + RESTORE_ALL + +ENTRY(int3) + pushl $0 +// pushl $ SYMBOL_NAME(do_int3) ###### disabled by Matt + jmp error_code + +ENTRY(overflow) + pushl $0 +// pushl $ SYMBOL_NAME(do_overflow) ###### disabled by Matt + jmp error_code + +ENTRY(bounds) + pushl $0 +// pushl $ SYMBOL_NAME(do_bounds) ###### disabled by Matt + jmp error_code + +ENTRY(invalid_op) + pushl $0 +// pushl $ SYMBOL_NAME(do_invalid_op) ###### disabled by Matt + jmp error_code + +ENTRY(coprocessor_segment_overrun) + pushl $0 +// pushl $ SYMBOL_NAME(do_coprocessor_segment_overrun) ###### disabled by Matt + jmp error_code + +ENTRY(double_fault) +// pushl $ SYMBOL_NAME(do_double_fault) ###### disabled by Matt + jmp error_code + +ENTRY(invalid_TSS) +// pushl $ SYMBOL_NAME(do_invalid_TSS) ###### disabled by Matt + jmp error_code + +ENTRY(segment_not_present) +// pushl $ SYMBOL_NAME(do_segment_not_present) ###### disabled by Matt + jmp error_code + +ENTRY(stack_segment) +// pushl $ SYMBOL_NAME(do_stack_segment) ###### disabled by Matt + jmp error_code + +ENTRY(general_protection) +// pushl $ SYMBOL_NAME(do_general_protection) ###### disabled by Matt + jmp error_code + +ENTRY(alignment_check) +// pushl $ SYMBOL_NAME(do_alignment_check) ###### disabled by Matt + jmp error_code + +ENTRY(page_fault) +// pushl $ SYMBOL_NAME(do_page_fault) ###### disabled by Matt + jmp error_code + +ENTRY(machine_check) + pushl $0 +// pushl $ SYMBOL_NAME(do_machine_check) ###### disabled by Matt + jmp error_code + +ENTRY(spurious_interrupt_bug) + pushl $0 +// pushl $ SYMBOL_NAME(do_spurious_interrupt_bug) ###### disabled by Matt + jmp error_code + + diff -urN linux-2.4.17/arch/um/sys-i386/ksyms.c linux_umopenmosix/arch/um/sys-i386/ksyms.c --- linux-2.4.17/arch/um/sys-i386/ksyms.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/sys-i386/ksyms.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,35 @@ +#include "linux/module.h" +#include "linux/in6.h" +#include "linux/rwsem.h" +#include "asm/byteorder.h" +#include "asm/semaphore.h" +#include "asm/uaccess.h" +#include "asm/checksum.h" +#include "asm/errno.h" +#include "linux/mosix.h" + + + + + +EXPORT_SYMBOL(__down_failed); +EXPORT_SYMBOL(__down_failed_interruptible); +EXPORT_SYMBOL(__down_failed_trylock); +EXPORT_SYMBOL(__up_wakeup); + +/* Networking helper routines. */ +EXPORT_SYMBOL(csum_partial_copy_generic); + + + + + + + + + + + + + + diff -urN linux-2.4.17/arch/um/sys-i386/ldt.c linux_umopenmosix/arch/um/sys-i386/ldt.c --- linux-2.4.17/arch/um/sys-i386/ldt.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/sys-i386/ldt.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +extern int modify_ldt(int func, void *ptr, unsigned long bytecount); + +int sys_modify_ldt(int func, void *ptr, unsigned long bytecount) +{ + return modify_ldt(func, ptr, bytecount); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/sys-i386/mosasm.H linux_umopenmosix/arch/um/sys-i386/mosasm.H --- linux-2.4.17/arch/um/sys-i386/mosasm.H Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/sys-i386/mosasm.H Fri Jun 28 00:25:51 2002 @@ -0,0 +1,13 @@ +/* Please do not edit -- this file is created automatically */ +/* Thu Jun 27 21:25:51 2002 */ + +ALTREGS = 0x700 +DFLAGS = 0x5C8 +DDEPUTY = 0x1 +DREMOTE = 0x2 +DTRACESYS1 = 0x2000 +DTRACESYS2 = 0x4000 +.data + +ENTRY(remote_sys_call_table) +.text Binary files linux-2.4.17/arch/um/sys-i386/offset and linux_umopenmosix/arch/um/sys-i386/offset differ diff -urN linux-2.4.17/arch/um/sys-i386/offset.c linux_umopenmosix/arch/um/sys-i386/offset.c --- linux-2.4.17/arch/um/sys-i386/offset.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/sys-i386/offset.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * Some sections copyright 2002 by Moshe Bar + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh, Moshe Bar + */ + +/* + * Produce an include-file for "entry.S", with: + * 1. constant-offsets of some required MOSIX-members of "task_struct" + * 2. some bits to test "current->dflags" + * 3. a duplicate system-call table (remote_sys_call_table), with all + * system-calls names preceded by a "remote_". + */ + +#include +#include +#include +#include +#include +#include + +struct file; +#define FILE struct file + +extern int printf(char *, ...); +extern int fgets(char *, int, FILE *); +extern int perror(char *); +extern FILE *stdin; + +char line[2048]; + +int +main(void) +{ + register char *c; + register int i; + long t, time(long *); + char *ctime(long *); + + time(&t); + printf("/* Please do not edit -- this file is created automatically */\n"); + printf("/* %.24s */\n\n", ctime(&t)); + printf("ALTREGS\t\t= 0x%X\n", + (int)&(((struct task_struct *)0)->mosix.altregs)); + printf("DFLAGS\t\t= 0x%X\n", + (int)&(((struct task_struct *)0)->mosix.dflags)); + printf("DDEPUTY\t\t= 0x%X\n", DDEPUTY); + printf("DREMOTE\t\t= 0x%X\n", DREMOTE); + printf("DTRACESYS1\t\t= 0x%X\n", DTRACESYS1); + printf("DTRACESYS2\t\t= 0x%X\n", DTRACESYS2); + printf(".data\n"); + printf("\nENTRY(remote_sys_call_table)\n"); + while(fgets(line, 2048, stdin) && + strcmp(line, "ENTRY(sys_call_table)\n")) + ; + for(i = 0 ; i < NR_syscalls && fgets(line, 2048, stdin) ; i++) + if(!strncmp(line, "\t.long SYMBOL_NAME(", 19)) + { + for(c = &line[19] ; *c && *c != ')' ; c++); + *c = '\0'; + if(!strcmp(&line[19], "sys_ni_syscall")) + { + printf("\t.long SYMBOL_NAME(sys_ni_syscall)\n"); + continue; + } + printf("\t.long SYMBOL_NAME(remote_%s)\n", &line[19]); + } + else if(!strcmp(line, "\t.rept NR_syscalls-(.-sys_call_table)/4\n")) + printf("\t.rept NR_syscalls-(.-remote_sys_call_table)/4\n"); + else + { + printf("%s", line); + if(!strncmp(line, "\t.endr", 5)) + break; + } + printf(".text\n"); + return(0); +} diff -urN linux-2.4.17/arch/um/sys-i386/ptrace.c linux_umopenmosix/arch/um/sys-i386/ptrace.c --- linux-2.4.17/arch/um/sys-i386/ptrace.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/sys-i386/ptrace.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "linux/sched.h" +#include "asm/ptrace.h" + +/* determines which flags the user has access to. */ +/* 1 = access 0 = no access */ +#define FLAG_MASK 0x00044dd5 + +int putreg(struct task_struct *child, unsigned long regno, + unsigned long value) +{ + switch (regno >> 2) { + case FS: + if (value && (value & 3) != 3) + return -EIO; + child->thread.process_regs.regs[FS] = value; + return 0; + case GS: + if (value && (value & 3) != 3) + return -EIO; + child->thread.process_regs.regs[GS] = value; + return 0; + case DS: + case ES: + if (value && (value & 3) != 3) + return -EIO; + value &= 0xffff; + break; + case SS: + case CS: + if ((value & 3) != 3) + return -EIO; + value &= 0xffff; + break; + case EFL: + value &= FLAG_MASK; + value |= child->thread.process_regs.regs[EFL]; + break; + } + child->thread.process_regs.regs[regno >> 2] = value; + return 0; +} + +unsigned long getreg(struct task_struct *child, unsigned long regno) +{ + unsigned long retval = ~0UL; + + switch (regno >> 2) { + case FS: + case GS: + case DS: + case ES: + case SS: + case CS: + retval = 0xffff; + /* fall through */ + default: + retval &= child->thread.process_regs.regs[regno >> 2]; + } + return retval; +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/sys-i386/ptrace_user.c linux_umopenmosix/arch/um/sys-i386/ptrace_user.c --- linux-2.4.17/arch/um/sys-i386/ptrace_user.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/sys-i386/ptrace_user.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,25 @@ +#include +#include +#include +#include "sysdep/ptrace.h" + +int ptrace_getregs(long pid, struct sys_pt_regs *regs_out) +{ + return(ptrace(PTRACE_GETREGS, pid, 0, regs_out)); +} + +int ptrace_setregs(long pid, struct sys_pt_regs *regs) +{ + return(ptrace(PTRACE_SETREGS, pid, 0, regs)); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/sys-i386/sigcontext.c linux_umopenmosix/arch/um/sys-i386/sigcontext.c --- linux-2.4.17/arch/um/sys-i386/sigcontext.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/sys-i386/sigcontext.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,108 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include "sysdep/ptrace.h" +#include "kern_util.h" +#include "frame.h" + +int copy_sigcontext_to_user(void *sc_ptr, struct sys_pt_regs *regs, + unsigned long cr2, int error) +{ + struct sigcontext *sc; + int err; + + sc = sc_ptr; + err = 0; + err |= copy_to_user_proc(&sc->ebx, ®s->regs[EBX], sizeof(sc->ebx)); + err |= copy_to_user_proc(&sc->ecx, ®s->regs[ECX], sizeof(sc->ecx)); + err |= copy_to_user_proc(&sc->edx, ®s->regs[EDX], sizeof(sc->edx)); + err |= copy_to_user_proc(&sc->esi, ®s->regs[ESI], sizeof(sc->esi)); + err |= copy_to_user_proc(&sc->edi, ®s->regs[EDI], sizeof(sc->edi)); + err |= copy_to_user_proc(&sc->ebp, ®s->regs[EBP], sizeof(sc->ebp)); + err |= copy_to_user_proc(&sc->eax, ®s->regs[EAX], sizeof(sc->eax)); + err |= copy_to_user_proc(&sc->ds, ®s->regs[DS], sizeof(sc->ds)); + err |= copy_to_user_proc(&sc->es, ®s->regs[ES], sizeof(sc->es)); + err |= copy_to_user_proc(&sc->fs, ®s->regs[FS], sizeof(sc->fs)); + err |= copy_to_user_proc(&sc->gs, ®s->regs[GS], sizeof(sc->gs)); + err |= copy_to_user_proc(&sc->eip, ®s->regs[EIP], sizeof(sc->eip)); + err |= copy_to_user_proc(&sc->cs, ®s->regs[CS], sizeof(sc->cs)); + err |= copy_to_user_proc(&sc->eflags, ®s->regs[EFL], + sizeof(sc->eflags)); + err |= copy_to_user_proc(&sc->esp_at_signal, ®s->regs[UESP], + sizeof(sc->esp_at_signal)); + err |= copy_to_user_proc(&sc->ss, ®s->regs[SS], sizeof(sc->ss)); + err |= copy_to_user_proc(&sc->err, &error, sizeof(sc->err)); + err |= copy_to_user_proc(&sc->cr2, &cr2, sizeof(sc->cr2)); + return(err); +} + +void fill_in_regs(struct sys_pt_regs *regs, void *sc_ptr) +{ + struct sigcontext *sc; + + sc = sc_ptr; + regs->regs[EBX] = sc->ebx; + regs->regs[ECX] = sc->ecx; + regs->regs[EDX] = sc->edx; + regs->regs[ESI] = sc->esi; + regs->regs[EDI] = sc->edi; + regs->regs[EBP] = sc->ebp; + regs->regs[EAX] = sc->eax; + regs->regs[DS] = sc->ds; + regs->regs[ES] = sc->es; + regs->regs[FS] = sc->fs; + regs->regs[GS] = sc->gs; + regs->regs[EIP] = sc->eip; + regs->regs[CS] = sc->cs; + regs->regs[EFL] = sc->eflags; + regs->regs[UESP] = sc->esp_at_signal; + regs->regs[SS] = sc->ss; +} + +int copy_sc_to_user(void *to_ptr, void *from_ptr, void *data) +{ + struct arch_frame_data *arch = data; + struct sigcontext *to = to_ptr, *from = from_ptr; + struct _fpstate *to_fp, *from_fp; + int err; + + to_fp = (struct _fpstate *)((unsigned long) to + sizeof(*to)); + from_fp = from->fpstate; + err = copy_to_user_proc(to, from, sizeof(*to)); + if(from_fp != NULL){ + err |= copy_to_user_proc(&to->fpstate, &to_fp, + sizeof(to->fpstate)); + err |= copy_to_user_proc(to_fp, from_fp, arch->fpstate_size); + } + return(err); +} + +int copy_sc_from_user(void *to_ptr, void *from_ptr) +{ + struct sigcontext *to = to_ptr, *from = from_ptr; + struct _fpstate *to_fp, *from_fp; + int err; + + to_fp = to->fpstate; + from_fp = from->fpstate; + err = copy_from_user_proc(to, from, sizeof(*to)); + if(to_fp != NULL) + err |= copy_from_user_proc(to_fp, from_fp, sizeof(*to_fp)); + return(err); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/sys-i386/syscalls.c linux_umopenmosix/arch/um/sys-i386/syscalls.c --- linux-2.4.17/arch/um/sys-i386/syscalls.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/sys-i386/syscalls.c Thu Jun 27 22:57:05 2002 @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "asm/mman.h" +#include "asm/uaccess.h" +#include "asm/unistd.h" + +/* + * Perform the select(nd, in, out, ex, tv) and mmap() system + * calls. Linux/i386 didn't use to be able to handle more than + * 4 system call parameters, so these system calls used a memory + * block for parameter passing.. + */ + +struct mmap_arg_struct { + unsigned long addr; + unsigned long len; + unsigned long prot; + unsigned long flags; + unsigned long fd; + unsigned long offset; +}; + +extern int old_mmap(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long offset); + +int old_mmap_i386(struct mmap_arg_struct *arg) +{ + struct mmap_arg_struct a; + int err = -EFAULT; + + if (copy_from_user(&a, arg, sizeof(a))) + goto out; + + err = old_mmap(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); + out: + return err; +} + +struct sel_arg_struct { + unsigned long n; + fd_set *inp, *outp, *exp; + struct timeval *tvp; +}; + +int old_select(struct sel_arg_struct *arg) +{ + struct sel_arg_struct a; + + if (copy_from_user(&a, arg, sizeof(a))) + return -EFAULT; + /* sys_select() does the appropriate kernel locking */ + return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/sys-i386/sysrq.c linux_umopenmosix/arch/um/sys-i386/sysrq.c --- linux-2.4.17/arch/um/sys-i386/sysrq.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/sys-i386/sysrq.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,26 @@ +#include "linux/kernel.h" +#include "linux/smp.h" +#include "linux/sched.h" +#include "asm/ptrace.h" +#include "sysrq.h" + + +void show_regs(struct pt_regs_subarch *regs) +{ + printk("\n"); + printk("EIP: %04x:[<%08lx>] CPU: %d %s",0xffff & regs->xcs, regs->eip, + smp_processor_id(), print_tainted()); + if (regs->xcs & 3) + printk(" ESP: %04x:%08lx",0xffff & regs->xss, regs->esp); + printk(" EFLAGS: %08lx\n %s\n", regs->eflags, print_tainted()); + printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", + regs->eax, regs->ebx, regs->ecx, regs->edx); + printk("ESI: %08lx EDI: %08lx EBP: %08lx", + regs->esi, regs->edi, regs->ebp); + printk(" DS: %04x ES: %04x\n", + 0xffff & regs->xds, 0xffff & regs->xes); + + show_trace(®s->esp); +} + + diff -urN linux-2.4.17/arch/um/sys-ia64/Makefile linux_umopenmosix/arch/um/sys-ia64/Makefile --- linux-2.4.17/arch/um/sys-ia64/Makefile Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/sys-ia64/Makefile Wed Jun 26 23:45:15 2002 @@ -0,0 +1,26 @@ +OBJ = sys.o + +OBJS = + +all: $(OBJ) + +$(OBJ): $(OBJS) + rm -f $@ + $(LD) $(LINKFLAGS) --start-group $^ --end-group -o $@ +clean: + rm -f $(OBJS) + +fastdep: + +archmrproper: + +archclean: + rm -f link.ld + @$(MAKEBOOT) clean + +archdep: + @$(MAKEBOOT) dep + +modules: + +include $(TOPDIR)/Rules.make diff -urN linux-2.4.17/arch/um/sys-ppc/Makefile linux_umopenmosix/arch/um/sys-ppc/Makefile --- linux-2.4.17/arch/um/sys-ppc/Makefile Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/sys-ppc/Makefile Wed Jun 26 23:45:15 2002 @@ -0,0 +1,78 @@ +OBJ = sys.o + +.S.o: + $(CC) $(AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o + +OBJS = ptrace.o sigcontext.o semaphore.o checksum.o miscthings.o misc.o \ + ptrace_user.o sysrq.o + +EXTRA_AFLAGS := -DCONFIG_ALL_PPC -I. -I$(TOPDIR)/arch/ppc/kernel + +all: $(OBJ) + +$(OBJ): $(OBJS) + rm -f $@ + $(LD) $(LINKFLAGS) --start-group $^ --end-group -o $@ + +ptrace_user.o: ptrace_user.c + $(CC) -D__KERNEL__ $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $< + +sigcontext.o: sigcontext.c + $(CC) $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $< + +semaphore.c: + rm -f $@ + ln -s $(TOPDIR)/arch/ppc/kernel/$@ $@ + +checksum.S: + rm -f $@ + ln -s $(TOPDIR)/arch/ppc/lib/$@ $@ + +mk_defs.c: + rm -f $@ + ln -s $(TOPDIR)/arch/ppc/kernel/$@ $@ + +ppc_defs.head: + rm -f $@ + ln -s $(TOPDIR)/arch/ppc/kernel/$@ $@ + +ppc_defs.h: mk_defs.c ppc_defs.head \ + $(TOPDIR)/include/asm-ppc/mmu.h \ + $(TOPDIR)/include/asm-ppc/processor.h \ + $(TOPDIR)/include/asm-ppc/pgtable.h \ + $(TOPDIR)/include/asm-ppc/ptrace.h +# $(CC) $(CFLAGS) -S mk_defs.c + cp ppc_defs.head ppc_defs.h +# for bk, this way we can write to the file even if it's not checked out + echo '#define THREAD 608' >> ppc_defs.h + echo '#define PT_REGS 8' >> ppc_defs.h + echo '#define CLONE_VM 256' >> ppc_defs.h +# chmod u+w ppc_defs.h +# grep '^#define' mk_defs.s >> ppc_defs.h +# rm mk_defs.s + +# the asm link is horrible, and breaks the other targets. This is also +# not going to work with parallel makes. + +checksum.o: checksum.S + rm -f asm + ln -s $(TOPDIR)/include/asm-ppc asm + $(CC) $(EXTRA_AFLAGS) $(AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o + rm -f asm + +misc.o: misc.S ppc_defs.h + rm -f asm + ln -s $(TOPDIR)/include/asm-ppc asm + $(CC) $(EXTRA_AFLAGS) $(AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o + rm -f asm + +clean: + rm -f $(OBJS) + rm -f ppc_defs.h + rm -f checksum.S semaphore.c mk_defs.c + +fastdep: + +modules: + +include $(TOPDIR)/Rules.make diff -urN linux-2.4.17/arch/um/sys-ppc/misc.S linux_umopenmosix/arch/um/sys-ppc/misc.S --- linux-2.4.17/arch/um/sys-ppc/misc.S Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/sys-ppc/misc.S Wed Jun 26 23:45:15 2002 @@ -0,0 +1,116 @@ +/* + * This file contains miscellaneous low-level functions. + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) + * and Paul Mackerras. + * + * A couple of functions stolen from arch/ppc/kernel/misc.S for UML + * by Chris Emerson. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include "ppc_asm.h" + +#if defined(CONFIG_4xx) || defined(CONFIG_8xx) +#define CACHE_LINE_SIZE 16 +#define LG_CACHE_LINE_SIZE 4 +#define MAX_COPY_PREFETCH 1 +#elif !defined(CONFIG_PPC64BRIDGE) +#define CACHE_LINE_SIZE 32 +#define LG_CACHE_LINE_SIZE 5 +#define MAX_COPY_PREFETCH 4 +#else +#define CACHE_LINE_SIZE 128 +#define LG_CACHE_LINE_SIZE 7 +#define MAX_COPY_PREFETCH 1 +#endif /* CONFIG_4xx || CONFIG_8xx */ + + .text + +/* + * Clear a page using the dcbz instruction, which doesn't cause any + * memory traffic (except to write out any cache lines which get + * displaced). This only works on cacheable memory. + */ +_GLOBAL(clear_page) + li r0,4096/CACHE_LINE_SIZE + mtctr r0 +#ifdef CONFIG_8xx + li r4, 0 +1: stw r4, 0(r3) + stw r4, 4(r3) + stw r4, 8(r3) + stw r4, 12(r3) +#else +1: dcbz 0,r3 +#endif + addi r3,r3,CACHE_LINE_SIZE + bdnz 1b + blr + +/* + * Copy a whole page. We use the dcbz instruction on the destination + * to reduce memory traffic (it eliminates the unnecessary reads of + * the destination into cache). This requires that the destination + * is cacheable. + */ +#define COPY_16_BYTES \ + lwz r6,4(r4); \ + lwz r7,8(r4); \ + lwz r8,12(r4); \ + lwzu r9,16(r4); \ + stw r6,4(r3); \ + stw r7,8(r3); \ + stw r8,12(r3); \ + stwu r9,16(r3) + +_GLOBAL(copy_page) + addi r3,r3,-4 + addi r4,r4,-4 + li r5,4 + +#ifndef CONFIG_8xx +#if MAX_COPY_PREFETCH > 1 + li r0,MAX_COPY_PREFETCH + li r11,4 + mtctr r0 +11: dcbt r11,r4 + addi r11,r11,CACHE_LINE_SIZE + bdnz 11b +#else /* MAX_COPY_PREFETCH == 1 */ + dcbt r5,r4 + li r11,CACHE_LINE_SIZE+4 +#endif /* MAX_COPY_PREFETCH */ +#endif /* CONFIG_8xx */ + + li r0,4096/CACHE_LINE_SIZE + mtctr r0 +1: +#ifndef CONFIG_8xx + dcbt r11,r4 + dcbz r5,r3 +#endif + COPY_16_BYTES +#if CACHE_LINE_SIZE >= 32 + COPY_16_BYTES +#if CACHE_LINE_SIZE >= 64 + COPY_16_BYTES + COPY_16_BYTES +#if CACHE_LINE_SIZE >= 128 + COPY_16_BYTES + COPY_16_BYTES + COPY_16_BYTES + COPY_16_BYTES +#endif +#endif +#endif + bdnz 1b + blr diff -urN linux-2.4.17/arch/um/sys-ppc/miscthings.c linux_umopenmosix/arch/um/sys-ppc/miscthings.c --- linux-2.4.17/arch/um/sys-ppc/miscthings.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/sys-ppc/miscthings.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,56 @@ +#include "linux/threads.h" +#include "linux/stddef.h" // for NULL +#include "linux/elf.h" // for AT_NULL + +/* unsigned int local_bh_count[NR_CPUS]; */ +unsigned long isa_io_base = 0; + +/* The following function nicked from arch/ppc/kernel/process.c and + * adapted slightly */ +/* + * XXX ld.so expects the auxiliary table to start on + * a 16-byte boundary, so we have to find it and + * move it up. :-( + */ +void shove_aux_table(unsigned long sp) +{ + int argc; + char *p; + unsigned long e; + unsigned long aux_start, offset; + + argc = *(int *)sp; + sp += sizeof(int) + (argc + 1) * sizeof(char *); + /* skip over the environment pointers */ + do { + p = *(char **)sp; + sp += sizeof(char *); + } while (p != NULL); + aux_start = sp; + /* skip to the end of the auxiliary table */ + do { + e = *(unsigned long *)sp; + sp += 2 * sizeof(unsigned long); + } while (e != AT_NULL); + offset = ((aux_start + 15) & ~15) - aux_start; + if (offset != 0) { + do { + sp -= sizeof(unsigned long); + e = *(unsigned long *)sp; + *(unsigned long *)(sp + offset) = e; + } while (sp > aux_start); + } +} +/* END stuff taken from arch/ppc/kernel/process.c */ + + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/sys-ppc/ptrace.c linux_umopenmosix/arch/um/sys-ppc/ptrace.c --- linux-2.4.17/arch/um/sys-ppc/ptrace.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/sys-ppc/ptrace.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,28 @@ +#include "linux/sched.h" +#include "asm/ptrace.h" + +int putreg(struct task_struct *child, unsigned long regno, + unsigned long value) +{ + child->thread.process_regs.regs[regno >> 2] = value; + return 0; +} + +unsigned long getreg(struct task_struct *child, unsigned long regno) +{ + unsigned long retval = ~0UL; + + retval &= child->thread.process_regs.regs[regno >> 2]; + return retval; +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/sys-ppc/ptrace_user.c linux_umopenmosix/arch/um/sys-ppc/ptrace_user.c --- linux-2.4.17/arch/um/sys-ppc/ptrace_user.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/sys-ppc/ptrace_user.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,40 @@ +#include +#include +#include +#include "sysdep/ptrace.h" + +int ptrace_getregs(long pid, struct sys_pt_regs *regs_out) +{ + int i; + for (i=0; i < sizeof(struct sys_pt_regs)/sizeof(PPC_REG); ++i) { + errno = 0; + regs_out->regs[i] = ptrace(PTRACE_PEEKUSER, pid, i*4, 0); + if (errno) { + return -errno; + } + } + return 0; +} + +int ptrace_setregs(long pid, struct sys_pt_regs *regs_in) +{ + int i; + for (i=0; i < sizeof(struct sys_pt_regs)/sizeof(PPC_REG); ++i) { + if (i != 34 /* FIXME: PT_ORIG_R3 */ && i <= PT_MQ) { + if (ptrace(PTRACE_POKEUSER, pid, i*4, regs_in->regs[i]) < 0) { + return -errno; + } + } + } + return 0; +} +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/sys-ppc/sigcontext.c linux_umopenmosix/arch/um/sys-ppc/sigcontext.c --- linux-2.4.17/arch/um/sys-ppc/sigcontext.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/sys-ppc/sigcontext.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,54 @@ +#include "asm/ptrace.h" +#include "asm/sigcontext.h" +#include "sysdep/ptrace.h" +#include "user_util.h" + +void copy_sigcontext_to_user(void *scontext, struct sys_pt_regs *regs, + unsigned long cr2, int err) +{ + struct sigcontext_struct *sc = scontext; +#if 0 + int i; + // general purpose regs + for (i=0; i<32; ++i) { + sc->regs->gpr[i] = regs->regs[PT_R0 + i]; + } + sc->regs->nip = regs->regs[PT_NIP]; + sc->regs->msr = regs->regs[PT_MSR]; + sc->regs->orig_gpr3 = regs->regs[PT_ORIG_R3]; + sc->regs->ctr = regs->regs[PT_CTR]; + sc->regs->link = regs->regs[PT_LNK]; + sc->regs->xer = regs->regs[PT_XER]; + sc->regs->ccr = regs->regs[PT_CCR]; + sc->regs->mq = regs->regs[PT_MQ]; + sc->regs->trap = err; + sc->regs->dar = cr2; +#endif + /* This is a bit of a hack; there's some confusion with the + * various definitions of [sys_]pt_regs, and everything isn't + * quite coming together quite right. */ + memcpy(sc->regs, regs, sizeof(struct sys_pt_regs)); + /*(sc->regs) = *regs; */ + return(0); +} + +void fill_in_regs(struct sys_pt_regs *regs, void *sc_ptr) +{ + struct sigcontext_struct *sc; + + sc = sc_ptr; + + // FIXME: need to investigate what's going on with struct pt_regs etc. + *regs = *(sc->regs); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/arch/um/sys-ppc/sysrq.c linux_umopenmosix/arch/um/sys-ppc/sysrq.c --- linux-2.4.17/arch/um/sys-ppc/sysrq.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/arch/um/sys-ppc/sysrq.c Wed Jun 26 23:45:15 2002 @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2001 Chris Emerson (cemerson@chiark.greenend.org.uk) + * Licensed under the GPL + */ + +#include "linux/kernel.h" +#include "linux/smp.h" +#include "asm/ptrace.h" +#include "sysrq.h" + +void show_regs(struct pt_regs_subarch *regs) +{ + printk("\n"); + printk("show_regs(): insert regs here.\n"); +#if 0 + printk("\n"); + printk("EIP: %04x:[<%08lx>] CPU: %d",0xffff & regs->xcs, regs->eip, + smp_processor_id()); + if (regs->xcs & 3) + printk(" ESP: %04x:%08lx",0xffff & regs->xss, regs->esp); + printk(" EFLAGS: %08lx\n", regs->eflags); + printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", + regs->eax, regs->ebx, regs->ecx, regs->edx); + printk("ESI: %08lx EDI: %08lx EBP: %08lx", + regs->esi, regs->edi, regs->ebp); + printk(" DS: %04x ES: %04x\n", + 0xffff & regs->xds, 0xffff & regs->xes); +#endif + + show_trace(®s->gpr[1]); +} + + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/drivers/char/Makefile linux_umopenmosix/drivers/char/Makefile --- linux-2.4.17/drivers/char/Makefile Sun Nov 11 20:09:32 2001 +++ linux_umopenmosix/drivers/char/Makefile Wed Jun 26 23:45:15 2002 @@ -72,6 +72,12 @@ endif endif +ifeq ($(ARCH),um) + KEYMAP = + KEYBD = + CONSOLE = +endif + ifeq ($(ARCH),sh) KEYMAP = KEYBD = @@ -137,7 +143,12 @@ obj-$(CONFIG_PCI) += keyboard.o $(KEYMAP) endif +ifdef CONFIG_MOSIX_UDB +obj-y += sysrq.o +else obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o +endif + obj-$(CONFIG_ATARI_DSP56K) += dsp56k.o obj-$(CONFIG_ROCKETPORT) += rocket.o obj-$(CONFIG_MOXA_SMARTIO) += mxser.o diff -urN linux-2.4.17/drivers/char/Makefile.orig linux_umopenmosix/drivers/char/Makefile.orig --- linux-2.4.17/drivers/char/Makefile.orig Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/drivers/char/Makefile.orig Wed Jun 26 23:45:15 2002 @@ -0,0 +1,267 @@ +# +# Makefile for the kernel character device drivers. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definitions are now inherited from the +# parent makes.. +# + +# +# This file contains the font map for the default (hardware) font +# +FONTMAPFILE = cp437.uni + +O_TARGET := char.o + +obj-y += mem.o tty_io.o n_tty.o tty_ioctl.o raw.o pty.o misc.o random.o + +# All of the (potential) objects that export symbols. +# This list comes from 'grep -l EXPORT_SYMBOL *.[hc]'. + +export-objs := busmouse.o console.o keyboard.o sysrq.o \ + misc.o pty.o random.o selection.o serial.o \ + sonypi.o tty_io.o tty_ioctl.o generic_serial.o + +mod-subdirs := joystick ftape drm pcmcia + +list-multi := + +KEYMAP =defkeymap.o +KEYBD =pc_keyb.o +CONSOLE =console.o +SERIAL =serial.o + +ifeq ($(ARCH),s390) + KEYMAP = + KEYBD = + CONSOLE = + SERIAL = +endif + +ifeq ($(ARCH),mips) + ifneq ($(CONFIG_PC_KEYB),y) + KEYBD = + endif +endif + +ifeq ($(ARCH),s390x) + KEYMAP = + KEYBD = + CONSOLE = + SERIAL = +endif + +ifeq ($(ARCH),m68k) + ifdef CONFIG_AMIGA + KEYBD = amikeyb.o + else + KEYBD = + endif + SERIAL = +endif + +ifeq ($(ARCH),arm) + ifneq ($(CONFIG_PC_KEYMAP),y) + KEYMAP = + endif + ifneq ($(CONFIG_PC_KEYB),y) + KEYBD = + endif +endif + +ifeq ($(ARCH),um) + KEYMAP = + KEYBD = + CONSOLE = +endif + +ifeq ($(ARCH),sh) + KEYMAP = + KEYBD = + CONSOLE = + ifeq ($(CONFIG_SH_HP600),y) + KEYMAP = defkeymap.o + KEYBD = scan_keyb.o hp600_keyb.o + CONSOLE = console.o + endif + ifeq ($(CONFIG_SH_DMIDA),y) + # DMIDA does not connect the HD64465 PS/2 keyboard port + # but we allow for USB keyboards to be plugged in. + KEYMAP = defkeymap.o + KEYBD = # hd64465_keyb.o pc_keyb.o + CONSOLE = console.o + endif + ifeq ($(CONFIG_SH_EC3104),y) + KEYMAP = defkeymap.o + KEYBD = ec3104_keyb.o + CONSOLE = console.o + endif + ifeq ($(CONFIG_SH_DREAMCAST),y) + KEYMAP = defkeymap.o + KEYBD = + CONSOLE = console.o + endif +endif + +ifeq ($(CONFIG_DECSTATION),y) + KEYMAP = + KEYBD = + SERIAL = decserial.o +endif + +ifeq ($(CONFIG_BAGET_MIPS),y) + KEYBD = + SERIAL = +endif + +ifeq ($(CONFIG_NINO),y) + SERIAL = +endif + +ifneq ($(CONFIG_SUN_SERIAL),) + SERIAL = +endif + +ifeq ($(CONFIG_QTRONIX_KEYBOARD),y) + KEYBD = qtronix.o + KEYMAP = qtronixmap.o +endif + +obj-$(CONFIG_VT) += vt.o vc_screen.o consolemap.o consolemap_deftbl.o $(CONSOLE) selection.o +obj-$(CONFIG_SERIAL) += $(SERIAL) +obj-$(CONFIG_SERIAL_ACPI) += acpi_serial.o +obj-$(CONFIG_SERIAL_21285) += serial_21285.o +obj-$(CONFIG_SERIAL_SA1100) += serial_sa1100.o +obj-$(CONFIG_SERIAL_AMBA) += serial_amba.o + +ifndef CONFIG_SUN_KEYBOARD + obj-$(CONFIG_VT) += keyboard.o $(KEYMAP) $(KEYBD) +else + obj-$(CONFIG_PCI) += keyboard.o $(KEYMAP) +endif + +obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o +obj-$(CONFIG_ATARI_DSP56K) += dsp56k.o +obj-$(CONFIG_ROCKETPORT) += rocket.o +obj-$(CONFIG_MOXA_SMARTIO) += mxser.o +obj-$(CONFIG_MOXA_INTELLIO) += moxa.o +obj-$(CONFIG_DIGI) += pcxx.o +obj-$(CONFIG_DIGIEPCA) += epca.o +obj-$(CONFIG_CYCLADES) += cyclades.o +obj-$(CONFIG_STALLION) += stallion.o +obj-$(CONFIG_ISTALLION) += istallion.o +obj-$(CONFIG_COMPUTONE) += ip2.o ip2main.o +obj-$(CONFIG_RISCOM8) += riscom8.o +obj-$(CONFIG_ISI) += isicom.o +obj-$(CONFIG_ESPSERIAL) += esp.o +obj-$(CONFIG_SYNCLINK) += synclink.o +obj-$(CONFIG_N_HDLC) += n_hdlc.o +obj-$(CONFIG_SPECIALIX) += specialix.o +obj-$(CONFIG_AMIGA_BUILTIN_SERIAL) += amiserial.o +obj-$(CONFIG_A2232) += ser_a2232.o generic_serial.o +obj-$(CONFIG_SX) += sx.o generic_serial.o +obj-$(CONFIG_RIO) += rio/rio.o generic_serial.o +obj-$(CONFIG_SH_SCI) += sh-sci.o generic_serial.o +obj-$(CONFIG_SERIAL167) += serial167.o +obj-$(CONFIG_MVME147_SCC) += generic_serial.o vme_scc.o +obj-$(CONFIG_MVME162_SCC) += generic_serial.o vme_scc.o +obj-$(CONFIG_BVME6000_SCC) += generic_serial.o vme_scc.o +obj-$(CONFIG_SERIAL_TX3912) += generic_serial.o serial_tx3912.o + +subdir-$(CONFIG_RIO) += rio +subdir-$(CONFIG_INPUT) += joystick + +obj-$(CONFIG_ATIXL_BUSMOUSE) += atixlmouse.o +obj-$(CONFIG_LOGIBUSMOUSE) += logibusmouse.o +obj-$(CONFIG_PRINTER) += lp.o + +ifeq ($(CONFIG_INPUT),y) +obj-y += joystick/js.o +endif + +obj-$(CONFIG_BUSMOUSE) += busmouse.o +obj-$(CONFIG_DTLK) += dtlk.o +obj-$(CONFIG_R3964) += n_r3964.o +obj-$(CONFIG_APPLICOM) += applicom.o +obj-$(CONFIG_SONYPI) += sonypi.o +obj-$(CONFIG_MS_BUSMOUSE) += msbusmouse.o +obj-$(CONFIG_82C710_MOUSE) += qpmouse.o +obj-$(CONFIG_AMIGAMOUSE) += amigamouse.o +obj-$(CONFIG_ATARIMOUSE) += atarimouse.o +obj-$(CONFIG_ADBMOUSE) += adbmouse.o +obj-$(CONFIG_PC110_PAD) += pc110pad.o +obj-$(CONFIG_RTC) += rtc.o +obj-$(CONFIG_EFI_RTC) += efirtc.o +ifeq ($(CONFIG_PPC),) + obj-$(CONFIG_NVRAM) += nvram.o +endif +obj-$(CONFIG_TOSHIBA) += toshiba.o +obj-$(CONFIG_I8K) += i8k.o +obj-$(CONFIG_DS1620) += ds1620.o +obj-$(CONFIG_INTEL_RNG) += i810_rng.o + +obj-$(CONFIG_QIC02_TAPE) += tpqic02.o + +subdir-$(CONFIG_FTAPE) += ftape +subdir-$(CONFIG_DRM) += drm +subdir-$(CONFIG_PCMCIA) += pcmcia +subdir-$(CONFIG_AGP) += agp + +ifeq ($(CONFIG_FTAPE),y) +obj-y += ftape/ftape.o +endif + +obj-$(CONFIG_H8) += h8.o +obj-$(CONFIG_PPDEV) += ppdev.o +obj-$(CONFIG_DZ) += dz.o +obj-$(CONFIG_NWBUTTON) += nwbutton.o +obj-$(CONFIG_NWFLASH) += nwflash.o + +# Only one watchdog can succeed. We probe the hardware watchdog +# drivers first, then the softdog driver. This means if your hardware +# watchdog dies or is 'borrowed' for some reason the software watchdog +# still gives you some cover. + +obj-$(CONFIG_PCWATCHDOG) += pcwd.o +obj-$(CONFIG_ACQUIRE_WDT) += acquirewdt.o +obj-$(CONFIG_ADVANTECH_WDT) += advantechwdt.o +obj-$(CONFIG_IB700_WDT) += ib700wdt.o +obj-$(CONFIG_MIXCOMWD) += mixcomwd.o +obj-$(CONFIG_60XX_WDT) += sbc60xxwdt.o +obj-$(CONFIG_WDT) += wdt.o +obj-$(CONFIG_WDTPCI) += wdt_pci.o +obj-$(CONFIG_21285_WATCHDOG) += wdt285.o +obj-$(CONFIG_977_WATCHDOG) += wdt977.o +obj-$(CONFIG_I810_TCO) += i810-tco.o +obj-$(CONFIG_MACHZ_WDT) += machzwd.o +obj-$(CONFIG_SH_WDT) += shwdt.o +obj-$(CONFIG_EUROTECH_WDT) += eurotechwdt.o +obj-$(CONFIG_SOFT_WATCHDOG) += softdog.o + +subdir-$(CONFIG_MWAVE) += mwave +ifeq ($(CONFIG_MWAVE),y) + obj-y += mwave/mwave.o +endif + +include $(TOPDIR)/Rules.make + +fastdep: + +conmakehash: conmakehash.c + $(HOSTCC) $(HOSTCFLAGS) -o conmakehash conmakehash.c + +consolemap_deftbl.c: $(FONTMAPFILE) conmakehash + ./conmakehash $(FONTMAPFILE) > consolemap_deftbl.c + +consolemap_deftbl.o: consolemap_deftbl.c $(TOPDIR)/include/linux/types.h + +.DELETE_ON_ERROR: + +defkeymap.c: defkeymap.map + set -e ; loadkeys --mktable $< | sed -e 's/^static *//' > $@ + +qtronixmap.c: qtronixmap.map + set -e ; loadkeys --mktable $< | sed -e 's/^static *//' > $@ diff -urN linux-2.4.17/drivers/char/console.c linux_umopenmosix/drivers/char/console.c --- linux-2.4.17/drivers/char/console.c Fri Dec 21 19:41:53 2001 +++ linux_umopenmosix/drivers/char/console.c Wed Jun 26 23:45:15 2002 @@ -2170,6 +2170,32 @@ clear_bit(0, &printing); } +#ifdef CONFIG_MOSIX_UDB +int +vt_console_read(struct console *co, const char *into, unsigned cnt) +{ + extern int keyboard_has_any(void); + extern void keyboard_manual_mode(int); + extern char keyboard_readch(void); + int n = 0; + + keyboard_manual_mode(1); + while(!keyboard_has_any()); + while(cnt--) + { + ((char *)into)[n++] = keyboard_readch(); + unblank_screen(); + if(!keyboard_has_any()) + { + keyboard_manual_mode(0); + return(n); + } + } + keyboard_manual_mode(0); + return(n); +} +#endif /* CONFIG_MOSIX_UDB */ + static kdev_t vt_console_device(struct console *c) { return MKDEV(TTY_MAJOR, c->index ? c->index : fg_console + 1); @@ -2178,6 +2204,9 @@ struct console vt_console_driver = { name: "tty", write: vt_console_print, +#ifdef CONFIG_MOSIX_UDB + read: vt_console_read, +#endif /* CONFIG_MOSIX_UDB */ device: vt_console_device, wait_key: keyboard_wait_for_keypress, unblank: unblank_screen, diff -urN linux-2.4.17/drivers/char/drm/i810_dma.c linux_umopenmosix/drivers/char/drm/i810_dma.c --- linux-2.4.17/drivers/char/drm/i810_dma.c Wed Aug 8 19:42:15 2001 +++ linux_umopenmosix/drivers/char/drm/i810_dma.c Wed Jun 26 23:45:15 2002 @@ -36,6 +36,10 @@ #include "i810_drv.h" #include /* For task queue support */ +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ + /* in case we don't have a 2.3.99-pre6 kernel or later: */ #ifndef VM_DONTCOPY #define VM_DONTCOPY 0 @@ -181,6 +185,13 @@ if(buf_priv->currently_mapped == I810_BUF_MAPPED) return -EINVAL; +#ifdef CONFIG_MOSIX + if(!mosix_go_home(1)) + { + printk("i810_map_buffer: Cannot map while away!\n"); + return(-ENOMEM); + } +#endif /* CONFIG_MOSIX */ if(VM_DONTCOPY != 0) { #if LINUX_VERSION_CODE <= 0x020402 down( ¤t->mm->mmap_sem ); @@ -222,6 +233,13 @@ if(VM_DONTCOPY != 0) { if(buf_priv->currently_mapped != I810_BUF_MAPPED) return -EINVAL; +#ifdef CONFIG_MOSIX + { + extern asmlinkage long sys_munmap(unsigned long,size_t); + retcode = sys_munmap((unsigned long)buf_priv->virtual, + (size_t) buf->total); + } +#else #if LINUX_VERSION_CODE <= 0x020402 down( ¤t->mm->mmap_sem ); #else @@ -240,6 +258,7 @@ #else up_write( ¤t->mm->mmap_sem ); #endif +#endif /* CONFIG_MOSIX */ } buf_priv->currently_mapped = I810_BUF_UNMAPPED; buf_priv->virtual = 0; diff -urN linux-2.4.17/drivers/char/keyboard.c linux_umopenmosix/drivers/char/keyboard.c --- linux-2.4.17/drivers/char/keyboard.c Tue Sep 18 23:39:51 2001 +++ linux_umopenmosix/drivers/char/keyboard.c Wed Jun 26 23:45:15 2002 @@ -65,6 +65,61 @@ EXPORT_SYMBOL(handle_scancode); EXPORT_SYMBOL(kbd_ledfunc); +#ifdef CONFIG_MOSIX_UDB +#define MANQ 8 +char manq[MANQ]; +static int man_in, man_out; +static int manual_mode; + +void +keyboard_manual_mode(int x) +{ + if(x) + manual_mode++; + else + { + if(manual_mode) + manual_mode--; + man_in = man_out = 0; + } +} + +int +in_manual_mode(void) +{ + return(manual_mode); +} + +int +keyboard_has_any(void) +{ + extern void keyboard_like_interrupt(void); + + keyboard_like_interrupt(); + return(man_in != man_out); +} + +char +keyboard_readch(void) +{ + char c; + + if(man_in == man_out) + return('\0'); + c = manq[man_out]; + man_out = (man_out + 1) % MANQ; + return(c); +} + +void +keyboard_putch(char ch) +{ + manq[man_in++] = ch; + man_in %= MANQ; +} + +#endif /* CONFIG_MOSIX_UDB */ + extern void ctrl_alt_del(void); DECLARE_WAIT_QUEUE_HEAD(keypress_wait); @@ -129,12 +184,19 @@ num, hold, scroll_forw, scroll_back, boot_it, caps_on, compose, SAK, decr_console, incr_console, spawn_console, bare_num; +#ifdef CONFIG_MOSIX_UDB +static void_fn call_udb; +#endif /* CONFIG_MOSIX_UDB */ + static void_fnp spec_fn_table[] = { do_null, enter, show_ptregs, show_mem, show_state, send_intr, lastcons, caps_toggle, num, hold, scroll_forw, scroll_back, boot_it, caps_on, compose, SAK, decr_console, incr_console, spawn_console, bare_num +#ifdef CONFIG_MOSIX_UDB + , call_udb +#endif /* CONFIG_MOSIX_UDB */ }; #define SPECIALS_ALLOWED_IN_RAW_MODE (1 << KVAL(K_SAK)) @@ -279,6 +341,9 @@ */ if (!rep || (vc_kbd_mode(kbd,VC_REPEAT) && tty && +#ifdef CONFIG_MOSIX_UDB + !manual_mode && +#endif /* CONFIG_MOSIX_UDB */ (L_ECHO(tty) || (tty->driver.chars_in_buffer(tty) == 0)))) { u_short keysym; u_char type; @@ -334,6 +399,13 @@ void put_queue(int ch) { +#ifdef CONFIG_MOSIX_UDB + if(manual_mode) + { + keyboard_putch((char)ch); + return; + } +#endif /* CONFIG_MOSIX_UDB */ wake_up(&keypress_wait); if (tty) { tty_insert_flip_char(tty, ch, 0); @@ -814,6 +886,14 @@ } } +#ifdef CONFIG_MOSIX_UDB +static void call_udb(void) +{ + extern void mosix_debugger(char *); + mosix_debugger("Keyboard"); +} +#endif /* CONFIG_MOSIX_UDB */ + /* * The leds display either (i) the status of NumLock, CapsLock, ScrollLock, * or (ii) whatever pattern of lights people want to show using KDSETLED, @@ -940,5 +1020,8 @@ pm_kbd = pm_register(PM_SYS_DEV, PM_SYS_KBC, pm_kbd_request_override); +#ifdef CONFIG_MOSIX_UDB + key_maps[2][69] = key_maps[8][69] = K_DEBUGGER|0xf000; +#endif /* CONFIG_MOSIX_UDB */ return 0; } diff -urN linux-2.4.17/drivers/char/mem.c linux_umopenmosix/drivers/char/mem.c --- linux-2.4.17/drivers/char/mem.c Fri Dec 21 19:41:54 2001 +++ linux_umopenmosix/drivers/char/mem.c Wed Jun 26 23:45:15 2002 @@ -443,6 +443,9 @@ left = count; /* do we want to be clever? Arbitrary cut-off */ +#ifdef CONFIG_MOSIX + if(!(current->mosix.dflags & DDEPUTY)) +#endif /* CONFIG_MOSIX */ if (count >= PAGE_SIZE*4) { unsigned long partial; @@ -520,6 +523,62 @@ return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; } +#ifdef CONFIG_MOSIX +static ssize_t read_fun(struct file * file, char * buf, + size_t count, loff_t *ppos) +{ + unsigned long i, n = 0; + + if (!count) + return 0; + + if (!access_ok(VERIFY_WRITE, buf, count)) + return -EFAULT; + + while(count) + { + i = (count > 2*PAGE_SIZE ? 2*PAGE_SIZE : count); + if(copy_to_user(buf, current, i) < 0) + return(EFAULT); + buf += i; + count -= i; + n += i; + } + return(n); +} + +static ssize_t write_fun(struct file * file, const char * buf, + size_t count, loff_t *ppos) +{ + unsigned long i, n = 0; + char *page; + + if (!count) + return 0; + + if (!access_ok(VERIFY_READ, buf, count)) + return -EFAULT; + + if (!(page = (char *)__get_free_page(GFP_KERNEL))) + return(-ENOMEM); + + while(count) + { + i = (count > PAGE_SIZE ? PAGE_SIZE : count); + if(copy_from_user(page, (char *)buf, i) < 0) + { + free_page((long)page); + return(EFAULT); + } + buf += i; + count -= i; + n += i; + } + free_page((long)page); + return(n); +} +#endif /* CONFIG_MOSIX */ + #define mmap_kmem mmap_mem #define zero_lseek null_lseek #define full_lseek null_lseek @@ -572,6 +631,13 @@ write: write_full, }; +#ifdef CONFIG_MOSIX +static struct file_operations fun_fops = { + read: read_fun, + write: write_fun, +}; +#endif /* CONFIG_MOSIX */ + static int memory_open(struct inode * inode, struct file * filp) { switch (MINOR(inode->i_rdev)) { @@ -601,6 +667,11 @@ case 9: filp->f_op = &urandom_fops; break; +#ifdef CONFIG_MOSIX + case 10: + filp->f_op = &fun_fops; + break; +#endif /* CONFIG_MOSIX */ default: return -ENXIO; } diff -urN linux-2.4.17/drivers/char/pc_keyb.c linux_umopenmosix/drivers/char/pc_keyb.c --- linux-2.4.17/drivers/char/pc_keyb.c Sat Nov 10 00:01:21 2001 +++ linux_umopenmosix/drivers/char/pc_keyb.c Wed Jun 26 23:45:15 2002 @@ -525,6 +525,27 @@ spin_unlock_irq(&kbd_controller_lock); } + +#ifdef CONFIG_MOSIX_UDB +void keyboard_like_interrupt(void) +{ + unsigned char status; + unsigned char scancode; + + status = kbd_read_status(); + if(!(status & KBD_STAT_OBF)) + return; + status = kbd_read_status(); + scancode = kbd_read_input(); + + if (do_acknowledge(scancode)) + handle_scancode(scancode, !(scancode & 0x80)); + if ((status & KBD_STAT_OBF) && do_acknowledge(scancode)) + handle_scancode(scancode, !(scancode & 0x80)); + tasklet_schedule(&keyboard_tasklet); +} +#endif /* CONFIG_MOSIX_UDB */ + /* * send_data sends a character to the keyboard and waits * for an acknowledge, possibly retrying if asked to. Returns diff -urN linux-2.4.17/drivers/char/serial.c linux_umopenmosix/drivers/char/serial.c --- linux-2.4.17/drivers/char/serial.c Fri Dec 21 19:41:54 2001 +++ linux_umopenmosix/drivers/char/serial.c Wed Jun 26 23:45:15 2002 @@ -231,6 +231,10 @@ #include #include +#ifdef CONFIG_MOSIX_UDB +#include +#endif /* CONFIG_MOSIX_UDB */ + #ifdef CONFIG_MAC_SERIAL #define SERIAL_DEV_OFFSET 2 #else @@ -818,6 +822,10 @@ if (!info) return; +#if defined(CONFIG_MOSIX_UDB) && defined(CONFIG_SERIAL_CONSOLE) + if (serial_inp(info, UART_LSR) & (UART_LSR_BI | UART_LSR_FE)) + mosix_debugger("user request 1"); +#endif /* CONFIG_MOSIX_UDB && CONFIG_SERIAL_CONSOLE */ #ifdef CONFIG_SERIAL_MULTIPORT multi = &rs_multiport[irq]; if (multi->port_monitor) @@ -892,7 +900,15 @@ #endif info = IRQ_ports[irq]; +#if defined(CONFIG_MOSIX_UDB) && defined(CONFIG_SERIAL_CONSOLE) + if (!info) + return; + if (serial_inp(info, UART_LSR) & (UART_LSR_BI | UART_LSR_FE)) + mosix_debugger("user request 2"); + else if (!info->tty) +#else if (!info || !info->tty) +#endif /* CONFIG_MOSIX_UDB */ return; #ifdef CONFIG_SERIAL_MULTIPORT @@ -952,6 +968,10 @@ info = IRQ_ports[irq]; if (!info) return; +#if defined(CONFIG_MOSIX_UDB) && defined(CONFIG_SERIAL_CONSOLE) + if (serial_inp(info, UART_LSR) & (UART_LSR_BI | UART_LSR_FE)) + mosix_debugger("user request 3"); +#endif /* CONFIG_MOSIX_UDB && CONFIG_SERIAL_CONSOLE */ multi = &rs_multiport[irq]; if (!multi->port1) { /* Should never happen */ @@ -5850,6 +5870,44 @@ return c; } +#ifdef CONFIG_MOSIX_UDB +#ifdef CONFIG_SERIAL_CONSOLE +static int serial_console_read(struct console *co, const char *into, unsigned cnt) +{ + int n = 0; + struct serial_state *ser = rs_table + co->index; + int lsr; + int c; + int ier = inb(ser->port + UART_IER); + + outb(0x00, ser->port + UART_IER); + while(cnt) + { + do { + lsr = inb(ser->port + UART_LSR); + } while (!n && !(lsr & UART_LSR_DR)); + if(!(lsr & UART_LSR_DR)) + break; + c = inb(ser->port + UART_RX); + if(lsr & (UART_LSR_BI | UART_LSR_FE)) + { + mosix_debugger("recursive break"); + continue; + } + if (lsr & (UART_LSR_PE | UART_LSR_FE | UART_LSR_OE)) + { + printk("Bad input-char, status=0x%x\n", lsr); + continue; + } + ((char *)into)[n++] = c; + cnt--; + } + outb(ier, ser->port + UART_IER); + return(n); +} +#endif /* CONFIG_SERIAL_CONSOLE */ +#endif /* CONFIG_MOSIX_UDB */ + static kdev_t serial_console_device(struct console *c) { return MKDEV(TTY_MAJOR, 64 + c->index); @@ -5989,6 +6047,9 @@ static struct console sercons = { name: "ttyS", write: serial_console_write, +#ifdef CONFIG_MOSIX_UDB + read: serial_console_read, +#endif /* CONFIG_MOSIX_UDB */ device: serial_console_device, wait_key: serial_console_wait_key, setup: serial_console_setup, diff -urN linux-2.4.17/drivers/char/sysrq.c linux_umopenmosix/drivers/char/sysrq.c --- linux-2.4.17/drivers/char/sysrq.c Fri Dec 21 19:41:54 2001 +++ linux_umopenmosix/drivers/char/sysrq.c Wed Jun 26 23:45:15 2002 @@ -162,6 +162,13 @@ file_list_unlock(); DQUOT_OFF(sb); fsync_dev(sb->s_dev); +#ifdef CONFIG_MOSIX_UDB + if (sb->s_op->write_super && sb->s_dirt) + sb->s_op->write_super(sb); + if (sb->s_op->put_super) + sb->s_op->put_super(sb); + fsync_dev(sb->s_dev); +#endif /* CONFIG_MOSIX_UDB */ flags = MS_RDONLY; if (sb->s_op && sb->s_op->remount_fs) { ret = sb->s_op->remount_fs(sb, &flags, NULL); @@ -288,7 +295,11 @@ { struct task_struct *p; +#ifdef CONFIG_MOSIX + for_each_local_task(p) { +#else for_each_task(p) { +#endif /* CONFIG_MOSIX */ if (p->mm) { /* Not swapper nor kernel thread */ if (p->pid == 1 && even_init) /* Ugly hack to kill init */ diff -urN linux-2.4.17/drivers/char/tty_io.c linux_umopenmosix/drivers/char/tty_io.c --- linux-2.4.17/drivers/char/tty_io.c Fri Dec 21 19:41:54 2001 +++ linux_umopenmosix/drivers/char/tty_io.c Wed Jun 26 23:45:15 2002 @@ -497,7 +497,11 @@ } read_lock(&tasklist_lock); +#ifdef CONFIG_MOSIX + for_each_local_task(p) { +#else for_each_task(p) { +#endif /* CONFIG_MOSIX */ if ((tty->session > 0) && (p->session == tty->session) && p->leader) { send_sig(SIGHUP,p,1); @@ -595,7 +599,11 @@ tty->pgrp = -1; read_lock(&tasklist_lock); +#ifdef CONFIG_MOSIX + for_each_local_task(p) +#else for_each_task(p) +#endif /* CONFIG_MOSIX */ if (p->session == current->session) p->tty = NULL; read_unlock(&tasklist_lock); @@ -1226,7 +1234,11 @@ struct task_struct *p; read_lock(&tasklist_lock); +#ifdef CONFIG_MOSIX + for_each_local_task(p) { +#else for_each_task(p) { +#endif /* CONFIG_MOSIX */ if (p->tty == tty || (o_tty && p->tty == o_tty)) p->tty = NULL; } @@ -1566,7 +1578,11 @@ struct task_struct *p; read_lock(&tasklist_lock); +#ifdef CONFIG_MOSIX + for_each_local_task(p) +#else for_each_task(p) +#endif /* CONFIG_MOSIX */ if (p->tty == tty) p->tty = NULL; read_unlock(&tasklist_lock); @@ -1839,7 +1855,11 @@ if (tty->driver.flush_buffer) tty->driver.flush_buffer(tty); read_lock(&tasklist_lock); +#ifdef CONFIG_MOSIX + for_each_local_task(p) { +#else for_each_task(p) { +#endif /* CONFIG_MOSIX */ if ((p->tty == tty) || ((session > 0) && (p->session == session))) { send_sig(SIGKILL, p, 1); diff -urN linux-2.4.17/drivers/char/vt.c linux_umopenmosix/drivers/char/vt.c --- linux-2.4.17/drivers/char/vt.c Fri Nov 16 20:08:28 2001 +++ linux_umopenmosix/drivers/char/vt.c Wed Jun 26 23:45:15 2002 @@ -221,6 +221,10 @@ key_map[0] = U(K_ALLOCATED); for (j = 1; j < NR_KEYS; j++) key_map[j] = U(K_HOLE); +#ifdef CONFIG_MOSIX_UDB + if(s == 2 || s == 8) + key_map[69] = K_DEBUGGER|0xf000; +#endif /* CONFIG_MOSIX_UDB */ keymap_count++; } ov = U(key_map[i]); @@ -232,6 +236,10 @@ if (((ov == K_SAK) || (v == K_SAK)) && !capable(CAP_SYS_ADMIN)) return -EPERM; key_map[i] = U(v); +#ifdef CONFIG_MOSIX_UDB + if(i == 69 && (s == 2 || s == 8)) + key_map[i] = K_DEBUGGER|0xf000; +#endif /* CONFIG_MOSIX_UDB */ if (!s && (KTYP(ov) == KT_SHIFT || KTYP(v) == KT_SHIFT)) compute_shiftstate(); break; diff -urN linux-2.4.17/drivers/scsi/cpqfcTSworker.c linux_umopenmosix/drivers/scsi/cpqfcTSworker.c --- linux-2.4.17/drivers/scsi/cpqfcTSworker.c Thu Oct 25 22:53:50 2001 +++ linux_umopenmosix/drivers/scsi/cpqfcTSworker.c Wed Jun 26 23:45:15 2002 @@ -193,6 +193,9 @@ { current->fs = fs; atomic_inc(&fs->count); +#ifdef CONFIG_MOSIX + atomic_inc(&fs->users); +#endif /* CONFIG_MOSIX */ } siginitsetinv(¤t->blocked, SHUTDOWN_SIGS); diff -urN linux-2.4.17/drivers/sgi/char/graphics.c linux_umopenmosix/drivers/sgi/char/graphics.c --- linux-2.4.17/drivers/sgi/char/graphics.c Thu Oct 11 18:43:30 2001 +++ linux_umopenmosix/drivers/sgi/char/graphics.c Wed Jun 26 23:45:15 2002 @@ -152,11 +152,17 @@ * sgi_graphics_mmap */ disable_gconsole (); +#ifdef CONFIG_MOSIX + r = do_mmap_down (file, (unsigned long)vaddr, + cards[board].g_regs_size, PROT_READ|PROT_WRITE, + MAP_FIXED|MAP_PRIVATE, 0); +#else down_write(¤t->mm->mmap_sem); r = do_mmap (file, (unsigned long)vaddr, cards[board].g_regs_size, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, 0); up_write(¤t->mm->mmap_sem); +#endif /* CONFIG_MOSIX */ if (r) return r; } diff -urN linux-2.4.17/drivers/sgi/char/shmiq.c linux_umopenmosix/drivers/sgi/char/shmiq.c --- linux-2.4.17/drivers/sgi/char/shmiq.c Mon Aug 27 18:56:31 2001 +++ linux_umopenmosix/drivers/sgi/char/shmiq.c Wed Jun 26 23:45:15 2002 @@ -285,11 +285,16 @@ s = req.arg * sizeof (struct shmqevent) + sizeof (struct sharedMemoryInputQueue); v = sys_munmap (vaddr, s); +#ifdef CONFIG_MOSIX + do_mmap_down(filp, vaddr, s, PROT_READ | PROT_WRITE, + MAP_PRIVATE|MAP_FIXED, 0); +#else down_write(¤t->mm->mmap_sem); do_munmap(current->mm, vaddr, s); do_mmap(filp, vaddr, s, PROT_READ | PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 0); up_write(¤t->mm->mmap_sem); +#endif /* CONFIG_MOSIX */ shmiqs[minor].events = req.arg; shmiqs[minor].mapped = 1; diff -urN linux-2.4.17/drivers/usb/storage/usb.c linux_umopenmosix/drivers/usb/storage/usb.c --- linux-2.4.17/drivers/usb/storage/usb.c Sun Nov 11 20:01:32 2001 +++ linux_umopenmosix/drivers/usb/storage/usb.c Wed Jun 26 23:45:15 2002 @@ -317,6 +317,9 @@ exit_files(current); current->files = init_task.files; atomic_inc(¤t->files->count); +#ifdef CONFIG_MOSIX_DFSA + atomic_inc(¤t->files->users); +#endif /* CONFIG_MOSIX_DFSA */ daemonize(); /* set our name for identification purposes */ diff -urN linux-2.4.17/fs/Makefile linux_umopenmosix/fs/Makefile --- linux-2.4.17/fs/Makefile Mon Nov 12 19:34:16 2001 +++ linux_umopenmosix/fs/Makefile Wed Jun 26 23:45:15 2002 @@ -67,6 +67,9 @@ subdir-$(CONFIG_REISERFS_FS) += reiserfs subdir-$(CONFIG_DEVPTS_FS) += devpts subdir-$(CONFIG_SUN_OPENPROMFS) += openpromfs +ifdef CONFIG_MOSIX +subdir-$(CONFIG_MOSIX_FS) += mfs +endif obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o diff -urN linux-2.4.17/fs/binfmt_aout.c linux_umopenmosix/fs/binfmt_aout.c --- linux-2.4.17/fs/binfmt_aout.c Sat Nov 3 03:39:20 2001 +++ linux_umopenmosix/fs/binfmt_aout.c Wed Jun 26 23:45:17 2002 @@ -29,6 +29,10 @@ #include #include +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ + static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); static int load_aout_library(struct file*); static int aout_core_dump(long signr, struct pt_regs * regs, struct file *file); @@ -108,6 +112,14 @@ dump.u_ar0 = (void *)(((unsigned long)(&dump.regs)) - ((unsigned long)(&dump))); #endif dump.signal = signr; +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DDEPUTY) + { + if(mosix_deputy_dump_thread(&dump)) + goto end_coredump; + } + else +#endif /* CONFIG_MOSIX */ dump_thread(regs, &dump); /* If the size of the dump file exceeds the rlimit, then see what would happen @@ -189,7 +201,11 @@ * memory and creates the pointer tables from them, and puts their * addresses on the "stack", returning the new stack pointer value. */ +#ifdef CONFIG_MOSIX +unsigned long * create_aout_tables(char * p, struct linux_binprm * bprm) +#else static unsigned long * create_aout_tables(char * p, struct linux_binprm * bprm) +#endif /* CONFIG_MOSIX */ { char **argv, **envp; unsigned long * sp; @@ -258,6 +274,9 @@ unsigned long fd_offset; unsigned long rlim; int retval; +#ifdef CONFIG_MOSIX + unsigned long start_stack; +#endif /* CONFIG_MOSIX */ ex = *((struct exec *) bprm->buf); /* exec-header */ if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC && @@ -296,6 +315,12 @@ set_personality(PER_LINUX); #endif +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DDEPUTY) + mosix_deputy_init_aout_mm(&ex); + else + { +#endif /* CONFIG_MOSIX */ current->mm->end_code = ex.a_text + (current->mm->start_code = N_TXTADDR(ex)); current->mm->end_data = ex.a_data + @@ -305,6 +330,9 @@ current->mm->rss = 0; current->mm->mmap = NULL; +#ifdef CONFIG_MOSIX + } +#endif /* CONFIG_MOSIX */ compute_creds(bprm); current->flags &= ~PF_FORKNOEXEC; #ifdef __sparc__ @@ -349,6 +377,9 @@ return error; } +// #if defined(CONFIG_MOSIX) && !defined(__i386__) +// #error On platforms where "flush_icache_range" is non-empty, code must be adjusted +// #endif /* CONFIG_MOSIX */ flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data); } else { static unsigned long error_time, error_time2; @@ -379,24 +410,38 @@ goto beyond_if; } +#ifdef CONFIG_MOSIX + error = do_mmap_down(bprm->file, N_TXTADDR(ex), ex.a_text, + PROT_READ | PROT_EXEC, + MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE, + fd_offset); +#else down_write(¤t->mm->mmap_sem); error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text, PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE, fd_offset); up_write(¤t->mm->mmap_sem); +#endif /* CONFIG_MOSIX */ if (error != N_TXTADDR(ex)) { send_sig(SIGKILL, current, 0); return error; } +#ifdef CONFIG_MOSIX + error = do_mmap_down(bprm->file, N_DATADDR(ex), ex.a_data, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE, + fd_offset + ex.a_text); +#else down_write(¤t->mm->mmap_sem); error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE, fd_offset + ex.a_text); up_write(¤t->mm->mmap_sem); +#endif /* CONFIG_MOSIX */ if (error != N_DATADDR(ex)) { send_sig(SIGKILL, current, 0); return error; @@ -405,8 +450,27 @@ beyond_if: set_binfmt(&aout_format); +#ifdef CONFIG_MOSIX + /* if DEPUTY, the REMOTE already did "set_brk" in aout_remote_init_mm */ + if(!(current->mosix.dflags & DDEPUTY)) +#endif /* CONFIG_MOSIX */ set_brk(current->mm->start_brk, current->mm->brk); +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DDEPUTY) + { + retval = mosix_deputy_setup_args(SETUP_ARGS_AS_AOUT, + &start_stack); + if(retval < 0) + { + send_sig(SIGKILL, current, 0); + return(retval); + } + mosix_obtain_registers(START_THREAD_REGS); + } + else + { +#endif /* CONFIG_MOSIX */ retval = setup_arg_pages(bprm); if (retval < 0) { /* Someone check-me: is this error path enough? */ @@ -414,17 +478,46 @@ return retval; } +#ifdef CONFIG_MOSIX + start_stack = +#endif /* CONFIG_MOSIX */ current->mm->start_stack = (unsigned long) create_aout_tables((char *) bprm->p, bprm); +#ifdef CONFIG_MOSIX + } +#endif /* CONFIG_MOSIX */ #ifdef __alpha__ regs->gp = ex.a_gpvalue; #endif +#ifdef CONFIG_MOSIX + start_thread(regs, ex.a_entry, start_stack); +#else start_thread(regs, ex.a_entry, current->mm->start_stack); +#endif /* CONFIG_MOSIX */ if (current->ptrace & PT_PTRACED) send_sig(SIGTRAP, current, 0); return 0; } +#ifdef CONFIG_MOSIX +void +aout_remote_init_mm(struct exec *ex) +{ + register struct task_struct *p = current; + + /* in principle we should first check for the "PER_LINUX" personality */ + /* but then which LINUX does not have it??? */ + + p->mm->end_code = ex->a_text + (p->mm->start_code = N_TXTADDR(*ex)); + p->mm->end_data = ex->a_data + (p->mm->start_data = N_DATADDR(*ex)); + p->mm->brk = ex->a_bss + (p->mm->start_brk = N_BSSADDR(*ex)); + + p->mm->rss = 0; + p->mm->mmap = NULL; + set_brk(p->mm->start_brk, current->mm->brk); +} +#endif /* CONFIG_MOSIX */ + static int load_aout_library(struct file *file) { struct inode * inode; @@ -478,12 +571,19 @@ goto out; } /* Now use mmap to map the library into memory. */ +#ifdef CONFIG_MOSIX + error = do_mmap_down(file, start_addr, ex.a_text + ex.a_data, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE, + N_TXTOFF(ex)); +#else down_write(¤t->mm->mmap_sem); error = do_mmap(file, start_addr, ex.a_text + ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE, N_TXTOFF(ex)); up_write(¤t->mm->mmap_sem); +#endif /* CONFIG_MOSIX */ retval = error; if (error != start_addr) goto out; diff -urN linux-2.4.17/fs/binfmt_elf.c linux_umopenmosix/fs/binfmt_elf.c --- linux-2.4.17/fs/binfmt_elf.c Fri Dec 21 19:41:55 2001 +++ linux_umopenmosix/fs/binfmt_elf.c Wed Jun 26 23:45:17 2002 @@ -42,6 +42,10 @@ #include +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ + static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs); static int load_elf_library(struct file*); static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int); @@ -106,12 +110,17 @@ } } -static elf_addr_t * +static elf_addr_t * create_elf_tables(char *p, int argc, int envc, struct elfhdr * exec, unsigned long load_addr, unsigned long load_bias, +#ifdef CONFIG_MOSIX + unsigned long interp_load_addr, int ibcs, + struct elf_tables_extras *extras) +#else unsigned long interp_load_addr, int ibcs) +#endif /* CONFIG_MOSIX */ { elf_caddr_t *argv; elf_caddr_t *envp; @@ -123,13 +132,18 @@ /* * Get hold of platform and hardware capabilities masks for - * the machine we are running on. In some cases (Sparc), + * the machine we are running on. In some cases (Sparc), * this info is impossible to get, in others (i386) it is * merely difficult. */ +#ifdef CONFIG_MOSIX + hwcap = extras->hwcap; + k_platform = extras->platform; +#else hwcap = ELF_HWCAP; k_platform = ELF_PLATFORM; +#endif /* CONFIG_MOSIX */ if (k_platform) { platform_len = strlen(k_platform) + 1; @@ -176,12 +190,19 @@ NEW_AUX_ENT( 6, AT_BASE, interp_load_addr); NEW_AUX_ENT( 7, AT_FLAGS, 0); NEW_AUX_ENT( 8, AT_ENTRY, load_bias + exec->e_entry); +#ifdef CONFIG_MOSIX + NEW_AUX_ENT(9, AT_UID, (elf_addr_t) extras->uid); + NEW_AUX_ENT(10, AT_EUID, (elf_addr_t) extras->euid); + NEW_AUX_ENT(11, AT_GID, (elf_addr_t) extras->gid); + NEW_AUX_ENT(12, AT_EGID, (elf_addr_t) extras->egid); +#else NEW_AUX_ENT( 9, AT_UID, (elf_addr_t) current->uid); NEW_AUX_ENT(10, AT_EUID, (elf_addr_t) current->euid); NEW_AUX_ENT(11, AT_GID, (elf_addr_t) current->gid); NEW_AUX_ENT(12, AT_EGID, (elf_addr_t) current->egid); +#endif /* CONFIG_MOSIX */ #ifdef ARCH_DLINFO - /* + /* * ARCH_DLINFO must come last so platform specific code can enforce * special alignment requirements on the AUXV if necessary (eg. PPC). */ @@ -228,11 +249,17 @@ { unsigned long map_addr; +#ifdef CONFIG_MOSIX + map_addr = do_mmap_down(filep, ELF_PAGESTART(addr), + eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr), prot, + type, eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr)); +#else down_write(¤t->mm->mmap_sem); map_addr = do_mmap(filep, ELF_PAGESTART(addr), eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr), prot, type, eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr)); up_write(¤t->mm->mmap_sem); +#endif /* CONFIG_MOSIX */ return(map_addr); } @@ -334,7 +361,7 @@ /* * Now fill out the bss section. First pad the last page up * to the page boundary, and then perform a mmap to make sure - * that there are zero-mapped pages up to and including the + * that there are zero-mapped pages up to and including the * last bss page. */ padzero(elf_bss); @@ -361,10 +388,23 @@ loff_t offset; int retval; +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DDEPUTY) + mosix_deputy_fix_elf_aout_interp(interp_ex->a_bss, + interp_ex->a_data, interp_ex->a_text); + else +#endif /* CONFIG_MOSIX */ current->mm->end_code = interp_ex->a_text; text_data = interp_ex->a_text + interp_ex->a_data; +#ifdef CONFIG_MOSIX + if(!current->mosix.dflags & DDEPUTY) + { +#endif /* CONFIG_MOSIX */ current->mm->end_data = text_data; current->mm->brk = interp_ex->a_bss + text_data; +#ifdef CONFIG_MOSIX + } +#endif /* CONFIG_MOSIX */ switch (N_MAGIC(*interp_ex)) { case OMAGIC: @@ -387,6 +427,12 @@ retval = interpreter->f_op->read(interpreter, addr, text_data, &offset); if (retval < 0) goto out; +#ifdef CONFIG_MOSIX +// #ifndef __i386__ +// #On platforms where "flush_icache_range" is non-empty, +// #it has to distributed and run REMOTELY. +// #endif /* __i386__ */ +#endif /* CONFIG_MOSIX */ flush_icache_range((unsigned long)addr, (unsigned long)addr + text_data); @@ -428,7 +474,10 @@ struct elfhdr interp_elf_ex; struct exec interp_ex; char passed_fileno[6]; - +#ifdef CONFIG_MOSIX + struct elf_tables_extras extras; +#endif /* CONFIG_MOSIX */ + /* Get the exec-header */ elf_ex = *((struct elfhdr *) bprm->buf); @@ -513,6 +562,9 @@ struct fs_struct *old_fs = current->fs, *new_fs; get_exec_domain(old_domain); atomic_inc(&old_fs->count); +#ifdef CONFIG_MOSIX_DFSA + atomic_inc(&old_fs->users); +#endif /* CONFIG_MOSIX_DFSA */ set_personality(PER_SVR4); interpreter = open_exec(elf_interpreter); @@ -523,7 +575,11 @@ current->exec_domain = old_domain; current->fs = old_fs; put_exec_domain(new_domain); +#ifdef CONFIG_MOSIX_DFSA + put_used_fs_struct(new_fs); +#else put_fs_struct(new_fs); +#endif /* CONFIG_MOSIX_DFSA */ } else #endif { @@ -583,7 +639,7 @@ if (elf_interpreter) { retval = copy_strings_kernel(1,&passed_p,bprm); if (retval) - goto out_free_dentry; + goto out_free_dentry; bprm->argc++; } } @@ -595,6 +651,22 @@ goto out_free_dentry; /* OK, This is the point of no return */ +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DDEPUTY) + { + current->flags &= ~PF_FORKNOEXEC; + elf_entry = (unsigned long) elf_ex.e_entry; + SET_PERSONALITY(elf_ex, ibcs2_interpreter); + mosix_deputy_setup_args(SETUP_ARGS_AS_ELF, NULL); + if(IS_ERR((const unsigned *)bprm->p)) + { + send_sig(SIGKILL, current, 0); + return(0); /* too late for error */ + } + } + else + { +#endif /* CONFIG_MOSIX */ current->mm->start_data = 0; current->mm->end_data = 0; current->mm->end_code = 0; @@ -611,6 +683,9 @@ current->mm->rss = 0; setup_arg_pages(bprm); /* XXX: check error */ current->mm->start_stack = bprm->p; +#ifdef CONFIG_MOSIX + } +#endif /* CONFIG_MOSIX */ /* Now we do a little grungy work by mmaping the ELF image into the correct location in memory. At this point, we assume that @@ -626,7 +701,7 @@ if (unlikely (elf_brk > elf_bss)) { unsigned long nbyte; - + /* There was a PT_LOAD segment with p_memsz > p_filesz before this one. Map anonymous pages, if needed, and clear the area. */ @@ -724,6 +799,26 @@ compute_creds(bprm); current->flags &= ~PF_FORKNOEXEC; +#ifdef CONFIG_MOSIX + extras.uid = current->uid; + extras.euid = current->euid; + extras.gid = current->gid; + extras.egid = current->egid; + extras.hwcap = ELF_HWCAP; + memcpy(extras.platform, ELF_PLATFORM, sizeof(extras.platform)); + extras.platform[sizeof(extras.platform)-1] = '\0'; + if(current->mosix.dflags & DDEPUTY) + bprm->p = mosix_deputy_elf_setup((char *)bprm->p, + bprm->argc, bprm->envc, &elf_ex, + load_addr, load_bias, interp_load_addr, + (interpreter_type == INTERPRETER_AOUT ? 0 : 1), + interpreter_type == INTERPRETER_AOUT ? + strlen(passed_fileno) + 1 : 0, + elf_brk, end_code, start_code, start_data, end_data, elf_bss, + current->personality, &extras); + if(!(current->mosix.dflags & DDEPUTY)) /* must check again! */ + { +#endif /* CONFIG_MOSIX */ bprm->p = (unsigned long) create_elf_tables((char *)bprm->p, bprm->argc, @@ -731,7 +826,12 @@ &elf_ex, load_addr, load_bias, interp_load_addr, +#ifdef CONFIG_MOSIX + (interpreter_type == INTERPRETER_AOUT ? 0 : 1), + &extras); +#else (interpreter_type == INTERPRETER_AOUT ? 0 : 1)); +#endif /* CONFIG_MOSIX */ /* N.B. passed_fileno might not be initialized? */ if (interpreter_type == INTERPRETER_AOUT) current->mm->arg_start += strlen(passed_fileno) + 1; @@ -765,11 +865,21 @@ Since we do not have the power to recompile these, we emulate the SVr4 behavior. Sigh. */ /* N.B. Shouldn't the size here be PAGE_SIZE?? */ +#ifdef CONFIG_MOSIX + error = do_mmap_down(NULL, 0, 4096, PROT_READ | PROT_EXEC, + MAP_FIXED | MAP_PRIVATE, 0); +#else down_write(¤t->mm->mmap_sem); error = do_mmap(NULL, 0, 4096, PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE, 0); up_write(¤t->mm->mmap_sem); +#endif /* CONFIG_MOSIX */ + } + +#ifdef CONFIG_MOSIX } + mosix_obtain_registers(START_THREAD_REGS|ELF_PLAT_INIT_REGS); +#endif /* CONFIG_MOSIX */ #ifdef ELF_PLAT_INIT /* @@ -802,6 +912,69 @@ goto out; } +#ifdef CONFIG_MOSIX +/* + * the following routine is the remote portion of part of "do_load_elf_binary" + * and must be updated with changes to the former. + */ +unsigned long +elf_remote_setup(char *p, int argc, int envc, struct elfhdr *exec, + unsigned long load_addr, unsigned long load_bias, + unsigned long interp_load_addr, int ibcs, int add_arg_start, + unsigned long elf_brk, unsigned long end_code, unsigned long start_code, + unsigned long start_data, unsigned long end_data, unsigned long elf_bss, + unsigned long personality, struct elf_tables_extras *extras) +{ + unsigned long ret; + struct exec_domain *new_domain = lookup_exec_domain(personality); + + if(!new_domain) + mosix_go_home(2); /* never returns */ + + if (current->exec_domain && current->exec_domain->module) + __MOD_DEC_USE_COUNT(current->exec_domain->module); + current->exec_domain = new_domain; + if (new_domain->module) + __MOD_INC_USE_COUNT(new_domain->module); + ret = (unsigned long)create_elf_tables(p, argc, envc, exec, load_addr, + load_bias, interp_load_addr, ibcs, extras); + current->mm->arg_start += add_arg_start; + current->mm->start_brk = current->mm->brk = elf_brk; + current->mm->end_code = end_code; + current->mm->start_code = start_code; + current->mm->start_data = start_data; + current->mm->end_data = end_data; + current->mm->start_stack = (unsigned long)p; + + /* Calling set_brk effectively mmaps the pages that we need for the bss and break + sections */ + set_brk(elf_bss, elf_brk); + + padzero(elf_bss); + +#if 0 + printk("(start_brk) %x\n" , current->mm->start_brk); + printk("(end_code) %x\n" , current->mm->end_code); + printk("(start_code) %x\n" , current->mm->start_code); + printk("(end_data) %x\n" , current->mm->end_data); + printk("(start_stack) %x\n" , current->mm->start_stack); + printk("(brk) %x\n" , current->mm->brk); +#endif + + if ( personality == PER_SVR4 ) + { + /* Why this, you ask??? Well SVr4 maps page 0 as read-only, + and some applications "depend" upon this behavior. + Since we do not have the power to recompile these, we + emulate the SVr4 behavior. Sigh. */ + /* N.B. Shouldn't the size here be PAGE_SIZE?? */ + do_mmap(NULL, 0, 4096, PROT_READ | PROT_EXEC, + MAP_FIXED | MAP_PRIVATE, 0); + } + return(ret); +} +#endif /* CONFIG_MOSIX */ + /* This is really simpleminded and specialized - we are loading an a.out library that is given an ELF header. */ @@ -848,6 +1021,16 @@ while (elf_phdata->p_type != PT_LOAD) elf_phdata++; /* Now use mmap to map the library into memory. */ +#ifdef CONFIG_MOSIX + error = do_mmap_down(file, + ELF_PAGESTART(elf_phdata->p_vaddr), + (elf_phdata->p_filesz + + ELF_PAGEOFFSET(elf_phdata->p_vaddr)), + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE, + (elf_phdata->p_offset - + ELF_PAGEOFFSET(elf_phdata->p_vaddr))); +#else down_write(¤t->mm->mmap_sem); error = do_mmap(file, ELF_PAGESTART(elf_phdata->p_vaddr), @@ -858,6 +1041,7 @@ (elf_phdata->p_offset - ELF_PAGEOFFSET(elf_phdata->p_vaddr))); up_write(¤t->mm->mmap_sem); +#endif /* CONFIG_MOSIX */ if (error != ELF_PAGESTART(elf_phdata->p_vaddr)) goto out_free_ph; @@ -937,6 +1121,14 @@ #define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) +#ifdef CONFIG_MOSIX +int +elf_maydump(struct vm_area_struct *vma) +{ + return(maydump(vma)); +} +#endif /* CONFIG_MOSIX */ + /* An ELF note in memory */ struct memelfnote { @@ -1031,17 +1223,60 @@ struct elf_prstatus prstatus; /* NT_PRSTATUS */ elf_fpregset_t fpu; /* NT_PRFPREG */ struct elf_prpsinfo psinfo; /* NT_PRPSINFO */ +#ifdef CONFIG_MOSIX + struct vmalist *vmas = NULL, *rvma; + unsigned long arg_start, arg_end; +#endif /* CONFIG_MOSIX */ + +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DDEPUTY) + { + segs = 0; + mosix_deputy_rusage(0); + if((i=mosix_deputy_list_vmas(&vmas, &arg_start, &arg_end)) < 0) + { + MOD_DEC_USE_COUNT; + return(0); + } + for(rvma = vmas ; i-- > 0 ; rvma++) + { + if(rvma->maydump) + { + int sz = rvma->vmend - rvma->vmstart; + if (size+sz >= limit) + break; + else + size += sz; + } + segs++; + } + vma = NULL; /* to satisfy the silly compiler */ + } + else +#endif /* CONFIG_MOSIX */ /* first copy the parameters from user space */ memset(&psinfo, 0, sizeof(psinfo)); { int i, len; +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DDEPUTY) + len = arg_end - arg_start; + else +#endif /* CONFIG_MOSIX */ len = current->mm->arg_end - current->mm->arg_start; if (len >= ELF_PRARGSZ) len = ELF_PRARGSZ-1; +#ifdef CONFIG_MOSIX + copy_from_user(&psinfo.pr_psargs, + (current->mosix.dflags & DDEPUTY) ? + (const char *)arg_start : + (const char *)current->mm->arg_start, len); +#else copy_from_user(&psinfo.pr_psargs, (const char *)current->mm->arg_start, len); +#endif /* CONFIG_MOSIX */ for(i = 0; i < len; i++) if (psinfo.pr_psargs[i] == 0) psinfo.pr_psargs[i] = ' '; @@ -1118,6 +1353,9 @@ * This transfers the registers from regs into the standard * coredump arrangement, whatever that is. */ +#ifdef CONFIG_MOSIX + mosix_obtain_registers(ALL_REGISTERS); +#endif /* CONFIG_MOSIX */ #ifdef ELF_CORE_COPY_REGS ELF_CORE_COPY_REGS(prstatus.pr_reg, regs) #else @@ -1139,7 +1377,12 @@ notes[1].type = NT_PRPSINFO; notes[1].datasz = sizeof(psinfo); notes[1].data = &psinfo; +#ifdef CONFIG_MOSIX + i = LOGICAL_STATE(current); + i = i ? ffz(~i) + 1 : 0; +#else i = current->state ? ffz(~current->state) + 1 : 0; +#endif /* CONFIG_MOSIX */ psinfo.pr_state = i; psinfo.pr_sname = (i < 0 || i > 5) ? '.' : "RSDZTD"[i]; psinfo.pr_zomb = psinfo.pr_sname == 'Z'; @@ -1155,6 +1398,11 @@ notes[2].data = current; /* Try to dump the FPU. */ +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DDEPUTY) + prstatus.pr_fpvalid = mosix_deputy_dump_fpu(&fpu); + else +#endif /* CONFIG_MOSIX */ prstatus.pr_fpvalid = dump_fpu (regs, &fpu); if (!prstatus.pr_fpvalid) { @@ -1167,7 +1415,7 @@ notes[3].datasz = sizeof(fpu); notes[3].data = &fpu; } - + /* Write notes phdr entry */ { struct elf_phdr phdr; @@ -1193,6 +1441,30 @@ dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); /* Write program headers for segments dump */ +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DDEPUTY) + for(rvma = vmas , i = 0 ; i < segs ; rvma++ , i++) { + struct elf_phdr phdr; + size_t sz; + + sz = rvma->vmend - rvma->vmstart; + + phdr.p_type = PT_LOAD; + phdr.p_offset = offset; + phdr.p_vaddr = rvma->vmstart; + phdr.p_paddr = 0; + phdr.p_filesz = rvma->maydump ? sz : 0; + phdr.p_memsz = sz; + offset += phdr.p_filesz; + phdr.p_flags = (rvma->vmflags & VM_READ) ? PF_R : 0; + if (rvma->vmflags & VM_WRITE) phdr.p_flags |= PF_W; + if (rvma->vmflags & VM_EXEC) phdr.p_flags |= PF_X; + phdr.p_align = ELF_EXEC_PAGESIZE; + + DUMP_WRITE(&phdr, sizeof(phdr)); + } + else +#endif /* CONFIG_MOSIX */ for(vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) { struct elf_phdr phdr; size_t sz; @@ -1220,6 +1492,23 @@ DUMP_SEEK(dataoff); +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DDEPUTY) + { + for(i = 0 , rvma = vmas ; i < segs ; rvma++ , i++) + if(rvma->maydump) + { + unsigned long addr = rvma->vmstart; + unsigned long len = rvma->vmend - rvma->vmstart; + +#ifdef DEBUG + printk("elf_core_dump: writing %08lx %lx\n", addr, len); +#endif + DUMP_WRITE((void *)addr, len); + } + } + else +#endif /* CONFIG_MOSIX */ for(vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) { unsigned long addr; @@ -1260,6 +1549,10 @@ } end_coredump: +#ifdef CONFIG_MOSIX + if(vmas) + kfree(vmas); +#endif /* CONFIG_MOSIX */ set_fs(fs); up_write(¤t->mm->mmap_sem); return has_dumped; diff -urN linux-2.4.17/fs/buffer.c linux_umopenmosix/fs/buffer.c --- linux-2.4.17/fs/buffer.c Fri Dec 21 19:41:55 2001 +++ linux_umopenmosix/fs/buffer.c Wed Jun 26 23:45:17 2002 @@ -53,6 +53,10 @@ #include #include +#ifdef CONFIG_MOSIX_DFSA +#include +#endif /* CONFIG_MOSIX_DFSA */ + #define MAX_BUF_PER_PAGE (PAGE_CACHE_SIZE / 512) #define NR_RESERVED (10*MAX_BUF_PER_PAGE) #define MAX_UNUSED_BUFFERS NR_RESERVED+20 /* don't ever have more than this @@ -403,6 +407,9 @@ struct inode * inode; int err; +#ifdef CONFIG_MOSIX_DFSA + dfsa_syscall_on_file(fd, 0); +#endif /* CONFIG_MOSIX_DFSA */ err = -EBADF; file = fget(fd); if (!file) @@ -435,6 +442,9 @@ struct inode * inode; int err; +#ifdef CONFIG_MOSIX_DFSA + dfsa_syscall_on_file(fd, 0); +#endif /* CONFIG_MOSIX_DFSA */ err = -EBADF; file = fget(fd); if (!file) diff -urN linux-2.4.17/fs/dcache.c linux_umopenmosix/fs/dcache.c --- linux-2.4.17/fs/dcache.c Fri Dec 21 19:41:55 2001 +++ linux_umopenmosix/fs/dcache.c Wed Jun 26 23:45:17 2002 @@ -26,6 +26,10 @@ #include +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ + #define DCACHE_PARANOIA 1 /* #define DCACHE_DEBUG 1 */ @@ -834,6 +838,11 @@ void d_rehash(struct dentry * entry) { struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash); + +#ifdef CONFIG_MOSIX_FS + if(entry->d_flags & DCACHE_NO_CACHE) + return; +#endif /* CONFIG_MOSIX_FS */ if (!list_empty(&entry->d_hash)) BUG(); spin_lock(&dcache_lock); list_add(&entry->d_hash, list); @@ -906,6 +915,9 @@ spin_lock(&dcache_lock); /* Move the dentry to the target hash queue */ list_del(&dentry->d_hash); +#ifdef CONFIG_MOSIX_FS + if(!(dentry->d_flags & DCACHE_NO_CACHE)) +#endif /* CONFIG_MOSIX_FS */ list_add(&dentry->d_hash, &target->d_hash); /* Unhash the target: dput() will then get rid of it */ @@ -951,6 +963,9 @@ *--end = '\0'; buflen--; +#ifdef CONFIG_MOSIX_FS + if (!(dentry->d_flags & DCACHE_NO_CACHE)) +#endif /* CONFIG_MOSIX_FS */ if (!IS_ROOT(dentry) && list_empty(&dentry->d_hash)) { buflen -= 10; end -= 10; @@ -1022,19 +1037,40 @@ char *page = (char *) __get_free_page(GFP_USER); if (!page) +#ifdef CONFIG_MOSIX_DFSA + return ((current->mosix.dflags & DREMOTE) ? + -EDOITATHOME : -ENOMEM); +#else return -ENOMEM; +#endif /* CONFIG_MOSIX_DFSA */ read_lock(¤t->fs->lock); +#ifdef CONFIG_MOSIX_DFSA + if(!current->fs->pwd) /* we are REMOTE and not on a DFSA FS! */ + { + read_unlock(¤t->fs->lock); + return(-EDOITATHOME); + } +#endif /* CONFIG_MOSIX_DFSA */ pwdmnt = mntget(current->fs->pwdmnt); pwd = dget(current->fs->pwd); rootmnt = mntget(current->fs->rootmnt); root = dget(current->fs->root); read_unlock(¤t->fs->lock); +#if defined(CONFIG_MOSIX_DFSA) || defined(CONFIG_MOSIX_FS) + if(pwd->d_inode->i_op && pwd->d_inode->i_op->check_path) + pwd->d_inode->i_op->check_path(pwd); +#endif /* CONFIG_MOSIX_DFSA || CONFIG_MOSIX_FS */ error = -ENOENT; /* Has the current directory has been unlinked? */ spin_lock(&dcache_lock); +#ifdef CONFIG_MOSIX_FS + if (pwd->d_parent == pwd || !list_empty(&pwd->d_hash) || + (pwd->d_flags & DCACHE_NO_CACHE)) { +#else if (pwd->d_parent == pwd || !list_empty(&pwd->d_hash)) { +#endif /* CONFIG_MOSIX_FS */ unsigned long len; char * cwd; diff -urN linux-2.4.17/fs/dnotify.c linux_umopenmosix/fs/dnotify.c --- linux-2.4.17/fs/dnotify.c Wed Nov 8 09:27:57 2000 +++ linux_umopenmosix/fs/dnotify.c Wed Jun 26 23:45:17 2002 @@ -20,6 +20,10 @@ #include #include +#ifdef CONFIG_MOSIX_FS +#include +#endif /* CONFIG_MOSIX_FS */ + extern void send_sigio(struct fown_struct *fown, int fd, int band); int dir_notify_enable = 1; @@ -51,6 +55,10 @@ inode = filp->f_dentry->d_inode; if (!S_ISDIR(inode->i_mode)) return -ENOTDIR; +#ifdef CONFIG_MOSIX_FS + if(inode->i_sb->s_magic == MFS_SUPER_MAGIC) + return(-EPERM); +#endif /* CONFIG_MOSIX_FS */ if (!turning_off) { dn = kmem_cache_alloc(dn_cache, SLAB_KERNEL); if (dn == NULL) diff -urN linux-2.4.17/fs/exec.c linux_umopenmosix/fs/exec.c --- linux-2.4.17/fs/exec.c Fri Dec 21 19:41:55 2001 +++ linux_umopenmosix/fs/exec.c Wed Jun 26 23:45:17 2002 @@ -19,7 +19,7 @@ * current->executable is only used by the procfs. This allows a dispatch * table to check for several different types of binary formats. We keep * trying until we recognize the file or we run out of supported binary - * formats. + * formats. */ #include @@ -47,6 +47,12 @@ #endif int core_uses_pid; +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ +#ifdef CONFIG_MOSIX_DFSA +#include +#endif /* CONFIG_MOSIX_DFSA */ static struct linux_binfmt *formats; static rwlock_t binfmt_lock = RW_LOCK_UNLOCKED; @@ -70,7 +76,7 @@ fmt->next = formats; formats = fmt; write_unlock(&binfmt_lock); - return 0; + return 0; } int unregister_binfmt(struct linux_binfmt * fmt) @@ -125,6 +131,9 @@ if (IS_ERR(file)) goto out; +#ifdef CONFIG_MOSIX_DFSA + current->mosix.dflags &= ~DSTATSDOWN; /* NOT a DFSA system-call */ +#endif /* CONFIG_MOSIX_DFSA */ error = -ENOEXEC; if(file->f_op && file->f_op->read) { struct linux_binfmt * fmt; @@ -180,7 +189,7 @@ * memory to free pages in kernel mem. These are in a format ready * to be put directly into the top of new user memory. */ -int copy_strings(int argc,char ** argv, struct linux_binprm *bprm) +int copy_strings(int argc,char ** argv, struct linux_binprm *bprm) { while (argc-- > 0) { char *str; @@ -189,11 +198,11 @@ if (get_user(str, argv+argc) || !(len = strnlen_user(str, bprm->p))) return -EFAULT; - if (bprm->p < len) - return -E2BIG; + if (bprm->p < len) + return -E2BIG; bprm->p -= len; - /* XXX: add architecture specific overflow check here. */ + /* XXX: add architecture specific overflow check here. */ pos = bprm->p; while (len > 0) { @@ -227,7 +236,7 @@ kunmap(page); if (err) - return -EFAULT; + return -EFAULT; pos += bytes_to_copy; str += bytes_to_copy; @@ -244,10 +253,10 @@ { int r; mm_segment_t oldfs = get_fs(); - set_fs(KERNEL_DS); + set_fs(KERNEL_DS); r = copy_strings(argc, argv, bprm); set_fs(oldfs); - return r; + return r; } /* @@ -305,9 +314,9 @@ bprm->exec += stack_base; mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - if (!mpnt) - return -ENOMEM; - + if (!mpnt) + return -ENOMEM; + down_write(¤t->mm->mmap_sem); { mpnt->vm_mm = current->mm; @@ -321,7 +330,7 @@ mpnt->vm_private_data = (void *) 0; insert_vm_struct(current->mm, mpnt); current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; - } + } for (i = 0 ; i < MAX_ARG_PAGES ; i++) { struct page *page = bprm->page[i]; @@ -332,7 +341,7 @@ stack_base += PAGE_SIZE; } up_write(¤t->mm->mmap_sem); - + return 0; } @@ -343,7 +352,16 @@ struct file *file; int err = 0; +#ifdef CONFIG_MOSIX_FS + /* if file is MFS, we need to complete in order to perform the "open" + * method of dentry_open (which is NULL on MFS itself) */ + nd.express_function = EF_OPEN; + nd.complete_args.ints.i1 = O_RDONLY+1; + nd.complete_args.ints.i2 = 0; + if (path_init(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE|LOOKUP_COMPLETE, &nd)) +#else if (path_init(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd)) +#endif /* CONFIG_MOSIX_FS */ err = path_walk(name, &nd); file = ERR_PTR(err); if (!err) { @@ -390,7 +408,11 @@ return result; } +#ifdef CONFIG_MOSIX +int exec_mmap(void) +#else static int exec_mmap(void) +#endif /* CONFIG_MOSIX */ { struct mm_struct * mm, * old_mm; @@ -398,6 +420,9 @@ if (old_mm && atomic_read(&old_mm->mm_users) == 1) { mm_release(); exit_mmap(old_mm); +#ifdef CONFIG_MOSIX + mosix_exec_mmap(NULL); +#endif /* CONFIG_MOSIX */ return 0; } @@ -423,6 +448,9 @@ task_unlock(current); activate_mm(active_mm, mm); mm_release(); +#ifdef CONFIG_MOSIX + mosix_exec_mmap(old_mm); +#endif /* CONFIG_MOSIX */ if (old_mm) { if (active_mm != old_mm) BUG(); mmput(old_mm); @@ -440,7 +468,7 @@ * disturbing other processes. (Other processes might share the signal * table via the CLONE_SIGNAL option to clone().) */ - + static inline int make_private_signals(void) { struct signal_struct * newsig; @@ -458,7 +486,7 @@ spin_unlock_irq(¤t->sigmask_lock); return 0; } - + /* * If make_private_signals() made a copy of the signal table, decrement the * refcount of the original table, and free it if necessary. @@ -541,9 +569,16 @@ retval = make_private_signals(); if (retval) goto flush_failed; - /* + /* * Release all of the old mmap stuff */ +#ifdef CONFIG_MOSIX + if(!(current->mosix.dflags & DDEPUTY)) + mosix_clear_statistics(); + if(current->mosix.dflags & DDEPUTY) + retval = mosix_deputy_exec_mmap(bprm->filename); + else +#endif /* CONFIG_MOSIX */ retval = exec_mmap(); if (retval) goto mmap_failed; @@ -553,7 +588,11 @@ current->sas_ss_sp = current->sas_ss_size = 0; if (current->euid == current->uid && current->egid == current->gid) +#ifdef CONFIG_MOSIX + set_me_dumpable(1); +#else current->mm->dumpable = 1; +#endif /* CONFIG_MOSIX */ name = bprm->filename; for (i=0; (ch = *(name++)) != '\0';) { if (ch == '/') @@ -563,20 +602,27 @@ current->comm[i++] = ch; } current->comm[i] = '\0'; +#ifdef CONFIG_MOSIX + tell_process(current, DREQ_INFOCNG); +#endif /* CONFIG_MOSIX */ flush_thread(); de_thread(current); - if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || + if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || permission(bprm->file->f_dentry->d_inode,MAY_READ)) +#ifdef CONFIG_MOSIX + set_me_dumpable(0); +#else current->mm->dumpable = 0; +#endif /* CONFIG_MOSIX */ /* An exec changes our domain. We are no longer part of the thread group */ - + current->self_exec_id++; - + flush_signal_handlers(current); flush_old_files(current->files); @@ -602,8 +648,8 @@ return (p->ptrace & PT_PTRACED) && !(p->ptrace & PT_PTRACE_CAP); } -/* - * Fill the binprm structure from the inode. +/* + * Fill the binprm structure from the inode. * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes */ int prepare_binprm(struct linux_binprm *bprm) @@ -657,7 +703,7 @@ cap_set_full(bprm->cap_inheritable); cap_set_full(bprm->cap_permitted); } - if (bprm->e_uid == 0) + if (bprm->e_uid == 0) cap_set_full(bprm->cap_effective); } @@ -680,7 +726,7 @@ * */ -void compute_creds(struct linux_binprm *bprm) +void compute_creds(struct linux_binprm *bprm) { kernel_cap_t new_permitted, working; int do_unlock = 0; @@ -692,12 +738,21 @@ if (bprm->e_uid != current->uid || bprm->e_gid != current->gid || !cap_issubset(new_permitted, current->cap_permitted)) { +#ifdef CONFIG_MOSIX + set_me_dumpable(0); +#else current->mm->dumpable = 0; - +#endif /* CONFIG_MOSIX */ + lock_kernel(); if (must_not_trace_exec(current) +#ifdef CONFIG_MOSIX_DFSA + || atomic_read(¤t->fs->users) > 1 + || atomic_read(¤t->files->users) > 1 +#else || atomic_read(¤t->fs->count) > 1 || atomic_read(¤t->files->count) > 1 +#endif /* CONFIG_MOSIX_DFSA */ || atomic_read(¤t->sig->count) > 1) { if(!capable(CAP_SETUID)) { bprm->e_uid = current->uid; @@ -720,9 +775,15 @@ current->cap_effective = cap_intersect(new_permitted, bprm->cap_effective); } - + /* AUD: Audit candidate if current->cap_effective is set */ +#ifdef CONFIG_MOSIX + tell_process(current, DREQ_CAPCNG|DREQ_INFOCNG); +#endif /* CONFIG_MOSIX */ +#ifdef CONFIG_MOSIX_DFSA + current->mosix.dupdates |= DFSA_UPDCRED; +#endif /* CONFIG_MOSIX_DFSA */ current->suid = current->euid = current->fsuid = bprm->e_uid; current->sgid = current->egid = current->fsgid = bprm->e_gid; @@ -867,13 +928,26 @@ return retval; bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); - memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0])); + memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0])); bprm.file = file; bprm.filename = filename; bprm.sh_bang = 0; bprm.loader = 0; bprm.exec = 0; +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DDEPUTY) + { + mosix_deputy_count_args(argv, envp, &bprm.argc, &bprm.envc); + if(bprm.argc < 0 || bprm.envc < 0) + { + allow_write_access(file); + fput(file); + return(bprm.argc < 0 ? bprm.argc : bprm.envc); + } + goto counted; + } +#endif /* CONFIG_MOSIX */ if ((bprm.argc = count(argv, bprm.p / sizeof(void *))) < 0) { allow_write_access(file); fput(file); @@ -885,30 +959,61 @@ fput(file); return bprm.envc; } +#ifdef CONFIG_MOSIX + counted: +#endif /* CONFIG_MOSIX */ retval = prepare_binprm(&bprm); - if (retval < 0) - goto out; +#ifdef CONFIG_MOSIX + current->mosix.inexec = &bprm; + if((current->mosix.dflags & DDEPUTY) && retval >= 0) + retval = mosix_deputy_bring_strings(&bprm, (char *)regs->ebx, + envp, argv); + else + { +#endif /* CONFIG_MOSIX */ + if (retval < 0) + goto out; retval = copy_strings_kernel(1, &bprm.filename, &bprm); - if (retval < 0) - goto out; + if (retval < 0) + goto out; bprm.exec = bprm.p; retval = copy_strings(bprm.envc, envp, &bprm); - if (retval < 0) - goto out; + if (retval < 0) + goto out; retval = copy_strings(bprm.argc, argv, &bprm); - if (retval < 0) - goto out; +#ifdef CONFIG_MOSIX + } +#endif /* CONFIG_MOSIX */ + if (retval < 0) + goto out; retval = search_binary_handler(&bprm,regs); +#ifdef CONFIG_MOSIX + current->mosix.inexec = NULL; + if(retval >= 0) + { + mosix_decay_exec(); + current->mosix.sigmig = 0; +#ifdef CONFIG_MOSIX_FS + current->mosix.lastexec = current->mosix.whereami ? : PE; +#ifdef CONFIG_MOSIX_DFSA + current->mosix.dupdates |= DFSA_UPDSEL; +#endif /* CONFIG_MOSIX_DFSA */ +#endif /* CONFIG_MOSIX_FS */ + } +#endif /* CONFIG_MOSIX */ if (retval >= 0) /* execve success */ return retval; out: +#ifdef CONFIG_MOSIX + current->mosix.inexec = NULL; +#endif /* CONFIG_MOSIX */ /* Something went wrong, return the inode and free the argument pages*/ allow_write_access(bprm.file); if (bprm.file) @@ -945,15 +1050,28 @@ binfmt = current->binfmt; if (!binfmt || !binfmt->core_dump) goto fail; +#ifdef CONFIG_MOSIX + if(!i_am_dumpable()) +#else if (!current->mm->dumpable) +#endif /* CONFIG_MOSIX */ goto fail; +#ifdef CONFIG_MOSIX + set_me_dumpable(0); +#else current->mm->dumpable = 0; +#endif /* CONFIG_MOSIX */ if (current->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump) goto fail; memcpy(corename,"core.", 5); corename[4] = '\0'; +#ifdef CONFIG_MOSIX + if (core_uses_pid || + (current->mm && atomic_read(¤t->mm->mm_realusers) != 1)) +#else if (core_uses_pid || atomic_read(¤t->mm->mm_users) != 1) +#endif /* CONFIG_MOSIX */ sprintf(&corename[4], ".%d", current->pid); file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW, 0600); if (IS_ERR(file)) @@ -972,6 +1090,10 @@ goto close_fail; if (do_truncate(file->f_dentry, 0) != 0) goto close_fail; +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DDEPUTY) + mosix_deputy_rusage(0); +#endif /* CONFIG_MOSIX */ retval = binfmt->core_dump(signr, regs, file); @@ -981,3 +1103,43 @@ unlock_kernel(); return retval; } + +#ifdef CONFIG_MOSIX +/* + * The following routines are performed on the REMOTE side + * and must be updated whenever the corresponding original-code + * is modified: + */ + +void +execve_remote_counts(char **argv, char **envp, int *argc, int *envc) +{ +/* the following definition must match the value + * of bprm.p at the beginning of do_execve(): + * (otherwise, this constant needs to be passed as argument) + */ +#define BPRM_P_AS_SIZE (PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *)) + if ((*argc = count(argv, BPRM_P_AS_SIZE / sizeof(void *))) < 0) + return; + if ((*envc = count(envp, BPRM_P_AS_SIZE / sizeof(void *))) < 0) + return; +} + +int +execve_remote_bring_strings(struct linux_binprm *bprm, char **envp, char **argv) +{ + int retval; + char *filename = getname(bprm->filename); + + if(IS_ERR(filename)) + return((int)filename); + retval = copy_strings_kernel(1, &filename, bprm); + putname(filename); + if(retval < 0) + return(retval); + bprm->exec = bprm->p; + if((retval = copy_strings(bprm->envc, envp, bprm)) < 0) + return(retval); + return(copy_strings(bprm->argc, argv, bprm)); +} +#endif /* CONFIG_MOSIX */ diff -urN linux-2.4.17/fs/fcntl.c linux_umopenmosix/fs/fcntl.c --- linux-2.4.17/fs/fcntl.c Mon Sep 17 23:16:30 2001 +++ linux_umopenmosix/fs/fcntl.c Wed Jun 26 23:45:17 2002 @@ -16,6 +16,10 @@ #include #include +#ifdef CONFIG_MOSIX_DFSA +#include +#endif /* CONFIG_MOSIX_DFSA */ + extern int sock_fcntl (struct file *, unsigned int cmd, unsigned long arg); extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); extern int fcntl_getlease(struct file *filp); @@ -81,6 +85,13 @@ files->max_fdset, start); } +#ifdef CONFIG_MOSIX_DFSA + if((current->mosix.dflags & DREMOTE) && newfd >= files->max_fds) + { + error = -EDOITATHOME; + goto out; + } +#endif /* CONFIG_MOSIX_DFSA */ error = -EMFILE; if (newfd >= current->rlim[RLIMIT_NOFILE].rlim_cur) goto out; @@ -137,12 +148,24 @@ struct file * file, *tofree; struct files_struct * files = current->files; +#ifdef CONFIG_MOSIX_DFSA + dfsa_syscall_on_file(oldfd, 1); +#endif /* CONFIG_MOSIX_DFSA */ write_lock(&files->file_lock); if (!(file = fcheck(oldfd))) goto out_unlock; err = newfd; if (newfd == oldfd) goto out_unlock; +#ifdef CONFIG_MOSIX_DFSA + if((current->mosix.dflags & DREMOTE) && + (newfd >= files->max_fdset || newfd >= files->max_fds || + (!files->fd[newfd] && FD_ISSET(newfd, files->open_fds)))) + { + err = -EDOITATHOME; + goto out_unlock; + } +#endif /* CONFIG_MOSIX_DFSA */ err = -EBADF; if (newfd >= current->rlim[RLIMIT_NOFILE].rlim_cur) goto out_unlock; @@ -169,6 +192,11 @@ FD_SET(newfd, files->open_fds); FD_CLR(newfd, files->close_on_exec); write_unlock(&files->file_lock); +#ifdef CONFIG_MOSIX_DFSA + if(tofree) + dfsa_close_file(newfd); + dfsa_open_file(newfd); +#endif /* CONFIG_MOSIX_DFSA */ if (tofree) filp_close(tofree, files); @@ -190,6 +218,9 @@ int ret = -EBADF; struct file * file = fget(fildes); +#ifdef CONFIG_MOSIX_DFSA + dfsa_syscall_on_file(fildes, 1); +#endif /* CONFIG_MOSIX_DFSA */ if (file) ret = dupfd(file, 0); return ret; @@ -206,6 +237,16 @@ * In the case of an append-only file, O_APPEND * cannot be cleared */ +#ifdef CONFIG_MOSIX_FS + if(!(arg & O_APPEND)) + /* no way to guess the IS_APPEND status on MFS - need to look */ + { + struct inode_operations *iops = filp->f_dentry->d_inode->i_op; + + if (iops && iops->revalidate) + iops->revalidate(filp->f_dentry); + } +#endif /* CONFIG_MOSIX_FS */ if (!(arg & O_APPEND) && IS_APPEND(inode)) return -EPERM; @@ -251,6 +292,18 @@ { long err = -EINVAL; +#ifdef CONFIG_MOSIX_DFSA + switch(cmd) + { + case F_GETFL: + dfsa_syscall_on_file(fd, 0); + break; + case F_DUPFD: + case F_SETFL: + dfsa_syscall_on_file(fd, 1); + break; + } +#endif /* CONFIG_MOSIX_DFSA */ switch (cmd) { case F_DUPFD: if (arg < NR_OPEN) { @@ -438,7 +491,11 @@ send_sigio_to_task(p, fown, fd, band); goto out; } +#ifdef CONFIG_MOSIX + for_each_local_task(p) { +#else for_each_task(p) { +#endif /* CONFIG_MOSIX */ int match = p->pid; if (pid < 0) match = -p->pgrp; diff -urN linux-2.4.17/fs/file.c linux_umopenmosix/fs/file.c --- linux-2.4.17/fs/file.c Fri Feb 9 21:29:44 2001 +++ linux_umopenmosix/fs/file.c Wed Jun 26 23:45:17 2002 @@ -14,6 +14,9 @@ #include +#ifdef CONFIG_MOSIX_DFSA +#include +#endif /* CONFIG_MOSIX_DFSA */ /* * Allocate an fd array, using kmalloc or vmalloc. @@ -98,6 +101,10 @@ struct file **old_fds; int i; +#ifdef CONFIG_MOSIX_DFSA + if(files == current->files) + current->mosix.dupdates |= DFSA_UPDMAX; +#endif /* CONFIG_MOSIX_DFSA */ old_fds = xchg(&files->fd, new_fds); i = xchg(&files->max_fds, nfds); @@ -164,6 +171,9 @@ int expand_fdset(struct files_struct *files, int nr) { fd_set *new_openset = 0, *new_execset = 0; +#ifdef CONFIG_MOSIX_DFSA + fd_set *new_closed = 0, *new_opened = 0, *new_modified = 0; +#endif /* CONFIG_MOSIX_DFSA */ int error, nfds = 0; error = -EMFILE; @@ -187,8 +197,18 @@ error = -ENOMEM; new_openset = alloc_fdset(nfds); new_execset = alloc_fdset(nfds); +#ifdef CONFIG_MOSIX_DFSA + new_closed = alloc_fdset(nfds); + new_opened = alloc_fdset(nfds); + new_modified = alloc_fdset(nfds); +#endif /* CONFIG_MOSIX_DFSA */ write_lock(&files->file_lock); +#ifdef CONFIG_MOSIX_DFSA + if (!new_openset || !new_execset || + !new_closed || !new_opened || !new_modified) +#else if (!new_openset || !new_execset) +#endif /* CONFIG_MOSIX_DFSA */ goto out; error = 0; @@ -198,6 +218,10 @@ int i = files->max_fdset / (sizeof(unsigned long) * 8); int count = (nfds - files->max_fdset) / 8; +#ifdef CONFIG_MOSIX_DFSA + if(files == current->files) + current->mosix.dupdates |= DFSA_UPDMAX; +#endif /* CONFIG_MOSIX_DFSA */ /* * Don't copy the entire array if the current fdset is * not yet initialised. @@ -207,14 +231,32 @@ memcpy (new_execset, files->close_on_exec, files->max_fdset/8); memset (&new_openset->fds_bits[i], 0, count); memset (&new_execset->fds_bits[i], 0, count); +#ifdef CONFIG_MOSIX_DFSA + memcpy (new_closed, files->closed, files->max_fdset/8); + memcpy (new_opened, files->opened, files->max_fdset/8); + memcpy (new_modified, files->modified, files->max_fdset/8); + memset (&new_closed->fds_bits[i], 0, count); + memset (&new_opened->fds_bits[i], 0, count); + memset (&new_modified->fds_bits[i], 0, count); +#endif /* CONFIG_MOSIX_DFSA */ } nfds = xchg(&files->max_fdset, nfds); new_openset = xchg(&files->open_fds, new_openset); new_execset = xchg(&files->close_on_exec, new_execset); +#ifdef CONFIG_MOSIX_DFSA + new_closed = xchg(&files->closed, new_closed); + new_opened = xchg(&files->opened, new_opened); + new_modified = xchg(&files->modified, new_modified); +#endif /* CONFIG_MOSIX_DFSA */ write_unlock(&files->file_lock); free_fdset (new_openset, nfds); free_fdset (new_execset, nfds); +#ifdef CONFIG_MOSIX_DFSA + free_fdset (files->closed, nfds); + free_fdset (files->opened, nfds); + free_fdset (files->modified, nfds); +#endif /* CONFIG_MOSIX_DFSA */ write_lock(&files->file_lock); return 0; } @@ -226,6 +268,14 @@ free_fdset(new_openset, nfds); if (new_execset) free_fdset(new_execset, nfds); +#ifdef CONFIG_MOSIX_DFSA + if (new_closed) + free_fdset(new_closed, nfds); + if (new_opened) + free_fdset(new_opened, nfds); + if (new_modified) + free_fdset(new_modified, nfds); +#endif /* CONFIG_MOSIX_DFSA */ write_lock(&files->file_lock); return error; } diff -urN linux-2.4.17/fs/file_table.c linux_umopenmosix/fs/file_table.c --- linux-2.4.17/fs/file_table.c Mon Sep 17 23:16:30 2001 +++ linux_umopenmosix/fs/file_table.c Wed Jun 26 23:45:17 2002 @@ -58,7 +58,12 @@ /* * Allocate a new one if we're below the limit. */ +#ifdef CONFIG_MOSIX_FS + if (files_stat.nr_files < files_stat.max_files || + (current->mosix.dirty_bits & MFSARG_EMPTYF_PRI)) { +#else if (files_stat.nr_files < files_stat.max_files) { +#endif /* CONFIG_MOSIX_FS */ file_list_unlock(); f = kmem_cache_alloc(filp_cachep, SLAB_KERNEL); file_list_lock(); @@ -113,6 +118,9 @@ file->f_op->release(inode, file); fops_put(file->f_op); if (file->f_mode & FMODE_WRITE) +#ifdef CONFIG_MOSIX + if (!(file->f_flags & O_NOWRITEACCESS)) +#endif /* CONFIG_MOSIX */ put_write_access(inode); file_list_lock(); file->f_dentry = NULL; diff -urN linux-2.4.17/fs/inode.c linux_umopenmosix/fs/inode.c --- linux-2.4.17/fs/inode.c Fri Dec 21 19:41:55 2001 +++ linux_umopenmosix/fs/inode.c Wed Jun 26 23:45:17 2002 @@ -18,6 +18,11 @@ #include #include +#ifdef CONFIG_MOSIX +spinlock_t unique_gen_lock = SPIN_LOCK_UNLOCKED; +unsigned long long unique_generator; +#endif /* CONFIG_MOSIX */ + /* * New inode.c implementation. * @@ -787,6 +792,9 @@ inode->i_data.host = inode; inode->i_data.gfp_mask = GFP_HIGHUSER; inode->i_mapping = &inode->i_data; +#ifdef CONFIG_MOSIX + VMODIFIED(inode); +#endif /* CONFIG_MOSIX */ } /** @@ -822,6 +830,9 @@ inode->i_flags = 0; atomic_set(&inode->i_count, 1); inode->i_state = 0; +#ifdef CONFIG_MOSIX + VMODIFIED(inode); +#endif /* CONFIG_MOSIX */ spin_unlock(&inode_lock); clean_inode(inode); } @@ -856,6 +867,9 @@ inode->i_flags = 0; atomic_set(&inode->i_count, 1); inode->i_state = I_LOCK; +#ifdef CONFIG_MOSIX + VMODIFIED(inode); +#endif /* CONFIG_MOSIX */ spin_unlock(&inode_lock); clean_inode(inode); diff -urN linux-2.4.17/fs/ioctl.c linux_umopenmosix/fs/ioctl.c --- linux-2.4.17/fs/ioctl.c Fri Feb 9 21:29:44 2001 +++ linux_umopenmosix/fs/ioctl.c Wed Jun 26 23:45:17 2002 @@ -11,6 +11,10 @@ #include #include +#ifdef CONFIG_MOSIX_DFSA +#include +#endif /* CONFIG_MOSIX_DFSA */ + static int file_ioctl(struct file *filp,unsigned int cmd,unsigned long arg) { int error; @@ -51,6 +55,9 @@ struct file * filp; unsigned int flag; int on, error = -EBADF; +#ifdef CONFIG_MOSIX_DFSA + int flags_changed = 0; +#endif /* CONFIG_MOSIX_DFSA */ filp = fget(fd); if (!filp) @@ -79,6 +86,9 @@ filp->f_flags |= flag; else filp->f_flags &= ~flag; +#ifdef CONFIG_MOSIX_DFSA + flags_changed = 1; +#endif /* CONFIG_MOSIX_DFSA */ break; case FIOASYNC: @@ -99,6 +109,9 @@ filp->f_flags |= FASYNC; else filp->f_flags &= ~FASYNC; +#ifdef CONFIG_MOSIX_DFSA + flags_changed = 1; +#endif /* CONFIG_MOSIX_DFSA */ break; default: @@ -110,6 +123,10 @@ } unlock_kernel(); fput(filp); +#ifdef CONFIG_MOSIX_DFSA + if(flags_changed) + dfsa_touch_file(fd); +#endif /* CONFIG_MOSIX_DFSA */ out: return error; diff -urN linux-2.4.17/fs/lockd/svc.c linux_umopenmosix/fs/lockd/svc.c --- linux-2.4.17/fs/lockd/svc.c Sun Oct 21 19:32:33 2001 +++ linux_umopenmosix/fs/lockd/svc.c Wed Jun 26 23:45:17 2002 @@ -304,6 +304,9 @@ * Wait for the lockd process to exit, but since we're holding * the lockd semaphore, we can't wait around forever ... */ +#ifdef CONFIG_MOSIX + current->mosix.ignoreoldsigs = 1; +#endif /* CONFIG_MOSIX */ current->sigpending = 0; interruptible_sleep_on_timeout(&lockd_exit, HZ); if (nlmsvc_pid) { diff -urN linux-2.4.17/fs/mfs/Makefile linux_umopenmosix/fs/mfs/Makefile --- linux-2.4.17/fs/mfs/Makefile Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/fs/mfs/Makefile Wed Jun 26 23:45:17 2002 @@ -0,0 +1,8 @@ +# +# Makefile for the Linux MOSIX mfs filesystem routines. +# + +O_TARGET := mfs.o +obj-y := file.o convert.o complete.o server.o client.o scontact.o ccontact.o socket.o count.o + +include $(TOPDIR)/Rules.make diff -urN linux-2.4.17/fs/mfs/ccontact.c linux_umopenmosix/fs/mfs/ccontact.c --- linux-2.4.17/fs/mfs/ccontact.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/fs/mfs/ccontact.c Wed Jun 26 23:45:17 2002 @@ -0,0 +1,325 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * Some sections copyright 2002 by Moshe Bar + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh, Moshe Bar + */ + +#include +#ifdef CONFIG_MOSIX_FS +#include +#include +#include + +spinlock_t mfs_client_lock = SPIN_LOCK_UNLOCKED; + +#define MFS_CLIENT_OPTCONS 50 +#define MFS_MAX_HOGGING_A_NODE 8 + +struct mfs_client_contact +{ + struct mfs_client_contact *next; + int to; + struct socket *sock; + int serial; + int has_mh; + struct mfs_message_header mh; +} *mfs_busy_clients, *mfs_connected_clients, *mfs_closing_clients; + +int mfs_client_serial; + +DECLARE_WAIT_QUEUE_HEAD(wait_for_busy_contact); +int waiting_for_busy_contact; + +int +mfs_client_send(struct mfs_client_contact *s, int type, void *header, int hdln, + void *dat1, int len1, void *dat2, int len2) +{ + return(mfs_send(s->sock, 0, s->serial, + type, header, hdln, dat1, len1, dat2, len2)); +} + +int +mfs_client_receive(struct mfs_client_contact *s, + void **header, void **dat1, void **dat2) +{ + if(s->has_mh) + { + s->has_mh = 0; +if(mfs_debug > 1) +printk("mfs_client_receive: continuing with serial %d, type %d (%d/%d/%d)\n", +s->mh.serial, s->mh.type, s->mh.hdsz, s->mh.dat1sz, s->mh.dat2sz); + return(mfs_receive(s->sock, MFS_MUST_MATCH, &s->serial, &s->mh, + header, dat1, dat2)); + } + else + return(mfs_receive(s->sock, MFS_SKIP_OTHERS, &s->serial, NULL, + header, dat1, dat2)); +} + +int +mfs_client_contact_has_data(struct mfs_client_contact *con) +{ + return(mfs_socket_has_data(con->sock)); +} + +void +delayed_close_client(struct mfs_client_contact *con) +{ + mfs_send(con->sock, 1, 0, MFS_CLOSE, NULL, 0, NULL, 0, NULL,0); + spin_lock(&mfs_client_lock); + con->next = mfs_closing_clients; + mfs_closing_clients = con; + spin_unlock(&mfs_client_lock); +} + +void +mfs_client_check_closed(struct mfs_client_contact **head) +{ + struct mfs_client_contact *c, *prev, *next; + struct mfs_client_contact *dispose = NULL; + + spin_lock(&mfs_client_lock); + for(prev = NULL, c = *head ; c ; c = next) + { + next = c->next; + if(mfs_socket_is_closed(c->sock)) + { + if(prev) + prev->next = next; + else + *head = next; + c->next = dispose; + dispose = c; + } + else + prev = c; + } + spin_unlock(&mfs_client_lock); + for(; dispose ; dispose = next) + { + next = dispose->next; + mfs_close_socket(dispose->sock); + kfree(dispose); + } +} + +void +mfs_release_contact(struct mfs_client_contact *con) +{ + struct mfs_client_contact *s, *prev; + int bcnt = 0, pcnt = 0, tcnt = 0; + int pe = con->to; + int found = 0; + + spin_lock(&mfs_client_lock); + for(prev = NULL , s = mfs_busy_clients ; s ; ) + { + tcnt++; + if(s == con) + { + s = s->next; + if(prev) + prev->next = s; + else + mfs_busy_clients = s; + found = 1; + } + else + { + if(s->to == pe && ++bcnt >= MFS_MAX_HOGGING_A_NODE && + found) + break; + prev = s; + s = s->next; + } + } + if(!found) + printk("mfs_release_contact: not listed!\n"); + if((pcnt = bcnt) < MFS_MAX_HOGGING_A_NODE) + for(s = mfs_connected_clients ; s ; s = s->next) + { + tcnt++; + if(s->to == pe && ++pcnt == MFS_MAX_HOGGING_A_NODE) + break; + } + if((tcnt >= MFS_CLIENT_OPTCONS && pcnt) || + pcnt >= MFS_MAX_HOGGING_A_NODE) + { + spin_unlock(&mfs_client_lock); + delayed_close_client(con); + if(bcnt >= MFS_MAX_HOGGING_A_NODE) + return; + spin_lock(&mfs_client_lock); + } + else + { + con->next = mfs_connected_clients; + mfs_connected_clients = con; + } + if(waiting_for_busy_contact == pe || + waiting_for_busy_contact == MFS_UNKNOWN_PE) + { + waiting_for_busy_contact = 0; + wake_up(&wait_for_busy_contact); + } + spin_unlock(&mfs_client_lock); +} + +static inline int +node_is_hogged(int pe) +{ + int n = 0; + struct mfs_client_contact *s; + + for(s = mfs_busy_clients ; s ; s = s->next) + if(s->to == pe && ++n == MFS_MAX_HOGGING_A_NODE) + return(1); + return(0); +} + +struct mfs_client_contact * +mfs_new_request(int pe, int type, void *header, int hdln, void *dat1, int len1, + void *dat2, int len2) +{ + struct mfs_client_contact *s, *prev, *prevprev, *con; + int tried_a_new = 0; + int cnt; + DECLARE_WAITQUEUE(wait, current); + + if(pe == MFS_BAD_PE) + return(ERR_PTR(-ESTALE)); + spin_lock(&mfs_client_lock); + for(prev = prevprev = NULL , cnt = 0 , s = mfs_connected_clients ; s ; + prevprev = prev, prev = s , s = s->next) + if(s->to == pe) + break; + else + cnt++; + if(s) + { + found_a_connected: + if(prev) + prev->next = s->next; + else + mfs_connected_clients = s->next; + try_it: + con = s; + con->to = pe; + con->next = mfs_busy_clients; + mfs_busy_clients = con; + con->serial = ++mfs_client_serial; + spin_unlock(&mfs_client_lock); + if(!mfs_send(con->sock, 1, con->serial, type, header, hdln, + dat1, len1, dat2, len2) && + mfs_receive(con->sock, MFS_JUST_HEADER, &con->serial, + &con->mh, NULL, NULL, NULL) >= 0) + { + con->has_mh = 1; +if(mfs_debug > 1) +printk("mfs_new_request: first reply will be serial %d, type %d (%d/%d/%d)\n", +con->mh.serial, con->mh.type, con->mh.hdsz, con->mh.dat1sz, con->mh.dat2sz); + return(con); + } + spin_lock(&mfs_client_lock); + for(prev = NULL , s = mfs_busy_clients ; s && s != con ; + prev = s , s = s->next) + ; + if(s) + { + if(prev) + prev->next = s->next; + else + mfs_busy_clients = s->next; + if(waiting_for_busy_contact == pe || + waiting_for_busy_contact == MFS_UNKNOWN_PE) + { + waiting_for_busy_contact = 0; + wake_up(&wait_for_busy_contact); + } + spin_unlock(&mfs_client_lock); + mfs_close_socket(s->sock); + kfree(s); + spin_lock(&mfs_client_lock); + } + else + printk("mfs_new_request: contact disappeared!\n"); + } + else if(cnt >= MFS_CLIENT_OPTCONS) + { + /* trim out the last one (also last used) */ + prevprev->next = NULL; + spin_unlock(&mfs_client_lock); + delayed_close_client(prev); + mfs_client_check_closed(&mfs_closing_clients); + spin_lock(&mfs_client_lock); + } + if(tried_a_new || !PE) + { + spin_unlock(&mfs_client_lock); + return(ERR_PTR(-EREMOTE)); + } + if(node_is_hogged(pe)) + { + add_wait_queue(&wait_for_busy_contact, &wait); + while(1) + { + set_current_state(TASK_UNINTERRUPTIBLE); + if(waiting_for_busy_contact && + waiting_for_busy_contact != pe) + waiting_for_busy_contact = MFS_UNKNOWN_PE; + else + waiting_for_busy_contact = pe; + if(!node_is_hogged(pe)) + break; + spin_unlock(&mfs_client_lock); + if(!PE) + { + remove_wait_queue(&wait_for_busy_contact, &wait); + set_current_state(TASK_RUNNING); + return(NULL); + } + schedule_timeout(HZ); + spin_lock(&mfs_client_lock); + for(prev = NULL , s = mfs_connected_clients ; s ; + prev = s , s = s->next) + if(s->to == pe) + { + remove_wait_queue(&wait_for_busy_contact,&wait); + set_current_state(TASK_RUNNING); + goto found_a_connected; + } + } + remove_wait_queue(&wait_for_busy_contact, &wait); + set_current_state(TASK_RUNNING); + } + spin_unlock(&mfs_client_lock); + tried_a_new++; + if(!(s = (struct mfs_client_contact *)kmalloc(sizeof(*s), GFP_KERNEL))) + return(ERR_PTR(-ENOMEM)); + s->sock = mfs_connect_to(pe); + s->has_mh = 0; + if(!IS_ERR(s->sock)) + { + spin_lock(&mfs_client_lock); + goto try_it; + } + con = (struct mfs_client_contact *)s->sock; + kfree(s); + return(con); +} + +void +mfs_monitor_client_contacts(void) +{ + mfs_client_check_closed(&mfs_connected_clients); + mfs_client_check_closed(&mfs_closing_clients); +} +#endif /* CONFIG_MOSIX_FS */ diff -urN linux-2.4.17/fs/mfs/client.c linux_umopenmosix/fs/mfs/client.c --- linux-2.4.17/fs/mfs/client.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/fs/mfs/client.c Wed Jun 26 23:45:17 2002 @@ -0,0 +1,665 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * Some sections copyright 2002 by Moshe Bar + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh, Moshe Bar + */ + +#include +#ifdef CONFIG_MOSIX_FS +#include +#include +#include + +void +pack_credits(struct mfs_cred *cred) +{ + register struct task_struct *p = current; + register int i; + + cred->uid = p->uid; + cred->euid = p->euid; + cred->sgid = p->sgid; + cred->fsuid = p->fsuid; + cred->gid = p->gid; + cred->egid = p->egid; + cred->sgid = p->sgid; + cred->fsgid = p->fsgid; + cred->ngroups = p->ngroups; + for(i = cred->ngroups-1 ; i >= 0 ; i--) + cred->groups[i] = p->groups[i]; + cred->caps = p->cap_effective; + cred->whereami = current->mosix.whereami ? : PE; + cred->deppe = current->mosix.deppe ? : PE; +} + +inline void +mfs_file_up_to_date(struct file *filp) +{ + struct dentry *dp; + struct mfs_dinfo *i; + + if((dp = filp->f_dentry) && (i = (struct mfs_dinfo *)dp->d_fsdata)) + i->latest = jiffies; +} + +int +mfs_client_attach_handle(int pe, mfs_handle_t handle) +{ + struct mfs_request_attach_handle s; + struct mfs_reply_attach_handle *r; + struct mfs_client_contact *con; + int err; + + s.handle = handle; + con = mfs_new_request(pe, MFS_REQUEST_ATTACH_HANDLE, &s, sizeof(s), + NULL, 0, NULL, 0); + if(IS_ERR(con)) + return((int)con); + err = mfs_client_receive(con, (void **)&r, NULL, NULL); + if(err == MFS_REPLY_ATTACH_HANDLE) + err = r->result; + else if(err >= 0) + { + printk("mfs_client_attach_handle: unexpected reply %d\n", err); + err = -EINVAL; + } + if(r) + kfree(r); + mfs_release_contact(con); + return(err); +} + +int +mfs_client_touch_handle(int pe, mfs_handle_t handle) +{ + struct mfs_request_touch_handle s; + struct mfs_reply_touch_handle *r; + struct mfs_client_contact *con; + int err; + + s.handle = handle; + con = mfs_new_request(pe, MFS_REQUEST_TOUCH_HANDLE, &s, sizeof(s), + NULL, 0, NULL, 0); + if(IS_ERR(con)) + return((int)con); + err = mfs_client_receive(con, (void **)&r, NULL, NULL); + if(err == MFS_REPLY_TOUCH_HANDLE) + err = r->result; + else if(err >= 0) + { + printk("mfs_client_touch_handle: unexpected reply %d\n", err); + err = -EINVAL; + } + if(r) + kfree(r); + mfs_release_contact(con); + return(err); +} + +void +mfs_client_dispose_handle(int pe, mfs_handle_t handle) +{ + struct mfs_request_dispose_handle s; + struct mfs_client_contact *con; + void *r; + + s.handle = handle; + con = mfs_new_request(pe, MFS_REQUEST_DISPOSE_HANDLE, &s, sizeof(s), + NULL, 0, NULL, 0); + if(IS_ERR(con)) + return; + mfs_client_receive(con, &r, NULL, NULL); + if(r) + kfree(r); + mfs_release_contact(con); +} + +int +mfs_client_llseek(int pe, mfs_handle_t handle, struct file *filp, loff_t offset, + int origin) +{ + struct mfs_request_llseek s; + struct mfs_reply_llseek *r; + struct mfs_client_contact *con; + int err; + + pack_credits(&s.cred); + s.handle = handle; + s.file = *filp; + s.offset = offset; + s.origin = origin; + con = mfs_new_request(pe, MFS_REQUEST_LLSEEK, &s, sizeof(s), NULL, 0, + NULL, 0); + if(IS_ERR(con)) + return((int)con); + err = mfs_client_receive(con, (void **)&r, NULL, NULL); + if(err == MFS_REPLY_LLSEEK) + { + cp_file_fields(filp, &r->file); + err = r->result; + if(err >= 0) + mfs_file_up_to_date(filp); + } + else if(err >= 0) + { + printk("mfs_client_llseek: unexpected reply %d\n", err); + err = -EINVAL; + } + if(r) + kfree(r); + mfs_release_contact(con); + return(err); +} + +ssize_t +mfs_client_read(int pe, mfs_handle_t handle, struct file *filp, char *buf, + size_t count, loff_t *ppos) +{ + struct mfs_request_read s; + struct mfs_reply_read *r; + struct mfs_client_contact *con; + void *data; + int err; + ssize_t i, result = 0; + int user_err = 0; + int done = 0; + + if((int)count < 0) + return(-EINVAL); + pack_credits(&s.cred); + s.handle = handle; + s.file = *filp; + s.count = count; + s.pos = *ppos; + con = mfs_new_request(pe, MFS_REQUEST_READ, &s, sizeof(s), NULL, 0, + NULL, 0); + if(IS_ERR(con)) + return((int)con); + while(!done) + { + err = mfs_client_receive(con, (void **)&r, &data, NULL); + if(err < 0) + { + result = err; + break; + } + switch(err) + { + case MFS_INTERIM_DATA: + i = ((struct mfs_interim_data *)r)->count; + if(!user_err && copy_to_user(buf, data, i)) + user_err = -EFAULT; + else + buf += i; + /* do not exit: it is always the server + * that should close the connection + */ + break; + case MFS_REPLY_READ: + done = 1; + if(!user_err && r->datalen && + copy_to_user(buf, data, r->datalen)) + user_err = -EFAULT; + cp_file_fields(filp, &r->file); + *ppos = r->ppos; + result = r->result; + if(result >= 0) + mfs_file_up_to_date(filp); + break; + default: + printk("mfs_client_read: unexpected reply %d\n", + err); + result = -EINVAL; + done = 1; + break; + + } + if(r) + kfree(r); + } + mfs_release_contact(con); + return(user_err ? : result); +} + +ssize_t +mfs_client_write(int pe, mfs_handle_t handle, struct file *filp, char *buf, + size_t count, loff_t *ppos, long flim) +{ + struct mfs_request_write s; + struct mfs_interim_data i; + struct mfs_reply_write *r; + struct mfs_client_contact *con; + void *data; + int err; + ssize_t result = 0; + int user_err = 0; + size_t max, sent, cnt; + char smallbuf[4]; + char *tbuf = NULL; /* NULL just because bad compiler complains */ + + if(count == 0) + return(0); + if((int)count < 0) + return(-EINVAL); + pack_credits(&s.cred); + s.handle = handle; + s.file = *filp; + s.count = count; + s.pos = *ppos; + s.flim = flim; + for(max = count > 16384 ? 16384 : count ; max > sizeof(smallbuf) ; + max >>= 1) + if((tbuf = (char *)kmalloc(max, GFP_KERNEL))) + break; + if(max <= sizeof(smallbuf)) + { + tbuf = smallbuf; + max = sizeof(smallbuf); + } + s.datalen = sent = count < max ? count : max; + if(copy_from_user(tbuf, buf, sent)) + { + user_err = -EFAULT; + goto free_buf; + } + buf += sent; + con = mfs_new_request(pe, MFS_REQUEST_WRITE, &s, sizeof(s), tbuf, sent, + NULL, 0); + if(IS_ERR(con)) + { + result = (int)con; + goto free_buf; + } + while(sent < count) + { + if(mfs_client_contact_has_data(con)) + break; + cnt = count - sent; + if(cnt > max) + cnt = max; + if(user_err || copy_from_user(tbuf, buf, cnt)) + { + user_err = -EFAULT; + mfs_client_send(con, MFS_STOP, NULL, 0, + NULL, 0, NULL, 0); + break; /* let the server quit first */ + } + i.count = cnt; + if((err = mfs_client_send(con, MFS_INTERIM_DATA, + &i, sizeof(i), (void *)tbuf, cnt, + NULL, 0))) + { + result = err; + break; + } + buf += cnt; + sent += cnt; + } + err = mfs_client_receive(con, (void **)&r, &data, NULL); + if(err == MFS_CONTINUE) + err = mfs_client_receive(con, (void **)&r, &data, NULL); + if(err == MFS_REPLY_WRITE) + { + cp_file_fields(filp, &r->file); + *ppos = r->ppos; + result = r->result; + if(result >= 0) + mfs_file_up_to_date(filp); + if(r->hadsigxfsz) + send_sig(SIGXFSZ, current, 0); + } + else if(err < 0) + result = err; + else + { + printk("mfs_client_write: unexpected reply %d\n", err); + result = -EINVAL; + } + if(r) + kfree(r); + mfs_release_contact(con); + free_buf: + if(max != sizeof(smallbuf)) + kfree(tbuf); + return(user_err ? : result); +} + +int +mfs_client_readdir_transfer(void *d, int len, void *dirent, filldir_t filldir) +{ + ino_t ino; + loff_t off; + unsigned int type; + short namelen; + int err; + + while(len) + { + len -= sizeof(ino)+sizeof(off)+sizeof(type)+sizeof(namelen); + if(len < 0) + return(-EINVAL); + ino = *((ino_t *)d)++; + off = *((loff_t *)d)++; + type = *((unsigned int *)d)++; + namelen = *((short *)d)++; + if(len < namelen) + return(-EINVAL); + len -= namelen; + if((err = filldir(dirent, (const char *)d, namelen, off, ino, + type))) + return(err); + d += namelen; + } + return(0); +} + +int +mfs_client_readdir(int pe, mfs_handle_t handle, struct file *filp, void *dirent, + filldir_t filldir, int policy, int maxbytes, int *packets, int *bytes) +{ + struct mfs_request_readdir s; + struct mfs_reply_readdir *r; + struct mfs_interim_readdir ri; + struct mfs_client_contact *con; + void *data; + ssize_t i; + int result = 0; + int done = 0; + + pack_credits(&s.cred); + s.handle = handle; + s.file = *filp; + s.policy = policy; + s.maxbytes = maxbytes; + *packets = *bytes = 0; + con = mfs_new_request(pe, MFS_REQUEST_READDIR, &s, sizeof(s), NULL, 0, + NULL, 0); + if(IS_ERR(con)) + return((int)con); + while(!done && (result = + mfs_client_receive(con, (void **)&r, &data, NULL)) >= 0) + { + switch(result) + { + case MFS_INTERIM_DATA: + i = ((struct mfs_interim_data *)r)->count; + (*packets)++; + (*bytes) += i; + ri.error = mfs_client_readdir_transfer(data, i, + dirent, filldir); + if((result = mfs_client_send(con, + MFS_INTERIM_READDIR, &ri, sizeof(ri), + NULL, 0, NULL, 0))) + done = 1; + break; + case MFS_REPLY_READDIR: + done = 1; + (*bytes) += r->datalen; + if(!r->datalen || + !(result = mfs_client_readdir_transfer( + data, r->datalen, dirent, filldir))) + result = r->result; + cp_file_fields(filp, &r->file); + if(result >= 0) + mfs_file_up_to_date(filp); + break; + default: + printk("mfs_client_readdir: unexpected reply %d\n", result); + result = -EINVAL; + done = 1; + break; + } + if(r) + kfree(r); + } + mfs_release_contact(con); + return(result); +} + +int +mfs_client_ioctl(int pe, mfs_handle_t handle, struct file *filp, + unsigned int cmd, unsigned long arg, int size, int *data) +{ + struct mfs_request_ioctl s; + struct mfs_reply_ioctl *r; + struct mfs_client_contact *con; + int err; + + pack_credits(&s.cred); + s.handle = handle; + s.file = *filp; + s.cmd = cmd; + s.arg = arg; + s.data = *data; + s.size = size; + con = mfs_new_request(pe, MFS_REQUEST_IOCTL, &s, sizeof(s), NULL, 0, + NULL, 0); + if(IS_ERR(con)) + return((int)con); + err = mfs_client_receive(con, (void **)&r, NULL, NULL); + if(err == MFS_REPLY_IOCTL) + { + cp_file_fields(filp, &r->file); + if(size < 0) + *data = r->data; + err = r->result; + if(err >= 0) + mfs_file_up_to_date(filp); + } + else if(err >= 0) + { + printk("mfs_client_ioctl: unexpected reply %d\n", err); + err = -EINVAL; + } + if(r) + kfree(r); + mfs_release_contact(con); + return(err); +} + +int +mfs_client_fsync(int pe, mfs_handle_t handle, struct file *filp, int datasync) +{ + struct mfs_request_fsync s; + struct mfs_reply_fsync *r; + struct mfs_client_contact *con; + int err; + + pack_credits(&s.cred); + s.handle = handle; + s.file = *filp; + s.datasync = datasync; + con = mfs_new_request(pe, MFS_REQUEST_FSYNC, &s, sizeof(s), NULL, 0, + NULL, 0); + if(IS_ERR(con)) + return((int)con); + err = mfs_client_receive(con, (void **)&r, NULL, NULL); + if(err == MFS_REPLY_FSYNC) + { + cp_file_fields(filp, &r->file); + err = r->result; + if(err >= 0) + mfs_file_up_to_date(filp); + } + else if(err >= 0) + { + printk("mfs_client_fsync: unexpected reply %d\n", err); + err = -EINVAL; + } + if(r) + kfree(r); + mfs_release_contact(con); + return(err); +} + +int +mfs_client_revalidate(int pe, mfs_handle_t handle, struct subinode *iinfo) +{ + struct mfs_request_revalidate s; + struct mfs_reply_revalidate *r; + struct mfs_client_contact *con; + int err; + + pack_credits(&s.cred); + s.handle = handle; + con = mfs_new_request(pe, MFS_REQUEST_REVALIDATE, &s, sizeof(s), + NULL, 0, NULL, 0); + if(IS_ERR(con)) + return((int)con); + err = mfs_client_receive(con, (void **)&r, NULL, NULL); + if(err == MFS_REPLY_REVALIDATE) + { + *iinfo = r->iinfo; + err = r->result; + } + else if(err >= 0) + { + printk("mfs_client_revalidate: unexpected reply %d\n", err); + err = -EINVAL; + } + if(r) + kfree(r); + mfs_release_contact(con); + return(err); +} + +int +mfs_client_express_lookup(int pe, mfs_handle_t *handle, char *name, + unsigned int follow, struct subinode *iinfo, char **newname, + struct mfs_completion *complete) +{ + struct mfs_request_express s; + struct mfs_reply_express *r; + struct mfs_client_contact *con; + int err; + void *name_back, *more_data; + + *newname = NULL; + pack_credits(&s.cred); + s.handle = *handle; + s.namelen = strlen(name) + 1; + s.follow = follow; + s.link_count = current->link_count; + s.total_link_count = current->total_link_count; + s.complete = *complete; + con = mfs_new_request(pe, MFS_REQUEST_EXPRESS, &s, sizeof(s), + (void *)name, s.namelen, + s.complete.more_data, s.complete.more_data_len); + if(IS_ERR(con)) + return((int)con); + err = mfs_client_receive(con, (void **)&r, &name_back, &more_data); + if(err == MFS_REPLY_EXPRESS) + { + *handle = r->handle; + *iinfo = r->iinfo; + *complete = r->complete; + err = r->result; + if(!err) + { + if(!r->namelen) + *newname = NULL; + else if((*newname = kmalloc(r->namelen, GFP_KERNEL))) + memcpy(*newname, name_back, r->namelen); + else + err = -ENOMEM; + } + if(complete->more_data) + { + if((complete->more_data = + kmalloc(complete->more_data_len, GFP_KERNEL))) + memcpy(complete->more_data, more_data, + complete->more_data_len); + else + { + if(!err) + err = -ENOMEM; + if(*newname) + { + kfree(*newname); + *newname = NULL; + } + } + } + } + else if(err >= 0) + { + printk("mfs_client_express_lookup: unexpected reply %d\n", err); + err = -EINVAL; + } + if(r) + kfree(r); + mfs_release_contact(con); + return(err); +} + +char * +mfs_client_check_path(int pe, mfs_handle_t handle) +{ + struct mfs_request_checkpath s; + struct mfs_reply_checkpath *r; + char *path; + char *copy = NULL; + struct mfs_client_contact *con; + int err; + + pack_credits(&s.cred); + s.handle = handle; + con = mfs_new_request(pe, MFS_REQUEST_CHECKPATH, &s, sizeof(s), NULL, 0, + NULL, 0); + if(IS_ERR(con)) + return(NULL); + err = mfs_client_receive(con, (void **)&r, (void *)&path, NULL); + if(err == MFS_REPLY_CHECKPATH) + { + if(r->namelen && (copy = kmalloc(r->namelen, GFP_KERNEL))) + memcpy(copy, path, r->namelen); + } + else if(err >= 0) + { + printk("mfs_client_check_path: unexpected reply %d\n", err); + err = -EINVAL; + } + if(r) + kfree(r); + mfs_release_contact(con); + return(copy); +} + +int +mfs_client_setattr(int pe, mfs_handle_t handle, struct iattr *iattr) +{ + struct mfs_request_setattr s; + struct mfs_reply_setattr *r; + struct mfs_client_contact *con; + int err; + + pack_credits(&s.cred); + s.handle = handle; + s.attr = *iattr; + con = mfs_new_request(pe, MFS_REQUEST_SETATTR, &s, sizeof(s), + NULL, 0, NULL, 0); + if(IS_ERR(con)) + return((int)con); + err = mfs_client_receive(con, (void **)&r, NULL, NULL); + if(err == MFS_REPLY_SETATTR) + err = r->result; + else if(err >= 0) + { + printk("mfs_client_setattr: unexpected reply %d\n", err); + err = -EINVAL; + } + if(r) + kfree(r); + mfs_release_contact(con); + return(err); +} + +#endif /* CONFIG_MOSIX_FS */ diff -urN linux-2.4.17/fs/mfs/complete.c linux_umopenmosix/fs/mfs/complete.c --- linux-2.4.17/fs/mfs/complete.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/fs/mfs/complete.c Wed Jun 26 23:45:17 2002 @@ -0,0 +1,731 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * Some sections copyright 2002 by Moshe Bar + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh, Moshe Bar + */ + +#include +#ifdef CONFIG_MOSIX_FS +#include +#include +#include +#include +#include +#include + +extern struct dentry *lookup_create(struct nameidata *, int); +extern int chown_common(struct dentry *, uid_t, gid_t); + +#define MAX_READLINK 10240 + +void +mfs_prepare_other(struct mfs_completion *c, struct nameidata *ond) +{ + struct dentry *other = ond->dentry; + struct mfs_dinfo *info; + + if(other->d_sb != mfs_super || + !(info = (struct mfs_dinfo *)other->d_fsdata)) + c->other_pe = 0; + else + { + c->other_pe = info->pe; + c->other = info->handle; + if(atomic_read(&other->d_count) > 1) + c->dispose_other = 0; + else + { + kfree(info); + other->d_fsdata = NULL; + c->dispose_other = 1; + } + } +} + +void +mfs_prepare_utime(struct mfs_completion *c, time_t *pa, time_t *pm) +{ + if(get_user(c->args.times.a, pa) || get_user(c->args.times.m, pm)) + { + c->needed = 0; + c->error = -EFAULT; + } +} + +void +mfs_prepare_completion(struct mfs_completion *c, struct nameidata *nd) +{ + struct dentry *base = nd->dentry; + struct utimbuf *ut; + struct timeval *uts; + struct mfs_dinfo *info; + + c->done = 0; + c->other_pe = 0; + c->dispose_other = 0; + c->more_data = NULL; + c->more_data_len = 0; + c->error = 0; + c->ro = mfs_is_ro; +#ifdef CONFIG_MOSIX_DIAG + if(nd->mnt->mnt_sb != mfs_super) + panic("mfs_prepare_completion: bad base\n"); +#endif /* CONFIG_MOSIX_DIAG */ + c->dispose_base = 0; + if((info = (struct mfs_dinfo *)base->d_fsdata)) + { + c->base_pe = info->pe; + c->base = info->handle; + if(atomic_read(&base->d_count) == 1) + { + kfree(info); + base->d_fsdata = NULL; + c->dispose_base = 1; + } + } + else + { + c->base_pe = 0; + c->base = MFS_ROOT_INO; + } + if(!(c->needed = ((nd->flags & LOOKUP_COMPLETE) != 0))) + return; + c->args = nd->complete_args; + switch(c->func = nd->express_function) + { + case EF_OPEN: + case EF_ACCESS: + case EF_UNLINK: + case EF_MKDIR: + case EF_RMDIR: + case EF_TRUNCATE: + case EF_CHOWN: + case EF_CHMOD: + case EF_MKNOD: + return; + case EF_RENAME: + if(nd->complete_args.oldnd->last_type == LAST_NORM) + { + c->more_data = (char *) + nd->complete_args.oldnd->last.name; + c->more_data_len = strlen(c->more_data)+1; + } + /* fall through */ + case EF_LINK: + mfs_prepare_other(c, nd->complete_args.oldnd); + return; + case EF_SYMLINK: + c->more_data = nd->complete_parg; + c->more_data_len = strlen(c->more_data)+1; + return; + case EF_READLINK: + if(c->args.buffer.bufsiz > MAX_READLINK) + c->args.buffer.bufsiz = MAX_READLINK; + return; + case EF_UTIME: + ut = (struct utimbuf *)nd->complete_parg; + if(ut) + mfs_prepare_utime(c, &ut->actime, &ut->modtime); + else + c->other_pe = 1; + return; + case EF_UTIMES: + uts = (struct timeval *)nd->complete_parg; + if(uts) + mfs_prepare_utime(c, &uts[0].tv_sec, + &uts[1].tv_sec); + else + c->other_pe = 1; + return; + case EF_STAT: + nd->complete_parg = NULL; + return; + default: + c->needed = 0; + return; + } +} + +int +mfs_low_complete_link(struct mfs_completion *c, struct nameidata *nd) +{ + struct nameidata ond; + int err; + struct dentry *new_dentry; + + if(c->other_pe != MFS_PE) + return(-EXDEV); + if((err = mfs_to_local(c->other, &ond))) + return(err); + err = -EXDEV; + if(ond.mnt != nd->mnt) + goto out; + new_dentry = lookup_create(nd, 0); + if(IS_ERR(new_dentry)) + err = PTR_ERR(new_dentry); + else + { + err = vfs_link(ond.dentry, nd->dentry->d_inode, new_dentry); + dput(new_dentry); + } + up(&nd->dentry->d_inode->i_sem); + out: + mfs_lput(ond.dentry, ond.mnt); + return(err); +} + +int +mfs_low_complete_unlink(struct mfs_completion *c, struct nameidata *nd) +{ + int err; + struct dentry *dentry; + + if(nd->last_type != LAST_NORM) + return(-EISDIR); + down(&nd->dentry->d_inode->i_sem); + dentry = lookup_hash(&nd->last, nd->dentry); + if(IS_ERR(dentry)) + err = PTR_ERR(dentry); + else + { + if(!dentry->d_inode) + err = -ENOENT; + else if(nd->last.name[nd->last.len]) + err = S_ISDIR(dentry->d_inode->i_mode) ? + -EISDIR : -ENOTDIR; + else + err = vfs_unlink(nd->dentry->d_inode, dentry); + dput(dentry); + } + up(&nd->dentry->d_inode->i_sem); + return(err); +} + +int +mfs_low_complete_symlink(struct mfs_completion *c, struct nameidata *nd) +{ + int err; + struct dentry *dentry; + + dentry = lookup_create(nd, 0); + if(IS_ERR(dentry)) + err = PTR_ERR(dentry); + else + { + err = vfs_symlink(nd->dentry->d_inode, dentry, c->more_data); + dput(dentry); + } + up(&nd->dentry->d_inode->i_sem); + return(err); +} + +int +mfs_low_complete_mkdir(struct mfs_completion *c, struct nameidata *nd) +{ + int err; + struct dentry *dentry; + + dentry = lookup_create(nd, 1); + if(IS_ERR(dentry)) + err = PTR_ERR(dentry); + else + { + err = vfs_mkdir(nd->dentry->d_inode, dentry, c->args.ints.i1); + dput(dentry); + } + up(&nd->dentry->d_inode->i_sem); + return(err); +} + +int +mfs_low_complete_mknod(struct mfs_completion *c, struct nameidata *nd) +{ + int err; + struct dentry *dentry; + int mode = c->args.ints.i1; + + dentry = lookup_create(nd, 1); + if(IS_ERR(dentry)) + err = PTR_ERR(dentry); + else + { + switch(mode & S_IFMT) + { + case 0: + case S_IFREG: + err = vfs_create(nd->dentry->d_inode, dentry, + mode); + break; + case S_IFCHR: + case S_IFBLK: + case S_IFIFO: + case S_IFSOCK: + case S_IFDIR: + err = -EPERM; + break; + default: + err = -EINVAL; + break; + } + dput(dentry); + } + up(&nd->dentry->d_inode->i_sem); + return(err); +} + +int +mfs_low_complete_rmdir(struct mfs_completion *c, struct nameidata *nd) +{ + int err; + struct dentry *dentry; + + switch(nd->last_type) + { + case LAST_NORM: + break; + case LAST_DOTDOT: + return(-ENOTEMPTY); + default: + return(-EBUSY); + } + down(&nd->dentry->d_inode->i_sem); + dentry = lookup_hash(&nd->last, nd->dentry); + if(IS_ERR(dentry)) + err = PTR_ERR(dentry); + else + { + err = vfs_rmdir(nd->dentry->d_inode, dentry); + dput(dentry); + } + up(&nd->dentry->d_inode->i_sem); + return(err); +} + +int +mfs_low_complete_rename(struct mfs_completion *c, struct nameidata *newnd) +{ + struct nameidata ond; + int err; + struct dentry *old_dir, *new_dir; + struct dentry *old_dentry, *new_dentry; + + if(c->other_pe != MFS_PE) + return(-EXDEV); + if((err = mfs_to_local(c->other, &ond))) + return(err); + if(c->more_data_len) + { + unsigned int hash = init_name_hash(); + const unsigned char *n; + + ond.last_type = LAST_NORM; + ond.last.name = c->more_data; + ond.last.len = strlen(c->more_data); + for(n = ond.last.name ; *n && *n != '/' ; n++) + hash = partial_name_hash(*n, hash); + ond.last.hash = end_name_hash(hash); + } + err = -EXDEV; + if(ond.mnt != newnd->mnt) + goto out; + err = -EBUSY; + if (ond.last_type != LAST_NORM || newnd->last_type != LAST_NORM) + goto out; + old_dir = ond.dentry; + new_dir = newnd->dentry; + double_lock(new_dir, old_dir); + + old_dentry = lookup_hash(&ond.last, old_dir); + err = PTR_ERR(old_dentry); + if (IS_ERR(old_dentry)) + goto out1; + /* source must exist */ + err = -ENOENT; + if (!old_dentry->d_inode) + goto out2; + /* unless the source is a directory trailing slashes give -ENOTDIR */ + if (!S_ISDIR(old_dentry->d_inode->i_mode)) { + err = -ENOTDIR; + if (ond.last.name[ond.last.len]) + goto out2; + if (newnd->last.name[newnd->last.len]) + goto out2; + } + new_dentry = lookup_hash(&newnd->last, new_dir); + err = PTR_ERR(new_dentry); + if (IS_ERR(new_dentry)) + goto out2; + + lock_kernel(); + err = vfs_rename(old_dir->d_inode, old_dentry, new_dir->d_inode, + new_dentry); + unlock_kernel(); + + dput(new_dentry); + out2: + dput(old_dentry); + out1: + double_up(&new_dir->d_inode->i_sem, &old_dir->d_inode->i_sem); + out: + mfs_lput(ond.dentry, ond.mnt); + return(err); +} + +int +mfs_low_complete_readlink(struct mfs_completion *c, struct nameidata *nd) +{ + char *buf; + mm_segment_t old_fs = get_fs(); + int err; + struct inode *ip = nd->dentry->d_inode; + int sz = c->args.buffer.bufsiz; + + if(!ip) + return(-ENOENT); + if(!ip->i_op || !ip->i_op->readlink) + return(-EINVAL); + if(ip->i_op && ip->i_op->revalidate && + (err = ip->i_op->revalidate(nd->dentry))) + return(err); + if(!(buf = kmalloc(sz, GFP_KERNEL))) + return(-ENOMEM); + UPDATE_ATIME(ip); + set_fs(KERNEL_DS); + if(!ip->i_op || !ip->i_op->readlink) + err = -EINVAL; + else + err = ip->i_op->readlink(nd->dentry, buf, sz); + set_fs(old_fs); + if(err > 0) + { + c->more_data = buf; + c->more_data_len = err; + } + else + kfree(buf); + return(err); +} + +int +mfs_low_complete_truncate(struct mfs_completion *c, struct nameidata *nd) +{ + int err; + struct inode *ip = nd->dentry->d_inode; + + if(!S_ISREG(ip->i_mode)) + return(-EACCES); + if((err = permission(ip, MAY_WRITE))) + return(err); + if(IS_RDONLY(ip)) + return(-EROFS); + if(IS_IMMUTABLE(ip) || IS_APPEND(ip)) + return(-EPERM); + if((err = get_write_access(ip))) + return(err); + if(!(err = locks_verify_truncate(ip, NULL, c->args.len))) + { + DQUOT_INIT(ip); + err = do_truncate(nd->dentry, c->args.len); + } + put_write_access(ip); + return(err); +} + +int +mfs_low_complete_utime(struct mfs_completion *c, struct nameidata *nd) +{ + int err; + struct iattr newattrs; + struct inode *ip = nd->dentry->d_inode; + + if(IS_RDONLY(ip)) + return(-EROFS); + newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; + if(c->other_pe) + { + if((err = permission(ip, MAY_WRITE))) + return(err); + } + else + { + newattrs.ia_atime = c->args.times.a; + newattrs.ia_mtime = c->args.times.m; + newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; + } + return(notify_change(nd->dentry, &newattrs)); +} + +int +mfs_low_complete_chmod(struct mfs_completion *c, struct nameidata *nd) +{ + struct iattr newattrs; + mode_t mode = c->args.mode; + struct inode *ip = nd->dentry->d_inode; + + if(IS_RDONLY(ip)) + return(-EROFS); + if(IS_IMMUTABLE(ip) || IS_APPEND(ip)) + return(-EPERM); + if (mode == (mode_t) -1) + mode = ip->i_mode; + newattrs.ia_mode = (mode & S_IALLUGO) | (ip->i_mode & ~S_IALLUGO); + newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; + return(notify_change(nd->dentry, &newattrs)); +} + +int +mfs_low_complete_stat(struct mfs_completion *c, struct nameidata *nd) +{ + int err; + struct inode *ip = nd->dentry->d_inode; + + if (ip->i_op && ip->i_op->revalidate && + (err = ip->i_op->revalidate(nd->dentry))) + return(err); + if(!(c->more_data = kmalloc(sizeof(struct subinode), GFP_KERNEL))) + return(-ENOMEM); + c->more_data_len = sizeof(struct subinode); + ip = nd->dentry->d_inode; + mfs_ip_to_subip((struct subinode *)c->more_data, ip); + ((struct subinode *)c->more_data)->si_ino = global_inum(ip); + return(0); +} + +mfs_handle_t +mfs_low_complete(struct mfs_completion *c, struct nameidata *nd) +{ + mfs_handle_t result; + + if(!c->needed) + goto export; + c->error = 0; + c->done = 1; + if(c->ro) + current->mosix.dirty_bits |= MFSARG_RONLY; + switch(c->func) + { + case EF_OPEN: + c->error = open_namei(NULL, c->args.ints.i1, + c->args.ints.i2, nd); + break; + case EF_ACCESS: + c->error = permission(nd->dentry->d_inode, + c->args.ints.i1); + break; + case EF_LINK: + c->error = mfs_low_complete_link(c, nd); + break; + case EF_UNLINK: + c->error = mfs_low_complete_unlink(c, nd); + break; + case EF_SYMLINK: + c->error = mfs_low_complete_symlink(c, nd); + c->more_data = NULL; + c->more_data_len = 0; + break; + case EF_MKDIR: + c->error = mfs_low_complete_mkdir(c, nd); + break; + case EF_MKNOD: + c->error = mfs_low_complete_mknod(c, nd); + break; + case EF_RMDIR: + c->error = mfs_low_complete_rmdir(c, nd); + break; + case EF_RENAME: + c->error = mfs_low_complete_rename(c, nd); + c->more_data = NULL; + c->more_data_len = 0; + break; + case EF_READLINK: + c->error = mfs_low_complete_readlink(c, nd); + break; + case EF_TRUNCATE: + c->error = mfs_low_complete_truncate(c, nd); + break; + case EF_UTIME: + case EF_UTIMES: + c->error = mfs_low_complete_utime(c, nd); + break; + case EF_CHMOD: + c->error = mfs_low_complete_chmod(c, nd); + break; + case EF_CHOWN: + c->error = chown_common(nd->dentry, c->args.ids.uid, + c->args.ids.gid); + break; + case EF_STAT: + c->error = mfs_low_complete_stat(c, nd); + break; + default: + c->done = 0; + break; + } + current->mosix.dirty_bits &= ~MFSARG_RONLY; + if(c->dispose_other && c->other_pe == MFS_PE) + { + c->dispose_other = 0; + mfs_low_dispose_handle(c->other); + } + if(c->dispose_base && c->base_pe == MFS_PE) + { + c->dispose_base = 0; + mfs_low_dispose_handle(c->base); + } + if(c->done && (c->func != EF_OPEN || c->error)) + return(0); + export: + if(!(result = local_to_mfs(nd))) + { + if(c->more_data) + { + kfree(c->more_data); + c->more_data = NULL; + c->more_data_len = 0; + } + c->error = -ENOMEM; + path_release(nd); + if(c->needed) + c->done = 1; + } + return(result); +} + +void +mfs_low_complete_error(struct mfs_completion *c, int err) +{ + if(c->dispose_base && c->base_pe == MFS_PE) + { + c->dispose_base = 0; + mfs_low_dispose_handle(c->base); + } + if(c->dispose_other && c->other_pe == MFS_PE) + { + c->dispose_other = 0; + mfs_low_dispose_handle(c->other); + } + c->error = err; + c->more_data = NULL; /* in case of EF_SYMLINK */ + c->more_data_len = 0; + if(c->needed) + c->done = 1; +} + +int +mfs_check_completion(struct mfs_completion *c, struct nameidata *nd) +{ + struct inode *inode; + struct subinode *subip; + + if(c->dispose_other) + mfs_dispose_handle(c->other_pe, c->other); + if(c->dispose_base) + mfs_dispose_handle(c->base_pe, c->base); + if(!c->done) + return(0); + nd->complete_flags |= COMPLETE_DONE; + if(c->error && (c->func != EF_READLINK || c->error < 0)) + return(1); + switch(c->func) + { + case EF_OPEN: + return(0); + case EF_STAT: + nd->complete_parg = NULL; + if(!(subip = (struct subinode *)c->more_data)) + { + if(!c->error) + c->error = -ENOMEM; + return(1); + } + if((inode = (struct inode *) + kmalloc(sizeof(struct inode), GFP_KERNEL))) + { + path_release(nd); + mfs_subip_to_ip(inode, subip); + inode->i_ino = + ((struct subinode *)c->more_data)->si_ino; + inode->i_dev = mfs_super->s_dev; + inode->u.mfs_i.pe = c->other_pe; + nd->complete_parg = inode; + /* the caller must later kfree this! */ + } + else if(!c->error) + c->error = -ENOMEM; + kfree(subip); + return(1); + case EF_READLINK: + if(c->more_data) + { + if(c->error >= 0 && + copy_to_user((void *)c->args.buffer.buf, + c->more_data, c->more_data_len)) + c->error = -EFAULT; + kfree(c->more_data); + path_release(nd); + } + else if(c->error >= 0) + c->error = -ENOMEM; + return(1); + default: + path_release(nd); + case 0: + return(1); + } +} + +int +mfs_link_complete(struct mfs_completion *c, int pe, struct nameidata *nd) +{ + int result; + char node[6]; + int len, copy; + struct inode *ip; + + if(c->dispose_other) + mfs_dispose_handle(c->other_pe, c->other); + if(c->dispose_base) + mfs_dispose_handle(c->base_pe, c->base); + if(!c->needed) + return(-EPERM); + nd->complete_flags |= COMPLETE_DONE; + switch(c->func) + { + case EF_READLINK: + sprintf(node, "%d", pe); + len = strlen(node); + result = copy = min(len, c->args.buffer.bufsiz); + if(copy_to_user((void *)c->args.buffer.buf, node, copy)) + return(-EFAULT); + break; + case EF_STAT: + if(!(ip = (struct inode *) + kmalloc(sizeof(*ip), GFP_KERNEL))) + return(-ENOMEM); + nd->complete_parg = ip; + memset(ip, 0, sizeof(*ip)); + ip->i_dev = mfs_super->s_dev; + ip->i_ino = pe; + ip->u.mfs_i.pe = pe; + ip->i_mode = S_IFLNK | 0555; + ip->i_nlink = 1; + for(len = pe ; len ; len /= 10) + ip->i_size++; + ip->i_version = pe; + result = 0; + break; + default: + return(-EPERM); + } + path_release(nd); + return(result); +} +#endif /* CONFIG_MOSIX_FS */ diff -urN linux-2.4.17/fs/mfs/convert.c linux_umopenmosix/fs/mfs/convert.c --- linux-2.4.17/fs/mfs/convert.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/fs/mfs/convert.c Wed Jun 26 23:45:17 2002 @@ -0,0 +1,629 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * Some sections copyright 2002 by Moshe Bar + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh, Moshe Bar + */ + +#include +#ifdef CONFIG_MOSIX_FS +#include +#include +#include +#include + +static struct semaphore mfsc_sem = __MUTEX_INITIALIZER(mfsc_sem); + +mfs_handle_t mfs_unique; +struct dentry *mfs_root; +struct vfsmount *mfs_mnt; +ino_t mfs_root_ino; +dev_t mfs_root_dev; +int mfs_root_usage; + +#define MFS_HASHES 64 +struct mfs_map +{ + struct list_head llist; + struct list_head glist; + mfs_handle_t handle; + struct dentry *l; + struct vfsmount *mnt; + int dcount; /* inc: local_to_mfs ; dec: mfs_put_handle */ + int usecount; /* inc: mfs_to_local ; dec: mfs_lput */ + unsigned long latest; +}; + +static struct list_head lhash[MFS_HASHES], ghash[MFS_HASHES]; + +static inline int +hash_dentry(struct dentry *d) +{ + return(((((unsigned int)d) >> 8) + (((unsigned int)d) >> 14) + + (((unsigned int)d) >> 20)) % MFS_HASHES); +} + +static inline int +hash_handle(mfs_handle_t l) +{ + unsigned int h = l; + + return((h+(h>>6)+(h>>12)+(h>>18)+(h>>24)) % MFS_HASHES); +} + +int +mfs_conversion_init(void) +{ + int i; + + if(!mfs_unique) + { + mfs_root = dget(current->fs->root); + mfs_mnt = mntget(current->fs->rootmnt); + mfs_root_ino = mfs_root->d_inode->i_ino; + mfs_root_dev = mfs_root->d_inode->i_dev; + mfs_unique = ((mfs_handle_t) CURRENT_TIME) << 31; + if(mfs_unique <= MFS_ROOT_INO) /* 0 time? */ + mfs_unique = MFS_ROOT_INO + 1; + for(i = 0 ; i < MFS_HASHES ; i++) + { + INIT_LIST_HEAD(&lhash[i]); + INIT_LIST_HEAD(&ghash[i]); + } + } + return(0); +} + +void +mfs_put(struct mfs_map *m, int putuse) +{ + struct list_head *x, *nex, *prv; + + if(putuse) + m->usecount--; + else + m->dcount--; + if(m->usecount > 0 || m->dcount > 0) + return; + x = &m->llist; + nex = x->next; + prv = x->prev; + nex->prev = prv; + prv->next = nex; + x = &m->glist; + nex = x->next; + prv = x->prev; + nex->prev = prv; + prv->next = nex; + dput(m->l); + mntput(m->mnt); + kfree(m); +} + +void +mfs_change_root(struct dentry *from, struct vfsmount *frommnt, + struct dentry *to, struct vfsmount *tomnt) +{ + int h; + struct list_head *head, *x; + struct mfs_map *m, *new = NULL; + int new_root_usage = 0; + + down(&mfsc_sem); + if(from != mfs_root || frommnt != mfs_mnt || + (to == from && tomnt == frommnt)) + goto out; + if(mfs_root_usage && !(new = (struct mfs_map *) + kmalloc(sizeof (struct mfs_map), GFP_KERNEL))) + { + printk("MFS root failed to change (ENOMEM)!\n"); + goto out; + } + + /* is the new root listed? (if so, and no external refs, remove it) */ + head = &lhash[hash_dentry(to)]; + for(x = head->next ; x != head ; x = x->next) + { + m = (struct mfs_map *)((char *)x - + offsetof(struct mfs_map, llist)); + if(m->l == to && m->mnt == tomnt) + { + new_root_usage = m->dcount; + m->dcount = 1; + mfs_put(m, 0); + break; + } + } + + if(mfs_root_usage) + { + /* someone is actively using the old root, + * so we must set it up as a normal hashed entry */ + new->l = dget(from); + new->mnt = mntget(frommnt); + new->dcount = 0; + new->usecount = mfs_root_usage; + new->handle = mfs_unique++; + new->latest = jiffies; + h = hash_dentry(from); + new->llist.next = lhash[h].next; + lhash[h].next->prev = &new->llist; + new->llist.prev = &lhash[h]; + lhash[h].next = &new->llist; + h = hash_handle(new->handle); + new->glist.next = ghash[h].next; + ghash[h].next->prev = &new->glist; + new->glist.prev = &ghash[h]; + ghash[h].next = &new->glist; + new = NULL; + } + dget(to); + mntget(tomnt); + mfs_root = to; + mfs_mnt = tomnt; + mfs_root_ino = to->d_inode->i_ino; + mfs_root_dev = to->d_inode->i_dev; + mfs_root_usage = new_root_usage; + dput(from); + mntput(frommnt); + if(new) + kfree(new); + out: + up(&mfsc_sem); +} + +mfs_handle_t +local_to_mfs(struct nameidata *local) +{ + struct dentry *dp = local->dentry; + struct vfsmount *mnt = local->mnt; + int h = hash_dentry(dp); + struct list_head *head = &lhash[h], *x; + struct mfs_map *m; + + down(&mfsc_sem); + if(dp == mfs_root && mnt == mfs_mnt) + { + up(&mfsc_sem); + return(MFS_PE); + } + for(x = head->next ; x != head ; x = x->next) + { + m = (struct mfs_map *)((char *)x - + offsetof(struct mfs_map, llist)); + if(m->l == dp && mnt == mnt) + { + m->dcount++; + up(&mfsc_sem); + return(m->handle); + } + } + if(!(m = (struct mfs_map *)kmalloc(sizeof (struct mfs_map),GFP_KERNEL))) + { + up(&mfsc_sem); + return(0); + } + m->l = dget(dp); + m->mnt = mntget(mnt); + m->dcount = 1; + m->usecount = 0; + m->handle = mfs_unique++; + m->llist.next = lhash[h].next; + lhash[h].next->prev = &m->llist; + m->llist.prev = &lhash[h]; + lhash[h].next = &m->llist; + h = hash_handle(m->handle); + m->glist.next = ghash[h].next; + ghash[h].next->prev = &m->glist; + m->glist.prev = &ghash[h]; + ghash[h].next = &m->glist; + m->latest = jiffies; + up(&mfsc_sem); + return(m->handle); +} + +int +mfs_to_local(mfs_handle_t handle, struct nameidata *local) +{ + struct list_head *head, *x, *nex, *prv; + struct mfs_map *m; + + down(&mfsc_sem); + if(handle == MFS_PE) + { + its_the_root: + mfs_root_usage++; + local->dentry = mfs_root; + local->mnt = mfs_mnt; + up(&mfsc_sem); + return(0); + } + if(handle < MFS_ROOT_INO) + { + up(&mfsc_sem); + return(-EINVAL); + } + head = &ghash[hash_handle(handle)]; + for(x = head->next ; x != head ; x = x->next) + { + m = (struct mfs_map *)((char *)x - + offsetof(struct mfs_map, glist)); + if(m->handle == handle) + { + local->dentry = m->l; + local->mnt = m->mnt; + if(m->l == mfs_root && m->mnt == mfs_mnt) + /* very rare, but possible after root change */ + goto its_the_root; + m->usecount++; + /* move to head of queues: */ + if(x != head->next) + { + nex = x->next; + prv = x->prev; + nex->prev = prv; + prv->next = nex; + x->next = head->next; + x->prev = head; + x->next->prev = x; + head->next = x; + } + head = &lhash[hash_dentry(local->dentry)]; + x = &m->llist; + if(x != head->next) + { + nex = x->next; + prv = x->prev; + nex->prev = prv; + prv->next = nex; + x->next = head->next; + x->prev = head; + x->next->prev = x; + head->next = x; + } + m->latest = jiffies; + up(&mfsc_sem); + return(0); + } + } + up(&mfsc_sem); + printk("MFS: Handle %016LX not found (obsolete)\n", handle); + return(-ESTALE); +} + +void +mfs_lput(struct dentry *dp, struct vfsmount *mnt) +{ + struct list_head *head, *x; + struct mfs_map *m; + + down(&mfsc_sem); + if(dp == mfs_root && mnt == mfs_mnt) + { + mfs_root_usage--; + up(&mfsc_sem); + return; + } + head = &lhash[hash_dentry(dp)]; + for(x = head->next ; x != head ; x = x->next) + { + m = (struct mfs_map *)((char *)x - + offsetof(struct mfs_map, llist)); + if(m->l == dp && m->mnt == mnt) + { + mfs_put(m, 1); + up(&mfsc_sem); + return; + } + } + up(&mfsc_sem); + printk("mfs_lput: no handle!\n"); +} + +int +mfs_get_handle(mfs_handle_t handle) +{ + int h = hash_handle(handle); + struct list_head *head = &ghash[h], *x; + struct mfs_map *m; + + if(handle < MFS_ROOT_INO) + return(0); + down(&mfsc_sem); + for(x = head->next ; x != head ; x = x->next) + { + m = (struct mfs_map *)((char *)x - + offsetof(struct mfs_map, glist)); + if(m->handle == handle) + { + m->dcount++; + m->latest = jiffies; + up(&mfsc_sem); + return(0); + } + } + up(&mfsc_sem); + printk("MFS: Handle %016LX not found (obsolete) when duplicated\n", + handle); + return(-ESTALE); +} + +int +mfs_access_handle(mfs_handle_t handle) +{ + int h = hash_handle(handle); + struct list_head *head = &ghash[h], *x; + struct mfs_map *m; + + if(handle < MFS_ROOT_INO) + return(0); + down(&mfsc_sem); + for(x = head->next ; x != head ; x = x->next) + { + m = (struct mfs_map *)((char *)x - + offsetof(struct mfs_map, glist)); + if(m->handle == handle) + { + m->latest = jiffies; + up(&mfsc_sem); + return(0); + } + } + up(&mfsc_sem); + printk("MFS: Handle %016LX not found (obsolete) when claimed\n", + handle); + return(-ESTALE); +} + +void +mfs_put_handle(mfs_handle_t handle) +{ + int h = hash_handle(handle); + struct list_head *head = &ghash[h], *x; + struct mfs_map *m; + + if(handle < MFS_ROOT_INO) + return; + down(&mfsc_sem); + for(x = head->next ; x != head ; x = x->next) + { + m = (struct mfs_map *)((char *)x - + offsetof(struct mfs_map, glist)); + if(m->handle == handle) + { + mfs_put(m, 0); + up(&mfsc_sem); + return; + } + } + up(&mfsc_sem); + printk("MFS: Handle %016LX not found (obsolete) when released\n", + handle); +} + +int +mfs_try_to_cleanup(int force) +{ + int h; + struct mfs_map *m; + struct list_head *next; + + if(!mfs_unique) + return(0); + down(&mfsc_sem); + if(force != 2) + for(h = 0 ; h < MFS_HASHES ; h++) + while(lhash[h].next != &lhash[h]) + { + m = (struct mfs_map *)((char *)lhash[h].next - + offsetof(struct mfs_map, llist)); + if(!force || m->usecount) + { + busy: + up(&mfsc_sem); + return(-EBUSY); + } + } + for(h = 0 ; h < MFS_HASHES ; h++) + for(next = lhash[h].next ; next != &lhash[h] ; ) + { + m = (struct mfs_map *)((char *)next - + offsetof(struct mfs_map, llist)); + next = next->next; + if(m->usecount) + { + if(force == 2) + m->dcount = 0; + else + goto busy; + } + else + { + m->dcount = 1; + mfs_put(m, 0); + } + } + up(&mfsc_sem); + return(0); +} + +void +mfs_throw_garbage(void) +{ + int h; + struct mfs_map *m; + struct list_head *next; + int n = 0; + + down(&mfsc_sem); + for(h = 0 ; h < MFS_HASHES ; h++) + for(next = lhash[h].next ; next != &lhash[h] ; ) + { + m = (struct mfs_map *)((char *)lhash[h].next - + offsetof(struct mfs_map, llist)); + next = next->next; + if(!m->usecount && + time_after(jiffies, m->latest + MFS_GARBAGE_TIME)) + { + m->dcount = 1; + mfs_put(m, 0); + n++; + } + } + up(&mfsc_sem); + if(n) + printk("MFS: Disposed of %d unclaimed entr%s\n", n, + n > 1 ? "ies" : "y"); +} + +/* why is this not in dcache.c? */ +int +is_descendant(struct dentry *dp, struct vfsmount *mnt, struct dentry *pdp, + struct vfsmount *pmnt) +{ + spin_lock(&dcache_lock); + while(1) + { + if(dp == pdp && mnt == pmnt) + { + spin_unlock(&dcache_lock); + return(1); + } + if(dp == mnt->mnt_root) + { + struct vfsmount *xmnt = mnt->mnt_parent; + + if(xmnt == mnt) + break; + dp = mnt->mnt_mountpoint; + mnt = xmnt; + } + else + dp = dp->d_parent; + } + spin_unlock(&dcache_lock); + return(0); +} + +int +mfs_kill(char *filename) +{ + int h; + struct mfs_map *m; + struct list_head *next; + int n = 0; + int error; + mm_segment_t old_fs; + struct nameidata nd; + + old_fs = set_fs(KERNEL_DS); + error = 0; + if(path_init(filename, LOOKUP_POSITIVE, &nd)) + error = path_walk(filename, &nd); + set_fs(old_fs); + if(error) + return(error); + down(&mfsc_sem); + for(h = 0 ; h < MFS_HASHES ; h++) + for(next = lhash[h].next ; next != &lhash[h] ; ) + { + m = (struct mfs_map *)((char *)lhash[h].next - + offsetof(struct mfs_map, llist)); + next = next->next; + if(is_descendant(m->l, m->mnt, nd.dentry, nd.mnt)) + { + m->dcount = 1; + mfs_put(m, 0); + n++; + } + } + up(&mfsc_sem); + if(n) + printk("MFS_KILL: Killed %d entr%s\n", n, n > 1 ? "ies" : "y"); + path_release(&nd); + return(0); +} + +/* JUST FOR DEBUGGING: */ +void +mfs_print_latest(unsigned long when) +{ + int diff = (when - jiffies) / HZ; + + printk(", age="); + if(diff < 0 || diff > 24*3600) + printk("%ld", when); + else if(diff == 0) + printk("in %ld ticks", when - jiffies); + else if(diff < 6000) + printk("%d seconds", diff); + else if(diff < 3600) + printk("%d:%02d minutes", diff/60, diff % 60); + else + printk("%d:%02d:%02d hours", diff/3600, (diff % 3600) / 60, + diff % 60); +} + +int +dump_mfsc(void) +{ + register int h; + struct list_head *head, *x; + struct mfs_map *m; + int cnt = 0; + + for(h = 0 ; h < MFS_HASHES ; h++) + for(head = &lhash[h] , x = head->next ; x != head ; x = x->next) + { + cnt++; + m = (struct mfs_map *)((char *)x - + offsetof(struct mfs_map, llist)); + printk("%d: handle=%016LX, l=%x, ip=%x, count=%d, use=%d", + h, m->handle, + (int)m->l, (int)m->l->d_inode, m->dcount, m->usecount); + if(m->l->d_inode) + printk("\n dev=%d/%d, ino=%ld, mode=0%o", + MAJOR(m->l->d_inode->i_dev), + MINOR(m->l->d_inode->i_dev), + m->l->d_inode->i_ino, m->l->d_inode->i_mode); + mfs_print_latest(m->latest); + printk("\n"); + } + return(cnt); +} + +int +dump_mfsc2(void) +{ + register int h; + struct list_head *head, *x; + struct mfs_map *m; + int cnt = 0; + + for(h = 0 ; h < MFS_HASHES ; h++) + for(head = &ghash[h] , x = head->next ; x != head ; x = x->next) + { + cnt++; + m = (struct mfs_map *)((char *)x - + offsetof(struct mfs_map, glist)); + printk("%d: handle=%016LX, l=%x, ip=%x, count=%d, use=%d", + h, m->handle, + (int)m->l, (int)m->l->d_inode, m->dcount, m->usecount); + if(m->l->d_inode) + printk("\n dev=%d/%d, ino=%ld, mode=0%o", + MAJOR(m->l->d_inode->i_dev), + MINOR(m->l->d_inode->i_dev), + m->l->d_inode->i_ino, m->l->d_inode->i_mode); + mfs_print_latest(m->latest); + printk("\n"); + } + return(cnt); +} +#endif /* CONFIG_MOSIX_FS */ diff -urN linux-2.4.17/fs/mfs/count.c linux_umopenmosix/fs/mfs/count.c --- linux-2.4.17/fs/mfs/count.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/fs/mfs/count.c Wed Jun 26 23:45:17 2002 @@ -0,0 +1,282 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh. + */ + + +#include +#if defined(CONFIG_MOSIX_FS) && defined(CONFIG_MOSIX_DFSA) +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define weight(_c,_i,_o) ((_c)*2048+(_i)+(_o)) +#define threshold 0xffe00000 + +#define DEBUG_MFSCOUNT +#ifdef DEBUG_MFSCOUNT +int debug_mfscount; + +void do_dump_mfscount(struct task_struct *p) +{ + int i; + struct mfs_stats *s = p->mosix.mfs_stats; + + if(!s || !s->nnodes) + { + printk("%s MFS Statistics for %d(%s)\n", s ? "Empty" : "No", + p->pid, p->comm); + return; + } + for(i = 0 ; i < s->nnodes ; i++) + printk("%d: node #%d, conns=%d, inbytes=%d, outbytes=%d\n", i, + s->nodes[i], (int)s->conns[i], (int)s->inbytes[i], + (int)s->outbytes[i]); +} +#endif /* DEBUG_MFSCOUNT */ + +void +mfs_count(int w, int inbytes, int outbytes) +{ + struct task_struct *p = current; + struct mfs_stats *s = p->mosix.mfs_stats; + register int i; + int64_t prev, worst = 0, best = 0; + int victim = -1; + long new_weight; + + if(!mfs_is_dfsa || !(p->mosix.dflags & (DREMOTE|DSTATSDOWN))) + return; +#ifdef DEBUG_MFSCOUNT + if(debug_mfscount > 1) + printk("mfs_count(%d,%d,%d): ", w, inbytes, outbytes); +#endif /* DEBUG_MFSCOUNT */ + if(w == 0 || w > MOSIX_MAX) + return; + if(!s) + { + if(!(s = kmalloc(sizeof(struct mfs_stats), GFP_KERNEL))) + return; + p->mosix.mfs_stats = s; + s->nnodes = 0; + } + new_weight = weight(1, inbytes, outbytes); + for(i = (s->hint < s->nnodes && s->nodes[s->hint] == w) ? s->hint : + s->nnodes - 1 ; i >= 0 ; i--) + { + prev = weight(s->conns[i], s->inbytes[i], s->outbytes[i]); + if(s->nodes[i] == w) + { + s->conns[i]++; + s->inbytes[i] += inbytes; + s->outbytes[i] += outbytes; + s->hint = i; + if(w != PE && + (((((long)prev)+new_weight)^prev) & threshold)) + mosix_add_to_whereto(p, MFSBALANCE); +#ifdef DEBUG_MFSCOUNT + if(debug_mfscount) + { + printk("Successfully added <%d,%d> to %d\n", + inbytes, outbytes, w); + if(debug_mfscount > 2) + do_dump_mfscount(p); + } +#endif /* DEBUG_MFSCOUNT */ + return; + } + if(victim == -1) + { + best = worst = prev; + victim = i; + } + else + { + if(prev > best) + best = prev; + if(prev < worst) + { + worst = prev; + victim = i; + } + } + } + if((i = s->nnodes) < MAX_MFS_STATNODES) + { + s->nnodes++; + s->nodes[i] = w; + s->conns[i] = 1; + s->inbytes[i] = inbytes; + s->outbytes[i] = outbytes; + s->hint = i; +#ifdef DEBUG_MFSCOUNT + if(debug_mfscount) + { + printk("New MFS stats (%d,%d) for %d\n", + inbytes, outbytes, w); + if(debug_mfscount > 2) + do_dump_mfscount(p); + } +#endif /* DEBUG_MFSCOUNT */ + return; + } + i = victim; + if(--s->conns[i] <= 0 || (s->inbytes[i] -= inbytes) <= 0 || + (s->outbytes[i] -= outbytes) <= 0) + { + if(victim == --s->nnodes) + { +#ifdef DEBUG_MFSCOUNT + if(debug_mfscount) + { + printk("Anulled victim=%d\n", s->nodes[i]); + if(debug_mfscount > 2) + do_dump_mfscount(p); + } +#endif /* DEBUG_MFSCOUNT */ + return; + } + victim = s->nnodes; + s->nodes[i] = s->nodes[victim]; + s->conns[i] = s->conns[victim]; + s->inbytes[i] = s->inbytes[victim]; + s->outbytes[i] = s->outbytes[victim]; + if(s->hint == victim) + s->hint = i; +#ifdef DEBUG_MFSCOUNT + if(debug_mfscount) + { + printk("Anulled/replaced a victim\n"); + if(debug_mfscount > 2) + do_dump_mfscount(p); + } +#endif /* DEBUG_MFSCOUNT */ + } +#ifdef DEBUG_MFSCOUNT + else if(debug_mfscount) + { + printk("Reduced victim=%d by <%d,%d>\n", s->nodes[i], + inbytes, outbytes); + if(debug_mfscount > 2) + do_dump_mfscount(p); + } +#endif /* DEBUG_MFSCOUNT */ + return; +} + +int +mfs_add_stats(struct mfs_stats *add, struct mfs_stats *to, struct mosix_task *m) +{ + register int i, j; + int64_t prev, worst = 0; + int victim = -1; +#if MAX_MFS_STATNODES > BITS_PER_LONG +#error unsigned long is not enough +#else + unsigned long bit, done = 0; +#endif /* MAX_MFS_STATNODES > BITS_PER_LONG */ + int node; + int ret = 0; + + if(!add || !add->nnodes) + return(0); + if(!to) + { + if(!to && (!m || !(to = m->mfs_stats = + kmalloc(sizeof(struct mfs_stats), GFP_KERNEL)))) + return(0); + *to = *add; + for(i = to->nnodes-1 ; i >= 0 ; i--) + if(weight(to->conns[i], to->inbytes[i], to->outbytes[i]) & + threshold) + return(1); + return(0); + } + for(i = 0 ; i < to->nnodes ; i++) + { + node = to->nodes[i]; + for(j = 0 ; j < add->nnodes ; j++) + if(add->nodes[j] == node) + { + prev = weight(to->conns[i], to->inbytes[i], + to->outbytes[i]); + to->conns[i] += add->conns[i]; + to->inbytes[i] += add->inbytes[i]; + to->outbytes[i] += add->outbytes[i]; + if(!ret && node != PE && + (weight(to->conns[i], to->inbytes[i], + to->outbytes[i]) ^ prev) & threshold) + ret = 1; + done |= (1 << (j-1)); + break; + } + } + for(bit = 1 , j = 0 ; j < add->nnodes ; j++ , bit <<= 1) + if(!(done & bit)) + { + if(to->nnodes < MAX_MFS_STATNODES) + { + i = to->nnodes++; + to->nodes[i] = add->nodes[j]; + to->conns[i] = add->conns[j]; + to->inbytes[i] = add->inbytes[j]; + to->outbytes[i] = add->outbytes[j]; + if(!ret && (weight(to->conns[i], to->inbytes[i], + to->outbytes[i]) & threshold)) + ret = 1; + continue; + } + for(i = 0 ; i < MAX_MFS_STATNODES ; i++) + { + prev = weight(add->conns[i], add->inbytes[i], + add->outbytes[i]); + if(i == 0 || prev < worst) + { + victim = i; + worst = prev; + } + } + if(!ret && (weight(add->conns[i], add->inbytes[i], + add->outbytes[i]) & threshold)) + ret = 1; + to->nodes[victim] = add->nodes[i]; + to->conns[victim] = add->conns[i]; + to->inbytes[victim] = add->inbytes[i]; + to->outbytes[victim] = add->outbytes[i]; + } + return(ret); +} + +#ifdef DEBUG_MFSCOUNT +void +dump_mfscount(int pid) +{ + struct task_struct *p; + + if(!pid) + p = current; + else + p = find_any_task_by_pid(pid); + if(p) + do_dump_mfscount(p); + else + printk("No Such Task (%d)\n", pid); +} +#endif /* DEBUG_MFSCOUNT */ + +#endif /* CONFIG_MOSIX_FS && CONFIG_MOSIX_DFSA */ diff -urN linux-2.4.17/fs/mfs/file.c linux_umopenmosix/fs/mfs/file.c --- linux-2.4.17/fs/mfs/file.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/fs/mfs/file.c Wed Jun 26 23:45:17 2002 @@ -0,0 +1,1868 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh. + */ + + +#include +#ifdef CONFIG_MOSIX_FS +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int MFS_PE; +int mfs_is_mounted; +char mfs_is_ro; +#ifdef CONFIG_MOSIX_DFSA +char mfs_is_dfsa; +#include +#endif /* CONFIG_MOSIX_DFSA */ +#ifdef CONFIG_MOSIX_DEBUG +int mfs_loopback_test = 0; +#else +#define mfs_loopback_test 0 +#endif /* CONFIG_MOSIX_DEBUG */ +int mfs_debug; +struct super_block *mfs_super; +extern int may_create(struct inode *, struct dentry *); + +#define AN_IMPOSSIBLE_ERROR (-ECHILD) /* cannot occur in FS context */ + +ino_t +global_inum(struct inode *ip) +{ + dev_t dev = ip->i_dev; + ino_t mask = (MFS_PE << 16) ^ (MAJOR(dev) << 16) ^ (MINOR(dev) << 24); + ino_t result = ip->i_ino ^ mask; + + if(ip->i_ino == mfs_root_ino && dev == mfs_root_dev) + return(MFS_PE); + if(((unsigned long)result) <= MFS_ROOT_INO) + result ^= 0x80800000; + return(result); +} + +inline void +mfs_get_handling(struct dentry *dp, int *where, mfs_handle_t *handle) +{ + struct mfs_dinfo *p = (struct mfs_dinfo *)dp->d_fsdata; + +#ifdef CONFIG_MOSIX_DIAG + if(!p) + panic("mfs_get_handling: no handle\n"); +#endif /* CONFIG_MOSIX_DIAG */ + *where = p->pe; + *handle = p->handle; +} + +inline int +mfs_is_last_use(struct dentry *dp, int *where, mfs_handle_t *handle) +{ + if(dp->d_fsdata) + { + *where = ((struct mfs_dinfo *)dp->d_fsdata)->pe; + *handle = ((struct mfs_dinfo *)dp->d_fsdata)->handle; + if(atomic_read(&dp->d_count) > 1) + return(0); + kfree(dp->d_fsdata); + dp->d_fsdata = NULL; + return(1); + } + else + { + *where = 0; + *handle = MFS_ROOT_INO; + return(0); + } +} + +inline int +mfs_low_attach_handle(mfs_handle_t handle) +{ + if(handle > MFS_ROOT_INO) /* do not waste time on root */ + return(mfs_get_handle(handle)); + return(0); +} + +inline int +mfs_attach_handle(int pe, mfs_handle_t handle) +{ + if(handle <= MFS_ROOT_INO) + return(0); + mfs_count_attach_handle(pe); + if(pe == MFS_PE && !mfs_loopback_test) + return(mfs_low_attach_handle(handle)); + else if(pe) + return(mfs_client_attach_handle(pe, handle)); + return(0); +} + +inline int +mfs_low_touch_handle(mfs_handle_t handle) +{ + if(handle > MFS_ROOT_INO) /* do not waste time on root */ + return(mfs_access_handle(handle)); + return(0); +} + +inline int +mfs_touch_handle(int pe, mfs_handle_t handle) +{ + if(handle <= MFS_ROOT_INO) + return(0); + mfs_count_touch_handle(pe); + if(pe == MFS_PE && !mfs_loopback_test) + return(mfs_low_touch_handle(handle)); + else if(pe) + return(mfs_client_touch_handle(pe, handle)); + return(0); +} + +inline void +mfs_low_dispose_handle(mfs_handle_t handle) +{ + if(handle > MFS_ROOT_INO) /* do not waste time on root */ + mfs_put_handle(handle); +} + +inline void +mfs_dispose_handle(int pe, mfs_handle_t handle) +{ + if(handle <= MFS_ROOT_INO) + return; + mfs_count_dispose_handle(pe); + if(pe == MFS_PE && !mfs_loopback_test) + mfs_low_dispose_handle(handle); + else if(pe) + mfs_client_dispose_handle(pe, handle); +} + +extern struct file_operations mfs_file_file_operations; +extern struct file_operations mfs_symlink_file_operations; +extern struct file_operations mfs_dir_file_operations; +extern struct inode_operations mfs_file_inode_operations; +extern struct inode_operations mfs_symlink_inode_operations; +extern struct inode_operations mfs_dir_inode_operations; +extern struct dentry_operations mfs_dentry_operations; + +void +mfs_ip_to_subip(struct subinode *subip, struct inode *ip) +{ + subip->si_mode = ip->i_mode; + subip->si_nlink = ip->i_nlink; + subip->si_iuid = ip->i_uid; + subip->si_gid = ip->i_gid; + subip->si_rdev = ip->i_rdev; + subip->si_size = ip->i_size; + subip->si_atime = ip->i_atime; + subip->si_ctime = ip->i_ctime; + subip->si_mtime = ip->i_mtime; + subip->si_blksize = ip->i_blksize; + subip->si_blocks = ip->i_blocks; + subip->si_origino = ip->i_ino; + subip->si_origdev = ip->i_dev; +} + +void +mfs_subip_to_ip(struct inode *ip, struct subinode *subip) +{ + ip->i_mode = subip->si_mode; + ip->i_nlink = subip->si_nlink; + ip->i_uid = subip->si_iuid; + ip->i_gid = subip->si_gid; + ip->i_rdev = subip->si_rdev; + ip->i_size = subip->si_size; + ip->i_atime = subip->si_atime; + ip->i_ctime = subip->si_ctime; + ip->i_mtime = subip->si_mtime; + ip->i_blksize = subip->si_blksize; + ip->i_blocks = subip->si_blocks; + ip->i_version = subip->si_origino; + ip->i_generation = subip->si_origdev; + ip->i_sb = mfs_super; + switch(ip->i_mode & S_IFMT) + { + case S_IFREG: + ip->i_op = &mfs_file_inode_operations; + ip->i_fop = &mfs_file_file_operations; + break; + case S_IFLNK: + ip->i_op = &mfs_symlink_inode_operations; + ip->i_fop = &mfs_symlink_file_operations; + break; + case S_IFDIR: + ip->i_op = &mfs_dir_inode_operations; + ip->i_fop = &mfs_dir_file_operations; + break; + default: + printk("MFS: inum %d of bad mode (0%o)\n", + (int)ip->i_ino, ip->i_mode); + make_bad_inode(ip); + break; + } +} + +void +cp_file_fields(struct file *filto, struct file *filfrom) +{ + filto->f_mode = filfrom->f_mode; + filto->f_pos = filfrom->f_pos; + filto->f_owner = filfrom->f_owner; + filto->f_reada = filfrom->f_reada; + filto->f_ramax = filfrom->f_ramax; + filto->f_raend = filfrom->f_raend; + filto->f_ralen = filfrom->f_ralen; + filto->f_rawin = filfrom->f_rawin; +/* the following two fields are not actually currently in use! + filto->f_uid = filfrom->f_uid; + filto->f_gid = filfrom->f_gid; +*/ + filto->f_error = filfrom->f_error; + filto->f_version = filfrom->f_version; + filto->private_data = filfrom->private_data; +} + +int +prepare_dummy_file(mfs_handle_t handle, struct file *dummy, struct file *filp) +{ + struct nameidata nd; + int err; + + if((err = mfs_to_local(handle, &nd))) + return(err); + if(nd.dentry->d_inode && nd.dentry->d_inode->i_op) + dummy->f_op = nd.dentry->d_inode->i_fop; + else + dummy->f_op = NULL; + dummy->f_dentry = nd.dentry; + dummy->f_vfsmnt = nd.mnt; + dummy->f_flags = filp->f_flags & (O_LARGEFILE|O_SYNC); + dummy->f_iobuf = NULL; + dummy->f_iobuf_lock = 0; + atomic_set(&dummy->f_count, 1); + cp_file_fields(dummy, filp); + return(0); +} + +void +collect_dummy_file(struct file *dummy, struct file *filp) +{ + mfs_lput(dummy->f_dentry, dummy->f_vfsmnt); + cp_file_fields(filp, dummy); +} + +loff_t +mfs_dir_llseek(struct file *filp, loff_t offset, int origin) +{ + int where; + mfs_handle_t handle; + + mfs_get_handling(filp->f_dentry, &where, &handle); + mfs_count_llseek(where); + if(where != MFS_PE || mfs_loopback_test) + return(mfs_client_llseek(where, handle, filp, offset, origin)); + return(mfs_low_llseek(handle, filp, offset, origin)); +} + +loff_t +mfs_file_llseek(struct file *filp, loff_t offset, int origin) +{ + uint64_t newoff; + + switch(origin) + { + case 0: + newoff = offset; + break; + case 1: + newoff = offset + filp->f_pos; + break; + default: + goto must_call; + } + if(!(newoff >> 32)) + { + if(newoff != filp->f_pos) + { + lock_kernel(); + filp->f_pos = newoff; + filp->f_reada = 0; + filp->f_version = ++event; + unlock_kernel(); + } + return(newoff); + } + + must_call: + return(mfs_dir_llseek(filp, offset, origin)); +} + +loff_t +mfs_low_llseek(mfs_handle_t handle, struct file *filp, loff_t offset, + int origin) +{ + loff_t ret; + struct file dummy; + extern loff_t llseek(struct file *, loff_t, int); + + if((ret = prepare_dummy_file(handle, &dummy, filp))) + return(ret); + lock_kernel(); + ret = llseek(&dummy, offset, origin); + unlock_kernel(); + collect_dummy_file(&dummy, filp); + return(ret); +} + +ssize_t +mfs_read(struct file *filp, char *buf, size_t count, loff_t *ppos) +{ + int where; + mfs_handle_t handle; + + if(!(filp->f_mode & FMODE_READ)) + return(-EBADF); + mfs_get_handling(filp->f_dentry, &where, &handle); + mfs_count_read(where, count); + if(where != MFS_PE || mfs_loopback_test) + return(mfs_client_read(where, handle, filp, buf, count, ppos)); + return(mfs_low_read(handle, filp, buf, count, ppos)); +} + +ssize_t +mfs_low_read(mfs_handle_t handle, struct file *filp, char *buf, size_t count, + loff_t *ppos) +{ + loff_t ret; + struct file *dummy; + struct file_operations *fops; + loff_t mppos; + + current->mosix.dirty_bits |= MFSARG_EMPTYF_PRI; + dummy = get_empty_filp(); + current->mosix.dirty_bits &= ~MFSARG_EMPTYF_PRI; + if(!dummy) + return(-ENFILE); + + if((ret = prepare_dummy_file(handle, dummy, filp))) + { + put_filp(dummy); + return(ret); + } + dget(dummy->f_dentry); + mntget(dummy->f_vfsmnt); + fops = dummy->f_dentry->d_inode->i_fop; + mppos = *ppos; +#ifdef ONE_DAY_S_FILES_WILL_BE_USED_FOR_RDONLY_FILES + if(ip->i_sb) + file_move(dummy, &ip->i_sb->s_files); +#endif /* ONE_DAY_S_FILES_WILL_BE_USED_FOR_RDONLY_FILES */ + if(!fops || !fops->read) + ret = -EINVAL; + else + ret = fops->read(dummy, buf, count, &mppos); + collect_dummy_file(dummy, filp); + *ppos = mppos; + dummy->f_mode &= ~FMODE_WRITE; + fput(dummy); + return(ret); +} + +ssize_t +mfs_write(struct file *filp, const char *buf, size_t count, loff_t *ppos) +{ + int where; + mfs_handle_t handle; + int flim = current->rlim[RLIMIT_FSIZE].rlim_cur; + + if(!(filp->f_mode & FMODE_WRITE)) + return(-EBADF); + mfs_get_handling(filp->f_dentry, &where, &handle); + mfs_count_write(where, count); + if(where != MFS_PE || mfs_loopback_test) + return(mfs_client_write(where, handle, filp, (char *)buf, count, + ppos, flim)); + return(mfs_low_write(handle, filp, (char *)buf, count, ppos, flim)); +} + +ssize_t +mfs_low_write(mfs_handle_t handle, struct file *filp, char *buf, size_t count, + loff_t *ppos, long flim) +{ + loff_t ret; + struct file *dummy; + struct file_operations *fops; + struct super_block *sb; + struct inode *ip; + loff_t mppos; + + /* unlike other functions, we need to allocate a dummy "file" + * in the proper way, because NFS files could live even after + * we exit */ + current->mosix.dirty_bits |= MFSARG_EMPTYF_PRI; + dummy = get_empty_filp(); + current->mosix.dirty_bits &= ~MFSARG_EMPTYF_PRI; + if(!dummy) + return(-ENFILE); + + if((ret = prepare_dummy_file(handle, dummy, filp))) + { + put_filp(dummy); + return(ret); + } + dget(dummy->f_dentry); + mntget(dummy->f_vfsmnt); + current->rlim[RLIMIT_FSIZE].rlim_cur = flim; + ip = dummy->f_dentry->d_inode; + sb = ip->i_sb; + fops = ip->i_fop; + mppos = *ppos; + if(filp->f_flags & O_APPEND) + mppos = ip->i_size; + if(sb) + file_move(dummy, &sb->s_files); + if(get_write_access(ip)) + dummy->f_flags |= O_NOWRITEACCESS; + if((ret = locks_verify_area(FLOCK_VERIFY_WRITE, ip, NULL, mppos,count))) + ; + else if(sb && (sb->s_flags & MS_RDONLY)) + ret = -EROFS; + else if(!fops || !fops->write) + ret = -EINVAL; + else + ret = fops->write(dummy, buf, count, &mppos); + collect_dummy_file(dummy, filp); + *ppos = mppos; + fput(dummy); + return(ret); +} + +int +mfs_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + int where; + mfs_handle_t handle; + struct task_struct *p = current; + int policy; + int packets, bytes; + int result; + + switch(p->mosix.dirty_bits & + (MFSARG_GETDENTS|MFSARG_GETDENTS64|MFSARG_OLDREADDIR)) + { + case MFSARG_GETDENTS: + policy = MFS_READDIR_GETDENTS; + break; + case MFSARG_GETDENTS64: + policy = MFS_READDIR_GETDENTS64; + break; + case MFSARG_OLDREADDIR: + policy = MFS_READDIR_JUSTONE; + break; + default: + policy = MFS_READDIR_UNKNOWN; + break; + } + mfs_get_handling(filp->f_dentry, &where, &handle); + if(where != MFS_PE || mfs_loopback_test) + result = mfs_client_readdir(where, handle, filp, dirent, + filldir, policy, p->mosix.dirty_arg, &packets, &bytes); + else + result = mfs_low_readdir(handle, filp, dirent, filldir, + &packets, &bytes); + mfs_count_readdir(where, packets, bytes); + return(result); +} + +struct mfs_local_readdir_callback +{ + filldir_t realfilldir; + void *realdirent; + struct dentry *parent; + struct vfsmount *pmnt; + int packets, bytes, inbytes; +}; + +int +mfs_local_filldir(void *__cb, const char *name, int namelen, loff_t offset, + ino_t ino, unsigned int type) +{ + struct mfs_local_readdir_callback *cb = __cb; + struct inode ji; + struct inode *ri; + int b = sizeof(ino_t) + sizeof(loff_t) + sizeof(namelen) + + strnlen(name, namelen) + 1; + + cb->bytes += b; + if((cb->inbytes += b) > MFS_MAXBLOCK) + { + cb->packets++; + cb->inbytes = b; + } + ji.i_ino = ino; + ji.i_dev = cb->parent->d_inode->i_dev; + /* to present a single, flat filesystem, we must adjust in case the + * son falls outside the parent's file-system. */ + if(namelen == 1 && name[0] == '.') + ; /* nothing */ + else if(namelen == 2 && name[0] == '.' && name[1] == '.') + { + if(cb->parent == cb->pmnt->mnt_root) + { + ri = cb->pmnt->mnt_mountpoint->d_inode; + ji.i_ino = ri->i_ino; + ji.i_dev = ri->i_dev; + } + } + else + { + struct list_head *p; + struct vfsmount *m; + + spin_lock(&dcache_lock); + for(p = cb->pmnt->mnt_mounts.next ; p != &cb->pmnt->mnt_mounts ; + p = p->next) + { + m = list_entry(p, struct vfsmount, mnt_child); + if(m->mnt_mountpoint->d_inode->i_ino == ji.i_ino) + { + struct inode *mroot = m->mnt_root->d_inode; + + ji.i_dev = mroot->i_dev; + ji.i_ino = mroot->i_ino; + break; + } + } + spin_unlock(&dcache_lock); + } + return(cb->realfilldir(cb->realdirent, name, namelen, offset, + global_inum(&ji), type)); +} + +int +mfs_low_readdir(mfs_handle_t handle, struct file *filp, void *dirent, + filldir_t filldir, int *packets, int *bytes) +{ + int ret; + struct file_operations *fops; + struct file dummy; + struct mfs_local_readdir_callback lrb; + + if((ret = prepare_dummy_file(handle, &dummy, filp))) + return(ret); + fops = dummy.f_dentry->d_inode->i_fop; + lrb.realfilldir = filldir; + lrb.realdirent = dirent; + lrb.parent = dummy.f_dentry; + lrb.pmnt = dummy.f_vfsmnt; + lrb.packets = lrb.bytes = lrb.inbytes = 0; + ret = vfs_readdir(&dummy, mfs_local_filldir, (void *)&lrb); + collect_dummy_file(&dummy, filp); + *packets = lrb.packets; + *bytes = lrb.bytes; + return(ret); +} + +int +mfs_ioctl(struct inode *ip, struct file *filp, unsigned int cmd, unsigned long arg) +{ + int where; + mfs_handle_t handle; + int size; + int data; + int ret; + +#ifdef CONFIG_MOSIX_DIAG + if(ip != filp->f_dentry->d_inode) + { + mosix_panic("mfs_ioctl: not same"); + return(-EINVAL); + } +#endif /* CONFIG_MOSIX_DIAG */ + /* currently supporting only EXT2 ioctls: the following others + * are possible candidates (later): ncpfs, smbfs, umsdos */ + switch(cmd) + { + case EXT2_IOC_GETFLAGS: + case EXT2_IOC_GETVERSION: + size = -(int)sizeof(int); + break; + case EXT2_IOC_SETFLAGS: + case EXT2_IOC_SETVERSION: + size = sizeof(int); + break; + default: + return(-EPERM); + } + if(size > 0 && copy_from_user((void *)&data, (void *)arg, size)) + return(-EFAULT); + mfs_get_handling(filp->f_dentry, &where, &handle); + mfs_count_ioctl(where); + if(where != MFS_PE || mfs_loopback_test) + ret = mfs_client_ioctl(where, handle, filp, cmd, arg, size, + &data); + else + ret = mfs_low_ioctl(handle, filp, cmd, arg, size, &data); + if(!ret && size < 0 && copy_to_user((void *)arg, (void *)&data, -size)) + ret = -EFAULT; + return(ret); +} + +int +mfs_low_ioctl(mfs_handle_t handle, struct file *filp, unsigned int cmd, + unsigned long arg, int size, int *data) +{ + int ret; + struct file_operations *fops; + struct file dummy; + mm_segment_t old_fs; + + if((ret = prepare_dummy_file(handle, &dummy, filp))) + return(ret); + fops = dummy.f_dentry->d_inode->i_fop; + if(!fops || !fops->ioctl) + ret = -ENOTTY; + else + { + if(size) + old_fs = set_fs(KERNEL_DS); + lock_kernel(); + ret = fops->ioctl(dummy.f_dentry->d_inode, &dummy, cmd, + size ? (unsigned long)data : arg); + unlock_kernel(); + if(size) + set_fs(old_fs); + } + collect_dummy_file(&dummy, filp); + return(ret); +} + +int +mfs_mmap(struct file *f, struct vm_area_struct *x) +{ + /* cannot be reached: just a place-holder */ + /* some code (including "binfmt_elf.c") check for the existance of this + * routine, but "do_mmap" will detect that there are no a_ops + * and convert it to a file-read + */ + panic("mfs_mmap"); + return(-ENOSYS); +} + +int +mfs_fsync(struct file *filp, struct dentry *dp, int datasync) +{ + int where; + mfs_handle_t handle; + +#ifdef CONFIG_MOSIX_DIAG + if(dp != filp->f_dentry) + mosix_panic("mfs_fsync: not same"); +#endif /* CONFIG_MOSIX_DIAG */ + mfs_get_handling(dp, &where, &handle); + mfs_count_fsync(where); + if(where != MFS_PE || mfs_loopback_test) + return(mfs_client_fsync(where, handle, filp, datasync)); + return(mfs_low_fsync(handle, filp, datasync)); +} + +int +mfs_low_fsync(mfs_handle_t handle, struct file *filp, int datasync) +{ + int ret; + struct file_operations *fops; + struct file dummy; + + if((ret = prepare_dummy_file(handle, &dummy, filp))) + return(ret); + fops = dummy.f_dentry->d_inode->i_fop; + down(&dummy.f_dentry->d_inode->i_sem); + if(!fops || !fops->fsync) + ret = -EINVAL; + else + ret = fops->fsync(&dummy, dummy.f_dentry, datasync); + up(&dummy.f_dentry->d_inode->i_sem); + collect_dummy_file(&dummy, filp); + return(ret); +} + +static struct file_operations mfs_file_file_operations = +{ + llseek: mfs_file_llseek, + read: mfs_read, + write: mfs_write, + ioctl: mfs_ioctl, + mmap: mfs_mmap, + fsync: mfs_fsync, +}; + +static struct file_operations mfs_symlink_file_operations = +{ + ioctl: mfs_ioctl, +}; + +static struct file_operations mfs_dir_file_operations = +{ + llseek: mfs_dir_llseek, + read: mfs_read, + readdir: mfs_readdir, + ioctl: mfs_ioctl, + fsync: mfs_fsync, +}; + +struct mfs_iphand +{ + int pe; + mfs_handle_t handle; +}; + +int +compare_mfs_ip(struct inode *ip, unsigned long ig1, void *arg) +{ + int pe = ((struct mfs_iphand *)arg)->pe; + + return(!pe || (ip->u.mfs_i.pe == pe && + ip->u.mfs_i.handle == ((struct mfs_iphand *)arg)->handle)); +} + +struct inode *mfs_iget(unsigned long ino, int pe, mfs_handle_t handle) +{ + struct mfs_iphand h; + + h.pe = pe; + h.handle = handle; + current->mosix.iget_mfs_pe = pe; + current->mosix.iget_mfs_handle = handle; + return(iget4(mfs_super, ino, compare_mfs_ip, &h)); +} + +int +mfs_revalidate(struct dentry *dentry) +{ + int where; + mfs_handle_t handle; + int ret; + struct subinode iinfo; + struct inode *ip = dentry->d_inode; + + mfs_get_handling(dentry, &where, &handle); + mfs_ip_to_subip(&iinfo, ip); + mfs_count_revalidate(where); + if(where != MFS_PE || mfs_loopback_test) + ret = mfs_client_revalidate(where, handle, &iinfo); + else + ret = mfs_low_revalidate(handle, &iinfo); + if(!ret) + mfs_subip_to_ip(ip, &iinfo); + return(ret); +} + +int +mfs_low_revalidate(mfs_handle_t handle, struct subinode *iinfo) +{ + struct inode *lip; + int ret; + struct nameidata nd; + + if((ret = mfs_to_local(handle, &nd))) + return(ret); + lip = nd.dentry->d_inode; + if(!lip || !lip->i_op || !lip->i_op->revalidate) + ret = 0; + else + ret = lip->i_op->revalidate(nd.dentry); + if(!ret) + mfs_ip_to_subip(iinfo, lip); + mfs_lput(nd.dentry, nd.mnt); + return(ret); +} + +int +mfs_setattr(struct dentry *dentry, struct iattr *attr) +{ + int where; + mfs_handle_t handle; + + if(mfs_is_ro) + return(-EROFS); + if(!dentry->d_fsdata) /* someone trying to fiddle with the MFS root */ + return(-EPERM); + mfs_get_handling(dentry, &where, &handle); + mfs_count_setattr(where); + if(where != MFS_PE || mfs_loopback_test) + return(mfs_client_setattr(where, handle, attr)); + return(mfs_low_setattr(handle, attr)); +} + +int +mfs_low_setattr(mfs_handle_t handle, struct iattr *attr) +{ + struct nameidata nd; + struct inode *ip; + int ret; + int wa = 0; + + if((ret = mfs_to_local(handle, &nd))) + return(ret); + ret = -EINVAL; + if(!(ip = nd.dentry->d_inode)) + goto out; + if(attr->ia_valid & ATTR_SIZE) + { + /* this must be ftruncate: */ + + if((ret = get_write_access(ip))) + goto out; + wa = 1; + ret = -EISDIR; + if(S_ISDIR(ip->i_mode)) + goto out; + ret = -EINVAL; + if(!S_ISREG(ip->i_mode)) + goto out; + ret = -EPERM; + if(IS_APPEND(ip)) + goto out; + if((ret = locks_verify_truncate(ip, NULL, attr->ia_size))) + goto out; + down(&ip->i_sem); + } + ret = notify_change(nd.dentry, attr); + if(attr->ia_valid & ATTR_SIZE) + up(&ip->i_sem); + out: + if(wa) + put_write_access(ip); + mfs_lput(nd.dentry, nd.mnt); + return(ret); +} + +int +mfs_root_name_to_ino(char *name, int len) +{ + int node = 0; + + if(*name >= '0' && *name <= '9') + while(len && node <= MOSIX_MAX && *name >= '0' && *name <= '9') + { + node = node * 10 + (*name - '0'); + name++; + len--; + } + else if(len) + switch(*name) + { + case 'h': + if(len != 4 || name[3] != 'e') + return(0); + if(name[1] == 'e' && name[2] == 'r') + return(current->mosix.whereami ? : PE); + if(name[1] == 'o' && name[2] == 'm') + return(current->mosix.deppe ? : PE); + break; + case 'l': + if(len == 8 && !memcmp(&name[1], "astexec", 7)) + return(current->mosix.lastexec ? : + (current->mosix.deppe ? : PE)); + break; + case 'm': + if(len == 5 && !memcmp(&name[1], "agic", 4)) + return(current->mosix.lastmagic ? : + (current->mosix.deppe ? : PE)); + case 's': + if(len == 8 && !memcmp(&name[1], "elected", 7)) + return(current->mosix.selected ? : + (current->mosix.deppe ? : PE)); + } + if(len || !mos_to_net(node, NULL)) + node = 0; + return(node); +} + +static void +fill_last_component(char *nam, struct nameidata *nd) +{ + char *start = nam, *end; + + while(*nam) + nam++; + end = nam; + for(nam-- ; *nam == '/' && nam > start ; nam--) + ; + while(nam > start && *(nam-1) != '/') + nam--; + nd->last.name = nam; + nd->last.len = end - nam + 1; + nd->last_type = LAST_NORM; + switch(*nam) + { + case '/': + case '\0': + nd->last_type = LAST_ROOT; + break; + case '.': + switch(*(nam+1)) + { + case '.': + if(*(nam+2) == '/' || *(nam+2) == '\0') + nd->last_type = LAST_DOTDOT; + break; + case '/': + case '\0': + nd->last_type = LAST_DOT; + return; + } + break; + } +} + +int +mfs_express_lookup(struct nameidata *nd, char **name) +{ + char *nam = *name; + struct vfsmount *mnt; + int pe; + mfs_handle_t handle; + struct subinode iinfo; + struct inode *ip; + struct qstr qnam; + char *newname = NULL; + struct mfs_completion complete; + struct dentry *result; + int err; + int magic = 0; + int link = 0; +#ifdef CONFIG_MOSIX_DFSA + unsigned int flags = nd->flags & ~(LOOKUP_COMPLETE|LOOKUP_STAYFS); +#else + unsigned int flags = nd->flags & ~LOOKUP_COMPLETE; +#endif /* CONFIG_MOSIX_DFSA */ + + mfs_prepare_completion(&complete, nd); + if(complete.error < 0) + { + mfs_check_completion(&complete, nd); + return(complete.error); + } + pe = complete.base_pe; + handle = complete.base; + *name = NULL; + while(*nam) + { + if(pe == 0) + { + char *c; + + if(*nam == '.') + switch(*(nam+1)) + { + case '\0': + nam++; + continue; + case '/': + nam += 2; + while(*nam == '/') + nam++; + continue; + case '.': + if(*(nam+2) == '\0' || *(nam+2) == '/') + goto name_leads_back; + break; + } + for(c = nam ; *c && *c != '/' ; c++) + ; + pe = mfs_root_name_to_ino(nam, c - nam); + handle = pe; + if(!pe) + { + mfs_check_completion(&complete, nd); +#ifdef CONFIG_MOSIX_DFSA + if(current->mosix.dflags & DREMOTE) + return(-EDOITATHOME); +#endif /* CONFIG_MOSIX_DFSA */ + return(-ENOENT); + } + if(nam[0] == 'm') + magic = 1; + link = (nam[0] >= 'a'); + while(*c == '/') + c++; + nam = c; + } + if(flags & LOOKUP_MFS) + { + /* server may only access the MFS root itself or reach + * back to the node's root via the correct link */ + mfs_check_completion(&complete, nd); + if(pe != MFS_PE || !(flags & LOOKUP_MFS_MFS)) + return(-EACCES); + nd->flags &= ~LOOKUP_MFS_MFS; + dput(nd->dentry); + mntput(nd->mnt); + nd->dentry = dget(mfs_root); + nd->mnt = mntget(mfs_mnt); + if(*nam) + *name = nam; + return(0); + } + if(!*nam && (nd->flags & LOOKUP_PARENT)) + { + mfs_check_completion(&complete, nd); + return(-EPERM); + } + if(pe && !*nam && link && !(nd->flags & LOOKUP_FOLLOW)) + return(mfs_link_complete(&complete, pe, nd)); + mfs_count_express(pe, complete.more_data_len); + if(pe != MFS_PE || mfs_loopback_test) + err = mfs_client_express_lookup(pe, &handle, nam, + flags, &iinfo, &newname, &complete); + else + err = mfs_low_express_lookup(&handle, nam, flags, + current->link_count, + current->total_link_count, &iinfo, + &newname, &complete); +#if 0 +printk("mfs_express_lookup: nam=<%s>, newname=%x", nam, newname); +if(newname)printk("<%s>\n", newname);else printk("\n"); +#endif + + if(err == AN_IMPOSSIBLE_ERROR && iinfo.si_count) + { + pe = magic = 0; +#if 0 +printk("mfs_express_lookup: skipping from <%s> to ", nam); +#endif + nam += iinfo.si_count - 1; +#if 0 +printk("<%s>\n", nam); +#endif + if(!*nam && (nd->flags & LOOKUP_PARENT)) + { + mfs_check_completion(&complete, nd); + return(-EPERM); + } + dput(nd->dentry); + nd->dentry = dget(mfs_super->s_root); + } + else if(IS_ERR((const void *)err)) + { + mfs_check_completion(&complete, nd); + return(err); + } + else + { + if(nd->flags & LOOKUP_PARENT) + fill_last_component(nam, nd); + break; + } + } + if(pe == 0) + { + mfs_check_completion(&complete, nd); + return(0); + } + if(mfs_check_completion(&complete, nd)) + { + if(newname) + kfree(newname); + return(complete.error); + } + if(newname) + { + qnam.name = newname; + qnam.len = strlen(newname); + } + else /* can still function - except 'getcwd' */ + { + qnam.name = "__removed__"; + qnam.len = 11; + } + result = d_alloc(mfs_super->s_root, &qnam); + if(newname) + kfree(newname); + if(!result) + { + mfs_dispose_handle(pe, handle); + return(-ENOMEM); + } + if(!(result->d_fsdata = kmalloc(sizeof(struct mfs_dinfo), GFP_KERNEL))) + { + dput(result); + mfs_dispose_handle(pe, handle); + return(-ENOMEM); + } + result->d_flags |= DCACHE_NO_CACHE; + result->d_op = &mfs_dentry_operations; + ((struct mfs_dinfo *)result->d_fsdata)->pe = pe; + ((struct mfs_dinfo *)result->d_fsdata)->handle = handle; + ((struct mfs_dinfo *)result->d_fsdata)->latest = jiffies; + if(iinfo.si_ino) + { + ip = mfs_iget(iinfo.si_ino, pe, handle); + if(!ip) + { + dput(result); + return(-EACCES); + } + mfs_subip_to_ip(ip, &iinfo); + d_add(result, ip); + } + else + d_add(result, NULL); + if(magic && (nd->complete_flags & COMPLETE_DONE)) + nd->complete_flags |= COMPLETE_MAGICAL; + dput(nd->dentry); + nd->dentry = result; + return(0); + + name_leads_back: +#ifdef CONFIG_MOSIX_DIAG + if(complete.done) + printk("OOPS: compete->done in name_leads_back!\n"); +#endif /* CONFIG_MOSIX_DIAG */ + mfs_check_completion(&complete, nd); +#ifdef CONFIG_MOSIX_DFSA + if(nd->flags & LOOKUP_STAYFS) + { + if(current->mosix.dflags & DREMOTE) + return(-EDOITATHOME); + nd->flags &= ~LOOKUP_STAYFS; + current->mosix.dflags &= ~DSTATSDOWN; + } +#endif /* CONFIG_MOSIX_DFSA */ + for(nam += 2 ; *nam == '/' ; nam++) + ; +#if 0 +printk("name leads back, left=../<%s>\n", nam); +#endif + if(*nam) + *name = nam; + else if(nd->flags & LOOKUP_PARENT) + return(-EPERM); + dput(nd->dentry); + mnt = nd->mnt; + nd->mnt = mntget(mnt->mnt_parent); + nd->dentry = dget(mnt->mnt_mountpoint->d_parent); + mntput(mnt); + return(0); +} + +char * +mfs_full_name(struct dentry *of, struct vfsmount *on) +{ + char *a, *b; + char *page = (char *)kmalloc(PAGE_SIZE, GFP_KERNEL); + int x; + + if(!page) + return(NULL); + if(of == mfs_root && on == mfs_mnt) + { + a = &page[PAGE_SIZE-1]; + *a = '\0'; + } + else + { + spin_lock(&dcache_lock); + a = __d_path(of, on, mfs_root, mfs_mnt, page, PAGE_SIZE); + spin_unlock(&dcache_lock); + } + for(x = MFS_PE ; x ; x /= 10) + { + if(a == page) + { + kfree(page); + return(NULL); + } + *(--a) = x % 10 + '0'; + } + if(a != page) + { + for(b = page ; *a ; ) + *b++ = *a++; + *b = '\0'; + a = page; + } + return(a); +} + +int +mfs_low_express_lookup(mfs_handle_t *handlep, char *name, unsigned int flags, + int link_count, int total_link_count, struct subinode *iinfo, + char **newname, struct mfs_completion *complete) +{ + struct inode *ip; + struct nameidata nd; + int error; + + iinfo->si_count = 0; + *newname = NULL; + if((error = mfs_to_local(*handlep, &nd))) + { + mfs_low_complete_error(complete, error); + return(error); + } +#if 0 +ip = nd.dentry->d_inode; +printk("mfs_low_express_lookup, original ino=%x/%d, d_count=%d\n", ip->i_dev, ip->i_ino, atomic_read(&nd.dentry->d_count)); +#endif + nd.flags = (flags | LOOKUP_MFS) & ~LOOKUP_MFS_MFS; + nd.last_type = LAST_ROOT; + nd.name_left = NULL; + nd.complete_flags = 0; + dget(nd.dentry); + mntget(nd.mnt); + mfs_lput(nd.dentry, nd.mnt); + current->link_count = link_count; + current->total_link_count = total_link_count; + error = path_walk(name, &nd); + if(nd.name_left) /* "/.." */ + { + iinfo->si_count = (nd.name_left - name) + 1; +#if 0 +printk("mfs_low_express_lookup: name_left %x<%s>, orig_name=%x<%s>, skip_factor=%d\n", nd.name_left, nd.name_left, name, name, iinfo->si_count); +#endif + return(AN_IMPOSSIBLE_ERROR); + } + if(error) + { + mfs_low_complete_error(complete, error); + return(error); + } + if((*handlep = mfs_low_complete(complete, &nd))) + { + /* need information back */ + if((ip = nd.dentry->d_inode)) + { + iinfo->si_ino = global_inum(ip); + mfs_ip_to_subip(iinfo, ip); + } + else + iinfo->si_ino = 0; + *newname = mfs_full_name(nd.dentry, nd.mnt); +#if 0 +printk("mfs_low_express_lookup: dev=%x, ip=%d, name=<%s>, newname=%x", ip ? ip->i_dev : 0, ip ? ip->i_ino : 0, name, *newname); +if(*newname)printk("<%s>", *newname);printk("\n"); +#endif + } + else + { +#if 0 +printk("mfs_low_express_lookup(%s): all resolved, err=%d", name, complete->error); +if(nd.dentry->d_inode)printk(", dev=%x, ino=%d\n", nd.dentry->d_inode->i_dev, nd.dentry->d_inode->i_ino); +printk("\n"); +#endif + *handlep = 0; + } +#if 0 +ip = nd.dentry->d_inode; printk("before path release, "); +if(ip)printk("ino=%x/%d, ", ip->i_dev, ip->i_ino); +printk("d_count=%d\n", atomic_read(&nd.dentry->d_count)); +#endif + if(!IS_ERR((const void *)complete->error)) + path_release(&nd); + return(complete->error); +} + +void +mfs_check_path(struct dentry *dp) +{ + int where; + mfs_handle_t handle; + char *name; + int len; + + if(dp == mfs_super->s_root) + return; + if(dp->d_parent != mfs_super->s_root) + { +#ifdef CONFIG_MOSIX_DIAG + printk("mfs_check_path: not son of root\n"); +#endif /* CONFIG_MOSIX_DIAG */ + return; + } + mfs_get_handling(dp, &where, &handle); + if(where != MFS_PE || mfs_loopback_test) + name = mfs_client_check_path(where, handle); + else + name = mfs_low_check_path(handle); + mfs_count_check_path(where, name ? strlen(name)+1 : 0); + if(!name || !strcmp(name, dp->d_name.name)) + return; + + len = strlen(name); + if(len <= dp->d_name.len) + { + dp->d_name.len = len; + strcpy((char *)dp->d_name.name, name); + kfree(name); + } + else + { + if(dname_external(dp)) + kfree(dp->d_name.name); + dp->d_name.name = name; + dp->d_name.len = len; + } +} + +char * +mfs_low_check_path(mfs_handle_t handle) +{ + struct nameidata nd; + char *ret; + + if(mfs_to_local(handle, &nd)) + return(NULL); + ret = mfs_full_name(nd.dentry, nd.mnt); + mfs_lput(nd.dentry, nd.mnt); + return(ret); +} + +struct inode_operations mfs_file_inode_operations = { + revalidate: mfs_revalidate, + setattr: mfs_setattr, + check_path: mfs_check_path, +}; + +struct inode_operations mfs_symlink_inode_operations = { + revalidate: mfs_revalidate, + setattr: mfs_setattr, + check_path: mfs_check_path, +}; + +struct inode_operations mfs_dir_inode_operations = { + revalidate: mfs_revalidate, + setattr: mfs_setattr, + express_lookup: mfs_express_lookup, + check_path: mfs_check_path, +}; + +int +mfs_root_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + int first, last; + + if(filp->f_pos == 0) + { + if(filldir(dirent, ".", 1, 0, + filp->f_dentry->d_inode->i_ino, DT_DIR) < 0) + return(0); + filp->f_pos = 1; + } + if(filp->f_pos == 1) + { + if(filldir(dirent, "..", 2, 1, + filp->f_dentry->d_inode->i_ino, DT_DIR) < 0) + return(0); + filp->f_pos = 2; + } + if(filp->f_pos == 2) + { + if(filldir(dirent, "here", 4, 2, + mfs_root_name_to_ino("here", 4), DT_DIR)) + return(0); + filp->f_pos = 3; + } + if(filp->f_pos == 3) + { + if(filldir(dirent, "home", 4, 3, + mfs_root_name_to_ino("home", 4), DT_DIR)) + return(0); + filp->f_pos = 4; + } + if(filp->f_pos == 4) + { + if(filldir(dirent, "lastexec", 8, 4, + mfs_root_name_to_ino("lastexec", 8), DT_DIR)) + return(0); + filp->f_pos = 5; + } + if(filp->f_pos == 5) + { + if(filldir(dirent, "magic", 5, 5, + mfs_root_name_to_ino("magic", 5), DT_DIR)) + return(0); + filp->f_pos = 6; + } + if(filp->f_pos == 6) + { + if(filldir(dirent, "selected", 8, 6, + mfs_root_name_to_ino("selected", 8), DT_DIR)) + return(0); + filp->f_pos = 7; + } + if(filp->f_pos < 7 || filp->f_pos >= 7 + MOSIX_MAX) + return(0); + while(scan_mosix_nodes(filp->f_pos - 6, &first, &last)) + while(first <= last) + { + char node[6]; + int n = first; + char *c; + + c = &node[5]; + *c = '\0'; + while(n) + { + c--; + *c = n % 10 + '0'; + n /= 10; + } + if(filldir(dirent, c, sizeof(node) - 1 - (c - node), + first + 6, first, DT_DIR)) + return(0); + filp->f_pos = ++first + 6; + } + return(0); +} + +static struct file_operations mfs_root_file_operations = +{ + readdir: mfs_root_readdir, +}; + +int +mfs_root_revalidate(struct dentry *dp) +{ + struct inode *ip = dp->d_inode; + + ip->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; + ip->i_nlink = 1; + ip->i_uid = 0; + ip->i_gid = 0; + ip->i_rdev = 0; + ip->i_size = 0; + ip->i_atime = 0; + ip->i_ctime = 0; + ip->i_mtime = 0; + ip->i_blksize = 1024; + ip->i_blocks = 0; + return(0); +} + +struct dentry * +mfs_root_lookup(struct inode *dir, struct dentry *dentry) +{ + struct mfs_dinfo *info; + struct inode *inode = NULL; + int node; + + if(!(node = mfs_root_name_to_ino((char *)dentry->d_name.name, + dentry->d_name.len))) + { +#ifdef CONFIG_MOSIX_DFSA + if(current->mosix.dflags & DREMOTE) + return(ERR_PTR(-EDOITATHOME)); +#endif /* CONFIG_MOSIX_DFSA */ + return(ERR_PTR(-EACCES)); + } + if(!(info = (struct mfs_dinfo *)kmalloc(sizeof(struct mfs_dinfo), + GFP_KERNEL))) + return(ERR_PTR(-ENOMEM)); + info->pe = node; + info->handle = node; + info->latest = jiffies; + dentry->d_flags |= DCACHE_NO_CACHE; + dentry->d_op = &mfs_dentry_operations; + dentry->d_fsdata = info; + inode = mfs_iget(node, node, node); + if(!inode) + return(ERR_PTR(-ENOMEM)); + d_add(dentry, inode); + return(NULL); +} + +/* note: most of the following functions are actually required, even though + * not really permitted on the MFS root itself, because the MFS root is + * technically the parent of all MFS nodes. + */ +struct inode_operations mfs_root_inode_operations = { + revalidate: mfs_root_revalidate, + setattr: mfs_setattr, + express_lookup: mfs_express_lookup, +}; + +void +mfs_read_inode(struct inode * inode) +{ + unsigned long ino = inode->i_ino; + struct subinode iinfo; + int err; + + if(ino == MFS_ROOT_INO) + { + inode->i_mode = S_IFDIR | S_IXUGO | S_IRUGO | S_IWUGO; + inode->i_op = &mfs_root_inode_operations; + inode->i_fop = &mfs_root_file_operations; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_blocks = 0; + inode->i_blksize = 1024; + inode->i_uid = 0; + inode->i_gid = 0; + inode->i_nlink = 1; + inode->i_size = 0; + inode->i_generation = 0; + inode->i_version = 0; + return; + } + inode->u.mfs_i.pe = current->mosix.iget_mfs_pe; + inode->u.mfs_i.handle = current->mosix.iget_mfs_handle; + if(ino < MFS_ROOT_INO) + { + mfs_count_revalidate(ino); + if(ino != MFS_PE || mfs_loopback_test) + err = mfs_client_revalidate(ino, ino, &iinfo); + else + err = mfs_low_revalidate(MFS_PE, &iinfo); + if(err) /* just guess */ + { + inode->i_mode = S_IFDIR | S_IXUGO | S_IRUGO; + inode->i_op = &mfs_dir_inode_operations; + inode->i_mtime = inode->i_atime = inode->i_ctime = + CURRENT_TIME; + inode->i_blocks = 0; + inode->i_blksize = 1024; + inode->i_uid = 0; + inode->i_gid = 0; + inode->i_nlink = 1; + inode->i_size = 0; + } + else + mfs_subip_to_ip(inode, &iinfo); + } +} + +int +mfs_statfs(struct super_block *sb, struct statfs *buf) +{ + buf->f_type = MFS_SUPER_MAGIC; + buf->f_bsize = PAGE_SIZE/sizeof(long); + buf->f_namelen = NAME_MAX; + /* all the rest are 0 */ + return(0); +} + +void +mfs_put_super(struct super_block *sb) +{ + mfs_is_mounted = 0; + mfs_super = NULL; +} + +#ifdef CONFIG_MOSIX_DFSA +struct mfs_ident /* used in mfs_identify/mfs_reconstruct */ +{ + int pe; + mfs_handle_t handle; + ino_t ino; + mode_t mode; +}; + +int +mfs_identify(struct dentry *dentry, void *into) +{ + struct mfs_ident *i = into; + struct mfs_dinfo *p = (struct mfs_dinfo *)dentry->d_fsdata; + + if(sizeof(struct mfs_ident) > MAX_IDENT_RECORD_LEN) + panic("mfs_identify: insufficient\n"); + if(p) + { + i->pe = p->pe; + i->handle = p->handle; + } + else + { + i->pe = 0; + i->handle = 0; + } + i->ino = dentry->d_inode->i_ino; + i->mode = dentry->d_inode->i_mode; + return(sizeof(struct mfs_ident)); +} + +struct dentry * +mfs_reconstruct(struct vfsmount *mnt, void *info, int infolen) +{ + struct mfs_ident *i = (struct mfs_ident *)info; + struct dentry *dp; + struct inode *ip; + struct qstr qname; + + if(mnt->mnt_sb != mfs_super || infolen != sizeof(struct mfs_ident)) + { + printk("mfs_reconstruct: improper data\n"); + return(NULL); + } + if(!i->pe) + return(i->handle ? NULL : dget(mfs_super->s_root)); + qname.name = "yet_unknown"; + qname.len = 11; + if(!(dp = d_alloc(mfs_super->s_root, &qname))) + return(NULL); + if(!(dp->d_fsdata = kmalloc(sizeof(struct mfs_dinfo), GFP_KERNEL))) + { + dput(dp); + return(NULL); + } + dp->d_flags |= DCACHE_NO_CACHE; + dp->d_op = &mfs_dentry_operations; + ((struct mfs_dinfo *)dp->d_fsdata)->pe = i->pe; + ((struct mfs_dinfo *)dp->d_fsdata)->handle = i->handle; + ((struct mfs_dinfo *)dp->d_fsdata)->latest = jiffies; + ip = mfs_iget(i->ino, i->pe, i->handle); + if(!ip) + { + kfree(dp->d_fsdata); + dp->d_fsdata = NULL; + dput(dp); + return(NULL); + } + ip->i_mode = i->mode; + switch(i->mode & S_IFMT) + { + case S_IFREG: + ip->i_op = &mfs_file_inode_operations; + ip->i_fop = &mfs_file_file_operations; + break; + case S_IFLNK: + ip->i_op = &mfs_symlink_inode_operations; + ip->i_fop = &mfs_symlink_file_operations; + break; + case S_IFDIR: + ip->i_op = &mfs_dir_inode_operations; + ip->i_fop = &mfs_dir_file_operations; + break; + default: + printk("mfs_reconstruct: inum %d of bad mode (0%o)\n", + (int)ip->i_ino, ip->i_mode); + make_bad_inode(ip); + break; + } + d_add(dp, ip); + if(mfs_attach_handle(i->pe, i->handle)) + { + kfree(dp->d_fsdata); + dp->d_fsdata = NULL; + dput(dp); + return(NULL); + } + return(dp); +} + +void +mfs_dfsa_changed(struct vfsmount *mnt, int new) +{ + mfs_is_dfsa = (new != 0); +} +#endif /* CONFIG_MOSIX_DFSA */ + +struct super_operations mfs_sops = +{ + read_inode: mfs_read_inode, + put_inode: force_delete, + put_super: mfs_put_super, + statfs: mfs_statfs, +#ifdef CONFIG_MOSIX_DFSA + identify: mfs_identify, + reconstruct: mfs_reconstruct, + dfsa_changed: mfs_dfsa_changed, +#endif /* CONFIG_MOSIX_DFSA */ +}; + +int +mfs_dentry_compare(struct dentry *d, struct qstr *a, struct qstr *b) +{ + /* since MFS uses no hashing/caching, we should never reach here! */ + printk("mfs_dentry_compare! (<%s>, <%s>)\n", a->name, b->name); + return(1); +} + +void +mfs_dentry_release(struct dentry *dp) +{ + int where; + mfs_handle_t handle; + + if(dp->d_fsdata) + { + where = ((struct mfs_dinfo *)dp->d_fsdata)->pe; + handle = ((struct mfs_dinfo *)dp->d_fsdata)->handle; + kfree(dp->d_fsdata); + dp->d_fsdata = NULL; + mfs_dispose_handle(where, handle); + } +} + +struct dentry_operations mfs_dentry_operations = +{ + d_compare: mfs_dentry_compare, + d_release: mfs_dentry_release, +}; + +struct super_block * +mfs_read_super(struct super_block *s, void *data, int silent) +{ + struct inode * root_inode; + struct super_block *ret = s; + + if(mfs_is_mounted) + { + printk("MFS already mounted!\n"); + return(NULL); + } + if(!PE) + printk("WARNING: MFS mounting is ineffective until MOSIX is configured.\n"); + mfs_is_mounted = 1; + mfs_is_ro = (s->s_flags & MS_RDONLY); + mfs_super = s; + s->s_blocksize = 1024; + s->s_blocksize_bits = 10; + s->s_magic = MFS_SUPER_MAGIC; + s->s_op = &mfs_sops; + root_inode = iget(s, MFS_ROOT_INO); + if (!root_inode || !(s->s_root = d_alloc_root(root_inode))) + { + printk("Failed initializing MOSIX file-system.\n"); + iput(root_inode); + mfs_is_mounted = 0; + ret = NULL; + } + else + s->s_root->d_op = &mfs_dentry_operations; + s->s_maxbytes = ~0LL; + return(ret); +} + +void +mfs_change_pe(void) +{ + lock_mosix(); + if(MFS_PE == ((PE && !mosadmin_mode_nomfs) ? PE : MFS_UNKNOWN_PE)) + { + unlock_mosix(); + return; + } + unlock_mosix(); + if(mfs_try_to_cleanup(0)) + { + printk("WARNING: Due to the MOSIX configuration-change, all MFS files and\n"); + printk("directories previously opened on this node will become inaccessible!\n"); + mfs_try_to_cleanup(2); + } + lock_mosix(); + MFS_PE = (PE && !mosadmin_mode_nomfs) ? PE : MFS_UNKNOWN_PE; + unlock_mosix(); +} + +DECLARE_FSTYPE(mfs_fs_type, "mfs", mfs_read_super, 0); + +int +init_mfs(void) +{ + int err; + + MFS_PE = (PE && !mosadmin_mode_nomfs) ? PE : MFS_UNKNOWN_PE; + if((err = register_filesystem(&mfs_fs_type))) + return(err); + mfs_conversion_init(); + mfs_make_servers(); + return(0); +} + +int +mfs_walk_init_root(struct nameidata *nd) +{ + int ret; + + lock_kernel(); + if(mfs_root) + { + nd->dentry = dget(mfs_root); + nd->mnt = mntget(mfs_mnt); + ret = 1; + } + else + ret = 0; + unlock_kernel(); + return(ret); +} + +int +is_mfs_root(struct nameidata *nd) +{ + return(nd->dentry == mfs_root && nd->mnt == mfs_mnt); +} + +void +mfs_report_used(void) +{ + struct dentry *root; + struct dentry *dp; + struct list_head *next; + unsigned long timo = jiffies + 10 * HZ; + struct mfs_dinfo *i; + + loop: + if(!mfs_super) + return; + root = mfs_super->s_root; + dget(root); + for(next = root->d_subdirs.next ; next != &root->d_subdirs && + time_before(jiffies, timo) ; next = next->next) + { + dp = list_entry(next, struct dentry, d_child); + if((i = dp->d_fsdata) && + time_before(i->latest + MFS_REPORT_TIME, jiffies)) + { + dget(dp); + i->latest = jiffies; + if(mfs_touch_handle(i->pe, i->handle)) + i->pe = MFS_BAD_PE; + dput(dp); + goto loop; + } + } + dput(root); +} + +int +mfs_client_daemon(void *nothing) +{ + struct task_struct *p = current; + + lock_kernel(); + common_daemon_setup("mfs_gc", 0); + while(1) + { + mfs_report_used(); + mfs_monitor_client_contacts(); + p->state = TASK_INTERRUPTIBLE; + schedule_timeout(20 * HZ); + flush_signals(p); + } +} +#endif /* CONFIG_MOSIX_FS */ diff -urN linux-2.4.17/fs/mfs/scontact.c linux_umopenmosix/fs/mfs/scontact.c --- linux-2.4.17/fs/mfs/scontact.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/fs/mfs/scontact.c Wed Jun 26 23:45:17 2002 @@ -0,0 +1,517 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh. + */ + +#include +#ifdef CONFIG_MOSIX_FS +#include +#include +#include +#include +#include +#include + +#define MFS_MINSERVERS 2 +#define MFS_MAXDEDICATED 6 +#define MFS_MAXSERVERS 14 +#define MFS_CONTACT_KEEPALIVE (60*HZ) +#define MFS_MAXQ 100 /* do not increase - leave space under 128 */ + +spinlock_t mfs_servq_lock = SPIN_LOCK_UNLOCKED; + +struct task_struct *mfs_main_server_task; +int n_mfs_servers; +int mfs_total_services; + +struct mfs_server_contact +{ + struct mfs_server_contact *next; + struct socket *sock; + int caller; + int serial; + unsigned long last_use; +}; + +/* service queue: */ +struct mfs_server_contact *queued_services; +struct mfs_server_contact *last_queued_service; +int mfs_qlen; + +/* past connections that are still kept open: */ +struct mfs_server_contact *mfs_idle_connections; + +/* servers waiting: */ +DECLARE_WAIT_QUEUE_HEAD(wait_to_serve_mfs); +atomic_t waiting_to_serve_mfs = ATOMIC_INIT(0); +int mfs_idle_servers; + +/* main server waiting: */ +DECLARE_WAIT_QUEUE_HEAD(wait_for_mfs_calls); +volatile int waiting_for_mfs_calls; +volatile int mfs_has_new_calls; + +int +mfs_server_send(struct mfs_server_contact *s, int type, void *header, int hdln, + void *dat1, int len1, void *dat2, int len2) +{ + return(mfs_send(s->sock, 0, s->serial, type, + header, hdln, dat1, len1, dat2, len2)); +} + +int +mfs_server_receive(struct mfs_server_contact *s, + void **header, void **dat1, void **dat2) +{ + return(mfs_receive(s->sock, MFS_MUST_MATCH, &s->serial, NULL, + header, dat1, dat2)); +} + +void +mfs_serve1(struct mfs_server_contact *s) +{ + int type, more, changed; + void *header; + void *dat1; + void *dat2; + static int n_dedicated; + + mfs_set_main_ownership(s->sock, 0); + again: + type = mfs_receive(s->sock, MFS_FIRST_RECEIVE, &s->serial, NULL, + &header, &dat1, &dat2); + if(type == MFS_CLOSE || type < MFS_NONE) + { + mfs_close_socket(s->sock); +if(mfs_debug > 1) +printk("mfs_serve1: closing socket %x by client request!\n", (int)s->sock); + s->sock = NULL; + } + else + mfs_serve(s, type, header, dat1, dat2); + if(header) + kfree(header); + if(!s->sock) + return; + spin_lock(&mfs_servq_lock); + more = 1 + (n_dedicated < MFS_MAXDEDICATED); + if(more == 2) + n_dedicated++; + spin_unlock(&mfs_servq_lock); + changed = mfs_set_main_ownership(s->sock, more); + if(more == 2) + { + spin_lock(&mfs_servq_lock); + n_dedicated--; + spin_unlock(&mfs_servq_lock); + } + if(!changed) + { + if(mfs_debug) + printk("mfs_serv1: serving in chain!\n"); + goto again; + } +} + +/* trim_idle_connections must be called with mfs_servq_lock! */ +void +trim_idle_connections(void) +{ +#define HOGS_CONSIDERED 10 + struct mfs_server_contact *s, *prev, *next, *dispose = NULL; + int nhogs = 0; + int hogpe[HOGS_CONSIDERED]; + int hogs[HOGS_CONSIDERED]; + int i, caller, least, leastval, penalty, bar; + int disposed = 0; + + /* compile the hog-table */ + for(prev = NULL, s = mfs_idle_connections ; s ; prev = s , s = s->next) + { + caller = s->caller; + for(i = 0 ; i < nhogs ; i++) + if(hogpe[i] == caller) + { + hogs[i]++; + goto next_connection; + } + if(i < HOGS_CONSIDERED) + { + hogpe[i] = caller; + hogs[i] = 1; + nhogs++; + goto next_connection; + } + leastval = 10000000; + least = 0; + for(i = 0 ; i < HOGS_CONSIDERED ; i++) + if(hogs[i] < leastval) + { + least = i; + leastval = hogs[i]; + } + hogpe[least] = caller; + hogs[least] = 1; + next_connection:; + } + /* calculate penalties, store in the "serial" field (unused for idle) */ + /* penalty increases for old sockets and for hogs */ + for(s = mfs_idle_connections ; s ; s = s->next) + { + penalty = abs((long)jiffies - (long)(s->last_use)); + if(penalty > 2*MFS_CONTACT_KEEPALIVE) /* avoid overflow */ + penalty = 2*MFS_CONTACT_KEEPALIVE; + penalty = mfs_total_services * penalty / MFS_CONTACT_KEEPALIVE; + caller = s->caller; + for(i = 0 ; i < nhogs ; i++) + if(hogpe[i] == caller) + { + penalty *= hogs[i]; + break; + } + s->serial = penalty; + } + /* collect old sockets, more aggressively as services are exhausted */ + for(bar = MFS_MAXQ/2 ; + mfs_idle_connections && mfs_total_services - disposed > MFS_MAXQ + ; bar = bar * 2 / 3) + for(prev = NULL, s = mfs_idle_connections ; s ; ) + if(s->serial >= bar) + { + next = s->next; + if(prev) + prev->next = next; + else + mfs_idle_connections = next; +if(mfs_debug) +printk("trim_idle_connections: penalty=%d, closing socket %x\n", s->serial, +(int)s->sock); + s->next = dispose; + dispose = s; + disposed++; + s = next; + } + else + { + prev = s; + s = s->next; + } + /* dispose of those sockets */ + mfs_total_services -= disposed; + spin_unlock(&mfs_servq_lock); + for(; dispose ; dispose = next) + { + next = dispose->next; + mfs_close_socket(dispose->sock); + kfree(dispose); + } + spin_lock(&mfs_servq_lock); +} + +void +mfs_recycle_service(struct mfs_server_contact *s) +{ + spin_lock(&mfs_servq_lock); + if(!s->sock) + { + mfs_total_services--; + spin_unlock(&mfs_servq_lock); + kfree(s); + return; + } + if(mfs_total_services >= MFS_MAXQ) + trim_idle_connections(); + if(mfs_total_services < MFS_MAXQ) + { + s->last_use = jiffies; + s->next = mfs_idle_connections; + mfs_idle_connections = s; + spin_unlock(&mfs_servq_lock); + return; + } +if(mfs_debug) +printk("mfs_recycle_service: closing socket %x (too many connections already)\n", (int)s->sock); + mfs_total_services--; + spin_unlock(&mfs_servq_lock); + mfs_close_socket(s->sock); + kfree(s); +} + +int +mfs_server(void *nothing) +{ + struct mfs_server_contact *s; + struct task_struct *p = current; + extern struct task_struct *child_reaper; + + strcpy(p->comm, "mfs_server"); + p->exit_signal = SIGCHLD; + p->p_opptr = child_reaper; + write_lock_irq(&tasklist_lock); + REMOVE_LINKS(p); + p->p_pptr = p->p_opptr; + SET_LINKS(p); + p->fs->umask = 0; + write_unlock_irq(&tasklist_lock); + set_fs(KERNEL_DS); /* all server I/O always to kernel */ + spin_lock(&mfs_servq_lock); + mfs_idle_servers--; + while(1) + { + while((s = queued_services)) + { + mfs_qlen--; + if(!(queued_services = s->next)) + last_queued_service = NULL; + spin_unlock(&mfs_servq_lock); + mfs_serve1(s); + mfs_recycle_service(s); + spin_lock(&mfs_servq_lock); + } + if(n_mfs_servers > MFS_MINSERVERS || MFS_PE == MFS_UNKNOWN_PE) + { + n_mfs_servers--; + spin_unlock(&mfs_servq_lock); + return(0); /* and exit */ + } + mfs_idle_servers++; + atomic_inc(&waiting_to_serve_mfs); + spin_unlock(&mfs_servq_lock); + interruptible_sleep_on(&wait_to_serve_mfs); + if(p->sigpending) + { + spin_lock_irq(&p->sigmask_lock); + flush_signals(p); + spin_unlock_irq(&p->sigmask_lock); + } + spin_lock(&mfs_servq_lock); + atomic_dec(&waiting_to_serve_mfs); + mfs_idle_servers--; + } +} + +void +mfs_new_request_arrived(void) +{ +if(mfs_debug > 1) +printk("mfs_new_request_arrived!\n"); + mfs_has_new_calls = 1; + if(waiting_for_mfs_calls) + wake_up_interruptible(&wait_for_mfs_calls); +} + +/* mfs_add_to_queue must be called with mfs_servq_lock! */ +static inline void +mfs_add_to_queue(struct mfs_server_contact *s) +{ + s->next = NULL; + mfs_qlen++; + if(last_queued_service) + last_queued_service->next = s; + else + queued_services = s; + last_queued_service = s; + if(atomic_read(&waiting_to_serve_mfs)) + wake_up_interruptible(&wait_to_serve_mfs); +} + +int +mfs_main_server(void *nothing) +{ + register struct task_struct *p = current; + struct socket *mainsock = NULL, *sock; + struct mfs_server_contact *s, *prev, *next, *dispose = NULL; + int caller; + unsigned long last_trim = 0; + int client_pid = 0; + int success; + DECLARE_WAITQUEUE(wait, p); + + mfs_main_server_task = p; + common_daemon_setup("mfs_main_server", 1); + while(client_pid <= 0 || !mainsock) + { + if(client_pid <= 0) + client_pid = kernel_thread(mfs_client_daemon, NULL, 0); + if(!mainsock) + mainsock = mfs_open_main_socket(); + if(client_pid > 0 && mainsock) + break; + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(MFS_CONTACT_KEEPALIVE / 3); + spin_lock_irq(&p->sigmask_lock); + flush_signals(p); + spin_unlock_irq(&p->sigmask_lock); + } + mainloop: + spin_lock(&mfs_servq_lock); + if(MFS_PE != MFS_UNKNOWN_PE) + while(n_mfs_servers < MFS_MINSERVERS) + { + n_mfs_servers++; + mfs_idle_servers++; + spin_unlock(&mfs_servq_lock); + success = (kernel_thread(mfs_server, NULL, 0) >= 0); + spin_lock(&mfs_servq_lock); + if(!success) + { + n_mfs_servers--; + mfs_idle_servers--; + break; + } + } + if(mfs_has_new_calls) + { + spin_unlock(&mfs_servq_lock); + mfs_has_new_calls = 0; + while((sock = mfs_new_connection(mainsock, &caller))) + if(MFS_PE == MFS_UNKNOWN_PE) + mfs_close_socket(sock); + else + { +if(mfs_debug) +printk("mfs_main_server: new connection=%x (mainsock=%x)\n", (int)sock, (int)mainsock); + if((s = (struct mfs_server_contact *)kmalloc( + sizeof(struct mfs_server_contact), GFP_KERNEL))) + { + spin_lock(&mfs_servq_lock); + mfs_total_services++; + s->sock = sock; + s->caller = caller; + mfs_add_to_queue(s); + spin_unlock(&mfs_servq_lock); + } + else + mfs_close_socket(sock); + } + spin_lock(&mfs_servq_lock); + for(prev = NULL, s = mfs_idle_connections ; s ; ) + if(MFS_PE == MFS_UNKNOWN_PE || mfs_socket_has_data(s->sock)) + { + next = s->next; + if(prev) + prev->next = next; + else + mfs_idle_connections = next; + if(MFS_PE == MFS_UNKNOWN_PE) + { + s->next = dispose; + dispose = s; + mfs_total_services--; + } + else + { +if(mfs_debug) +printk("mfs_main_server: new request on old sock (%x)\n", (int)s->sock); + mfs_add_to_queue(s); + } + s = next; + } + else + { + prev = s; + s = s->next; + } + while(mfs_qlen > mfs_idle_servers && + n_mfs_servers < MFS_MAXSERVERS && + MFS_PE != MFS_UNKNOWN_PE) + { +if(mfs_debug > 1)printk("mfs_main_server adding a server\n"); + n_mfs_servers++; + mfs_idle_servers++; + spin_unlock(&mfs_servq_lock); + success = (kernel_thread(mfs_server, NULL, 0) >= 0); + spin_lock(&mfs_servq_lock); + if(!success) + { + n_mfs_servers--; + mfs_idle_servers--; + if(!n_mfs_servers) + { + /* too bad - just shut all down */ + while((s = queued_services)) + { + queued_services = s->next; + mfs_qlen--; + mfs_total_services--; + spin_unlock(&mfs_servq_lock); + mfs_close_socket(s->sock); + kfree(s); + spin_lock(&mfs_servq_lock); + } + } + break; + } + } + } + if(abs((long)jiffies - (long)last_trim) > HZ) + { + for(prev = NULL, s = mfs_idle_connections ; s ; ) + if(abs((long)jiffies - (long)(s->last_use)) >= + MFS_CONTACT_KEEPALIVE) + { + next = s->next; + if(prev) + prev->next = next; + else + mfs_idle_connections = next; + if(mfs_socket_has_data(s->sock)) + mfs_add_to_queue(s); + else + { +if(mfs_debug > 1) +printk("mfs_main_server: closing old socket %x\n", (int)s->sock); + s->next = dispose; + dispose = s; + mfs_total_services--; + } + s = next; + } + else + { + prev = s; + s = s->next; + } + } + spin_unlock(&mfs_servq_lock); + last_trim = jiffies; + for(; dispose ; dispose = next) + { + next = dispose->next; + mfs_close_socket(dispose->sock); + kfree(dispose); + } + if(!mfs_has_new_calls) + { + add_wait_queue(&wait_for_mfs_calls, &wait); + set_current_state(TASK_INTERRUPTIBLE); + waiting_for_mfs_calls = 1; + if(!mfs_has_new_calls) + schedule_timeout(MFS_CONTACT_KEEPALIVE / 3); + waiting_for_mfs_calls = 0; + remove_wait_queue(&wait_for_mfs_calls, &wait); + set_current_state(TASK_RUNNING); + } + spin_lock_irq(&p->sigmask_lock); + flush_signals(p); + spin_unlock_irq(&p->sigmask_lock); + mfs_throw_garbage(); + goto mainloop; +} + +void +mfs_make_servers(void) +{ + kernel_thread(mfs_main_server, NULL, 0); +} + +#endif /* CONFIG_MOSIX_FS */ diff -urN linux-2.4.17/fs/mfs/server.c linux_umopenmosix/fs/mfs/server.c --- linux-2.4.17/fs/mfs/server.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/fs/mfs/server.c Wed Jun 26 23:45:17 2002 @@ -0,0 +1,471 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh. + */ + +#include +#ifdef CONFIG_MOSIX_FS +#include +#include +#include + +void +unpack_credits(struct mfs_cred *cred) +{ + register struct task_struct *p = current; + register int i; + + p->uid = cred->uid; + p->euid = cred->euid; + p->sgid = cred->sgid; + p->fsuid = cred->fsuid; + p->gid = cred->gid; + p->egid = cred->egid; + p->sgid = cred->sgid; + p->fsgid = cred->fsgid; + p->ngroups = cred->ngroups; + for(i = cred->ngroups-1 ; i >= 0 ; i--) + p->groups[i] = cred->groups[i]; + p->cap_effective = (p->cap_effective & ~REMOTE_CAPS) | + (cred->caps & REMOTE_CAPS); + p->mosix.whereami = cred->whereami; + p->mosix.deppe = cred->deppe; +} + +void +mfs_serve_attach(struct mfs_server_contact *con, + struct mfs_request_attach_handle *s) +{ + struct mfs_reply_attach_handle r; + + r.result = mfs_low_attach_handle(s->handle); + mfs_server_send(con, MFS_REPLY_ATTACH_HANDLE, &r, sizeof(r), NULL, 0, + NULL, 0); +} + +void +mfs_serve_touch(struct mfs_server_contact *con, + struct mfs_request_touch_handle *s) +{ + struct mfs_reply_touch_handle r; + + r.result = mfs_low_touch_handle(s->handle); + mfs_server_send(con, MFS_REPLY_TOUCH_HANDLE, &r, sizeof(r), NULL, 0, + NULL, 0); +} + +void +mfs_serve_llseek(struct mfs_server_contact *con, struct mfs_request_llseek *s) +{ + struct mfs_reply_llseek r; + + unpack_credits(&s->cred); + r.result = mfs_low_llseek(s->handle, &s->file, s->offset, s->origin); + r.file = s->file; + mfs_server_send(con, MFS_REPLY_LLSEEK, &r, sizeof(r), NULL, 0, NULL, 0); +} + +void +mfs_serve_read(struct mfs_server_contact *con, struct mfs_request_read *s) +{ + struct mfs_reply_read r; + int n; + size_t count = s->count; + size_t cnt, sent = 0; + char *buf = NULL; + int ret = 0; + size_t max; + char smallbuf[4]; + struct mfs_interim_data d; + + for(max = s->count > 16384 ? 16384 : s->count ; max > sizeof(smallbuf) ; + max >>= 1) + if((buf = (char *)kmalloc(max, GFP_KERNEL))) + break; + if(max <= sizeof(smallbuf)) + { + buf = smallbuf; + max = sizeof(smallbuf); + } + unpack_credits(&s->cred); + while(count > 0) + { + cnt = count; + if(cnt > max) + cnt = max; + n = mfs_low_read(s->handle, &s->file, buf, cnt, &s->pos); + if(n < cnt || cnt == count) + { + if(n < 0) + ret = n; + else + ret += n; + break; + } + ret += cnt; + d.count = cnt; + if(mfs_server_send(con, MFS_INTERIM_DATA, (void *)&d, sizeof(d), + (void *)buf, cnt, NULL, 0)) + goto free_buf; + sent += cnt; + count -= cnt; + } + r.file = s->file; + r.ppos = s->pos; + r.result = ret; + r.datalen = (ret > 0 && ret > sent) ? ret - sent : 0; + mfs_server_send(con, MFS_REPLY_READ, &r, sizeof(r), + r.datalen ? buf : NULL, r.datalen, NULL, 0); + free_buf: + if(max != sizeof(smallbuf)) + kfree(buf); +} + +void +mfs_serve_write(struct mfs_server_contact *con, struct mfs_request_write *s, + void *buf) +{ + struct mfs_reply_write r; + struct mfs_interim_data *i = NULL; + ssize_t n; + size_t count = s->count; + size_t cnt; + struct task_struct *p = current; + + unpack_credits(&s->cred); + if(signal_pending(p)) + { + spin_lock_irq(&p->sigmask_lock); + flush_signals(p); + spin_unlock_irq(&p->sigmask_lock); + } + cnt = s->datalen; + if(cnt < count) + mfs_server_send(con, MFS_CONTINUE, NULL, 0, NULL, 0, NULL, 0); + r.result = mfs_low_write(s->handle, &s->file, buf, cnt,&s->pos,s->flim); + /* in Linux, an append-"write", once started is not broken even if + * another process interfered */ + s->file.f_flags &= ~O_APPEND; + if(r.result == cnt) + while(r.result < count) + { + if(i) + kfree(i); + if((n = mfs_server_receive(con, (void **)&i, &buf, NULL)) < 0) + return; + if(n == MFS_STOP) + { + r.result = -EFAULT; + break; + } + if(n != MFS_INTERIM_DATA) + { + printk("mfs_serve_write: unexpected type: %d\n", n); + if(i) + kfree(i); + return; + } + n = mfs_low_write(s->handle, &s->file, buf, i->count, &s->pos, + s->flim); + r.result += n; + if(n < i->count) + { + if(n < 0) + r.result = n; + break; + } + } + if(i) + kfree(i); + r.file = s->file; + r.ppos = s->pos; + if(sigismember(&p->pending.signal, SIGXFSZ)) + { + spin_lock_irq(&p->sigmask_lock); + flush_signals(p); + spin_unlock_irq(&p->sigmask_lock); + r.hadsigxfsz = 1; + } + else + r.hadsigxfsz = 0; + current->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; + mfs_server_send(con, MFS_REPLY_WRITE, &r, sizeof(r), NULL, 0, NULL, 0); +} + +struct mfs_serve_readdir_callback +{ + struct mfs_server_contact *con; + void *buf, *bufp; + int buflen; + int policy; + int maxbytes; +}; + +int +mfs_serve_flush_dirs(struct mfs_serve_readdir_callback *cb) +{ + struct mfs_interim_data i; + struct mfs_interim_readdir *ri; + int t, err; + + i.count = cb->bufp - cb->buf; + if(i.count == 0) + return(0); + if(mfs_server_send(cb->con, MFS_INTERIM_DATA, (void *)&i, sizeof(i), + cb->buf, i.count, NULL, 0)) + return(-EINVAL); + t = mfs_server_receive(cb->con, (void **)&ri, NULL, NULL); + if(t == MFS_INTERIM_READDIR) + err = ri->error; + else + err = -EINVAL; + if(ri) + kfree(ri); + if(err) + return(err); + cb->bufp = cb->buf; + return(0); +} + +int +mfs_serve_filldir(void *__cb, const char *name, int namelen, loff_t offset, + ino_t ino, unsigned int type) +{ + struct mfs_serve_readdir_callback *cb = __cb; + int reclen; + int err; + + switch(cb->policy) + { + case MFS_READDIR_JUSTONE: + if(!cb->maxbytes) + return(-EINVAL); + cb->maxbytes = 0; + break; + case MFS_READDIR_GETDENTS: + reclen = (offsetof(struct dirent, d_name[0]) + namelen + + 1 + sizeof(long)-1) & ~(sizeof(long)-1); + if(reclen > cb->maxbytes) + return(-EINVAL); + cb->maxbytes -= reclen; + break; + case MFS_READDIR_GETDENTS64: + reclen = (offsetof(struct dirent64, d_name[0]) + + namelen + 1 + sizeof(u64)-1) & ~(sizeof(u64)-1); + if(reclen > cb->maxbytes) + return(-EINVAL); + cb->maxbytes -= reclen; + break; + } + reclen = sizeof(ino_t) + sizeof(loff_t) + sizeof(unsigned int) + + sizeof(short) + namelen; + if(reclen > cb->buflen - (cb->bufp - cb->buf)) + { + if(cb->bufp == cb->buf || (cb->policy != MFS_READDIR_GETDENTS + && cb->policy != MFS_READDIR_GETDENTS64)) + { + /* should not happen - we allocated at least dirent64 */ + printk("mfs_serve_filldir: buffer not sufficient\n"); + return(-EINVAL); + } + if((err = mfs_serve_flush_dirs(cb))) + return(err); + } + *((ino_t *)cb->bufp)++ = ino; + *((loff_t *)cb->bufp)++ = offset; + *((unsigned int *)cb->bufp)++ = type; + *((short *)cb->bufp)++ = namelen; + memcpy(cb->bufp, (void *)name, namelen); + cb->bufp += namelen; + if(cb->policy == MFS_READDIR_UNKNOWN) + return(mfs_serve_flush_dirs(cb)); + else + return(0); +} + +void +mfs_serve_readdir(struct mfs_server_contact *con, struct mfs_request_readdir *s) +{ + struct mfs_reply_readdir r; + int ret = 0; + struct mfs_serve_readdir_callback cb; + int multi = (s->policy == MFS_READDIR_GETDENTS || + s->policy == MFS_READDIR_GETDENTS64); + int duma, dumb; + + for(cb.buflen = multi ? 16384 : sizeof(struct dirent64) ; + cb.buflen >= sizeof(struct dirent64) ; cb.buflen >>= 1) + if((cb.buf = (char *)kmalloc(cb.buflen, GFP_KERNEL))) + break; + cb.bufp = cb.buf; + cb.policy = s->policy; + cb.maxbytes = multi ? s->maxbytes : 1; + cb.con = con; + unpack_credits(&s->cred); + if(cb.buflen < sizeof(struct dirent64)) + ret = -ENOMEM; + else + ret = mfs_low_readdir(s->handle, &s->file, (void *)&cb, + mfs_serve_filldir, &duma, &dumb); + r.file = s->file; + r.result = ret; + r.datalen = cb.bufp - cb.buf; + mfs_server_send(con, MFS_REPLY_READDIR, &r, sizeof(r), + r.datalen ? cb.buf : NULL, r.datalen, NULL, 0); + if(cb.buflen >= sizeof(struct dirent64)) + kfree(cb.buf); +} + +void +mfs_serve_ioctl(struct mfs_server_contact *con, struct mfs_request_ioctl *s) +{ + struct mfs_reply_ioctl r; + + unpack_credits(&s->cred); + r.result = mfs_low_ioctl(s->handle, &s->file, s->cmd, s->arg, s->size, + &s->data); + r.file = s->file; + r.data = s->data; + mfs_server_send(con, MFS_REPLY_IOCTL, &r, sizeof(r), NULL, 0, NULL, 0); +} + +void +mfs_serve_fsync(struct mfs_server_contact *con, struct mfs_request_fsync *s) +{ + struct mfs_reply_fsync r; + + unpack_credits(&s->cred); + r.result = mfs_low_fsync(s->handle, &s->file, s->datasync); + r.file = s->file; + mfs_server_send(con, MFS_REPLY_FSYNC, &r, sizeof(r), NULL, 0, NULL, 0); +} + +void +mfs_serve_revalidate(struct mfs_server_contact *con, + struct mfs_request_revalidate *s) +{ + struct mfs_reply_revalidate r; + + unpack_credits(&s->cred); + r.result = mfs_low_revalidate(s->handle, &r.iinfo); + mfs_server_send(con, MFS_REPLY_REVALIDATE, &r, sizeof(r), NULL, 0, NULL, 0); +} + +void +mfs_serve_express(struct mfs_server_contact *con, struct mfs_request_express *s, + char *name, void *more_data) +{ + struct mfs_reply_express r; + char *newname; + + unpack_credits(&s->cred); + r.handle = s->handle; + r.complete = s->complete; + if(r.complete.more_data) + r.complete.more_data = more_data; + r.result = mfs_low_express_lookup(&r.handle, name, s->follow, + s->link_count, s->total_link_count, &r.iinfo, + &newname, &r.complete); + r.namelen = (r.result || !newname) ? 0 : strlen(newname) + 1; + mfs_server_send(con, MFS_REPLY_EXPRESS, &r, sizeof(r), + r.namelen ? (void *)newname : NULL, r.namelen, + r.complete.more_data, r.complete.more_data_len); + if(r.namelen) + kfree(newname); + if(r.complete.more_data) + kfree(r.complete.more_data); +} + +void +mfs_serve_checkpath(struct mfs_server_contact *con, + struct mfs_request_checkpath *s) +{ + struct mfs_reply_checkpath r; + char *path; + + unpack_credits(&s->cred); + path = mfs_low_check_path(s->handle); + r.namelen = path ? strlen(path)+1 : 0; + mfs_server_send(con, MFS_REPLY_CHECKPATH, &r, sizeof(r), + r.namelen ? (void *)path : NULL, r.namelen, NULL, 0); + if(path) + kfree(path); +} + +void +mfs_serve_setattr(struct mfs_server_contact *con, struct mfs_request_setattr *s) +{ + struct mfs_reply_setattr r; + + unpack_credits(&s->cred); + r.result = mfs_low_setattr(s->handle, &s->attr); + mfs_server_send(con, MFS_REPLY_SETATTR, &r, sizeof(r), NULL, 0, + NULL, 0); +} + +void +mfs_serve(struct mfs_server_contact *con, int type, void *header, void *dat1, + void *dat2) +{ + switch(type) + { + case MFS_REQUEST_ATTACH_HANDLE: + mfs_serve_attach(con, + (struct mfs_request_attach_handle *)header); + break; + case MFS_REQUEST_TOUCH_HANDLE: + mfs_serve_touch(con,(struct mfs_request_touch_handle *)header); + break; + case MFS_REQUEST_DISPOSE_HANDLE: + mfs_low_dispose_handle( + ((struct mfs_request_dispose_handle *)header)->handle); + mfs_server_send(con, MFS_CONTINUE, NULL, 0, NULL, 0, NULL,0); + break; + case MFS_REQUEST_LLSEEK: + mfs_serve_llseek(con, (struct mfs_request_llseek *)header); + break; + case MFS_REQUEST_READ: + mfs_serve_read(con, (struct mfs_request_read *)header); + break; + case MFS_REQUEST_WRITE: + mfs_serve_write(con, (struct mfs_request_write *)header, dat1); + break; + case MFS_REQUEST_READDIR: + mfs_serve_readdir(con, (struct mfs_request_readdir *)header); + break; + case MFS_REQUEST_IOCTL: + mfs_serve_ioctl(con, (struct mfs_request_ioctl *)header); + break; + case MFS_REQUEST_FSYNC: + mfs_serve_fsync(con, (struct mfs_request_fsync *)header); + break; + case MFS_REQUEST_REVALIDATE: + mfs_serve_revalidate(con, + (struct mfs_request_revalidate *)header); + break; + case MFS_REQUEST_EXPRESS: + mfs_serve_express(con, (struct mfs_request_express *)header, + (char *)dat1, dat2); + break; + case MFS_REQUEST_CHECKPATH: + mfs_serve_checkpath(con, + (struct mfs_request_checkpath *)header); + break; + case MFS_REQUEST_SETATTR: + mfs_serve_setattr(con, (struct mfs_request_setattr *)header); + break; + default: + printk("mfs_serve: type %d unimplemented\n", type); + } +} + +#endif /* CONFIG_MOSIX_FS */ diff -urN linux-2.4.17/fs/mfs/socket.c linux_umopenmosix/fs/mfs/socket.c --- linux-2.4.17/fs/mfs/socket.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/fs/mfs/socket.c Wed Jun 26 23:45:17 2002 @@ -0,0 +1,534 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh. + * used code by Oren Laadan. + */ + +#include +#ifdef CONFIG_MOSIX_FS +#include +#include +#include +#include +#include +#include +#include +#include + +/* meanwhile, we assume MOSIX uses only IPv4 addresses */ +/* when MOSIX can use IPv6, this file must also be upgraded */ + +#ifdef CONFIG_MOSIX_UDB +#define MFS_CONNECT_TIMO (100*HZ) +#define MFS_RECEIVE_TIMO (100*HZ) +#else +#define MFS_CONNECT_TIMO (10*HZ) +#define MFS_RECEIVE_TIMO (10*HZ) +#endif /* CONFIG_MOSIX_UDB */ + +int +mfs_common_socket_setup(struct socket *sock) +{ + mm_segment_t oldfs = get_fs(); + int error = 0; + int val; + + set_fs(KERNEL_DS); + val = 32768+1024; + sock_setsockopt(sock, SOL_SOCKET, SO_SNDBUF, (char *)&val, sizeof(val)); + sock_setsockopt(sock, SOL_SOCKET, SO_RCVBUF, (char *)&val, sizeof(val)); + val = MOSIX_CONNECTION_KEEPALIVE_INTERVAL; + sock->ops->setsockopt(sock, IPPROTO_TCP, TCP_KEEPINTVL, + (char *) &val, sizeof(val)); + val = MOSIX_CONNECTION_KEEPALIVE_MAXTRIES; + sock->ops->setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT, + (char *) &val, sizeof(val)); + val = MOSIX_CONNECTION_KEEPALIVE_TOTAL; + sock->ops->setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE, + (char *) &val, sizeof(val)); + val = 1; + sock->ops->setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, + (char *)&val, sizeof(val)); + error = sock_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, + (char *)&val, sizeof(val)); + set_fs(oldfs); + set_bit(SOCK_INTER_MOSIX, &sock->flags); + return(error); +} + +struct socket * +mfs_open_main_socket(void) +{ + struct socket *sock; + struct sockaddr_in addr; + + deeper_sleep(); + + memset(&addr, 0, sizeof(struct sockaddr_in)); + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = INADDR_ANY; + addr.sin_port = MFS_MAIN_PORT; + if(sock_create(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) + goto failed; + + if(mfs_common_socket_setup(sock)) + { +#ifdef CONFIG_MOSIX_DEBUG + printk("setup failed on MFS main socket\n"); +#endif /* CONFIG_MOSIX_DEBUG */ + goto failed; + } + + if(sock->ops->bind(sock, (struct sockaddr *)&addr, sizeof(addr))) + { +#ifdef CONFIG_MOSIX_DEBUG + printk("bind failed on MFS main socket\n"); +#endif /* CONFIG_MOSIX_DEBUG */ + goto failed; + } + if(sock->ops->listen(sock, SOMAXCONN)) + { +#ifdef CONFIG_MOSIX_DEBUG + printk("listen failed on MFS main socket\n"); +#endif /* CONFIG_MOSIX_DEBUG */ + goto failed; + } + + lighter_sleep(); + spin_lock_irq(&skown_lock); + sock->sk->owner = mfs_main_server_task; + spin_unlock_irq(&skown_lock); + return(sock); + +failed: + lighter_sleep(); + if (sock) + sock_release(sock); + return(NULL); +} + +struct socket * +mfs_new_connection(struct socket *parent, int *caller) +{ + struct socket *sock; + struct mosix_addr mosaddr; + int val = sizeof(mosaddr.saddr); + + if(!(parent->ops->poll(NULL, parent, NULL) & POLLIN)) + return(NULL); + if (!(sock = sock_alloc())) + return(NULL); + sock->type = parent->type; + sock->ops = parent->ops; + if(sock->ops->accept(parent, sock, O_NONBLOCK)) + goto failed; + if (sock->ops->getname(sock, &mosaddr.saddr, &val, 1)) + goto failed; + if(!(*caller = net_to_mos(&mosaddr))) + { + comm_report_violation("MFS", &mosaddr.saddr); + goto failed; + } + if(mfs_common_socket_setup(sock)) + goto failed; + spin_lock_irq(&skown_lock); + sock->sk->owner = mfs_main_server_task; + spin_unlock_irq(&skown_lock); + return(sock); + failed: + sock_release(sock); + return(NULL); +} + +struct socket * +mfs_connect_to(int to) +{ + struct task_struct *p = current; + int error; + struct socket *sock = NULL; + struct sockaddr addr; + unsigned long timo = MFS_CONNECT_TIMO; + DECLARE_WAITQUEUE(wait, p); + + deeper_sleep(); + if(!mos_to_net(to, &addr)) + { + error = -EREMOTE; + goto failed; + } + ((struct sockaddr_in *)&addr)->sin_port = MFS_MAIN_PORT; + if((error = sock_create(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock))) + goto failed; + if((error = mfs_common_socket_setup(sock))) + goto failed; + sock->ops->connect(sock, &addr, sizeof(addr), O_NONBLOCK); + add_wait_queue(sock->sk->sleep, &wait); + while (sock->state != SS_CONNECTED) + { + set_current_state(TASK_INTERRUPTIBLE); + error = sock->ops->connect(sock, &addr, sizeof(addr), + O_NONBLOCK); + if (error != -EALREADY || (error = sock_error(sock->sk))) + break; + timo = schedule_timeout(timo); + if (timo <= 0) + { + error = -EAGAIN; + break; + } + } + remove_wait_queue(sock->sk->sleep, &wait); + set_current_state(TASK_RUNNING); + + if (error) + goto failed; + if (sock->sk->err) + { + error = sock_error(sock->sk); /* cleans error.. */ + goto failed; + } + lighter_sleep(); +if(mfs_debug) +printk("%s-mfs_connect_to %d: success\n", desc_mostask(NULL), to); + return (sock); + +failed: + lighter_sleep(); + if (sock) + sock_release(sock); + return (ERR_PTR(error)); +} + +inline int +mfs_socket_has_data(struct socket *sock) +{ + return((sock->ops->poll(NULL, sock, 0) & + (POLLRDNORM|POLLIN|POLLHUP|POLLERR)) != 0); +} + +/* + * "mfs_socket_is_closed" is only called by the client on inactive sockets: + * we never expect server-initiated data, so POLLIN implies a closed socket. + */ +int +mfs_socket_is_closed(struct socket *sock) +{ +if(mfs_debug > 1)printk("socket_is_closed(%x): poll=0x%x\n", (int)sock, sock->ops->poll(NULL, sock, NULL)); + return((sock->ops->poll(NULL, sock, NULL) & + (POLLHUP|POLLERR|POLLIN)) != 0); +} + +int +mfs_send(struct socket *sock, int first, int serial, int type, + void *header, int hdsz, void *dat1, int dat1sz, void *dat2, int dat2sz) +{ + struct mfs_message_header hd; + struct iovec iov[5]; + struct msghdr msg = { NULL, 0, iov, 0, NULL, 0, MSG_NOSIGNAL }; + mm_segment_t oldfs; + int niov = 1; + int size = 0; + int error = -ENOMEM; + + hd.serial = serial; + hd.first = first; + hd.version = MFS_VERSION; + hd.caller = PE; + hd.type = type; + hd.hdsz = hdsz; + hd.dat1sz = dat1sz; + hd.dat2sz = dat2sz; + iov[0].iov_base = &hd; + iov[0].iov_len = sizeof(hd); + if(hdsz) + { + iov[1].iov_base = header; + iov[1].iov_len = hdsz; + niov++; + } + if(dat1sz) + { + iov[niov].iov_base = dat1; + iov[niov++].iov_len = dat1sz; + } + if(dat2sz) + { + iov[niov].iov_base = dat2; + iov[niov++].iov_len = dat2sz; + } + size = sizeof(hd) + hdsz + dat1sz + dat2sz; + msg.msg_iovlen = niov; + + deeper_sleep(); + oldfs = get_fs(); + set_fs(KERNEL_DS); + error = sock->ops->sendmsg(sock, &msg, size, 0); +if(mfs_debug) +printk("%s-mfs_send, type=%d, serial=%d, size=%d/%d/%d, err=%d\n", desc_mostask(NULL), type, serial, hdsz, dat1sz, dat2sz, error); + set_fs(oldfs); + lighter_sleep(); + + return (error < 0 ? error : 0); +} + +int +mfs_do_receive(struct socket *sock, void *base, int len) +{ + struct task_struct *p = current; + int n = 0; + struct iovec iov; + struct msghdr msg = { NULL, 0, &iov, 1, NULL, 0, + MSG_NOSIGNAL | MSG_DONTWAIT}; + int done = 0; + unsigned long timo = MFS_RECEIVE_TIMO; + DECLARE_WAITQUEUE(wait, p); + + add_wait_queue(sock->sk->sleep, &wait); + while(done < len) + { + set_current_state(TASK_INTERRUPTIBLE); + p->mosix.ignoreoldsigs = 1; + p->sigpending = 0; + if(!mfs_socket_has_data(sock)) + { + if(timo <= 0) + { + len = -EPIPE; + break; + } +if(mfs_debug)printk("mfs_do_receive: timo before=%ld\n", timo); + timo = schedule_timeout(timo); +if(mfs_debug)printk("mfs_do_receive: timo after=%ld\n", timo); + continue; + } + iov.iov_base = (void *)((char *)base + done); + iov.iov_len = len - done; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; +if(mfs_debug > 2)printk("%s-mfs_do_receive: looking for %d-%d\n", desc_mostask(NULL), len, done); + n = sock->ops->recvmsg(sock, &msg, len-done, msg.msg_flags, 0); +if(mfs_debug > 2)printk("%s-mfs_do_receive: got %d of %d-%d\n", desc_mostask(NULL), n, len, done); + if (n == -EAGAIN || n == -ERESTARTSYS || n == -EINTR) + continue; + if(n <= 0) + { + len = n < 0 ? n : ECONNRESET; + break; + } + done += n; + } + remove_wait_queue(sock->sk->sleep, &wait); + set_current_state(TASK_RUNNING); + recalc_sigpending(p); + return (len); +} + +int +mfs_receive(struct socket *sock, int policy, int *serial, + struct mfs_message_header *preread_header, + void **header, void **dat1, void **dat2) +{ + struct mfs_message_header hd; + void *contents = NULL; + int tlen; + mm_segment_t oldfs; + int error; + int need_another; + +if(mfs_debug > 1) +printk("%s-mfs_receive, policy=%d\n", desc_mostask(NULL), policy); + deeper_sleep(); + oldfs = get_fs(); + set_fs(KERNEL_DS); + + next_message: + need_another = 0; + + if(preread_header && policy != MFS_JUST_HEADER) + { + hd = *preread_header; + preread_header = NULL; + error = 0; + } + else if ((error = mfs_do_receive(sock, &hd, sizeof(hd))) != sizeof(hd)) + { +if(mfs_debug) +printk("%s-mfs_receive: asked for header(%d), got %d\n", desc_mostask(NULL), sizeof(hd), error); + if (error >= 0) + error = -EPIPE; + goto failed; + } +if(mfs_debug) +printk("%s-mfs_do_receive: asked and got header: ", desc_mostask(NULL)); +if(mfs_debug) +printk("version=%d, first=%d, caller=%d, serial=%d, type=%d, size=%d/%d/%d\n", hd.version,hd.first, hd.caller, hd.serial, hd.type, hd.hdsz, hd.dat1sz, hd.dat2sz); + if(hd.version != MFS_VERSION) + { + static int limit; + + if(limit++ < 5) + { + printk("MFS received request from wrong version:\n"); + printk("Mine is %d, theirs (%d) is %d\n", MFS_VERSION, + hd.caller, hd.version); + error = -EINVAL; + goto failed; + } + } + error = 0; + switch(policy) + { + case MFS_JUST_HEADER: + if(hd.serial != *serial) + { + need_another = 1; + break; + } + *preread_header = hd; + goto done; + case MFS_SKIP_OTHERS: + if(hd.serial != *serial) + need_another = 1; + break; + case MFS_MUST_MATCH: + if(hd.first || hd.serial != *serial) + { + error = -EPIPE; + goto failed; + } + break; + case MFS_FIRST_RECEIVE: + if(hd.first) + *serial = hd.serial; + else + { + error = -EPIPE; + goto failed; + } + break; + } + tlen = hd.hdsz + hd.dat1sz + hd.dat2sz; + + if (tlen) + { + if(!(contents = kmalloc(tlen, GFP_KERNEL))) + { + error = -ENOMEM; + goto failed; + } + if ((error = mfs_do_receive(sock, contents, tlen)) != tlen) { +if(mfs_debug) +printk("%s-mfs_receive: asked for body(%d), got %d\n", desc_mostask(NULL), tlen, error); + if (error >= 0) + error = -EFAULT; + goto failed; + } +if(mfs_debug>1) +printk("%s-mfs_receive: asked and got body(%d)\n", desc_mostask(NULL), tlen); + } + error = 0; + +done: +failed: + lighter_sleep(); + set_fs(oldfs); + if(error) + { + if(contents) + kfree(contents); + if(header) + *header = NULL; + if(dat1) + *dat1 = NULL; + if(dat2) + *dat2 = NULL; +if(mfs_debug) +printk("%s-mfs_receive: error=%d\n", desc_mostask(NULL), error); + return(error); + } + else if(need_another) + { + need_another = 0; +if(mfs_debug) +printk("%s-mfs_receive: need another!\n", desc_mostask(NULL)); + if(contents) + { + kfree(contents); + contents = NULL; + } + goto next_message; + } + if(header) + *header = contents; + if(dat1) + *dat1 = (void *)(((char *)contents) + hd.hdsz); + if(dat2) + *dat2 = (void *)(((char *)contents) + hd.hdsz + hd.dat1sz); + return(hd.type); +} + +int +mfs_set_main_ownership(struct socket *sock, int main) +{ + int timo = HZ; + + spin_lock_irq(&skown_lock); + if(!main) + { + sock->sk->owner = NULL; + spin_unlock_irq(&skown_lock); + return(1); + } + sock->sk->owner = current; + set_bit(SOCK_WAIT_MFSDATA, &sock->flags); + spin_unlock_irq(&skown_lock); + while(1) + { + current->state = TASK_INTERRUPTIBLE; + if(sock->ops->poll(NULL, sock, 0) & POLLIN) + { + spin_lock_irq(&skown_lock); + clear_bit(SOCK_WAIT_MFSDATA, &sock->flags); + sock->sk->owner = NULL; + spin_unlock_irq(&skown_lock); + current->state = TASK_RUNNING; + return(0); + } + if(main == 1 || timo <= 0) + { + spin_lock_irq(&skown_lock); + clear_bit(SOCK_WAIT_MFSDATA, &sock->flags); + sock->sk->owner = NULL; + current->state = TASK_RUNNING; + spin_unlock_irq(&skown_lock); + sock->sk->owner = mfs_main_server_task; + mfs_new_request_arrived(); + return(1); + } + timo = schedule_timeout(timo); + } +} + +void +mfs_close_socket(struct socket *sock) +{ + spin_lock_irq(&skown_lock); + if(sock->sk) + sock->sk->owner = NULL; + clear_bit(SOCK_INTER_MOSIX, &sock->flags); + spin_unlock_irq(&skown_lock); + if(sock->ops) + sock->ops->shutdown(sock, 2); + sock_release(sock); +} + +#endif /* CONFIG_MOSIX_FS */ diff -urN linux-2.4.17/fs/namei.c linux_umopenmosix/fs/namei.c --- linux-2.4.17/fs/namei.c Wed Oct 17 23:46:29 2001 +++ linux_umopenmosix/fs/namei.c Wed Jun 26 23:45:17 2002 @@ -26,6 +26,15 @@ #include #include +#ifdef CONFIG_MOSIX_FS +#include +#include +#endif /* CONFIG_MOSIX_FS */ +#ifdef CONFIG_MOSIX_DFSA +#include +#define replaced_name name_left +#endif /* CONFIG_MOSIX_DFSA */ + #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) /* [Feb-1997 T. Schoebel-Theuer] @@ -237,6 +246,7 @@ spin_unlock(&arbitration_lock); return 0; } + int deny_write_access(struct file * file) { spin_lock(&arbitration_lock); @@ -324,6 +334,25 @@ return result; } +#ifdef CONFIG_MOSIX_DFSA +static inline int +dfsa_crossed(struct nameidata *nd, struct vfsmount *prevmnt) +{ + if(nd->flags & LOOKUP_STAYFS) + { + mntput(prevmnt); + if(nd->mnt != prevmnt) + { + if(current->mosix.dflags & DREMOTE) + return(-EDOITATHOME); + nd->flags &= ~LOOKUP_STAYFS; + current->mosix.dflags &= ~DSTATSDOWN; + } + } + return(0); +} +#endif /* CONFIG_MOSIX_DFSA */ + /* * This limits recursive symlink follows to 8, while * limiting consecutive symlinks to 40. @@ -436,6 +465,15 @@ } } +#ifdef CONFIG_MOSIX_FS +static inline int +unacceptable_mfs_fs(unsigned long magic) +{ + return(magic == PROC_SUPER_MAGIC || magic == 0x42494e4d); + /* (BINMisc) */ +} +#endif /* CONFIG_MOSIX_FS */ + /* * Name resolution. * @@ -450,7 +488,29 @@ struct inode *inode; int err; unsigned int lookup_flags = nd->flags; - +#ifdef CONFIG_MOSIX_DFSA + struct vfsmount *prevmnt = NULL; +#endif /* CONFIG_MOSIX_DFSA */ +#ifdef CONFIG_MOSIX_FS + char *orig_part = NULL, *orig_ref = NULL, *tmpname; + int flattings = 0; + char *free_me = NULL; +#endif /* CONFIG_MOSIX_FS */ + +#ifdef CONFIG_MOSIX_DFSA + /* an early error in path_init? (callers did not error-check there!) */ + if(nd->flags & LOOKUP_DOITATHOME) + { + path_release(nd); + return(-EDOITATHOME); + } + if(nd->flags & LOOKUP_REPLACENAME) + { + nd->flags &= ~LOOKUP_REPLACENAME; + name = nd->replaced_name; + nd->replaced_name = NULL; + } +#endif /* CONFIG_MOSIX_DFSA */ while (*name=='/') name++; if (!*name) @@ -458,7 +518,12 @@ inode = nd->dentry->d_inode; if (current->link_count) +#ifdef CONFIG_MOSIX_FS + lookup_flags = LOOKUP_FOLLOW | + (nd->flags & (LOOKUP_MFS|LOOKUP_MFS_MFS)); +#else lookup_flags = LOOKUP_FOLLOW; +#endif /* CONFIG_MOSIX_FS */ /* At this point we know we have a real path component. */ for(;;) { @@ -466,6 +531,47 @@ struct qstr this; unsigned int c; +#ifdef CONFIG_MOSIX_FS + /* have we consumed the whole link and can continue + * with the original name? + */ + if(orig_part && name >= orig_part) + { + name = orig_ref + (name - orig_part); + orig_part = orig_ref = NULL; + if(free_me) + { + kfree(free_me); + free_me = NULL; + } + flattings = 0; /* real progress made */ + nd->flags &= ~LOOKUP_MFS_MFS; + } + if(inode->i_op->express_lookup) + { + if(current->link_count) + /* flatten the path by passing it back to top-level, + * so all is included in the same express_lookup */ + { + if(!(nd->name_left = + kmalloc(strlen(name)+1, GFP_KERNEL))) + { + err = -ENOMEM; + break; + } + strcpy(nd->name_left, name); + return(0); + } + err = inode->i_op->express_lookup(nd, (char **)&name); + if(IS_ERR((const void *)err)) + break; + if(!name) + goto return_err; /*trick: err can be positive*/ + inode = nd->dentry->d_inode; + continue; + } +#endif /* CONFIG_MOSIX_FS */ + err = permission(inode, MAY_EXEC); dentry = ERR_PTR(err); if (err) @@ -501,7 +607,25 @@ case 2: if (this.name[1] != '.') break; +#ifdef CONFIG_MOSIX_FS + if((nd->flags & LOOKUP_MFS) && + !current->link_count && !orig_ref && + is_mfs_root(nd)) + { + nd->name_left = (char *)name; + path_release(nd); + goto return_base; + } +#endif /* CONFIG_MOSIX_FS */ +#ifdef CONFIG_MOSIX_DFSA + if(nd->flags & LOOKUP_STAYFS) + prevmnt = mntget(nd->mnt); +#endif /* CONFIG_MOSIX_DFSA */ follow_dotdot(nd); +#ifdef CONFIG_MOSIX_DFSA + if((err = dfsa_crossed(nd, prevmnt))) + break; +#endif /* CONFIG_MOSIX_DFSA */ inode = nd->dentry->d_inode; /* fallthrough */ case 1: @@ -525,8 +649,16 @@ break; } /* Check mountpoints.. */ +#ifdef CONFIG_MOSIX_DFSA + if(nd->flags & LOOKUP_STAYFS) + prevmnt = mntget(nd->mnt); +#endif /* CONFIG_MOSIX_DFSA */ while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry)) ; +#ifdef CONFIG_MOSIX_DFSA + if((err = dfsa_crossed(nd, prevmnt))) + break; +#endif /* CONFIG_MOSIX_DFSA */ err = -ENOENT; inode = dentry->d_inode; @@ -543,6 +675,43 @@ goto return_err; err = -ENOENT; inode = nd->dentry->d_inode; +#ifdef CONFIG_MOSIX_FS + if(nd->name_left) + { + int l = strlen(nd->name_left); + + err = -ELOOP; + if(flattings++ > 8) + { + kfree(nd->name_left); + nd->name_left = NULL; + break; + } + err = -ENOMEM; + if(!(tmpname = kmalloc(l + strlen(name) + 2, + GFP_KERNEL))) + { + kfree(nd->name_left); + nd->name_left = NULL; + break; + } + sprintf(tmpname, "%s/%s", nd->name_left, name); + kfree(nd->name_left); + if(current->link_count) + { + nd->name_left = tmpname; + return(0); + } + if(free_me) + kfree(free_me); + free_me = tmpname; + nd->name_left = NULL; + orig_part = tmpname + l + 1; + orig_ref = (char *)name; + name = tmpname; + continue; + } +#endif /* CONFIG_MOSIX_FS */ if (!inode) break; err = -ENOTDIR; @@ -553,8 +722,19 @@ nd->dentry = dentry; } err = -ENOTDIR; +#ifdef CONFIG_MOSIX_FS + if (!inode->i_op->express_lookup) +#endif /* CONFIG_MOSIX_FS */ if (!inode->i_op->lookup) break; +#ifdef CONFIG_MOSIX_FS + if(lookup_flags & LOOKUP_MFS) + { + err = -EACCES; + if(unacceptable_mfs_fs(nd->mnt->mnt_sb->s_magic)) + break; + } +#endif /* CONFIG_MOSIX_FS */ continue; /* here ends the main loop */ @@ -569,7 +749,25 @@ case 2: if (this.name[1] != '.') break; +#ifdef CONFIG_MOSIX_FS + if((nd->flags & LOOKUP_MFS) && + !current->link_count && !orig_ref && + is_mfs_root(nd)) + { + nd->name_left = (char *)name; + path_release(nd); + goto return_base; + } +#endif /* CONFIG_MOSIX_FS */ +#ifdef CONFIG_MOSIX_DFSA + if(nd->flags & LOOKUP_STAYFS) + prevmnt = mntget(nd->mnt); +#endif /* CONFIG_MOSIX_DFSA */ follow_dotdot(nd); +#ifdef CONFIG_MOSIX_DFSA + if((err = dfsa_crossed(nd, prevmnt))) + break; +#endif /* CONFIG_MOSIX_DFSA */ inode = nd->dentry->d_inode; /* fallthrough */ case 1: @@ -587,8 +785,16 @@ if (IS_ERR(dentry)) break; } +#ifdef CONFIG_MOSIX_DFSA + if(nd->flags & LOOKUP_STAYFS) + prevmnt = mntget(nd->mnt); +#endif /* CONFIG_MOSIX_DFSA */ while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry)) ; +#ifdef CONFIG_MOSIX_DFSA + if((err = dfsa_crossed(nd, prevmnt))) + break; +#endif /* CONFIG_MOSIX_DFSA */ inode = dentry->d_inode; if ((lookup_flags & LOOKUP_FOLLOW) && inode && inode->i_op && inode->i_op->follow_link) { @@ -597,6 +803,29 @@ if (err) goto return_err; inode = nd->dentry->d_inode; +#ifdef CONFIG_MOSIX_FS + if(nd->name_left) + { + err = -ELOOP; + if(flattings++ > 8) + { + kfree(nd->name_left); + nd->name_left = NULL; + break; + } + err = -ENOMEM; + if(free_me) + kfree(free_me); + if(current->link_count) + return(0); + orig_ref = (char *)name; + name = free_me = nd->name_left; + nd->name_left = NULL; + lookup_flags = nd->flags; + orig_part = NULL; + continue; + } +#endif /* CONFIG_MOSIX_FS */ } else { dput(nd->dentry); nd->dentry = dentry; @@ -606,9 +835,26 @@ goto no_inode; if (lookup_flags & LOOKUP_DIRECTORY) { err = -ENOTDIR; +#ifdef CONFIG_MOSIX_FS + if (!inode->i_op || (!inode->i_op->lookup && + !inode->i_op->express_lookup)) +#else if (!inode->i_op || !inode->i_op->lookup) +#endif /* CONFIG_MOSIX_FS */ break; } +#ifdef CONFIG_MOSIX_FS + if(lookup_flags & LOOKUP_MFS) + { + umode_t mod = inode->i_mode & S_IFMT; + + err = -EACCES; + if(mod != S_IFREG && mod != S_IFDIR && mod != S_IFLNK) + break; + if(unacceptable_mfs_fs(nd->mnt->mnt_sb->s_magic)) + break; + } +#endif /* CONFIG_MOSIX_FS */ goto return_base; no_inode: err = -ENOENT; @@ -625,6 +871,10 @@ else if (this.len == 2 && this.name[1] == '.') nd->last_type = LAST_DOTDOT; return_base: +#ifdef CONFIG_MOSIX_FS + if(free_me) + kfree(free_me); +#endif /* CONFIG_MOSIX_FS */ return 0; out_dput: dput(dentry); @@ -632,6 +882,10 @@ } path_release(nd); return_err: +#ifdef CONFIG_MOSIX_FS + if(free_me) + kfree(free_me); +#endif /* CONFIG_MOSIX_FS */ return err; } @@ -701,12 +955,53 @@ dput(olddentry); mntput(oldmnt); } +#ifdef CONFIG_MOSIX_DFSA + tell_process(current, DREQ_NOTUPTODATE); +#endif /* CONFIG_MOSIX_DFSA */ } /* SMP-safe */ static inline int walk_init_root(const char *name, struct nameidata *nd) { +#ifdef CONFIG_MOSIX_DFSA + if(nd->flags & LOOKUP_STAYFS) + { + struct task_struct *p = current; + + if(current->fs->altroot) + { + p->mosix.dflags &= ~DSTATSDOWN; + nd->flags &= ~LOOKUP_STAYFS; + } + else if(p->mosix.dflags & DDEPUTY) + nd->flags &= ~LOOKUP_STAYFS; + else if(((p->mosix.dflags & DREMOTE) || CAN_DFSA(p)) && + name_starts_in_dfsa((char **)&name, nd)) + { + if(!(p->mosix.dflags & DREMOTE) && !p->link_count) + p->mosix.dflags |= DSTATSDOWN; + if(!*name) + return(0); + nd->flags |= LOOKUP_REPLACENAME; + nd->replaced_name = (char *)name; + return(1); + } + else if(p->mosix.dflags & DREMOTE) + { + nd->flags |= LOOKUP_DOITATHOME; + nd->mnt = NULL; + nd->dentry = NULL; + return(1); + } + else + p->mosix.dflags &= ~DSTATSDOWN; + } +#endif /* CONFIG_MOSIX_DFSA */ +#ifdef CONFIG_MOSIX_FS + if(nd->flags & LOOKUP_MFS) + return(mfs_walk_init_root(nd)); +#endif /* CONFIG_MOSIX_FS */ read_lock(¤t->fs->lock); if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) { nd->mnt = mntget(current->fs->altrootmnt); @@ -727,12 +1022,48 @@ { nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->flags = flags; +#ifdef CONFIG_MOSIX_FS + nd->name_left = NULL; + nd->complete_flags = 0; +#ifdef CONFIG_MOSIX_DIAG + if(flags & LOOKUP_MFS) + panic("MFS should have own path_init!"); +#endif /* CONFIG_MOSIX_DIAG */ +#endif /* CONFIG_MOSIX_FS */ if (*name=='/') return walk_init_root(name,nd); read_lock(¤t->fs->lock); nd->mnt = mntget(current->fs->pwdmnt); nd->dentry = dget(current->fs->pwd); read_unlock(¤t->fs->lock); +#ifdef CONFIG_MOSIX_DFSA + if(!nd->dentry) /* possible on REMOTE! */ + { + nd->flags |= LOOKUP_DOITATHOME; + return(1); + } + + if(nd->flags & LOOKUP_STAYFS) + { + struct task_struct *p = current; + + if(p->mosix.dflags & DDEPUTY) + nd->flags &= ~LOOKUP_STAYFS; + else if(((p->mosix.dflags & DREMOTE) || CAN_DFSA(p)) && + within_dfsa(nd)) + { + if(!(p->mosix.dflags & DREMOTE)) + p->mosix.dflags |= DSTATSDOWN; + } + else + { + if(p->mosix.dflags & DREMOTE) + nd->flags |= LOOKUP_DOITATHOME; + else + p->mosix.dflags &= ~DSTATSDOWN; + } + } +#endif /* CONFIG_MOSIX_DFSA */ return 1; } @@ -832,6 +1163,10 @@ err = path_walk(tmp, nd); putname(tmp); } +#ifdef CONFIG_MOSIX_FS + else + nd->complete_flags = 0; +#endif /* CONFIG_MOSIX_FS */ return err; } @@ -898,7 +1233,11 @@ * 3. We should have write and exec permissions on dir * 4. We can't do it if dir is immutable (done in permission()) */ +#ifdef CONFIG_MOSIX_FS +inline int may_create(struct inode *dir, struct dentry *child) { +#else static inline int may_create(struct inode *dir, struct dentry *child) { +#endif /* CONFIG_MOSIX_FS */ if (child->d_inode) return -EEXIST; if (IS_DEADDIR(dir)) @@ -976,17 +1315,46 @@ struct dentry *dentry; struct dentry *dir; int count = 0; +#if defined(CONFIG_MOSIX_DFSA) || defined(CONFIG_MOSIX_FS) + unsigned int more_flags = 0; +#endif /* CONFIG_MOSIX_DFSA || CONFIG_MOSIX_FS */ acc_mode = ACC_MODE(flag); +#ifdef CONFIG_MOSIX_FS + if(!pathname) /* special argument to skip path-search */ + { + if(flag & O_CREAT) + goto low_create; + else + goto low_nocreate; + } + nd->express_function = EF_OPEN; + nd->complete_args.ints.i1 = flag; + nd->complete_args.ints.i2 = mode & ~current->fs->umask; + more_flags |= LOOKUP_COMPLETE; +#endif /* CONFIG_MOSIX_FS */ +#ifdef CONFIG_MOSIX_DFSA + if(current->mosix.dirty_bits & FILP_OPEN_SYSCALL) + more_flags |= LOOKUP_STAYFS; +#endif /* CONFIG_MOSIX_DFSA */ /* * The simplest case - just a plain lookup. */ if (!(flag & O_CREAT)) { +#if defined(CONFIG_MOSIX_DFSA) || defined(CONFIG_MOSIX_FS) + if (path_init(pathname, lookup_flags(flag)|more_flags, nd)) +#else if (path_init(pathname, lookup_flags(flag), nd)) +#endif /* CONFIG_MOSIX_DFSA || CONFIG_MOSIX_FS */ error = path_walk(pathname, nd); if (error) return error; +#ifdef CONFIG_MOSIX_FS + if(nd->complete_flags & COMPLETE_DONE) + return(0); + low_nocreate: +#endif /* CONFIG_MOSIX_FS */ dentry = nd->dentry; goto ok; } @@ -994,10 +1362,19 @@ /* * Create - we need to know the parent. */ +#if defined(CONFIG_MOSIX_DFSA) || defined(CONFIG_MOSIX_FS) + if (path_init(pathname, LOOKUP_PARENT|more_flags, nd)) +#else if (path_init(pathname, LOOKUP_PARENT, nd)) +#endif /* CONFIG_MOSIX_FS */ error = path_walk(pathname, nd); if (error) return error; +#ifdef CONFIG_MOSIX_FS + if(nd->complete_flags & COMPLETE_DONE) + return(0); + low_create: +#endif /* CONFIG_MOSIX_FS */ /* * We have the parent and last component. First of all, check @@ -1186,7 +1563,11 @@ } /* SMP-safe */ +#ifdef CONFIG_MOSIX_FS +struct dentry *lookup_create(struct nameidata *nd, int is_dir) +#else static struct dentry *lookup_create(struct nameidata *nd, int is_dir) +#endif /* CONFIG_MOSIX_FS */ { struct dentry *dentry; @@ -1247,8 +1628,18 @@ if (IS_ERR(tmp)) return PTR_ERR(tmp); +#ifdef CONFIG_MOSIX_FS + nd.express_function = EF_MKNOD; + nd.complete_args.ints.i1 = mode & ~current->fs->umask; + if (path_init(tmp, LOOKUP_COMPLETE|LOOKUP_PARENT, &nd)) +#else if (path_init(tmp, LOOKUP_PARENT, &nd)) +#endif /* CONFIG_MOSIX_FS */ error = path_walk(tmp, &nd); +#ifdef CONFIG_MOSIX_FS + if(nd.complete_flags & COMPLETE_DONE) + goto out; +#endif /* CONFIG_MOSIX_FS */ if (error) goto out; dentry = lookup_create(&nd, 0); @@ -1316,10 +1707,25 @@ struct dentry *dentry; struct nameidata nd; +#ifdef CONFIG_MOSIX_FS + nd.express_function = EF_MKDIR; + nd.complete_args.ints.i1 = mode & ~current->fs->umask; + if (path_init(tmp, LOOKUP_COMPLETE|LOOKUP_PARENT|LOOKUP_STAYFS, + &nd)) +#else +#ifdef CONFIG_MOSIX_DFSA + if (path_init(tmp, LOOKUP_PARENT|LOOKUP_STAYFS, &nd)) +#else if (path_init(tmp, LOOKUP_PARENT, &nd)) +#endif /* CONFIG_MOSIX_DFSA */ +#endif /* CONFIG_MOSIX_FS */ error = path_walk(tmp, &nd); if (error) goto out; +#ifdef CONFIG_MOSIX_FS + if(nd.complete_flags & COMPLETE_DONE) + goto out; +#endif /* CONFIG_MOSIX_FS */ dentry = lookup_create(&nd, 1); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { @@ -1411,10 +1817,23 @@ if(IS_ERR(name)) return PTR_ERR(name); +#ifdef CONFIG_MOSIX_FS + nd.express_function = EF_RMDIR; + if (path_init(name, LOOKUP_COMPLETE|LOOKUP_PARENT|LOOKUP_STAYFS, &nd)) +#else +#ifdef CONFIG_MOSIX_DFSA + if (path_init(name, LOOKUP_PARENT|LOOKUP_STAYFS, &nd)) +#else if (path_init(name, LOOKUP_PARENT, &nd)) +#endif /* CONFIG_MOSIX_DFSA */ +#endif /* CONFIG_MOSIX_FS */ error = path_walk(name, &nd); if (error) goto exit; +#ifdef CONFIG_MOSIX_FS + if(nd.complete_flags & COMPLETE_DONE) + goto exit; +#endif /* CONFIG_MOSIX_FS */ switch(nd.last_type) { case LAST_DOTDOT: @@ -1480,10 +1899,23 @@ if(IS_ERR(name)) return PTR_ERR(name); +#ifdef CONFIG_MOSIX_FS + nd.express_function = EF_UNLINK; + if (path_init(name, LOOKUP_COMPLETE|LOOKUP_PARENT|LOOKUP_STAYFS, &nd)) +#else +#ifdef CONFIG_MOSIX_DFSA + if (path_init(name, LOOKUP_PARENT|LOOKUP_STAYFS, &nd)) +#else if (path_init(name, LOOKUP_PARENT, &nd)) +#endif /* CONFIG_MOSIX_DFSA */ +#endif /* CONFIG_MOSIX_FS */ error = path_walk(name, &nd); if (error) goto exit; +#ifdef CONFIG_MOSIX_FS + if(nd.complete_flags & COMPLETE_DONE) + goto exit; +#endif /* CONFIG_MOSIX_FS */ error = -EISDIR; if (nd.last_type != LAST_NORM) goto exit1; @@ -1552,10 +1984,25 @@ struct dentry *dentry; struct nameidata nd; +#ifdef CONFIG_MOSIX_FS + nd.express_function = EF_SYMLINK; + nd.complete_parg = from; + if (path_init(to, LOOKUP_COMPLETE|LOOKUP_PARENT|LOOKUP_STAYFS, + &nd)) +#else +#ifdef CONFIG_MOSIX_DFSA + if (path_init(to, LOOKUP_PARENT|LOOKUP_STAYFS, &nd)) +#else if (path_init(to, LOOKUP_PARENT, &nd)) +#endif /* CONFIG_MOSIX_DFSA */ +#endif /* CONFIG_MOSIX_FS */ error = path_walk(to, &nd); if (error) goto out; +#ifdef CONFIG_MOSIX_FS + if(nd.complete_flags & COMPLETE_DONE) + goto out; +#endif /* CONFIG_MOSIX_FS */ dentry = lookup_create(&nd, 0); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { @@ -1634,16 +2081,40 @@ if (!IS_ERR(to)) { struct dentry *new_dentry; struct nameidata nd, old_nd; +#if defined(CONFIG_MOSIX_DFSA) || defined(CONFIG_MOSIX_FS) + unsigned int more_flags = 0; +#endif /* CONFIG_MOSIX_DFSA || CONFIG_MOSIX_FS */ error = 0; +#ifdef CONFIG_MOSIX_DFSA + if (path_init(from, LOOKUP_POSITIVE|LOOKUP_STAYFS, &old_nd)) +#else if (path_init(from, LOOKUP_POSITIVE, &old_nd)) +#endif /* CONFIG_MOSIX_DFSA */ error = path_walk(from, &old_nd); if (error) goto exit; +#ifdef CONFIG_MOSIX_DFSA + if(current->mosix.dflags & (DREMOTE|DSTATSDOWN)) + more_flags |= LOOKUP_STAYFS; +#endif /* CONFIG_MOSIX_DFSA */ +#ifdef CONFIG_MOSIX_FS + nd.express_function = EF_LINK; + nd.complete_args.oldnd = &old_nd; + more_flags |= LOOKUP_COMPLETE; +#endif /* CONFIG_MOSIX_FS */ +#if defined(CONFIG_MOSIX_DFSA) || defined(CONFIG_MOSIX_FS) + if (path_init(to, LOOKUP_PARENT|more_flags, &nd)) +#else if (path_init(to, LOOKUP_PARENT, &nd)) +#endif /* CONFIG_MOSIX_DFSA || CONFIG_MOSIX_FS */ error = path_walk(to, &nd); if (error) goto out; +#ifdef CONFIG_MOSIX_FS + if(nd.complete_flags & COMPLETE_DONE) + goto out; +#endif /* CONFIG_MOSIX_FS */ error = -EXDEV; if (old_nd.mnt != nd.mnt) goto out_release; @@ -1840,15 +2311,39 @@ struct dentry * old_dir, * new_dir; struct dentry * old_dentry, *new_dentry; struct nameidata oldnd, newnd; - +#if defined(CONFIG_MOSIX_DFSA) || defined(CONFIG_MOSIX_FS) + unsigned int more_flags = 0; +#endif /* CONFIG_MOSIX_DFSA || CONFIG_MOSIX_FS */ + +#ifdef CONFIG_MOSIX_DFSA + if (path_init(oldname, LOOKUP_PARENT|LOOKUP_STAYFS, &oldnd)) +#else if (path_init(oldname, LOOKUP_PARENT, &oldnd)) +#endif /* CONFIG_MOSIX_DFSA */ error = path_walk(oldname, &oldnd); if (error) goto exit; +#ifdef CONFIG_MOSIX_DFSA + if(current->mosix.dflags & (DREMOTE|DSTATSDOWN)) + more_flags |= LOOKUP_STAYFS; +#endif /* CONFIG_MOSIX_DFSA */ +#ifdef CONFIG_MOSIX_FS + newnd.express_function = EF_RENAME; + newnd.complete_args.oldnd = &oldnd; + more_flags |= LOOKUP_COMPLETE; +#endif /* CONFIG_MOSIX_FS */ +#if defined(CONFIG_MOSIX_DFSA) || defined(CONFIG_MOSIX_FS) + if (path_init(newname, LOOKUP_PARENT|more_flags, &newnd)) +#else if (path_init(newname, LOOKUP_PARENT, &newnd)) +#endif /* CONFIG_MOSIX_DFSA || CONFIG_MOSIX_FS */ error = path_walk(newname, &newnd); +#ifdef CONFIG_MOSIX_FS + if(newnd.complete_flags & COMPLETE_DONE) + goto exit1; +#endif /* CONFIG_MOSIX_FS */ if (error) goto exit1; @@ -1955,8 +2450,16 @@ if (!walk_init_root(link, nd)) /* weird __emul_prefix() stuff did it */ goto out; +#ifdef CONFIG_MOSIX_FS + if(nd->flags & LOOKUP_MFS) + nd->flags |= LOOKUP_MFS_MFS; +#endif /* CONFIG_MOSIX_FS */ } res = link_path_walk(link, nd); +#ifdef CONFIG_MOSIX_FS + if(!nd->name_left) + nd->flags &= ~LOOKUP_MFS_MFS; +#endif /* CONFIG_MOSIX_FS */ out: if (current->link_count || res || nd->last_type!=LAST_NORM) return res; diff -urN linux-2.4.17/fs/namespace.c linux_umopenmosix/fs/namespace.c --- linux-2.4.17/fs/namespace.c Fri Dec 21 19:41:55 2001 +++ linux_umopenmosix/fs/namespace.c Wed Jun 26 23:45:17 2002 @@ -21,13 +21,24 @@ #include +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ +#ifdef CONFIG_MOSIX_DFSA +#include +#include +#endif /* CONFIG_MOSIX_DFSA */ +#ifdef CONFIG_MOSIX_FS +#include +#endif /* CONFIG_MOSIX_FS */ + struct vfsmount *do_kern_mount(char *type, int flags, char *name, void *data); int do_remount_sb(struct super_block *sb, int flags, void * data); void kill_super(struct super_block *sb); static struct list_head *mount_hashtable; static int hash_mask, hash_bits; -static kmem_cache_t *mnt_cache; +static kmem_cache_t *mnt_cache; static LIST_HEAD(vfsmntlist); static DECLARE_MUTEX(mount_sem); @@ -45,7 +56,7 @@ struct vfsmount *alloc_vfsmnt(void) { - struct vfsmount *mnt = kmem_cache_alloc(mnt_cache, GFP_KERNEL); + struct vfsmount *mnt = kmem_cache_alloc(mnt_cache, GFP_KERNEL); if (mnt) { memset(mnt, 0, sizeof(struct vfsmount)); atomic_set(&mnt->mnt_count,1); @@ -240,7 +251,12 @@ } if (mnt->mnt_sb->s_op->show_options) err = mnt->mnt_sb->s_op->show_options(m, mnt); - seq_puts(m, " 0 0\n"); + seq_puts(m, " 0 0"); +#ifdef CONFIG_MOSIX_DFSA + if (mnt->mnt_dfsa) + seq_printf(m, ",dfsa=%d", mnt->mnt_dfsa); +#endif /* CONFIG_MOSIX_DFSA */ + seq_puts(m, "\n"); return err; } @@ -333,7 +349,16 @@ return retval; } +#ifdef CONFIG_MOSIX_DFSA + down_half_interruptible(&mount_sem); + if((retval = dfsa_setmnt(mnt, 0, 0))) + { + up(&mount_sem); + return(retval); + } +#else down(&mount_sem); +#endif /* CONFIG_MOSIX_DFSA */ spin_lock(&dcache_lock); if (atomic_read(&sb->s_active) == 1) { @@ -398,9 +423,9 @@ } /* - * The 2.0 compatible umount. No flags. + * The 2.0 compatible umount. No flags. */ - + asmlinkage long sys_oldumount(char * name) { return sys_umount(name,0); @@ -510,7 +535,11 @@ if (err) return err; +#ifdef CONFIG_MOSIX + down_half_interruptible(&mount_sem); +#else down(&mount_sem); +#endif /* CONFIG_MOSIX */ err = -EINVAL; if (check_mnt(nd->mnt) && (!recurse || check_mnt(old_nd.mnt))) { err = -ENOMEM; @@ -545,6 +574,9 @@ { int err; struct super_block * sb = nd->mnt->mnt_sb; +#ifdef CONFIG_MOSIX_DFSA + int odfsa = 0, new_dfsa, cng_dfsa; +#endif /* CONFIG_MOSIX_DFSA */ if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -555,11 +587,33 @@ if (nd->dentry != nd->mnt->mnt_root) return -EINVAL; +#ifdef CONFIG_MOSIX_DFSA + if((cng_dfsa = dfsa_option(data, &new_dfsa))) + { + down_half_interruptible(&mount_sem); + odfsa = nd->mnt->mnt_dfsa; + nd->mnt->mnt_dfsa = new_dfsa; + err = dfsa_setmnt(nd->mnt, new_dfsa, flags); + up(&mount_sem); + if(err) + return(err); + } +#endif /* CONFIG_MOSIX_DFSA */ down_write(&sb->s_umount); err = do_remount_sb(sb, flags, data); if (!err) nd->mnt->mnt_flags=mnt_flags; up_write(&sb->s_umount); +#ifdef CONFIG_MOSIX_DFSA + if(err && cng_dfsa) + { + /* undo - unless someone else played with mount meanwhile */ + down_half_interruptible(&mount_sem); + if(nd->mnt->mnt_dfsa == new_dfsa) + dfsa_setmnt(nd->mnt, odfsa, nd->mnt->mnt_sb->s_flags); + up(&mount_sem); + } +#endif /* CONFIG_MOSIX_DFSA */ return err; } @@ -572,7 +626,11 @@ if (IS_ERR(mnt)) goto out; +#ifdef CONFIG_MOSIX_DFSA + down_half_interruptible(&mount_sem); +#else down(&mount_sem); +#endif /* CONFIG_MOSIX_DFSA */ /* Something was mounted here while we slept */ while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry)) ; @@ -599,7 +657,7 @@ int i; unsigned long page; unsigned long size; - + *where = 0; if (!data) return 0; @@ -618,7 +676,7 @@ i = size - copy_from_user((void *)page, data, size); if (!i) { - free_page(page); + free_page(page); return -EFAULT; } if (i != PAGE_SIZE) @@ -647,6 +705,9 @@ struct nameidata nd; int retval = 0; int mnt_flags = 0; +#ifdef CONFIG_MOSIX_DFSA + int dfsa_opt; +#endif /* CONFIG_MOSIX_DFSA */ /* Discard magic */ if ((flags & MS_MGC_MSK) == MS_MGC_VAL) @@ -682,6 +743,22 @@ else retval = do_add_mount(&nd, type_page, flags, mnt_flags, dev_name, data_page); +#ifdef CONFIG_MOSIX_DFSA + down_half_interruptible(&mount_sem); + if(!retval && dfsa_option((char *)data_page, &dfsa_opt) && + d_mountpoint(nd.dentry) && follow_down(&nd.mnt, &nd.dentry) + && (nd.mnt->mnt_dfsa = dfsa_opt) && + dfsa_setmnt(nd.mnt, dfsa_opt, flags)) + { + /* too late to undo the mount - all we can do is complain: */ + nd.mnt->mnt_dfsa = 0; + printk("Mount succeeded, but DFSA option failed: " + "if you can fix the problem, run:\n"); + printk("'mount -o remount,dfsa=%d %s %s'\n", dfsa_opt, + dev_name, dir_name); + } + up(&mount_sem); +#endif /* CONFIG_MOSIX_DFSA */ path_release(&nd); return retval; } @@ -733,7 +810,12 @@ struct fs_struct *fs; read_lock(&tasklist_lock); +#ifdef CONFIG_MOSIX + /* DFSA is disabled, so no REMOTE roots (and no cwd's away) */ + for_each_local_task(p) { +#else for_each_task(p) { +#endif /* CONFIG_MOSIX */ task_lock(p); fs = p->fs; if (fs) { @@ -750,6 +832,32 @@ read_unlock(&tasklist_lock); } +#ifdef CONFIG_MOSIX_DFSA +void +restore_dfsa_mounts(void) +{ + struct list_head *p; + int told = 0; + + for (p = vfsmntlist.next; p != &vfsmntlist; p = p->next) + { + struct vfsmount *tmp = list_entry(p, struct vfsmount, mnt_list); + + if(tmp->mnt_dfsa && + dfsa_setmnt(tmp, tmp->mnt_dfsa, tmp->mnt_sb->s_flags)) + { + if(!told++) + printk("One or more partitions failed to restore DFSA" + " status after pivot_root.\n" + "If you can fix the problem, please remount " + "the affected partition(s) with the appropriate" + " dfsa= option\n"); + tmp->mnt_dfsa = 0; + } + } +} +#endif /* CONFIG_MOSIX_DFSA */ + /* * Moves the current root to put_root, and sets root/cwd of all processes * which had them on the old root to new_root. @@ -773,6 +881,10 @@ if (!capable(CAP_SYS_ADMIN)) return -EPERM; +#ifdef CONFIG_MOSIX_DFSA + if((error = disable_dfsa())) + return(error); +#endif /* CONFIG_MOSIX_DFSA */ lock_kernel(); name = getname(new_root); @@ -804,7 +916,11 @@ user_nd.mnt = mntget(current->fs->rootmnt); user_nd.dentry = dget(current->fs->root); read_unlock(¤t->fs->lock); +#ifdef CONFIG_MOSIX_DFSA + down_half_interruptible(&mount_sem); +#else down(&mount_sem); +#endif /* CONFIG_MOSIX_DFSA */ down(&old_nd.dentry->d_inode->i_zombie); error = -EINVAL; if (!check_mnt(user_nd.mnt)) @@ -816,6 +932,15 @@ goto out2; if (d_unhashed(old_nd.dentry) && !IS_ROOT(old_nd.dentry)) goto out2; +#ifdef CONFIG_MOSIX_FS + if(new_nd.dentry->d_inode->i_sb->s_magic == MFS_SUPER_MAGIC || + old_nd.dentry->d_inode->i_sb->s_magic == MFS_SUPER_MAGIC) + { + /* MFS does not support mounts (which are otherwise ignored) */ + error = -EINVAL; + goto out2; + } +#endif /* CONFIG_MOSIX_FS */ error = -EBUSY; if (new_nd.mnt == user_nd.mnt || old_nd.mnt == user_nd.mnt) goto out2; /* loop */ @@ -843,6 +968,9 @@ attach_mnt(user_nd.mnt, &old_nd); attach_mnt(new_nd.mnt, &root_parent); spin_unlock(&dcache_lock); +#ifdef CONFIG_MOSIX_FS + mfs_change_root(user_nd.dentry, user_nd.mnt, new_nd.dentry, new_nd.mnt); +#endif /* CONFIG_MOSIX_FS */ chroot_fs_refs(&user_nd, &new_nd); error = 0; path_release(&root_parent); @@ -856,6 +984,10 @@ path_release(&new_nd); out0: unlock_kernel(); +#ifdef CONFIG_MOSIX_DFSA + enable_dfsa(); + restore_dfsa_mounts(); +#endif /* CONFIG_MOSIX_DFSA */ return error; out3: spin_unlock(&dcache_lock); @@ -1001,7 +1133,7 @@ mount_root(); #if 1 shrink_dcache(); - printk("change_root: old root has d_count=%d\n", + printk("change_root: old root has d_count=%d\n", atomic_read(&old_rootmnt->mnt_root->d_count)); #endif mount_devfs_fs (); @@ -1033,7 +1165,7 @@ } else { printk("okay\n"); error = 0; - } + } kfree(new_devname); return error; } diff -urN linux-2.4.17/fs/nfsd/auth.c linux_umopenmosix/fs/nfsd/auth.c --- linux-2.4.17/fs/nfsd/auth.c Mon Jul 24 09:04:10 2000 +++ linux_umopenmosix/fs/nfsd/auth.c Wed Jun 26 23:45:17 2002 @@ -9,6 +9,9 @@ #include #include #include +#ifdef CONFIG_MOSIX_DFSA +#include +#endif /* CONFIG_MOSIX_DFSA */ #define CAP_NFSD_MASK (CAP_FS_MASK|CAP_TO_MASK(CAP_SYS_RESOURCE)) void @@ -20,6 +23,9 @@ if (rqstp->rq_userset) return; +#ifdef CONFIG_MOSIX_DFSA + current->mosix.dupdates |= DFSA_UPDCRED; +#endif /* CONFIG_MOSIX_DFSA */ if (exp->ex_flags & NFSEXP_ALLSQUASH) { cred->cr_uid = exp->ex_anon_uid; cred->cr_gid = exp->ex_anon_gid; diff -urN linux-2.4.17/fs/nfsd/export.c linux_umopenmosix/fs/nfsd/export.c --- linux-2.4.17/fs/nfsd/export.c Thu Oct 4 07:57:36 2001 +++ linux_umopenmosix/fs/nfsd/export.c Wed Jun 26 23:45:17 2002 @@ -463,6 +463,9 @@ return 0; } +#ifdef CONFIG_MOSIX + current->mosix.ignoreoldsigs = 1; +#endif /* CONFIG_MOSIX */ current->sigpending = 0; want_lock++; while (hash_count || hash_lock) { diff -urN linux-2.4.17/fs/nfsd/vfs.c linux_umopenmosix/fs/nfsd/vfs.c --- linux-2.4.17/fs/nfsd/vfs.c Fri Dec 21 19:41:55 2001 +++ linux_umopenmosix/fs/nfsd/vfs.c Wed Jun 26 23:45:17 2002 @@ -45,6 +45,10 @@ #include +#ifdef CONFIG_MOSIX_FS +#include +#endif /* CONFIG_MOSIX_FS */ + #define NFSDDBG_FACILITY NFSDDBG_FILEOP #define NFSD_PARANOIA @@ -108,6 +112,13 @@ dparent = fhp->fh_dentry; exp = fhp->fh_export; +#ifdef CONFIG_MOSIX_FS + if(dparent->d_sb->s_magic == MFS_SUPER_MAGIC) + { + err = -EPERM; + goto out; + } +#endif /* CONFIG_MOSIX_FS */ err = nfserr_acces; /* Lookup the name, but don't follow links */ @@ -510,6 +521,9 @@ filp->f_op->release(inode, filp); fops_put(filp->f_op); if (filp->f_mode & FMODE_WRITE) +#ifdef CONFIG_MOSIX + if (!(filp->f_flags & O_NOWRITEACCESS)) +#endif /* CONFIG_MOSIX */ put_write_access(inode); } diff -urN linux-2.4.17/fs/open.c linux_umopenmosix/fs/open.c --- linux-2.4.17/fs/open.c Fri Oct 12 22:48:42 2001 +++ linux_umopenmosix/fs/open.c Wed Jun 26 23:45:17 2002 @@ -20,6 +20,12 @@ #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) +#ifdef CONFIG_MOSIX_DFSA +#include +#include +#include +#endif /* CONFIG_MOSIX_DFSA */ + int vfs_statfs(struct super_block *sb, struct statfs *buf) { int retval = -ENODEV; @@ -99,7 +105,21 @@ if (length < 0) /* sorry, but loff_t says... */ goto out; +#ifdef CONFIG_MOSIX_FS + nd.express_function = EF_TRUNCATE; + nd.complete_args.len = length; + error = __user_walk(path, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_STAYFS| + LOOKUP_COMPLETE, &nd); + if(nd.complete_flags & COMPLETE_DONE) + return(error); +#else +#ifdef CONFIG_MOSIX_DFSA + error = __user_walk(path, + LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_STAYFS, &nd); +#else error = user_path_walk(path, &nd); +#endif /* CONFIG_MOSIX_DFSA */ +#endif /* CONFIG_MOSIX_FS */ if (error) goto out; inode = nd.dentry->d_inode; @@ -165,6 +185,9 @@ error = -EINVAL; if (length < 0) goto out; +#ifdef CONFIG_MOSIX_DFSA + dfsa_syscall_on_file(fd, 0); +#endif /* CONFIG_MOSIX_DFSA */ error = -EBADF; file = fget(fd); if (!file) @@ -236,7 +259,21 @@ struct inode * inode; struct iattr newattrs; +#ifdef CONFIG_MOSIX_FS + nd.express_function = EF_UTIME; + nd.complete_parg = times; + error = __user_walk(filename, LOOKUP_POSITIVE|LOOKUP_FOLLOW| + LOOKUP_STAYFS|LOOKUP_COMPLETE, &nd); + if(nd.complete_flags & COMPLETE_DONE) + return(error); +#else +#ifdef CONFIG_MOSIX_DFSA + error = __user_walk(filename, + LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_STAYFS, &nd); +#else error = user_path_walk(filename, &nd); +#endif /* CONFIG_MOSIX_DFSA */ +#endif /* CONFIG_MOSIX_FS */ if (error) goto out; inode = nd.dentry->d_inode; @@ -280,7 +317,21 @@ struct inode * inode; struct iattr newattrs; +#ifdef CONFIG_MOSIX_FS + nd.express_function = EF_UTIMES; + nd.complete_parg = utimes; + error = __user_walk(filename, LOOKUP_POSITIVE|LOOKUP_FOLLOW| + LOOKUP_STAYFS| LOOKUP_COMPLETE, &nd); + if(nd.complete_flags & COMPLETE_DONE) + return(error); +#else +#ifdef CONFIG_MOSIX_DFSA + error = __user_walk(filename, + LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_STAYFS , &nd); +#else error = user_path_walk(filename, &nd); +#endif /* CONFIG_MOSIX_DFSA */ +#endif /* CONFIG_MOSIX_FS */ if (error) goto out; @@ -339,7 +390,22 @@ else current->cap_effective = current->cap_permitted; +#ifdef CONFIG_MOSIX_FS + nd.express_function = EF_ACCESS; + nd.complete_args.ints.i1 = mode; + res = __user_walk(filename, + LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_COMPLETE|LOOKUP_STAYFS, + &nd); + if(nd.complete_flags & COMPLETE_DONE) + return(res); +#else +#ifdef CONFIG_MOSIX_DFSA + res = __user_walk(filename, + LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_STAYFS, &nd); +#else res = user_path_walk(filename, &nd); +#endif /* CONFIG_MOSIX_DFSA */ +#endif /* CONFIG_MOSIX_FS */ if (!res) { res = permission(nd.dentry->d_inode, mode); /* SuS v2 requires we report a read only fs too */ @@ -352,6 +418,11 @@ current->fsuid = old_fsuid; current->fsgid = old_fsgid; current->cap_effective = old_cap; +#ifdef CONFIG_MOSIX_DFSA + /* a bit paranoid, for the rare case when we slept interruptibly + * during "permission" and the temporary fs[ug]id passed across: */ + current->mosix.dupdates |= DFSA_UPDCRED; +#endif /* CONFIG_MOSIX_DFSA */ return res; } @@ -368,7 +439,12 @@ goto out; error = 0; +#ifdef CONFIG_MOSIX_DFSA + if (path_init(name, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY| + LOOKUP_STAYFS, &nd)) +#else if (path_init(name,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd)) +#endif /* CONFIG_MOSIX_DFSA */ error = path_walk(name, &nd); putname(name); if (error) @@ -394,6 +470,9 @@ struct vfsmount *mnt; int error; +#ifdef CONFIG_MOSIX_DFSA + dfsa_syscall_on_file(fd, 0); +#endif /* CONFIG_MOSIX_DFSA */ error = -EBADF; file = fget(fd); if (!file) @@ -459,6 +538,9 @@ int err = -EBADF; struct iattr newattrs; +#ifdef CONFIG_MOSIX_DFSA + dfsa_syscall_on_file(fd, 0); +#endif /* CONFIG_MOSIX_DFSA */ file = fget(fd); if (!file) goto out; @@ -491,7 +573,21 @@ int error; struct iattr newattrs; +#ifdef CONFIG_MOSIX_FS + nd.express_function = EF_CHMOD; + nd.complete_args.mode = mode; + error = __user_walk(filename, LOOKUP_POSITIVE|LOOKUP_FOLLOW| + LOOKUP_STAYFS|LOOKUP_COMPLETE, &nd); + if(nd.complete_flags & COMPLETE_DONE) + return(error); +#else +#ifdef CONFIG_MOSIX_DFSA + error = __user_walk(filename, + LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_STAYFS, &nd); +#else error = user_path_walk(filename, &nd); +#endif /* CONFIG_MOSIX_DFSA */ +#endif /* CONFIG_MOSIX_FS */ if (error) goto out; inode = nd.dentry->d_inode; @@ -516,7 +612,11 @@ return error; } +#ifdef CONFIG_MOSIX_FS +int chown_common(struct dentry * dentry, uid_t user, gid_t group) +#else static int chown_common(struct dentry * dentry, uid_t user, gid_t group) +#endif /* CONFIG_MOSIX_FS */ { struct inode * inode; int error; @@ -581,7 +681,22 @@ struct nameidata nd; int error; +#ifdef CONFIG_MOSIX_FS + nd.express_function = EF_CHOWN; + nd.complete_args.ids.uid = user; + nd.complete_args.ids.gid = group; + error = __user_walk(filename, LOOKUP_POSITIVE|LOOKUP_FOLLOW| + LOOKUP_STAYFS|LOOKUP_COMPLETE, &nd); + if(nd.complete_flags & COMPLETE_DONE) + return(error); +#else +#ifdef CONFIG_MOSIX_DFSA + error = __user_walk(filename, + LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_STAYFS, &nd); +#else error = user_path_walk(filename, &nd); +#endif /* CONFIG_MOSIX_DFSA */ +#endif /* CONFIG_MOSIX_FS */ if (!error) { error = chown_common(nd.dentry, user, group); path_release(&nd); @@ -594,7 +709,21 @@ struct nameidata nd; int error; +#ifdef CONFIG_MOSIX_FS + nd.express_function = EF_CHOWN; + nd.complete_args.ids.uid = user; + nd.complete_args.ids.gid = group; + error = __user_walk(filename, + LOOKUP_POSITIVE|LOOKUP_STAYFS|LOOKUP_COMPLETE, &nd); + if(nd.complete_flags & COMPLETE_DONE) + return(error); +#else +#ifdef CONFIG_MOSIX_DFSA + error = __user_walk(filename, LOOKUP_POSITIVE|LOOKUP_STAYFS, &nd); +#else error = user_path_walk_link(filename, &nd); +#endif /* CONFIG_MOSIX_DFSA */ +#endif /* CONFIG_MOSIX_FS */ if (!error) { error = chown_common(nd.dentry, user, group); path_release(&nd); @@ -608,6 +737,9 @@ struct file * file; int error = -EBADF; +#ifdef CONFIG_MOSIX_DFSA + dfsa_syscall_on_file(fd, 0); +#endif /* CONFIG_MOSIX_DFSA */ file = fget(fd); if (file) { error = chown_common(file->f_dentry, user, group); @@ -634,6 +766,9 @@ { int namei_flags, error; struct nameidata nd; +#ifdef CONFIG_MOSIX_FS + int old_magic = current->mosix.lastmagic; +#endif /* CONFIG_MOSIX_FS */ namei_flags = flags; if ((namei_flags+1) & O_ACCMODE) @@ -641,9 +776,31 @@ if (namei_flags & O_TRUNC) namei_flags |= 2; +#ifdef CONFIG_MOSIX_FS + if(flags & O_CREAT) + current->mosix.lastmagic = current->mosix.whereami ? : PE; +#endif /* CONFIG_MOSIX_FS */ error = open_namei(filename, namei_flags, mode, &nd); +#ifdef CONFIG_MOSIX_FS + if(!(nd.complete_flags & COMPLETE_MAGICAL)) + current->mosix.lastmagic = old_magic; +#endif /* CONFIG_MOSIX_FS */ if (!error) +#ifdef CONFIG_MOSIX_FS + { + struct file *newf = dentry_open(nd.dentry, nd.mnt, flags); + + if(IS_ERR(newf)) + current->mosix.lastmagic = old_magic; +#ifdef CONFIG_MOSIX_DFSA + else if(current->mosix.lastmagic != old_magic) + current->mosix.dupdates |= DFSA_UPDSEL; +#endif /* CONFIG_MOSIX_DFSA */ + return(newf); + } +#else return dentry_open(nd.dentry, nd.mnt, flags); +#endif /* CONFIG_MOSIX_FS */ return ERR_PTR(error); } @@ -735,6 +892,11 @@ /* Do we need to expand the fdset array? */ if (fd >= files->max_fdset) { +#ifdef CONFIG_MOSIX_DFSA + if(current->mosix.dflags & DREMOTE) + error = -EDOITATHOME; + else +#endif /* CONFIG_MOSIX_DFSA */ error = expand_fdset(files, fd); if (!error) { error = -EMFILE; @@ -747,6 +909,11 @@ * Check whether we need to expand the fd array. */ if (fd >= files->max_fds) { +#ifdef CONFIG_MOSIX_DFSA + if(current->mosix.dflags & DREMOTE) + error = -EDOITATHOME; + else +#endif /* CONFIG_MOSIX_DFSA */ error = expand_fd_array(files, fd); if (!error) { error = -EMFILE; @@ -785,10 +952,46 @@ if (!IS_ERR(tmp)) { fd = get_unused_fd(); if (fd >= 0) { +#ifdef CONFIG_MOSIX_DFSA + struct file * f; + mode_t imode; + + current->mosix.dirty_bits |= FILP_OPEN_SYSCALL; + f = filp_open(tmp, flags, mode); + current->mosix.dirty_bits &= ~FILP_OPEN_SYSCALL; +#else struct file *f = filp_open(tmp, flags, mode); +#endif /* CONFIG_MOSIX_DFSA */ error = PTR_ERR(f); if (IS_ERR(f)) goto out_error; +#ifdef CONFIG_MOSIX_DFSA + imode = f->f_dentry->d_inode->i_mode; + if(!S_ISREG(imode) && !S_ISDIR(imode)) + { + if(current->mosix.dflags & DREMOTE) + { +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_DFSA) + printk("%d(%s)-File <%s> Did not open, " + "mode=%d\n", current->pid, + current->comm, tmp, imode); +#endif /* CONFIG_MOSIX_DEBUG */ + filp_close(f, NULL); + error = -EDOITATHOME; + goto out_error; + } + current->mosix.dflags &= ~DSTATSDOWN; + } +#ifdef CONFIG_MOSIX_DEBUG + if((current->mosix.dflags & DREMOTE) && + (ds_debug & DSDEB_DFSA)) + printk("%d(%s)-Opened <%s> Remotely, fd=%d, " + "ino=%ld\n", current->pid, + current->comm, tmp, (int)fd, + f->f_dentry->d_inode->i_ino); +#endif /* CONFIG_MOSIX_DEBUG */ +#endif /* CONFIG_MOSIX_DFSA */ fd_install(fd, f); } out: @@ -799,6 +1002,10 @@ out_error: put_unused_fd(fd); fd = error; +#ifdef CONFIG_MOSIX_DFSA + if(error == -ETXTBSY && (current->mosix.dflags & DREMOTE)) + error = -EDOITATHOME; +#endif /* CONFIG_MOSIX_DFSA */ goto out; } @@ -859,6 +1066,9 @@ FD_CLR(fd, files->close_on_exec); __put_unused_fd(files, fd); write_unlock(&files->file_lock); +#ifdef CONFIG_MOSIX_DFSA + dfsa_close_file(fd); +#endif /* CONFIG_MOSIX_DFSA */ return filp_close(filp, files); out_unlock: @@ -866,6 +1076,35 @@ return -EBADF; } +#ifdef CONFIG_MOSIX_DFSA +/* + * almost like "sys_close", except for raising DSTATSDOWN in the right + * circumstances, which we cannot do this in "sys_close" itself only + * because it can be called from other system-calls. + * [wish they all called "do_close" instead!] + */ +asmlinkage long sys_close_syscall(unsigned int fd) +{ + register struct task_struct *p = current; + + if((p->mosix.dflags & (DDEPUTY|DREMOTE)) || !CAN_DFSA(p)) + goto call_sys; + + /* no locking of "&p->files->file_lock" is needed because CAN_DFSA + * included checking that no clones are sharing our files! */ + if (fd >= p->files->max_fds) + /* if we were to run remotely, we could also "close" a bad file there!*/ + { + p->mosix.dflags |= DSTATSDOWN; + return(-EBADF); + } + if(can_dfsa_file(fd)) /* and we already know CAN_DFSA(p) is true */ + p->mosix.dflags |= DSTATSDOWN; + call_sys: + return(sys_close(fd)); +} +#endif /* CONFIG_MOSIX_DFSA */ + /* * This routine simulates a hangup on the tty, to arrange that users * are given clean terminals at login time. diff -urN linux-2.4.17/fs/pipe.c linux_umopenmosix/fs/pipe.c --- linux-2.4.17/fs/pipe.c Sat Sep 29 03:03:48 2001 +++ linux_umopenmosix/fs/pipe.c Wed Jun 26 23:45:17 2002 @@ -69,8 +69,24 @@ goto out; for (;;) { +#ifdef CONFIG_MOSIX_PIPE_EXCEPTIONS + unsigned long bytes, prev; +#endif /* CONFIG_MOSIX_PIPE_EXCEPTIONS */ PIPE_WAITING_READERS(*inode)++; +#ifdef CONFIG_MOSIX_PIPE_EXCEPTIONS + bytes = count; + if(bytes > PIPE_BUF) + bytes = PIPE_BUF; + prev = PIPE_READING_BYTES(*inode); + PIPE_READING_BYTES(*inode) += bytes; + if (!prev && (PIPE_EXCEPTIONS(*inode) & + PIPE_EXCEPTION_INPUT)) + wake_up_interruptible(PIPE_WAIT(*inode)); +#endif /* CONFIG_MOSIX_PIPE_EXCEPTIONS */ pipe_wait(inode); +#ifdef CONFIG_MOSIX_PIPE_EXCEPTIONS + PIPE_READING_BYTES(*inode) -= bytes; +#endif /* CONFIG_MOSIX_PIPE_EXCEPTIONS */ PIPE_WAITING_READERS(*inode)--; ret = -ERESTARTSYS; if (signal_pending(current)) @@ -264,6 +280,22 @@ return -EBADF; } +#ifdef CONFIG_MOSIX_PIPE_EXCEPTIONS +static inline int +set_pipe_exceptions(struct inode *ip, unsigned long val) +{ + int oval = PIPE_EXCEPTIONS(*ip); + + PIPE_EXCEPTIONS(*ip) = val; + if(val & ~(PIPE_EXCEPTION_INPUT|PIPE_EXCEPTION_NOINPUT)) + return(-EINVAL); + if(((val & PIPE_EXCEPTION_INPUT & ~oval) && PIPE_READING_BYTES(*ip)) + || ((val & PIPE_EXCEPTION_NOINPUT & ~oval) && !PIPE_READERS(*ip))) + wake_up_interruptible(PIPE_WAIT(*ip)); + return(0); +} +#endif /* CONFIG_MOSIX_PIPE_EXCEPTIONS */ + static int pipe_ioctl(struct inode *pino, struct file *filp, unsigned int cmd, unsigned long arg) @@ -271,6 +303,12 @@ switch (cmd) { case FIONREAD: return put_user(PIPE_LEN(*pino), (int *)arg); +#ifdef CONFIG_MOSIX_PIPE_EXCEPTIONS + case TCSBRK: + return(set_pipe_exceptions(pino, arg)); + case TIOCGWINSZ: + return(PIPE_READING_BYTES(*pino)); +#endif /* CONFIG_MOSIX_PIPE_EXCEPTIONS */ default: return -EINVAL; } @@ -293,6 +331,13 @@ mask |= POLLHUP; if (!PIPE_READERS(*inode)) mask |= POLLERR; +#ifdef CONFIG_MOSIX_PIPE_EXCEPTIONS + if (((PIPE_EXCEPTIONS(*inode) & PIPE_EXCEPTION_INPUT) && + PIPE_READING_BYTES(*inode)) || + ((PIPE_EXCEPTIONS(*inode) & PIPE_EXCEPTION_NOINPUT) && + !PIPE_READERS(*inode))) + mask |= POLLPRI; +#endif /* CONFIG_MOSIX_PIPE_EXCEPTIONS */ return mask; } @@ -458,6 +503,9 @@ PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0; PIPE_WAITING_READERS(*inode) = PIPE_WAITING_WRITERS(*inode) = 0; PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1; +#ifdef CONFIG_MOSIX_PIPE_EXCEPTIONS + PIPE_READING_BYTES(*inode) = PIPE_EXCEPTIONS(*inode) = 0; +#endif /* CONFIG_MOSIX_PIPE_EXCEPTIONS */ return inode; fail_page: diff -urN linux-2.4.17/fs/proc/array.c linux_umopenmosix/fs/proc/array.c --- linux-2.4.17/fs/proc/array.c Thu Oct 11 18:00:01 2001 +++ linux_umopenmosix/fs/proc/array.c Wed Jun 26 23:45:17 2002 @@ -76,6 +76,13 @@ #include #include +#ifdef CONFIG_MOSIX +#include +#include +#include +#include +#endif /* CONFIG_MOSIX */ + /* Gcc optimizes away "strlen(x)" for constant x */ #define ADDBUF(buffer, string) \ do { memcpy(buffer, string, strlen(string)); \ @@ -130,7 +137,11 @@ static inline const char * get_task_state(struct task_struct *tsk) { +#ifdef CONFIG_MOSIX + unsigned int state = LOGICAL_STATE(tsk) & (TASK_RUNNING | +#else unsigned int state = tsk->state & (TASK_RUNNING | +#endif /* CONFIG_MOSIX */ TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE | TASK_ZOMBIE | @@ -176,7 +187,11 @@ return buffer; } +#ifdef CONFIG_MOSIX +inline char * task_mem(struct mm_struct *mm, char *buffer) +#else static inline char * task_mem(struct mm_struct *mm, char *buffer) +#endif /* CONFIG_MOSIX */ { struct vm_area_struct * vma; unsigned long data = 0, stack = 0; @@ -274,10 +289,18 @@ { char * orig = buffer; struct mm_struct *mm; +#ifdef CONFIG_MOSIX + int should_ask = 0; +#endif /* CONFIG_MOSIX */ buffer = task_name(task, buffer); buffer = task_state(task, buffer); task_lock(task); +#ifdef CONFIG_MOSIX + if((should_ask = SHOULD_ASK_PROCESS(task))) + mm = NULL; + else +#endif /* CONFIG_MOSIX */ mm = task->mm; if(mm) atomic_inc(&mm->mm_users); @@ -286,6 +309,10 @@ buffer = task_mem(mm, buffer); mmput(mm); } +#ifdef CONFIG_MOSIX + else if(should_ask) + buffer = request_process_to_buf(task,buffer,PR_PROCFS_TASK_MEM); +#endif /* CONFIG_MOSIX */ buffer = task_sig(task, buffer); buffer = task_cap(task, buffer); #if defined(CONFIG_ARCH_S390) @@ -294,10 +321,126 @@ return buffer - orig; } +#ifdef CONFIG_MOSIX + +int proc_pid_where(struct task_struct *task, char *buffer) +{ + task_lock(task); + sprintf(buffer, "%d\n", (task->mosix.stay & DSTAY_ITS_DAEMON) ? 0 : + task->mosix.whereami); + task_unlock(task); + return (strlen(buffer)); +} + +int proc_pid_lock(struct task_struct *task, char *buffer) +{ + sprintf(buffer, "%d\n", (task->mosix.stay & DNOMIGRATE) ? 1 : 0); + return(3); /* == strlen(buffer) */ +} + +int proc_pid_disclosure(struct task_struct *task, char *buffer) +{ + sprintf(buffer, "%d\n", task->mosix.disclosure); + return(strlen(buffer)); +} + +char *mosix_stay_string[] = { + "monkey", "mmap_dev", "VM86_mode", "daemon", + "priv_inst", "mem_lock", "clone_vm", "rt_sched", + "direct_io", "init_proc", "kiobuf", NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + "extern_1", "extern_2", "extern_3", "extern_4", + NULL, NULL, NULL, "user_lock" +}; + +int proc_pid_cantmove(struct task_struct *task, char *buffer) +{ + typeof (task->mosix.stay) i; + int j; + + for (buffer[0] = '\0' , j = 0 , i = 1 ; i ; i <<= 1 , j++) + if (task->mosix.stay & DSTAY & i) + { + if(buffer[0]) + strcat(buffer, ", "); + strcat(buffer, mosix_stay_string[j] ? : "unrecognized"); + } + if(buffer[0]) + strcat(buffer, "\n"); + return(strlen(buffer)); +} + +int proc_pid_nmigs(struct task_struct *task, char *buffer) +{ + task_lock(task); + sprintf(buffer, "%d\n", task->mosix.nmigs); + task_unlock(task); + return(strlen(buffer)); +} + +int proc_pid_sigmig(struct task_struct *task, char * buffer) +{ + sprintf(buffer, "%d\n", task->mosix.sigmig); + return(strlen(buffer)); +} + +#ifdef CONFIG_MOSIX_FS +int proc_pid_selected(struct task_struct *task, char *buffer) +{ + sprintf(buffer, "%d\n", task->mosix.selected); + return(strlen(buffer)); +} +#endif /* CONFIG_MOSIX_FS */ + +void +proc_get_stat_parts(struct task_struct *task, struct mm_struct *mm, + struct proc_remote_stat_parts *t) +{ + if (mm) + { + struct vm_area_struct *vma; + + t->vsize = 0; + down_read(&mm->mmap_sem); + vma = mm->mmap; + while (vma) { + t->vsize += vma->vm_end - vma->vm_start; + vma = vma->vm_next; + } + t->rss = mm->rss; + t->start_code = mm->start_code; + t->end_code = mm->end_code; + t->start_stack = mm->start_stack; + up_read(&mm->mmap_sem); + if(task == current && (task->mosix.dflags & DREMOTE)) + /* just in case EIP and/or ESP are held by DEPUTY: */ + mosix_obtain_registers( + BIT_OF_REGISTER(eip)|BIT_OF_REGISTER(esp)); + t->eip = KSTK_EIP(task); + t->esp = KSTK_ESP(task); + } + else + memset(t, 0, sizeof(*t)); + t->priority = task->counter; + t->priority = 20 - (t->priority * 10 + DEF_COUNTER / 2) / DEF_COUNTER; + t->processor = task->processor; + if(task->mosix.dflags & DREMOTE) + t->processor += 32*PE; +} +#endif /* CONFIG_MOSIX */ + int proc_pid_stat(struct task_struct *task, char * buffer) { +#ifdef CONFIG_MOSIX + unsigned long wchan; + long nice; + struct proc_remote_stat_parts rinfo; +#else unsigned long vsize, eip, esp, wchan; long priority, nice; +#endif /* CONFIG_MOSIX */ int tty_pgrp = -1, tty_nr = 0; sigset_t sigign, sigcatch; char state; @@ -306,8 +449,36 @@ struct mm_struct *mm; state = *get_task_state(task); +#ifndef CONFIG_MOSIX vsize = eip = esp = 0; +#endif /* CONFIG_MOSIX */ task_lock(task); +#ifdef CONFIG_MOSIX + if (task->tty) { + tty_pgrp = task->tty->pgrp; + tty_nr = kdev_t_to_nr(task->tty->device); + } + if(SHOULD_ASK_PROCESS(task)) + { + task_unlock(task); + if(request_process(task, &rinfo, PR_PROCFS_GET_STAT_PARTS, 0)<0) + memset((caddr_t)&rinfo, 0, sizeof(rinfo)); + mm = NULL; + } + else + { + mm = task->mm; + if(mm) + atomic_inc(&mm->mm_users); + task_unlock(task); + proc_get_stat_parts(task, mm, &rinfo); + } + /* the following test has nothing to do with MOSIX - just security! */ + if ((current->fsuid != task->euid || + !cap_issubset(task->cap_permitted, current->cap_permitted)) && + !capable(CAP_DAC_OVERRIDE)) + rinfo.eip = rinfo.esp = 0; +#else mm = task->mm; if(mm) atomic_inc(&mm->mm_users); @@ -328,6 +499,7 @@ esp = KSTK_ESP(task); up_read(&mm->mmap_sem); } +#endif /* CONFIG_MOSIX */ wchan = get_wchan(task); @@ -335,8 +507,10 @@ /* scale priority and nice values from timeslices to -20..20 */ /* to make it look like a "normal" Unix priority/nice value */ +#ifndef CONFIG_MOSIX priority = task->counter; priority = 20 - (priority * 10 + DEF_COUNTER / 2) / DEF_COUNTER; +#endif /* CONFIG_MOSIX */ nice = task->nice; read_lock(&tasklist_lock); @@ -362,19 +536,36 @@ task->times.tms_stime, task->times.tms_cutime, task->times.tms_cstime, +#ifdef CONFIG_MOSIX + rinfo.priority, +#else priority, +#endif /* CONFIG_MOSIX */ nice, 0UL /* removed */, task->it_real_value, task->start_time, +#ifdef CONFIG_MOSIX + rinfo.vsize, + rinfo.rss, +#else vsize, mm ? mm->rss : 0, /* you might want to shift this left 3 */ +#endif /* CONFIG_MOSIX */ task->rlim[RLIMIT_RSS].rlim_cur, +#ifdef CONFIG_MOSIX + rinfo.start_code, + rinfo.end_code, + rinfo.start_stack, + rinfo.esp, + rinfo.eip, +#else mm ? mm->start_code : 0, mm ? mm->end_code : 0, mm ? mm->start_stack : 0, esp, eip, +#endif /* CONFIG_MOSIX */ /* The signal information here is obsolete. * It must be decimal for Linux 2.0 compatibility. * Use /proc/#/status for real-time signals. @@ -387,7 +578,11 @@ task->nswap, task->cnswap, task->exit_signal, +#ifdef CONFIG_MOSIX + (int)rinfo.processor); +#else task->processor); +#endif /* CONFIG_MOSIX */ if(mm) mmput(mm); return res; @@ -472,12 +667,24 @@ { struct mm_struct *mm; int size=0, resident=0, share=0, trs=0, lrs=0, drs=0, dt=0; +#ifdef CONFIG_MOSIX + int should_ask = 0; +#endif /* CONFIG_MOSIX */ task_lock(task); +#ifdef CONFIG_MOSIX + if((should_ask = SHOULD_ASK_PROCESS(task))) + mm = NULL; + else +#endif /* CONFIG_MOSIX */ mm = task->mm; if(mm) atomic_inc(&mm->mm_users); task_unlock(task); +#ifdef CONFIG_MOSIX + if(should_ask) + return(request_process(task, buffer, PR_PROCFS_GET_STATM, 0)); +#endif /* CONFIG_MOSIX */ if (mm) { struct vm_area_struct * vma; down_read(&mm->mmap_sem); @@ -538,6 +745,46 @@ #define MAPS_LINE_FORMAT (sizeof(void*) == 4 ? MAPS_LINE_FORMAT4 : MAPS_LINE_FORMAT8) #define MAPS_LINE_MAX (sizeof(void*) == 4 ? MAPS_LINE_MAX4 : MAPS_LINE_MAX8) +#ifdef CONFIG_MOSIX +int +proc_list_maps(struct vmamaps *maps, int start) +{ + struct mm_struct *mm; + struct vm_area_struct *map; + int i; + + if(!(mm = current->mm)) + return(0); + down_read(&mm->mmap_sem); + map = mm->mmap; + while(map && start-- > 0) + map = map->vm_next; + for(i = 0 ; map && i < PAGE_SIZE / sizeof(struct vmamaps) ; i++) + { + maps->vmstart = map->vm_start; + maps->vmend = map->vm_end; + maps->vmflags = map->vm_flags; + maps->vmpgoff = map->vm_pgoff; + maps->fp = map->vm_file ? home_file(map->vm_file) : 0; + maps++; + map = map->vm_next; + } + up_read(&mm->mmap_sem); + return (i * sizeof(struct vmamaps)); +} + +void +ref_mapped_files(struct vmamaps *maps, int len) +{ + int i; + struct file *fp; + + for(i = 0 ; i < len / sizeof(struct vmamaps) ; i++) + if((fp = maps[i].fp)) + get_file(fp); +} +#endif /* CONFIG_MOSIX */ + static int proc_pid_maps_get_line (char *buf, struct vm_area_struct *map) { /* produce the next line */ @@ -595,6 +842,12 @@ char *tmp, *kbuf; long retval; int off, lineno, loff; +#ifdef CONFIG_MOSIX + int should_ask = 0; + int nrmaps = 0, ind = 0; + struct vmamaps *rmaps = NULL; + struct vm_area_struct rvma; +#endif /* CONFIG_MOSIX */ /* reject calls with out of range parameters immediately */ retval = 0; @@ -616,16 +869,42 @@ goto out_free1; task_lock(task); +#ifdef CONFIG_MOSIX + if((should_ask = SHOULD_ASK_PROCESS(task))) + mm = NULL; + else +#endif /* CONFIG_MOSIX */ mm = task->mm; if (mm) atomic_inc(&mm->mm_users); task_unlock(task); +#ifdef CONFIG_MOSIX + if(should_ask) + { + retval = -ENOMEM; + if(!(rmaps = (struct vmamaps *)__get_free_page(GFP_KERNEL))) + goto out_free2; + ind = off / PAGE_SIZE; + retval = request_process(task, rmaps, PR_PROCFS_MAP_INFO, ind); + nrmaps = 0; + if(retval <= 0) + goto out_free2; + ind = -ind; + nrmaps = retval / sizeof(struct vmamaps); + retval = 0; + map = &rvma; + goto begin; + } +#endif /* CONFIG_MOSIX */ retval = 0; if (!mm) goto out_free2; down_read(&mm->mmap_sem); map = mm->mmap; +#ifdef CONFIG_MOSIX + begin: +#endif /* CONFIG_MOSIX */ lineno = 0; loff = 0; if (count > PAGE_SIZE) @@ -636,6 +915,16 @@ off -= PAGE_SIZE; goto next; } +#ifdef CONFIG_MOSIX + if(should_ask) + { + rvma.vm_start = rmaps[ind].vmstart; + rvma.vm_end = rmaps[ind].vmend; + rvma.vm_flags = rmaps[ind].vmflags; + rvma.vm_pgoff = rmaps[ind].vmpgoff; + rvma.vm_file = rmaps[ind].fp; + } +#endif /* CONFIG_MOSIX */ len = proc_pid_maps_get_line(tmp, map); len -= off; if (len > 0) { @@ -656,10 +945,25 @@ if (retval >= count) break; if (loff) BUG(); +#ifdef CONFIG_MOSIX + if(should_ask) + { + if(++ind >= nrmaps) + map = NULL; + } + else +#endif /* CONFIG_MOSIX */ map = map->vm_next; } +#ifdef CONFIG_MOSIX + if(mm) + { +#endif /* CONFIG_MOSIX */ up_read(&mm->mmap_sem); mmput(mm); +#ifdef CONFIG_MOSIX + } +#endif /* CONFIG_MOSIX */ if (retval > count) BUG(); if (copy_to_user(buf, kbuf, retval)) @@ -672,6 +976,15 @@ out_free1: free_page((unsigned long)kbuf); out: +#ifdef CONFIG_MOSIX + if(rmaps) + { + for(ind = 0 ; ind < nrmaps ; ind++) + if(rmaps[ind].fp) + fput(rmaps[ind].fp); + free_page((unsigned long)rmaps); + } +#endif /* CONFIG_MOSIX */ return retval; } @@ -680,6 +993,10 @@ { int i, len; +#ifdef CONFIG_MOSIX + if(SHOULD_ASK_PROCESS(task)) + request_process(task, buffer, PR_PROCFS_UPDATE_TIMES, 0); +#endif /* CONFIG_MOSIX */ len = sprintf(buffer, "cpu %lu %lu\n", task->times.tms_utime, @@ -690,6 +1007,10 @@ i, task->per_cpu_utime[cpu_logical_map(i)], task->per_cpu_stime[cpu_logical_map(i)]); +#ifdef CONFIG_MOSIX + if(task->mosix.nmigs) + len += sprintf(buffer, "TIMES ON REMOTE MOSIX NODES NOT LISTED\n"); +#endif /* CONFIG_MOSIX */ return len; } diff -urN linux-2.4.17/fs/proc/base.c linux_umopenmosix/fs/proc/base.c --- linux-2.4.17/fs/proc/base.c Thu Oct 11 08:42:47 2001 +++ linux_umopenmosix/fs/proc/base.c Wed Jun 26 23:45:17 2002 @@ -24,6 +24,11 @@ #include #include +#ifdef CONFIG_MOSIX +#include +#include +#endif /* CONFIG_MOSIX */ + /* * For hysterical raisins we keep the same inumbers as in the old procfs. * Feel free to change the macro below - just keep the range distinct from @@ -39,6 +44,17 @@ int proc_pid_status(struct task_struct*,char*); int proc_pid_statm(struct task_struct*,char*); int proc_pid_cpu(struct task_struct*,char*); +#ifdef CONFIG_MOSIX +int proc_pid_where(struct task_struct*,char*); +int proc_pid_nmigs(struct task_struct*,char*); +int proc_pid_cantmove(struct task_struct*,char*); +int proc_pid_lock(struct task_struct*,char*); +int proc_pid_sigmig(struct task_struct*,char*); +int proc_pid_disclosure(struct task_struct*,char*); +#endif /* CONFIG_MOSIX */ +#ifdef CONFIG_MOSIX_FS +int proc_pid_selected(struct task_struct*,char*); +#endif /* CONFIG_MOSIX_FS */ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) { @@ -50,6 +66,32 @@ return -ENOENT; } +#ifdef CONFIG_MOSIX +struct file * +first_executable(void) +{ + struct mm_struct *mm = current->mm; + register struct vm_area_struct *vma; + struct file *f = NULL; + + if(!mm) + return(NULL); + down_read(&mm->mmap_sem); + for(vma = current->mm->mmap ; vma ; vma = vma->vm_next) + if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file) + { + f = vma->vm_file; + if(current->mosix.dflags & DREMOTE) + f = home_file(f); + else + get_file(f); + break; + } + up_read(&mm->mmap_sem); + return(f); +} +#endif /* CONFIG_MOSIX */ + static int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) { struct mm_struct * mm; @@ -58,6 +100,24 @@ struct task_struct *task = inode->u.proc_i.task; task_lock(task); +#ifdef CONFIG_MOSIX + if(SHOULD_ASK_PROCESS(task)) + { + struct file *f; + + task_unlock(task); + f = (struct file *)request_process(task, &f, + PR_PROCFS_GET_EXE, 0); + if(f && !IS_ERR(f)) + { + *mnt = mntget(f->f_vfsmnt); + *dentry = dget(f->f_dentry); + result = 0; + fput(f); + } + goto out; + } +#endif /* CONFIG_MOSIX */ mm = task->mm; if (mm) atomic_inc(&mm->mm_users); @@ -86,6 +146,12 @@ { struct fs_struct *fs; int result = -ENOENT; +#ifdef CONFIG_MOSIX_DFSA + struct task_struct *p = inode->u.proc_i.task; + + if(SHOULD_ASK_PROCESS(p)) + request_process(p, NULL, PR_DFSA_SYNCHRONIZE, 0); +#endif /* CONFIG_MOSIX_DFSA */ task_lock(inode->u.proc_i.task); fs = inode->u.proc_i.task->fs; if(fs) @@ -122,11 +188,22 @@ return result; } +#ifdef CONFIG_MOSIX +int proc_pid_environ(struct task_struct *task, char * buffer) +#else static int proc_pid_environ(struct task_struct *task, char * buffer) +#endif /* CONFIG_MOSIX */ { struct mm_struct *mm; int res = 0; task_lock(task); +#ifdef CONFIG_MOSIX + if(SHOULD_ASK_PROCESS(task)) + { + task_unlock(task); + return(request_process(task, buffer, PR_PROCFS_GET_ENV, 0)); + } +#endif /* CONFIG_MOSIX */ mm = task->mm; if (mm) atomic_inc(&mm->mm_users); @@ -141,11 +218,22 @@ return res; } +#ifdef CONFIG_MOSIX +int proc_pid_cmdline(struct task_struct *task, char * buffer) +#else static int proc_pid_cmdline(struct task_struct *task, char * buffer) +#endif /* CONFIG_MOSIX */ { struct mm_struct *mm; int res = 0; task_lock(task); +#ifdef CONFIG_MOSIX + if(SHOULD_ASK_PROCESS(task)) + { + task_unlock(task); + return(request_process(task, buffer, PR_PROCFS_GET_ARG, 0)); + } +#endif /* CONFIG_MOSIX */ mm = task->mm; if (mm) atomic_inc(&mm->mm_users); @@ -196,6 +284,10 @@ our_vfsmnt = mntget(current->fs->rootmnt); base = dget(current->fs->root); read_unlock(¤t->fs->lock); +#ifdef CONFIG_MOSIX + if(!root) /* hope it will be fixed by 2.4.0 */ + return(-ENOENT); +#endif /* CONFIG_MOSIX */ spin_lock(&dcache_lock); de = root; @@ -286,8 +378,14 @@ read: proc_info_read, }; +#ifdef CONFIG_MOSIX +#define MAY_PTRACE(p) \ +(p==current||(p->p_pptr==current&&(p->ptrace & PT_PTRACED) && \ + LOGICAL_STATE(p) == TASK_STOPPED)) +#else #define MAY_PTRACE(p) \ (p==current||(p->p_pptr==current&&(p->ptrace & PT_PTRACED)&&p->state==TASK_STOPPED)) +#endif /* CONFIG_MOSIX */ static int mem_open(struct inode* inode, struct file* file) @@ -318,10 +416,15 @@ if (mm) atomic_inc(&mm->mm_users); task_unlock(task); +#ifndef CONFIG_MOSIX if (!mm) return 0; +#endif /* CONFIG_MOSIX */ if (file->private_data != (void*)((long)current->self_exec_id) ) { +#ifdef CONFIG_MOSIX + if(mm) +#endif /* CONFIG_MOSIX */ mmput(mm); return -EIO; } @@ -347,6 +450,9 @@ count -= retval; } *ppos = src; +#ifdef CONFIG_MOSIX + if(mm) +#endif /* CONFIG_MOSIX */ mmput(mm); free_page((unsigned long) page); return copied; @@ -497,6 +603,20 @@ PROC_PID_STATM, PROC_PID_MAPS, PROC_PID_CPU, +#ifdef CONFIG_MOSIX + PROC_PID_WHERE, + PROC_PID_MIGRATE, + PROC_PID_GOTO, + PROC_PID_LOCK, + PROC_PID_NMIGS, + PROC_PID_CANTMOVE, + PROC_PID_SIGMIG, + PROC_PID_DISCLOSURE, +#ifdef CONFIG_MOSIX_FS + PROC_PID_SELECTED, +#endif /* CONFIG_MOSIX_FS */ + PROC_PID_MOSIX_START = PROC_MOSIX_USE_START, +#endif /* CONFIG_MOSIX */ PROC_PID_FD_DIR = 0x8000, /* 0x8000-0xffff */ }; @@ -516,6 +636,19 @@ E(PROC_PID_CWD, "cwd", S_IFLNK|S_IRWXUGO), E(PROC_PID_ROOT, "root", S_IFLNK|S_IRWXUGO), E(PROC_PID_EXE, "exe", S_IFLNK|S_IRWXUGO), +#ifdef CONFIG_MOSIX + E(PROC_PID_WHERE, "where", S_IFREG|S_IRUGO), + E(PROC_PID_MIGRATE, "migrate", S_IFREG|S_IWUSR), + E(PROC_PID_GOTO, "goto", S_IFREG|S_IWUGO), + E(PROC_PID_LOCK, "lock", S_IFREG|S_IRUGO|S_IWUSR), + E(PROC_PID_NMIGS, "nmigs", S_IFREG|S_IRUGO), + E(PROC_PID_CANTMOVE, "cantmove", S_IFREG|S_IRUGO), + E(PROC_PID_SIGMIG, "sigmig", S_IFREG|S_IRUGO|S_IWUSR), + E(PROC_PID_DISCLOSURE,"disclosure", S_IFREG|S_IRUGO|S_IWUSR), +#endif /* CONFIG_MOSIX */ +#ifdef CONFIG_MOSIX_FS + E(PROC_PID_SELECTED, "selected", S_IFREG|S_IRUGO|S_IWUSR), +#endif /* CONFIG_MOSIX_FS */ {0,0,NULL,0} }; #undef E @@ -546,6 +679,10 @@ goto out; filp->f_pos++; default: +#ifdef CONFIG_MOSIX_DFSA + if(SHOULD_ASK_PROCESS(p)) + request_process(p, NULL, PR_DFSA_SYNCHRONIZE,0); +#endif /* CONFIG_MOSIX_DFSA */ task_lock(p); files = p->files; if (files) @@ -622,7 +759,11 @@ /* building an inode */ +#ifdef CONFIG_MOSIX +int task_dumpable(struct task_struct *task) +#else static int task_dumpable(struct task_struct *task) +#endif /* CONFIG_MOSIX */ { int dumpable = 0; struct mm_struct *mm; @@ -631,6 +772,11 @@ mm = task->mm; if (mm) dumpable = mm->dumpable; +#ifdef CONFIG_MOSIX + else if((current->mosix.dflags & (DDEPUTY|DDUMPABLE)) == + (DDEPUTY|DDUMPABLE)) + dumpable = 1; +#endif /* CONFIG_MOSIX */ task_unlock(task); return dumpable; } @@ -747,6 +893,10 @@ inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_FD_DIR+fd); if (!inode) goto out; +#ifdef CONFIG_MOSIX_DFSA + if(SHOULD_ASK_PROCESS(task)) + request_process(task, NULL, PR_DFSA_SYNCHRONIZE,0); +#endif /* CONFIG_MOSIX_DFSA */ task_lock(task); files = task->files; if (files) @@ -795,6 +945,52 @@ permission: proc_permission, }; +#ifdef CONFIG_MOSIX +static int +proc_pid_writeonly(struct task_struct *doesnt_matter, char *buf) +{ + return(-EPERM); +} + +static ssize_t +proc_mosix_write(struct file *file, const char *buf, size_t cnt, loff_t *ppos) +{ + struct inode * inode = file->f_dentry->d_inode; + ssize_t result = -EPERM; + struct task_struct *task = inode->u.proc_i.task; + + switch(inode->i_ino & 0xffff) + { + case PROC_PID_MIGRATE: + result = proc_mosix_pid_set_migrate(file, task, buf, cnt); + break; + case PROC_PID_GOTO: + result = proc_mosix_pid_set_goto(file, task, buf, cnt); + break; + case PROC_PID_LOCK: + result = proc_mosix_pid_set_lock(file, task, buf, cnt); + break; + case PROC_PID_SIGMIG: + result = proc_mosix_pid_set_sigmig(file, task, buf, cnt); + break; + case PROC_PID_DISCLOSURE: + result = proc_mosix_pid_set_disclosure(file, task, buf, cnt); + break; +#ifdef CONFIG_MOSIX_FS + case PROC_PID_SELECTED: + result = proc_mosix_pid_set_selected(file, task, buf, cnt); + break; +#endif /* CONFIG_MOSIX_FS */ + } + return(result); +} + +static struct file_operations proc_mosix_file_operations = { + read: proc_info_read, + write: proc_mosix_write, +}; +#endif /* CONFIG_MOSIX */ + static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry) { struct inode *inode; @@ -875,6 +1071,46 @@ inode->i_op = &proc_mem_inode_operations; inode->i_fop = &proc_mem_operations; break; +#ifdef CONFIG_MOSIX + case PROC_PID_WHERE: + inode->i_fop = &proc_info_file_operations; + inode->u.proc_i.op.proc_read = proc_pid_where; + break; + case PROC_PID_NMIGS: + inode->i_fop = &proc_info_file_operations; + inode->u.proc_i.op.proc_read = proc_pid_nmigs; + break; + case PROC_PID_CANTMOVE: + inode->i_fop = &proc_info_file_operations; + inode->u.proc_i.op.proc_read = proc_pid_cantmove; + break; + case PROC_PID_MIGRATE: + inode->i_fop = &proc_mosix_file_operations; + inode->u.proc_i.op.proc_read = proc_pid_writeonly; + break; + case PROC_PID_GOTO: + inode->i_fop = &proc_mosix_file_operations; + inode->u.proc_i.op.proc_read = proc_pid_writeonly; + break; + case PROC_PID_LOCK: + inode->i_fop = &proc_mosix_file_operations; + inode->u.proc_i.op.proc_read = proc_pid_lock; + break; + case PROC_PID_SIGMIG: + inode->i_fop = &proc_mosix_file_operations; + inode->u.proc_i.op.proc_read = proc_pid_sigmig; + break; + case PROC_PID_DISCLOSURE: + inode->i_fop = &proc_mosix_file_operations; + inode->u.proc_i.op.proc_read = proc_pid_disclosure; + break; +#endif /* CONFIG_MOSIX */ +#ifdef CONFIG_MOSIX_FS + case PROC_PID_SELECTED: + inode->i_fop = &proc_mosix_file_operations; + inode->u.proc_i.op.proc_read = proc_pid_selected; + break; +#endif /* CONFIG_MOSIX_FS */ default: printk("procfs: impossible type (%d)",p->type); iput(inode); @@ -989,6 +1225,14 @@ { if (inode->u.proc_i.file) fput(inode->u.proc_i.file); +#ifdef CONFIG_MOSIX + { + int ino = inode->i_ino & 0xffff; + + if(ino >= PROC_PID_MOSIX_START && ino < PROC_PID_FD_DIR) + return; + } +#endif /* CONFIG_MOSIX */ if (inode->u.proc_i.task) free_task_struct(inode->u.proc_i.task); } @@ -1008,7 +1252,11 @@ index--; read_lock(&tasklist_lock); +#ifdef CONFIG_MOSIX + for_each_local_task(p) { +#else for_each_task(p) { +#endif /* CONFIG_MOSIX */ int pid = p->pid; if (!pid) continue; diff -urN linux-2.4.17/fs/proc/generic.c linux_umopenmosix/fs/proc/generic.c --- linux-2.4.17/fs/proc/generic.c Fri Sep 7 20:53:59 2001 +++ linux_umopenmosix/fs/proc/generic.c Wed Jun 26 23:45:17 2002 @@ -356,7 +356,11 @@ lookup: proc_lookup, }; +#ifdef CONFIG_MOSIX +int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) +#else static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) +#endif /* CONFIG_MOSIX */ { int i; diff -urN linux-2.4.17/fs/proc/inode.c linux_umopenmosix/fs/proc/inode.c --- linux-2.4.17/fs/proc/inode.c Sat Nov 17 21:24:32 2001 +++ linux_umopenmosix/fs/proc/inode.c Wed Jun 26 23:45:17 2002 @@ -194,7 +194,11 @@ * Fixup the root inode's nlink value */ read_lock(&tasklist_lock); +#ifdef CONFIG_MOSIX + for_each_local_task(p) if (p->pid) root_inode->i_nlink++; +#else for_each_task(p) if (p->pid) root_inode->i_nlink++; +#endif /* CONFIG_MOSIX */ read_unlock(&tasklist_lock); s->s_root = d_alloc_root(root_inode); if (!s->s_root) diff -urN linux-2.4.17/fs/proc/root.c linux_umopenmosix/fs/proc/root.c --- linux-2.4.17/fs/proc/root.c Sun Oct 21 04:14:42 2001 +++ linux_umopenmosix/fs/proc/root.c Wed Jun 26 23:45:17 2002 @@ -17,7 +17,14 @@ #include #include +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ + struct proc_dir_entry *proc_net, *proc_bus, *proc_root_fs, *proc_root_driver; +#ifdef CONFIG_MOSIX +struct proc_dir_entry *proc_mosix; +#endif /* CONFIG_MOSIX */ #ifdef CONFIG_SYSCTL struct proc_dir_entry *proc_sys_root; @@ -48,6 +55,10 @@ proc_mkdir("sys/fs", 0); proc_mkdir("sys/fs/binfmt_misc", 0); #endif +#ifdef CONFIG_MOSIX + proc_mosix = proc_mkdir("hpc", 0); + mosix_proc_init(); +#endif /* CONFIG_MOSIX */ proc_root_fs = proc_mkdir("fs", 0); proc_root_driver = proc_mkdir("driver", 0); #if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE) @@ -70,6 +81,9 @@ int nlink = proc_root.nlink; nlink += nr_threads; +#ifdef CONFIG_MOSIX + nlink -= count_guests(); +#endif /* CONFIG_MOSIX */ dir->i_nlink = nlink; } diff -urN linux-2.4.17/fs/read_write.c linux_umopenmosix/fs/read_write.c --- linux-2.4.17/fs/read_write.c Sun Aug 5 23:12:41 2001 +++ linux_umopenmosix/fs/read_write.c Wed Jun 26 23:45:17 2002 @@ -14,6 +14,10 @@ #include +#ifdef CONFIG_MOSIX_DFSA +#include +#endif /* CONFIG_MOSIX_DFSA */ + struct file_operations generic_ro_fops = { llseek: generic_file_llseek, read: generic_file_read, @@ -76,7 +80,11 @@ return retval; } +#ifdef CONFIG_MOSIX_FS +inline loff_t llseek(struct file *file, loff_t offset, int origin) +#else static inline loff_t llseek(struct file *file, loff_t offset, int origin) +#endif /* CONFIG_MOSIX_FS */ { loff_t (*fn)(struct file *, loff_t, int); loff_t retval; @@ -95,6 +103,9 @@ off_t retval; struct file * file; +#ifdef CONFIG_MOSIX_DFSA + dfsa_syscall_on_file(fd, 1); +#endif /* CONFIG_MOSIX_DFSA */ retval = -EBADF; file = fget(fd); if (!file) @@ -120,6 +131,9 @@ struct file * file; loff_t offset; +#ifdef CONFIG_MOSIX_DFSA + dfsa_syscall_on_file(fd, 1); +#endif /* CONFIG_MOSIX_DFSA */ retval = -EBADF; file = fget(fd); if (!file) @@ -149,6 +163,9 @@ ssize_t ret; struct file * file; +#ifdef CONFIG_MOSIX_DFSA + dfsa_syscall_on_file(fd, 1); +#endif /* CONFIG_MOSIX_DFSA */ ret = -EBADF; file = fget(fd); if (file) { @@ -175,6 +192,9 @@ ssize_t ret; struct file * file; +#ifdef CONFIG_MOSIX_DFSA + dfsa_syscall_on_file(fd, 1); +#endif /* CONFIG_MOSIX_DFSA */ ret = -EBADF; file = fget(fd); if (file) { @@ -307,6 +327,9 @@ ssize_t ret; +#ifdef CONFIG_MOSIX_DFSA + dfsa_syscall_on_file(fd, 1); +#endif /* CONFIG_MOSIX_DFSA */ ret = -EBADF; file = fget(fd); if (!file) @@ -327,6 +350,9 @@ ssize_t ret; +#ifdef CONFIG_MOSIX_DFSA + dfsa_syscall_on_file(fd, 1); +#endif /* CONFIG_MOSIX_DFSA */ ret = -EBADF; file = fget(fd); if (!file) diff -urN linux-2.4.17/fs/readdir.c linux_umopenmosix/fs/readdir.c --- linux-2.4.17/fs/readdir.c Mon Aug 13 00:59:08 2001 +++ linux_umopenmosix/fs/readdir.c Wed Jun 26 23:45:17 2002 @@ -5,6 +5,10 @@ */ #include +#ifdef CONFIG_MOSIX +#include +#include +#endif /* CONFIG_MOSIX */ #include #include #include @@ -13,6 +17,10 @@ #include +#ifdef CONFIG_MOSIX_DFSA +#include +#endif /* CONFIG_MOSIX_DFSA */ + int vfs_readdir(struct file *file, filldir_t filler, void *buf) { struct inode *inode = file->f_dentry->d_inode; @@ -132,11 +140,30 @@ return -EINVAL; buf->count++; dirent = buf->dirent; +#ifdef CONFIG_MOSIX + { + union + { + struct old_linux_dirent o; + struct dirent d; + } u; + + u.o.d_ino = ino; + u.o.d_offset = offset; + u.o.d_namlen = namlen; + memcpy(u.d.d_name, name, namlen); + u.d.d_name[namlen] = '\0'; + copy_to_user(dirent, &u, + offsetof(struct old_linux_dirent, d_name[0]) + + namlen + 1); + } +#else put_user(ino, &dirent->d_ino); put_user(offset, &dirent->d_offset); put_user(namlen, &dirent->d_namlen); copy_to_user(dirent->d_name, name, namlen); put_user(0, dirent->d_name + namlen); +#endif /* CONFIG_MOSIX */ return 0; } @@ -146,6 +173,9 @@ struct file * file; struct readdir_callback buf; +#ifdef CONFIG_MOSIX_DFSA + dfsa_syscall_on_file(fd, 1); +#endif /* CONFIG_MOSIX_DFSA */ error = -EBADF; file = fget(fd); if (!file) @@ -154,7 +184,13 @@ buf.count = 0; buf.dirent = dirent; +#ifdef CONFIG_MOSIX_FS + current->mosix.dirty_bits |= MFSARG_OLDREADDIR; +#endif /* CONFIG_MOSIX_FS */ error = vfs_readdir(file, fillonedir, &buf); +#ifdef CONFIG_MOSIX_FS + current->mosix.dirty_bits &= ~MFSARG_OLDREADDIR; +#endif /* CONFIG_MOSIX_FS */ if (error >= 0) error = buf.count; @@ -189,6 +225,9 @@ struct linux_dirent * dirent; struct getdents_callback * buf = (struct getdents_callback *) __buf; int reclen = ROUND_UP(NAME_OFFSET(dirent) + namlen + 1); +#ifdef CONFIG_MOSIX + struct dirent d; +#endif /* CONFIG_MOSIX */ buf->error = -EINVAL; /* only used if we fail.. */ if (reclen > buf->count) @@ -199,9 +238,24 @@ dirent = buf->current_dir; buf->previous = dirent; put_user(ino, &dirent->d_ino); +#ifdef CONFIG_MOSIX +/* access to remote-data is much faster in one block */ + d.d_reclen = reclen; + memcpy(d.d_name, name, namlen); + d.d_name[namlen] = '\0'; +/* NOTICE: +We take great liberty to fill the potential round-up byte(s) +that are not filled by standard Linux. +SORRY - performance would be horrible otherwise with all the fragmentation. +*/ + + copy_to_user(&dirent->d_reclen, &d.d_reclen, + reclen - offsetof(struct dirent, d_reclen)); +#else put_user(reclen, &dirent->d_reclen); copy_to_user(dirent->d_name, name, namlen); put_user(0, dirent->d_name + namlen); +#endif /* CONFIG_MOSIX */ ((char *) dirent) += reclen; buf->current_dir = dirent; buf->count -= reclen; @@ -215,6 +269,9 @@ struct getdents_callback buf; int error; +#ifdef CONFIG_MOSIX_DFSA + dfsa_syscall_on_file(fd, 1); +#endif /* CONFIG_MOSIX_DFSA */ error = -EBADF; file = fget(fd); if (!file) @@ -225,7 +282,14 @@ buf.count = count; buf.error = 0; +#ifdef CONFIG_MOSIX_FS + current->mosix.dirty_bits |= MFSARG_GETDENTS; + current->mosix.dirty_arg = count; +#endif /* CONFIG_MOSIX_FS */ error = vfs_readdir(file, filldir, &buf); +#ifdef CONFIG_MOSIX_FS + current->mosix.dirty_bits &= ~MFSARG_GETDENTS; +#endif /* CONFIG_MOSIX_FS */ if (error < 0) goto out_putf; error = buf.error; @@ -298,6 +362,9 @@ struct getdents_callback64 buf; int error; +#ifdef CONFIG_MOSIX_DFSA + dfsa_syscall_on_file(fd, 1); +#endif /* CONFIG_MOSIX_DFSA */ error = -EBADF; file = fget(fd); if (!file) @@ -308,7 +375,14 @@ buf.count = count; buf.error = 0; +#ifdef CONFIG_MOSIX_FS + current->mosix.dirty_bits |= MFSARG_GETDENTS64; + current->mosix.dirty_arg = count; +#endif /* CONFIG_MOSIX_FS */ error = vfs_readdir(file, filldir64, &buf); +#ifdef CONFIG_MOSIX_FS + current->mosix.dirty_bits &= ~MFSARG_GETDENTS64; +#endif /* CONFIG_MOSIX_FS */ if (error < 0) goto out_putf; error = buf.error; diff -urN linux-2.4.17/fs/stat.c linux_umopenmosix/fs/stat.c --- linux-2.4.17/fs/stat.c Fri Sep 14 02:04:43 2001 +++ linux_umopenmosix/fs/stat.c Wed Jun 26 23:45:17 2002 @@ -13,6 +13,14 @@ #include +#ifdef CONFIG_MOSIX_FS +#include +#include +#endif /* CONFIG_MOSIX_FS */ +#ifdef CONFIG_MOSIX_DFSA +#include +#endif /* CONFIG_MOSIX_DFSA */ + /* * Revalidate the inode. This is required for proper NFS attribute caching. */ @@ -123,6 +131,14 @@ tmp.st_blocks = inode->i_blocks; tmp.st_blksize = inode->i_blksize; } +#ifdef CONFIG_MOSIX_FS + if(inode->i_sb && inode->i_sb->s_magic == MFS_SUPER_MAGIC) + { + tmp.__unused1 = inode->u.mfs_i.pe; + tmp.__unused2 = inode->i_generation; + tmp.__unused3 = inode->i_version; + } +#endif /* CONFIG_MOSIX_FS */ return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; } @@ -137,7 +153,28 @@ struct nameidata nd; int error; +#ifdef CONFIG_MOSIX_FS + nd.express_function = EF_STAT; + error = __user_walk(filename, LOOKUP_POSITIVE|LOOKUP_FOLLOW| + LOOKUP_STAYFS|LOOKUP_COMPLETE, &nd); + if(nd.complete_flags & COMPLETE_DONE) + { + if(nd.complete_parg) + { + error = cp_old_stat((struct inode *)nd.complete_parg, + statbuf); + kfree(nd.complete_parg); + } + return(error); + } +#else +#ifdef CONFIG_MOSIX_DFSA + error = __user_walk(filename, + LOOKUP_POSITIVE|LOOKUP_FOLLOW| LOOKUP_STAYFS, &nd); +#else error = user_path_walk(filename, &nd); +#endif /* CONFIG_MOSIX_DFSA */ +#endif /* CONFIG_MOSIX_FS */ if (!error) { error = do_revalidate(nd.dentry); if (!error) @@ -153,7 +190,28 @@ struct nameidata nd; int error; +#ifdef CONFIG_MOSIX_FS + nd.express_function = EF_STAT; + error = __user_walk(filename, LOOKUP_POSITIVE|LOOKUP_FOLLOW| + LOOKUP_STAYFS|LOOKUP_COMPLETE, &nd); + if(nd.complete_flags & COMPLETE_DONE) + { + if(nd.complete_parg) + { + error = cp_new_stat((struct inode *)nd.complete_parg, + statbuf); + kfree(nd.complete_parg); + } + return(error); + } +#else +#ifdef CONFIG_MOSIX_DFSA + error = __user_walk(filename, + LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_STAYFS, &nd); +#else error = user_path_walk(filename, &nd); +#endif /* CONFIG_MOSIX_DFSA */ +#endif /* CONFIG_MOSIX_FS */ if (!error) { error = do_revalidate(nd.dentry); if (!error) @@ -174,7 +232,27 @@ struct nameidata nd; int error; +#ifdef CONFIG_MOSIX_FS + nd.express_function = EF_STAT; + error = __user_walk(filename, LOOKUP_POSITIVE|LOOKUP_STAYFS| + LOOKUP_COMPLETE, &nd); + if(nd.complete_flags & COMPLETE_DONE) + { + if(nd.complete_parg) + { + error = cp_old_stat((struct inode *)nd.complete_parg, + statbuf); + kfree(nd.complete_parg); + } + return(error); + } +#else +#ifdef CONFIG_MOSIX_DFSA + error = __user_walk(filename, LOOKUP_POSITIVE|LOOKUP_STAYFS, &nd); +#else error = user_path_walk_link(filename, &nd); +#endif /* CONFIG_MOSIX_DFSA */ +#endif /* CONFIG_MOSIX_FS */ if (!error) { error = do_revalidate(nd.dentry); if (!error) @@ -191,7 +269,27 @@ struct nameidata nd; int error; +#ifdef CONFIG_MOSIX_FS + nd.express_function = EF_STAT; + error = __user_walk(filename, LOOKUP_POSITIVE|LOOKUP_STAYFS| + LOOKUP_COMPLETE, &nd); + if(nd.complete_flags & COMPLETE_DONE) + { + if(nd.complete_parg) + { + error = cp_new_stat((struct inode *)nd.complete_parg, + statbuf); + kfree(nd.complete_parg); + } + return(error); + } +#else +#ifdef CONFIG_MOSIX_DFSA + error = __user_walk(filename, LOOKUP_POSITIVE|LOOKUP_STAYFS, &nd); +#else error = user_path_walk_link(filename, &nd); +#endif /* CONFIG_MOSIX_DFSA */ +#endif /* CONFIG_MOSIX_FS */ if (!error) { error = do_revalidate(nd.dentry); if (!error) @@ -212,6 +310,9 @@ struct file * f; int err = -EBADF; +#ifdef CONFIG_MOSIX_DFSA + dfsa_syscall_on_file(fd, 0); +#endif /* CONFIG_MOSIX_DFSA */ f = fget(fd); if (f) { struct dentry * dentry = f->f_dentry; @@ -231,6 +332,9 @@ struct file * f; int err = -EBADF; +#ifdef CONFIG_MOSIX_DFSA + dfsa_syscall_on_file(fd, 0); +#endif /* CONFIG_MOSIX_DFSA */ f = fget(fd); if (f) { struct dentry * dentry = f->f_dentry; @@ -251,7 +355,21 @@ if (bufsiz <= 0) return -EINVAL; +#ifdef CONFIG_MOSIX_FS + nd.express_function = EF_READLINK; + nd.complete_args.buffer.buf = buf; + nd.complete_args.buffer.bufsiz = bufsiz; + error = __user_walk(path, LOOKUP_POSITIVE|LOOKUP_STAYFS|LOOKUP_COMPLETE, + &nd); + if(nd.complete_flags & COMPLETE_DONE) + return(error); +#else +#ifdef CONFIG_MOSIX_DFSA + error = __user_walk(path, LOOKUP_POSITIVE|LOOKUP_STAYFS, &nd); +#else error = user_path_walk_link(path, &nd); +#endif /* CONFIG_MOSIX_DFSA */ +#endif /* CONFIG_MOSIX_FS */ if (!error) { struct inode * inode = nd.dentry->d_inode; @@ -326,6 +444,14 @@ tmp.st_blocks = inode->i_blocks; tmp.st_blksize = inode->i_blksize; } +#ifdef CONFIG_MOSIX_FS + if(inode->i_sb && inode->i_sb->s_magic == MFS_SUPER_MAGIC) + { + STAT64_NODE(tmp) = inode->u.mfs_i.pe; + STAT64_ORIGDEV(tmp) = inode->i_generation; + STAT64_ORIGINO(tmp) = inode->i_version; + } +#endif /* CONFIG_MOSIX_FS */ return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; } @@ -334,7 +460,28 @@ struct nameidata nd; int error; +#ifdef CONFIG_MOSIX_FS + nd.express_function = EF_STAT; + error = __user_walk(filename, LOOKUP_POSITIVE|LOOKUP_FOLLOW| + LOOKUP_STAYFS|LOOKUP_COMPLETE, &nd); + if(nd.complete_flags & COMPLETE_DONE) + { + if(nd.complete_parg) + { + error = cp_new_stat64((struct inode *)nd.complete_parg, + statbuf); + kfree(nd.complete_parg); + } + return(error); + } +#else +#ifdef CONFIG_MOSIX_DFSA + error = __user_walk(filename, + LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_STAYFS, &nd); +#else error = user_path_walk(filename, &nd); +#endif /* CONFIG_MOSIX_DFSA */ +#endif /* CONFIG_MOSIX_FS */ if (!error) { error = do_revalidate(nd.dentry); if (!error) @@ -349,7 +496,27 @@ struct nameidata nd; int error; +#ifdef CONFIG_MOSIX_FS + nd.express_function = EF_STAT; + error = __user_walk(filename, LOOKUP_POSITIVE|LOOKUP_STAYFS| + LOOKUP_COMPLETE, &nd); + if(nd.complete_flags & COMPLETE_DONE) + { + if(nd.complete_parg) + { + error = cp_new_stat64((struct inode *)nd.complete_parg, + statbuf); + kfree(nd.complete_parg); + } + return(error); + } +#else +#ifdef CONFIG_MOSIX_DFSA + error = __user_walk(filename, LOOKUP_POSITIVE|LOOKUP_STAYFS, &nd); +#else error = user_path_walk_link(filename, &nd); +#endif /* CONFIG_MOSIX_DFSA */ +#endif /* CONFIG_MOSIX_FS */ if (!error) { error = do_revalidate(nd.dentry); if (!error) @@ -364,6 +531,9 @@ struct file * f; int err = -EBADF; +#ifdef CONFIG_MOSIX_DFSA + dfsa_syscall_on_file(fd, 0); +#endif /* CONFIG_MOSIX_DFSA */ f = fget(fd); if (f) { struct dentry * dentry = f->f_dentry; diff -urN linux-2.4.17/fs/super.c linux_umopenmosix/fs/super.c --- linux-2.4.17/fs/super.c Fri Dec 21 19:42:03 2001 +++ linux_umopenmosix/fs/super.c Wed Jun 26 23:45:17 2002 @@ -495,7 +495,11 @@ { int i; +#ifdef CONFIG_MOSIX_FS + for (i = 1; i < 255; i++) { +#else for (i = 1; i < 256; i++) { +#endif /* CONFIG_MOSIX_FS */ if (!test_and_set_bit(i,unnamed_dev_in_use)) return MKDEV(UNNAMED_MAJOR, i); } @@ -506,6 +510,10 @@ { if (!dev || MAJOR(dev) != UNNAMED_MAJOR) return; +#ifdef CONFIG_MOSIX_FS + if(MINOR(dev) == 255) + return; +#endif /* CONFIG_MOSIX_FS */ if (test_and_clear_bit(MINOR(dev), unnamed_dev_in_use)) return; printk("VFS: put_unnamed_dev: freeing unused device %s\n", @@ -647,6 +655,12 @@ { kdev_t dev; int error = -EMFILE; + +#ifdef CONFIG_MOSIX_FS + if(!strcmp(fs_type->name, "mfs")) + dev = MKDEV(UNNAMED_MAJOR, 255); + else +#endif /* CONFIG_MOSIX_FS */ dev = get_unnamed_dev(); if (dev) { struct super_block * sb; diff -urN linux-2.4.17/fs/umsdos/ioctl.c linux_umopenmosix/fs/umsdos/ioctl.c --- linux-2.4.17/fs/umsdos/ioctl.c Tue Aug 28 18:16:07 2001 +++ linux_umopenmosix/fs/umsdos/ioctl.c Wed Jun 26 23:45:17 2002 @@ -15,6 +15,10 @@ #include #include +#ifdef CONFIG_MOSIX_DFSA +#include +#endif /* CONFIG_MOSIX_DFSA */ + struct UMSDOS_DIR_ONCE { struct dirent *ent; int count; @@ -209,6 +213,20 @@ } /* update the original f_pos */ filp->f_pos = pos; +#ifdef CONFIG_MOSIX_DFSA + { + struct files_struct *files = current->files; + register int i; + + if(atomic_read(&files->count) == 1) + for(i = 0 ; i < files->max_fds ; i++) + if(files->fd[i] == filp) + { + dfsa_touch_file(i); + break; + } + } +#endif /* CONFIG_MOSIX_DFSA */ read_dput: d_drop(demd); dput(demd); diff -urN linux-2.4.17/howto_compile.txt linux_umopenmosix/howto_compile.txt --- linux-2.4.17/howto_compile.txt Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/howto_compile.txt Wed Jun 26 23:45:17 2002 @@ -0,0 +1,15 @@ +make mrproper ARCH=um +make clean ARCH=um +cp node1_uml_config.openmosix .config +make menuconfig ARCH=um +make dep ARCH=um +make linux ARCH=um +make modules ARCH=um +mount root_fs mnt -o loop +make modules_install INSTALL_MOD_PATH=`pwd`/mnt ARCH=um +umount mnt + + + + + diff -urN linux-2.4.17/include/asm-generic/smplock.h linux_umopenmosix/include/asm-generic/smplock.h --- linux-2.4.17/include/asm-generic/smplock.h Thu Mar 23 22:50:09 2000 +++ linux_umopenmosix/include/asm-generic/smplock.h Wed Jun 26 23:45:17 2002 @@ -49,3 +49,24 @@ if (--current->lock_depth < 0) spin_unlock(&kernel_flag); } + +#ifdef CONFIG_MOSIX + +/* the big MOSIX lock resembles the big kernel lock in the way it is + * allowed to be locked multiple times. It is not allowed, however, + * to remain locked when entering "schedule". + */ +extern spinlock_t mosix_flag; + +extern __inline__ void lock_mosix(void) +{ + if (!++current->mosix.lock_depth) + spin_lock(&mosix_flag); +} + +extern __inline__ void unlock_mosix(void) +{ + if (--current->mosix.lock_depth < 0) + spin_unlock(&mosix_flag); +} +#endif /* CONFIG_MOSIX */ diff -urN linux-2.4.17/include/asm-i386/a.out.h linux_umopenmosix/include/asm-i386/a.out.h --- linux-2.4.17/include/asm-i386/a.out.h Fri Jun 16 21:33:06 1995 +++ linux_umopenmosix/include/asm-i386/a.out.h Sat Jun 29 16:49:28 2002 @@ -21,6 +21,9 @@ #define STACK_TOP TASK_SIZE +#ifdef CONFIG_MOSIX +void aout_remote_init_mm(struct exec *); +#endif /* CONFIG_MOSIX */ #endif #endif /* __A_OUT_GNU_H__ */ diff -urN linux-2.4.17/include/asm-i386/checksum.h linux_umopenmosix/include/asm-i386/checksum.h --- linux-2.4.17/include/asm-i386/checksum.h Thu Jul 26 23:41:22 2001 +++ linux_umopenmosix/include/asm-i386/checksum.h Sat Jun 29 16:49:41 2002 @@ -1,6 +1,13 @@ #ifndef _I386_CHECKSUM_H #define _I386_CHECKSUM_H +#ifndef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ + +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ /* * computes the checksum of a memory block at buff, length len, @@ -38,13 +45,29 @@ unsigned int csum_partial_copy_nocheck ( const char *src, char *dst, int len, int sum) { +#ifdef CONFIG_MOSIX + if(!segment_eq(get_fs(), KERNEL_DS) && + (((unsigned long)src) < PAGE_OFFSET || + ((unsigned long)dst) < PAGE_OFFSET)) + panic("csum_partial_copy_nocheck, src=%x,dst=%x",(int)src,(int)dst); +#endif /* CONFIG_MOSIX */ return csum_partial_copy_generic ( src, dst, len, sum, NULL, NULL); } +#ifdef CONFIG_MOSIX +extern unsigned int deputy_csum_partial_copy_from_user(const char *, char *, int, int, int *); +extern unsigned int deputy_csum_partial_copy_to_user(const char *, char *, int, int, int *); +#endif /* CONFIG_MOSIX */ + static __inline__ unsigned int csum_partial_copy_from_user ( const char *src, char *dst, int len, int sum, int *err_ptr) { +#ifdef CONFIG_MOSIX + if(memory_not_here(len, FROM_USER)) + return(deputy_csum_partial_copy_from_user(src, dst, len, + sum, err_ptr)); +#endif /* CONFIG_MOSIX */ return csum_partial_copy_generic ( src, dst, len, sum, err_ptr, NULL); } @@ -185,6 +208,11 @@ static __inline__ unsigned int csum_and_copy_to_user (const char *src, char *dst, int len, int sum, int *err_ptr) { +#ifdef CONFIG_MOSIX + if(memory_not_here(len, FROM_USER)) + return(deputy_csum_partial_copy_to_user(src, dst, len, + sum, err_ptr)); +#endif /* CONFIG_MOSIX */ if (access_ok(VERIFY_WRITE, dst, len)) return csum_partial_copy_generic(src, dst, len, sum, NULL, err_ptr); diff -urN linux-2.4.17/include/asm-i386/cpufeature.h linux_umopenmosix/include/asm-i386/cpufeature.h --- linux-2.4.17/include/asm-i386/cpufeature.h Fri Dec 21 19:42:03 2001 +++ linux_umopenmosix/include/asm-i386/cpufeature.h Wed Jun 26 23:45:17 2002 @@ -64,6 +64,22 @@ #define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */ #define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */ +#ifdef CONFIG_MOSIX +#define FBIT(feature) (1 << ((feature) % 32)) +#define USER_MODE_FEATURES0 (FBIT(X86_FEATURE_TSC)|FBIT(X86_FEATURE_CX8)|\ + FBIT(X86_FEATURE_SEP)|FBIT(X86_FEATURE_CMOV)|\ + FBIT(X86_FEATURE_MMX)|FBIT(X86_FEATURE_CLFLSH)|\ + FBIT(X86_FEATURE_FXSR)|FBIT(X86_FEATURE_XMM)|\ + FBIT(X86_FEATURE_XMM2)) +#define USER_MODE_FEATURES1 (FBIT(X86_FEATURE_SYSCALL)|\ + FBIT(X86_FEATURE_MMXEXT)|FBIT(X86_FEATURE_LM)|\ + FBIT(X86_FEATURE_3DNOWEXT)|\ + FBIT(X86_FEATURE_3DNOW)) +#define USER_MODE_FEATURES2 0 +#define USER_MODE_FEATURES3 (FBIT(X86_FEATURE_CXMMX)) +#define USER_MODE_FEATURES { USER_MODE_FEATURES0, USER_MODE_FEATURES1, \ + USER_MODE_FEATURES2, USER_MODE_FEATURES3 } +#endif /* CONFIG_MOSIX */ #endif /* __ASM_I386_CPUFEATURE_H */ /* diff -urN linux-2.4.17/include/asm-i386/desc.h linux_umopenmosix/include/asm-i386/desc.h --- linux-2.4.17/include/asm-i386/desc.h Thu Jul 26 23:40:32 2001 +++ linux_umopenmosix/include/asm-i386/desc.h Wed Jun 26 23:45:17 2002 @@ -45,14 +45,48 @@ unsigned long a,b; }; -extern struct desc_struct gdt_table[]; -extern struct desc_struct *idt, *gdt; +struct desc_struct gdt_table[]; +struct desc_struct *idt, *gdt; struct Xgt_desc_struct { unsigned short size; unsigned long address __attribute__((packed)); }; + + + + + +struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 } }; + + + +#define _set_tssldt_desc(n,addr,limit,type) \ +__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \ + "movw %%ax,2(%2)\n\t" \ + "rorl $16,%%eax\n\t" \ + "movb %%al,4(%2)\n\t" \ + "movb %4,5(%2)\n\t" \ + "movb $0,6(%2)\n\t" \ + "movb %%ah,7(%2)\n\t" \ + "rorl $16,%%eax" \ + : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type)) + + +void set_ldt_desc(unsigned int n, void *addr, unsigned int size) +{ + _set_tssldt_desc(gdt_table+__LDT(n), (int)addr, ((size << 3)-1), 0x82); +} + + + + + + + + #define idt_descr (*(struct Xgt_desc_struct *)((char *)&idt - 2)) #define gdt_descr (*(struct Xgt_desc_struct *)((char *)&gdt - 2)) diff -urN linux-2.4.17/include/asm-i386/elf.h linux_umopenmosix/include/asm-i386/elf.h --- linux-2.4.17/include/asm-i386/elf.h Thu Nov 22 21:48:29 2001 +++ linux_umopenmosix/include/asm-i386/elf.h Wed Jun 26 23:45:17 2002 @@ -41,6 +41,12 @@ We might as well make sure everything else is cleared too (except for %esp), just to make things more deterministic. */ +#ifdef CONFIG_MOSIX +#define ELF_PLAT_INIT_REGS (BIT_OF_REGISTER(ebx)|BIT_OF_REGISTER(ecx)|\ + BIT_OF_REGISTER(edx)|BIT_OF_REGISTER(esi)|\ + BIT_OF_REGISTER(edi)|BIT_OF_REGISTER(ebp)|\ + BIT_OF_REGISTER(eax)) +#endif /* CONFIG_MOSIX */ #define ELF_PLAT_INIT(_r) do { \ _r->ebx = 0; _r->ecx = 0; _r->edx = 0; \ _r->esi = 0; _r->edi = 0; _r->ebp = 0; \ diff -urN linux-2.4.17/include/asm-i386/fcntl.h linux_umopenmosix/include/asm-i386/fcntl.h --- linux-2.4.17/include/asm-i386/fcntl.h Mon Sep 17 23:16:30 2001 +++ linux_umopenmosix/include/asm-i386/fcntl.h Sat Jun 29 16:49:28 2002 @@ -21,6 +21,10 @@ #define O_DIRECTORY 0200000 /* must be a directory */ #define O_NOFOLLOW 0400000 /* don't follow links */ +#ifdef CONFIG_MOSIX +#define O_NOWRITEACCESS 01000000 /* do not put_write_access() */ +#endif /* CONFIG_MOSIX */ + #define F_DUPFD 0 /* dup */ #define F_GETFD 1 /* get close_on_exec */ #define F_SETFD 2 /* set/clear close_on_exec */ diff -urN linux-2.4.17/include/asm-i386/hardirq.h linux_umopenmosix/include/asm-i386/hardirq.h --- linux-2.4.17/include/asm-i386/hardirq.h Thu Nov 22 21:46:19 2001 +++ linux_umopenmosix/include/asm-i386/hardirq.h Sat Jun 29 16:49:30 2002 @@ -4,6 +4,7 @@ #include #include #include +#include /* for cpu_relax */ /* assembly code in softirq.h is sensitive to the offsets of these fields */ typedef struct { diff -urN linux-2.4.17/include/asm-i386/i387.h linux_umopenmosix/include/asm-i386/i387.h --- linux-2.4.17/include/asm-i386/i387.h Thu Nov 22 21:48:58 2001 +++ linux_umopenmosix/include/asm-i386/i387.h Wed Jun 26 23:45:17 2002 @@ -85,4 +85,10 @@ extern int dump_extended_fpu( struct pt_regs *regs, struct user_fxsr_struct *fpu ); +#ifdef CONFIG_MOSIX +extern int has_fxsr(void); +extern void fsave_to_fxsave(union i387_union *, union i387_union *); +extern void fxsave_to_fsave(union i387_union *, union i387_union *); +#endif /* CONFIG_MOSIX */ + #endif /* __ASM_I386_I387_H */ diff -urN linux-2.4.17/include/asm-i386/processor.h linux_umopenmosix/include/asm-i386/processor.h --- linux-2.4.17/include/asm-i386/processor.h Thu Nov 22 21:46:19 2001 +++ linux_umopenmosix/include/asm-i386/processor.h Wed Jun 26 23:45:17 2002 @@ -420,6 +420,12 @@ regs->esp = new_esp; \ } while (0) +#ifdef CONFIG_MOSIX +#define START_THREAD_REGS (BIT_OF_REGISTER(xds)|BIT_OF_REGISTER(xes)|\ + BIT_OF_REGISTER(xss)|BIT_OF_REGISTER(xcs)|\ + BIT_OF_REGISTER(eip)|BIT_OF_REGISTER(esp)) +#endif /* CONFIG_MOSIX */ + /* Forward declaration, a strange C thing */ struct task_struct; struct mm_struct; @@ -430,6 +436,9 @@ * create a kernel thread without removing it from tasklists */ extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); +#ifdef CONFIG_MOSIX +extern int user_thread(int (*fn)(void *), void * arg, unsigned long flags); +#endif /* CONFIG_MOSIX */ /* Copy and release all segment info associated with a VM */ extern void copy_segments(struct task_struct *p, struct mm_struct * mm); @@ -444,8 +453,13 @@ } unsigned long get_wchan(struct task_struct *p); +#ifdef CONFIG_MOSIX +#define KSTK_EIP(tsk) (mos_to_regs(&(tsk)->mosix)->eip) +#define KSTK_ESP(tsk) (mos_to_regs(&(tsk)->mosix)->esp) +#else #define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019]) #define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022]) +#endif /* CONFIG_MOSIX */ #define THREAD_SIZE (2*PAGE_SIZE) #define alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1)) diff -urN linux-2.4.17/include/asm-i386/semaphore.h linux_umopenmosix/include/asm-i386/semaphore.h --- linux-2.4.17/include/asm-i386/semaphore.h Thu Nov 22 21:46:19 2001 +++ linux_umopenmosix/include/asm-i386/semaphore.h Sat Jun 29 16:49:28 2002 @@ -159,6 +159,11 @@ return result; } +#ifdef CONFIG_MOSIX +/* do not be interrupted by signals, but still respond to MOSIX requests */ +extern void down_half_interruptible(struct semaphore *); +#endif /* CONFIG_MOSIX */ + /* * Non-blockingly attempt to down() a semaphore. * Returns zero if we acquired it diff -urN linux-2.4.17/include/asm-i386/siginfo.h linux_umopenmosix/include/asm-i386/siginfo.h --- linux-2.4.17/include/asm-i386/siginfo.h Thu Nov 22 21:46:19 2001 +++ linux_umopenmosix/include/asm-i386/siginfo.h Sat Jun 29 16:49:28 2002 @@ -59,6 +59,12 @@ int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ int _fd; } _sigpoll; +#ifdef CONFIG_MOSIX + struct { + int _newplace; + int _reason; + } _sigmig; +#endif /* CONFIG_MOSIX */ } _sifields; } siginfo_t; @@ -76,6 +82,10 @@ #define si_addr _sifields._sigfault._addr #define si_band _sifields._sigpoll._band #define si_fd _sifields._sigpoll._fd +#ifdef CONFIG_MOSIX +#define si_newplace _sifields._sigmig._newplace +#define si_reason _sifields._sigmig._reason +#endif /* CONFIG_MOSIX */ #ifdef __KERNEL__ #define __SI_MASK 0xffff0000 @@ -85,6 +95,9 @@ #define __SI_FAULT (3 << 16) #define __SI_CHLD (4 << 16) #define __SI_RT (5 << 16) +#ifdef CONFIG_MOSIX +#define __SI_MIGRATION (6 << 16) +#endif /* CONFIG_MOSIX */ #define __SI_CODE(T,N) ((T) << 16 | ((N) & 0xffff)) #else #define __SI_KILL 0 @@ -107,6 +120,9 @@ #define SI_MESGQ -3 /* sent by real time mesq state change */ #define SI_ASYNCIO -4 /* sent by AIO completion */ #define SI_SIGIO -5 /* sent by queued SIGIO */ +#ifdef CONFIG_MOSIX +#define SI_MIGRATION 0x100 /* sent by migration */ +#endif /* CONFIG_MOSIX */ #define SI_FROMUSER(siptr) ((siptr)->si_code <= 0) #define SI_FROMKERNEL(siptr) ((siptr)->si_code > 0) @@ -181,6 +197,10 @@ #define POLL_HUP (__SI_POLL|6) /* device disconnected */ #define NSIGPOLL 6 +#ifdef CONFIG_MOSIX +#define SI_MIGRATED (__SI_MIGRATION|1) /* process migrated */ +#endif /* CONFIG_MOSIX */ + /* * sigevent definitions * diff -urN linux-2.4.17/include/asm-i386/smplock.h linux_umopenmosix/include/asm-i386/smplock.h --- linux-2.4.17/include/asm-i386/smplock.h Thu Nov 22 21:46:20 2001 +++ linux_umopenmosix/include/asm-i386/smplock.h Sat Jun 29 16:49:30 2002 @@ -73,3 +73,26 @@ "=m" (current->lock_depth)); #endif } + +#ifdef CONFIG_MOSIX + +/* the big MOSIX lock resembles the big kernel lock in the way it is + * allowed to be locked multiple times. It is not allowed, however, + * to remain locked when entering "schedule". + */ +extern spinlock_t mosix_flag; + +extern __inline__ void lock_mosix(void) +{ + if (!++current->mosix.lock_depth) + spin_lock(&mosix_flag); +} + +extern __inline__ void unlock_mosix(void) +{ + if (current->mosix.lock_depth < 0) + BUG(); + if (--current->mosix.lock_depth < 0) + spin_unlock(&mosix_flag); +} +#endif /* CONFIG_MOSIX */ diff -urN linux-2.4.17/include/asm-i386/spinlock.h linux_umopenmosix/include/asm-i386/spinlock.h --- linux-2.4.17/include/asm-i386/spinlock.h Thu Nov 22 21:46:19 2001 +++ linux_umopenmosix/include/asm-i386/spinlock.h Sat Jun 29 16:49:28 2002 @@ -209,4 +209,8 @@ return 0; } +#ifdef CONFIG_MOSIX +#define can_read_lock(rw) ((rw)->lock > 0) +#endif /* CONFIG_MOSIX */ + #endif /* __ASM_SPINLOCK_H */ diff -urN linux-2.4.17/include/asm-i386/stat.h linux_umopenmosix/include/asm-i386/stat.h --- linux-2.4.17/include/asm-i386/stat.h Wed Aug 2 00:08:00 2000 +++ linux_umopenmosix/include/asm-i386/stat.h Sat Jun 29 16:49:28 2002 @@ -75,4 +75,12 @@ unsigned long long st_ino; }; +#ifdef CONFIG_MOSIX_FS + +/* more information about MFS: */ +#define STAT64_NODE(_x) (*((short *)(&(_x).__pad0[2]))) +#define STAT64_ORIGDEV(_x) (*((short *)(&(_x).__pad0[4]))) +#define STAT64_ORIGINO(_x) (*((unsigned long *)(&(_x).__pad0[6]))) + +#endif /* CONFIG_MOSIX_FS */ #endif diff -urN linux-2.4.17/include/asm-i386/uaccess.h linux_umopenmosix/include/asm-i386/uaccess.h --- linux-2.4.17/include/asm-i386/uaccess.h Thu Nov 22 21:46:20 2001 +++ linux_umopenmosix/include/asm-i386/uaccess.h Wed Jun 26 23:45:17 2002 @@ -32,6 +32,74 @@ #define segment_eq(a,b) ((a).seg == (b).seg) +#ifdef CONFIG_MOSIX + +enum { FROM_USER, TO_USER }; + +#define COPY_COUNTER_MULTIPLIER 1024 +#define COPY_COUNTER_SHIFT 10 + +static inline void +mosix_local_statistic(int __nb, int __dir) +{ + extern void cause_link_error_by_routine_that_does_not_exist(void); + + if(__dir == TO_USER) + { +#ifdef CONFIG_MOSIX_DFSA + if(current->mosix.dflags & DSTATSDOWN) + return; +#endif /* CONFIG_MOSIX_DFSA */ + current->mosix.ncopyouts += COPY_COUNTER_MULTIPLIER; + current->mosix.copyoutbytes += __nb; + } + else if(__dir == FROM_USER) + { +#ifdef CONFIG_MOSIX_DFSA + if(current->mosix.dflags & DSTATSDOWN) + { + current->mosix.copy_ins++; + current->mosix.bytes_in += __nb; + } + else +#endif /* CONFIG_MOSIX_DFSA */ + { + current->mosix.ncopyins += COPY_COUNTER_MULTIPLIER; + current->mosix.copyinbytes += __nb; + } + } + else /* impossible */ + cause_link_error_by_routine_that_does_not_exist(); +} + +#define USER_IS_REMOTE ((current->mosix.dflags & DDEPUTY) && \ + segment_eq(get_fs(), USER_DS)) + +static inline int +memory_not_here(int bytes, int direction) +{ + if(segment_eq(get_fs(), KERNEL_DS)) + return(0); + switch(current->mosix.dflags & (DDEPUTY|DREMOTE)) + { + case DDEPUTY: + return(1); + default: + mosix_local_statistic(bytes, direction); + /* fall thru */ + case DREMOTE: + return(0); + } +} + +extern unsigned long deputy_copy_from_user(void *, void *, unsigned long, int); +extern unsigned long deputy_copy_to_user(void *, void *, unsigned long, int); +extern unsigned long deputy_clear_user(void *, int, int); +extern long deputy_strncpy_from_user(char *, char *, int, int); +extern long deputy_strnlen_user(char *, long); +extern int deputy_verify_write(void *, unsigned long); +#endif /* CONFIG_MOSIX */ + extern int __verify_write(const void *, unsigned long); #define __addr_ok(addr) ((unsigned long)(addr) < (current->addr_limit.seg)) @@ -112,6 +180,23 @@ :"0" (ptr)) /* Careful: we have to cast the result to the type of the pointer for sign reasons */ +#ifdef CONFIG_MOSIX +#define get_user(x,ptr) \ +({ int __ret_gu,__val_gu; \ + if(memory_not_here(sizeof(*(ptr)),FROM_USER)) \ + __ret_gu = deputy_copy_from_user((void *)&__val_gu, \ + (void *)ptr, sizeof(*(ptr)), 1); \ + else \ + switch(sizeof (*(ptr))) { \ + case 1: __get_user_x(1,__ret_gu,__val_gu,ptr); break; \ + case 2: __get_user_x(2,__ret_gu,__val_gu,ptr); break; \ + case 4: __get_user_x(4,__ret_gu,__val_gu,ptr); break; \ + default: __get_user_x(X,__ret_gu,__val_gu,ptr); break; \ + } \ + (x) = (__typeof__(*(ptr)))__val_gu; \ + __ret_gu; \ +}) +#else #define get_user(x,ptr) \ ({ int __ret_gu,__val_gu; \ switch(sizeof (*(ptr))) { \ @@ -123,6 +208,7 @@ (x) = (__typeof__(*(ptr)))__val_gu; \ __ret_gu; \ }) +#endif /* CONFIG_MOSIX */ extern void __put_user_1(void); extern void __put_user_2(void); @@ -131,6 +217,34 @@ extern void __put_user_bad(void); +#ifdef CONFIG_MOSIX +#define put_user(x,ptr) \ +({ int __ret_pu; \ + if(memory_not_here(sizeof(*(ptr)),TO_USER)) { \ + __typeof__(*(ptr)) __val_pu = (__typeof__(*ptr)) x; \ + __ret_pu = deputy_copy_to_user((void *)ptr, \ + (void *)&__val_pu, sizeof(*(ptr)), 1); } \ + else \ + __ret_pu = \ + __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr))); \ + __ret_pu; \ +}) + +#define __get_user(x,ptr) \ + (memory_not_here(sizeof(*(ptr)),FROM_USER) ? \ + ({ int __ret_gu,__val_gu; \ + __ret_gu = deputy_copy_from_user((void *)&__val_gu, \ + (void *)(ptr), sizeof(*(ptr)), 0); \ + (x) = (__typeof__(*(ptr)))__val_gu; \ + __ret_gu; }) \ + : __get_user_nocheck((x),(ptr),sizeof(*(ptr)))) +#define __put_user(x,ptr) \ + (memory_not_here(sizeof(*ptr),TO_USER) ? ({ \ + __typeof__(*(ptr)) __val_pu = (__typeof__(*ptr)) x; \ + deputy_copy_to_user((void *)(ptr), \ + (void *)&__val_pu, sizeof(*(ptr)), 0); }) \ + : __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))) +#else #define put_user(x,ptr) \ __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr))) @@ -138,6 +252,7 @@ __get_user_nocheck((x),(ptr),sizeof(*(ptr))) #define __put_user(x,ptr) \ __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr))) +#endif /* CONFIG_MOSIX */ #define __put_user_nocheck(x,ptr,size) \ ({ \ @@ -576,6 +691,35 @@ return n; } +#ifdef CONFIG_MOSIX +#define copy_to_user(to,from,n) \ + (memory_not_here((n),TO_USER) ? \ + deputy_copy_to_user((to),(void *)(from),(n),1) : \ + __builtin_constant_p(n) ? \ + __constant_copy_to_user((to),(from),(n)) : \ + __generic_copy_to_user((to),(from),(n))) + +#define copy_from_user(to,from,n) \ + (memory_not_here((n),FROM_USER) ? \ + deputy_copy_from_user((to),(void *)(from),(n),1) : \ + __builtin_constant_p(n) ? \ + __constant_copy_from_user((to),(from),(n)) : \ + __generic_copy_from_user((to),(from),(n))) + +#define __copy_to_user(to,from,n) \ + (memory_not_here((n),TO_USER) ? \ + deputy_copy_to_user((to),(void *)(from),(n),0) : \ + __builtin_constant_p(n) ? \ + __constant_copy_to_user_nocheck((to),(from),(n)) : \ + __generic_copy_to_user_nocheck((to),(from),(n))) + +#define __copy_from_user(to,from,n) \ + (memory_not_here((n),FROM_USER) ? \ + deputy_copy_from_user((to),(void *)(from),(n),0) : \ + __builtin_constant_p(n) ? \ + __constant_copy_from_user_nocheck((to),(from),(n)) : \ + __generic_copy_from_user_nocheck((to),(from),(n))) +#else #define copy_to_user(to,from,n) \ (__builtin_constant_p(n) ? \ __constant_copy_to_user((to),(from),(n)) : \ @@ -595,6 +739,7 @@ (__builtin_constant_p(n) ? \ __constant_copy_from_user_nocheck((to),(from),(n)) : \ __generic_copy_from_user_nocheck((to),(from),(n))) +#endif /* CONFIG_MOSIX */ long strncpy_from_user(char *dst, const char *src, long count); long __strncpy_from_user(char *dst, const char *src, long count); diff -urN linux-2.4.17/include/asm-i386/unistd.h linux_umopenmosix/include/asm-i386/unistd.h --- linux-2.4.17/include/asm-i386/unistd.h Wed Oct 17 19:03:03 2001 +++ linux_umopenmosix/include/asm-i386/unistd.h Wed Jun 26 23:45:17 2002 @@ -231,6 +231,7 @@ #define __NR_gettid 224 #define __NR_readahead 225 + /* user-visible error numbers are in the range -1 - -124: see */ #define __syscall_return(type, res) \ diff -urN linux-2.4.17/include/asm-um/a.out.h linux_umopenmosix/include/asm-um/a.out.h --- linux-2.4.17/include/asm-um/a.out.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/a.out.h Sat Jun 29 16:49:28 2002 @@ -0,0 +1,18 @@ +#ifndef __UM_A_OUT_H +#define __UM_A_OUT_H + +#include "asm/arch/a.out.h" + +#undef STACK_TOP + +extern unsigned long stacksizelim; + +extern unsigned long host_task_size; + +extern int honeypot; + +#define STACK_ROOM (stacksizelim) + +#define STACK_TOP (honeypot ? host_task_size : task_size) + +#endif diff -urN linux-2.4.17/include/asm-um/archparam-i386.h linux_umopenmosix/include/asm-um/archparam-i386.h --- linux-2.4.17/include/asm-um/archparam-i386.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/archparam-i386.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,64 @@ +#ifndef __UM_ARCHPARAM_I386_H +#define __UM_ARCHPARAM_I386_H + +/********* Bits for asm-um/elf.h ************/ + +#include "user.h" + +#define ELF_PLATFORM "i586" + +#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) + +typedef unsigned long elf_greg_t; + +#define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t)) +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; + +typedef int elf_fpregset_t; + +#define ELF_DATA ELFDATA2LSB +#define ELF_ARCH EM_386 + +#define ELF_PLAT_INIT(r) do { \ + ((struct sys_pt_regs *) regs)->regs[EBX] = 0; \ + ((struct sys_pt_regs *) regs)->regs[ECX] = 0; \ + ((struct sys_pt_regs *) regs)->regs[EDX] = 0; \ + ((struct sys_pt_regs *) regs)->regs[ESI] = 0; \ + ((struct sys_pt_regs *) regs)->regs[EDI] = 0; \ + ((struct sys_pt_regs *) regs)->regs[EBP] = 0; \ + ((struct sys_pt_regs *) regs)->regs[EAX] = 0; \ +} while(0); + +/* Shamelessly stolen from include/asm-i386/elf.h */ + +#define ELF_CORE_COPY_REGS(pr_reg, regs) \ + pr_reg[0] = ((struct sys_pt_regs *) regs)->regs[EBX]; \ + pr_reg[1] = ((struct sys_pt_regs *) regs)->regs[ECX]; \ + pr_reg[2] = ((struct sys_pt_regs *) regs)->regs[EDX]; \ + pr_reg[3] = ((struct sys_pt_regs *) regs)->regs[ESI]; \ + pr_reg[4] = ((struct sys_pt_regs *) regs)->regs[EDI]; \ + pr_reg[5] = ((struct sys_pt_regs *) regs)->regs[EBP]; \ + pr_reg[6] = ((struct sys_pt_regs *) regs)->regs[EAX]; \ + pr_reg[7] = ((struct sys_pt_regs *) regs)->regs[DS]; \ + pr_reg[8] = ((struct sys_pt_regs *) regs)->regs[ES]; \ + /* fake once used fs and gs selectors? */ \ + pr_reg[9] = ((struct sys_pt_regs *) regs)->regs[DS]; \ + pr_reg[10] = ((struct sys_pt_regs *) regs)->regs[DS]; \ + pr_reg[11] = ((struct sys_pt_regs *) regs)->regs[ORIG_EAX]; \ + pr_reg[12] = ((struct sys_pt_regs *) regs)->regs[EIP]; \ + pr_reg[13] = ((struct sys_pt_regs *) regs)->regs[CS]; \ + pr_reg[14] = ((struct sys_pt_regs *) regs)->regs[EFL]; \ + pr_reg[15] = ((struct sys_pt_regs *) regs)->regs[UESP]; \ + pr_reg[16] = ((struct sys_pt_regs *) regs)->regs[SS]; + +/********* Bits for asm-um/delay.h **********/ + +typedef unsigned long um_udelay_t; + +/********* Nothing for asm-um/hardirq.h **********/ + +/********* Nothing for asm-um/hw_irq.h **********/ + +/********* Nothing for asm-um/string.h **********/ + +#endif diff -urN linux-2.4.17/include/asm-um/archparam-ppc.h linux_umopenmosix/include/asm-um/archparam-ppc.h --- linux-2.4.17/include/asm-um/archparam-ppc.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/archparam-ppc.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,41 @@ +#ifndef __UM_ARCHPARAM_PPC_H +#define __UM_ARCHPARAM_PPC_H + +/********* Bits for asm-um/elf.h ************/ + +#define ELF_PLATFORM (0) + +#define ELF_ET_DYN_BASE (0x08000000) + +/* the following stolen from asm-ppc/elf.h */ +#define ELF_NGREG 48 /* includes nip, msr, lr, etc. */ +#define ELF_NFPREG 33 /* includes fpscr */ +/* General registers */ +typedef unsigned long elf_greg_t; +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; + +/* Floating point registers */ +typedef double elf_fpreg_t; +typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; + +#define ELF_DATA ELFDATA2MSB +#define ELF_ARCH EM_PPC + +/********* Bits for asm-um/delay.h **********/ + +typedef unsigned int um_udelay_t; + +/********* Bits for asm-um/hw_irq.h **********/ + +struct hw_interrupt_type; + +/********* Bits for asm-um/hardirq.h **********/ + +#define irq_enter(cpu, irq) hardirq_enter(cpu) +#define irq_exit(cpu, irq) hardirq_exit(cpu) + +/********* Bits for asm-um/string.h **********/ + +#define __HAVE_ARCH_STRRCHR + +#endif diff -urN linux-2.4.17/include/asm-um/atomic.h linux_umopenmosix/include/asm-um/atomic.h --- linux-2.4.17/include/asm-um/atomic.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/atomic.h Sat Jun 29 16:49:28 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_ATOMIC_H +#define __UM_ATOMIC_H + +#include "asm/arch/atomic.h" + +#endif diff -urN linux-2.4.17/include/asm-um/bitops.h linux_umopenmosix/include/asm-um/bitops.h --- linux-2.4.17/include/asm-um/bitops.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/bitops.h Sat Jun 29 16:49:28 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_BITOPS_H +#define __UM_BITOPS_H + +#include "asm/arch/bitops.h" + +#endif diff -urN linux-2.4.17/include/asm-um/boot.h linux_umopenmosix/include/asm-um/boot.h --- linux-2.4.17/include/asm-um/boot.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/boot.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_BOOT_H +#define __UM_BOOT_H + +#include "asm/arch/boot.h" + +#endif diff -urN linux-2.4.17/include/asm-um/bugs.h linux_umopenmosix/include/asm-um/bugs.h --- linux-2.4.17/include/asm-um/bugs.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/bugs.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_BUGS_H +#define __UM_BUGS_H + +void check_bugs(void); + +#endif diff -urN linux-2.4.17/include/asm-um/byteorder.h linux_umopenmosix/include/asm-um/byteorder.h --- linux-2.4.17/include/asm-um/byteorder.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/byteorder.h Wed Jun 26 23:47:18 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_BYTEORDER_H +#define __UM_BYTEORDER_H + +#include "asm/arch/byteorder.h" + +#endif diff -urN linux-2.4.17/include/asm-um/cache.h linux_umopenmosix/include/asm-um/cache.h --- linux-2.4.17/include/asm-um/cache.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/cache.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_CACHE_H +#define __UM_CACHE_H + +#define L1_CACHE_BYTES 32 + +#endif diff -urN linux-2.4.17/include/asm-um/checksum.h linux_umopenmosix/include/asm-um/checksum.h --- linux-2.4.17/include/asm-um/checksum.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/checksum.h Sat Jun 29 16:49:41 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_CHECKSUM_H +#define __UM_CHECKSUM_H + +#include "asm/arch/checksum.h" + +#endif diff -urN linux-2.4.17/include/asm-um/cobalt.h linux_umopenmosix/include/asm-um/cobalt.h --- linux-2.4.17/include/asm-um/cobalt.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/cobalt.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_COBALT_H +#define __UM_COBALT_H + +#include "asm/arch/cobalt.h" + +#endif diff -urN linux-2.4.17/include/asm-um/cpufeature.h linux_umopenmosix/include/asm-um/cpufeature.h --- linux-2.4.17/include/asm-um/cpufeature.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/cpufeature.h Sat Jun 29 16:49:30 2002 @@ -0,0 +1,90 @@ +/* + * cpufeature.h + * + * Defines x86 CPU feature bits + */ + +#ifndef __ASM_I386_CPUFEATURE_H +#define __ASM_I386_CPUFEATURE_H + +/* Sample usage: CPU_FEATURE_P(cpu.x86_capability, FPU) */ +#define CPU_FEATURE_P(CAP, FEATURE) test_bit(CAP, X86_FEATURE_##FEATURE ##_BIT) + +#define NCAPINTS 4 /* Currently we have 4 32-bit words worth of info */ + +/* Intel-defined CPU features, CPUID level 0x00000001, word 0 */ +#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */ +#define X86_FEATURE_VME (0*32+ 1) /* Virtual Mode Extensions */ +#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */ +#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */ +#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */ +#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */ +#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */ +#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture */ +#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */ +#define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */ +#define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */ +#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */ +#define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */ +#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */ +#define X86_FEATURE_CMOV (0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */ +#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */ +#define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */ +#define X86_FEATURE_PN (0*32+18) /* Processor serial number */ +#define X86_FEATURE_CLFLSH (0*32+19) /* Supports the CLFLUSH instruction */ +#define X86_FEATURE_DTES (0*32+21) /* Debug Trace Store */ +#define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */ +#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ +#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */ + /* of FPU context), and CR4.OSFXSR available */ +#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */ +#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */ +#define X86_FEATURE_SELFSNOOP (0*32+27) /* CPU self snoop */ +#define X86_FEATURE_HT (0*32+28) /* Hyper-Threading */ +#define X86_FEATURE_ACC (0*32+29) /* Automatic clock control */ +#define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */ + +/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ +/* Don't duplicate feature flags which are redundant with Intel! */ +#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */ +#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ +#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */ +#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */ +#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */ + +/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ +#define X86_FEATURE_RECOVERY (2*32+ 0) /* CPU in recovery mode */ +#define X86_FEATURE_LONGRUN (2*32+ 1) /* Longrun power control */ +#define X86_FEATURE_LRTI (2*32+ 3) /* LongRun table interface */ + +/* Other features, Linux-defined mapping, word 3 */ +/* This range is used for feature bits which conflict or are synthesized */ +#define X86_FEATURE_CXMMX (3*32+ 0) /* Cyrix MMX extensions */ +#define X86_FEATURE_K6_MTRR (3*32+ 1) /* AMD K6 nonstandard MTRRs */ +#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */ +#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */ + +#ifdef CONFIG_MOSIX +#define FBIT(feature) (1 << ((feature) % 32)) +#define USER_MODE_FEATURES0 (FBIT(X86_FEATURE_TSC)|FBIT(X86_FEATURE_CX8)|\ + FBIT(X86_FEATURE_SEP)|FBIT(X86_FEATURE_CMOV)|\ + FBIT(X86_FEATURE_MMX)|FBIT(X86_FEATURE_CLFLSH)|\ + FBIT(X86_FEATURE_FXSR)|FBIT(X86_FEATURE_XMM)|\ + FBIT(X86_FEATURE_XMM2)) +#define USER_MODE_FEATURES1 (FBIT(X86_FEATURE_SYSCALL)|\ + FBIT(X86_FEATURE_MMXEXT)|FBIT(X86_FEATURE_LM)|\ + FBIT(X86_FEATURE_3DNOWEXT)|\ + FBIT(X86_FEATURE_3DNOW)) +#define USER_MODE_FEATURES2 0 +#define USER_MODE_FEATURES3 (FBIT(X86_FEATURE_CXMMX)) +#define USER_MODE_FEATURES { USER_MODE_FEATURES0, USER_MODE_FEATURES1, \ + USER_MODE_FEATURES2, USER_MODE_FEATURES3 } +#endif /* CONFIG_MOSIX */ +#endif /* __ASM_I386_CPUFEATURE_H */ + +/* + * Local Variables: + * mode:c + * comment-column:42 + * End: + */ diff -urN linux-2.4.17/include/asm-um/current.h linux_umopenmosix/include/asm-um/current.h --- linux-2.4.17/include/asm-um/current.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/current.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __UM_CURRENT_H +#define __UM_CURRENT_H + +#ifndef __ASSEMBLY__ + +#include "linux/config.h" +#include "asm/page.h" + +struct task_struct; + +#define CURRENT_TASK(dummy) (((unsigned long) &dummy) & (PAGE_MASK << 2)) + +#define current ({ int dummy; (struct task_struct *) CURRENT_TASK(dummy); }) + +#endif /* __ASSEMBLY__ */ + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/include/asm-um/delay.h linux_umopenmosix/include/asm-um/delay.h --- linux-2.4.17/include/asm-um/delay.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/delay.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,7 @@ +#ifndef __UM_DELAY_H +#define __UM_DELAY_H + +#include "asm/arch/delay.h" +#include "asm/archparam.h" + +#endif diff -urN linux-2.4.17/include/asm-um/desc.h linux_umopenmosix/include/asm-um/desc.h --- linux-2.4.17/include/asm-um/desc.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/desc.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_DESC_H +#define __UM_DESC_H + +#include "asm/arch/desc.h" + +#endif diff -urN linux-2.4.17/include/asm-um/div64.h linux_umopenmosix/include/asm-um/div64.h --- linux-2.4.17/include/asm-um/div64.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/div64.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef _UM_DIV64_H +#define _UM_DIV64_H + +#include "asm/arch/div64.h" + +#endif diff -urN linux-2.4.17/include/asm-um/dma.h linux_umopenmosix/include/asm-um/dma.h --- linux-2.4.17/include/asm-um/dma.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/dma.h Sat Jun 29 16:50:24 2002 @@ -0,0 +1,10 @@ +#ifndef __UM_DMA_H +#define __UM_DMA_H + +#include "asm/arch/dma.h" + +#undef MAX_DMA_ADDRESS + +#define MAX_DMA_ADDRESS (uml_physmem) + +#endif diff -urN linux-2.4.17/include/asm-um/elf.h linux_umopenmosix/include/asm-um/elf.h --- linux-2.4.17/include/asm-um/elf.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/elf.h Sat Jun 29 16:49:30 2002 @@ -0,0 +1,27 @@ +#ifndef __UM_ELF_H +#define __UM_ELF_H + +#include "asm/archparam.h" + +#ifdef CONFIG_MOSIX +#define ELF_PLAT_INIT_REGS (BIT_OF_REGISTER(ebx)|BIT_OF_REGISTER(ecx)|\ + BIT_OF_REGISTER(edx)|BIT_OF_REGISTER(esi)|\ + BIT_OF_REGISTER(edi)|BIT_OF_REGISTER(ebp)|\ + BIT_OF_REGISTER(eax)) +#endif /* CONFIG_MOSIX */ + + + +#define ELF_HWCAP (0) + +#define SET_PERSONALITY(ex, ibcs2) do ; while(0) + +#define ELF_EXEC_PAGESIZE 4096 + +#define elf_check_arch(x) (1) + +#define ELF_CLASS ELFCLASS32 + +#define USE_ELF_CORE_DUMP + +#endif diff -urN linux-2.4.17/include/asm-um/errno.h linux_umopenmosix/include/asm-um/errno.h --- linux-2.4.17/include/asm-um/errno.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/errno.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_ERRNO_H +#define __UM_ERRNO_H + +#include "asm/arch/errno.h" + +#endif diff -urN linux-2.4.17/include/asm-um/fcntl.h linux_umopenmosix/include/asm-um/fcntl.h --- linux-2.4.17/include/asm-um/fcntl.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/fcntl.h Sat Jun 29 16:49:28 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_FCNTL_H +#define __UM_FCNTL_H + +#include "asm/arch/fcntl.h" + +#endif diff -urN linux-2.4.17/include/asm-um/fixmap.h linux_umopenmosix/include/asm-um/fixmap.h --- linux-2.4.17/include/asm-um/fixmap.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/fixmap.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_FIXMAP_H +#define __UM_FIXMAP_H + +#define FIXADDR_START (0xffff0000) + +#endif diff -urN linux-2.4.17/include/asm-um/floppy.h linux_umopenmosix/include/asm-um/floppy.h --- linux-2.4.17/include/asm-um/floppy.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/floppy.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_FLOPPY_H +#define __UM_FLOPPY_H + +#include "asm/arch/floppy.h" + +#endif diff -urN linux-2.4.17/include/asm-um/hardirq.h linux_umopenmosix/include/asm-um/hardirq.h --- linux-2.4.17/include/asm-um/hardirq.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/hardirq.h Sat Jun 29 16:49:30 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_HARDIRQ_H +#define __UM_HARDIRQ_H + +#include "asm/arch/hardirq.h" + +#endif diff -urN linux-2.4.17/include/asm-um/hdreg.h linux_umopenmosix/include/asm-um/hdreg.h --- linux-2.4.17/include/asm-um/hdreg.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/hdreg.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_HDREG_H +#define __UM_HDREG_H + +#include "asm/arch/hdreg.h" + +#endif diff -urN linux-2.4.17/include/asm-um/highmem.h linux_umopenmosix/include/asm-um/highmem.h --- linux-2.4.17/include/asm-um/highmem.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/highmem.h Sat Jun 29 16:49:30 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_HIGHMEM_H +#define __UM_HIGHMEM_H + +#include "asm/arch/highmem.h" + +#endif diff -urN linux-2.4.17/include/asm-um/hw_irq.h linux_umopenmosix/include/asm-um/hw_irq.h --- linux-2.4.17/include/asm-um/hw_irq.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/hw_irq.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,10 @@ +#ifndef _ASM_UM_HW_IRQ_H +#define _ASM_UM_HW_IRQ_H + +#include "asm/irq.h" +#include "asm/archparam.h" + +static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) +{} + +#endif diff -urN linux-2.4.17/include/asm-um/i387.h linux_umopenmosix/include/asm-um/i387.h --- linux-2.4.17/include/asm-um/i387.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/i387.h Sat Jun 29 16:49:30 2002 @@ -0,0 +1,94 @@ +/* + * include/asm-i386/i387.h + * + * Copyright (C) 1994 Linus Torvalds + * + * Pentium III FXSR, SSE support + * General FPU state handling cleanups + * Gareth Hughes , May 2000 + */ + +#ifndef __ASM_I386_I387_H +#define __ASM_I386_I387_H + +#include +#include +#include +#include + +extern void init_fpu(void); +/* + * FPU lazy state save handling... + */ +extern void save_init_fpu( struct task_struct *tsk ); +extern void restore_fpu( struct task_struct *tsk ); + +extern void kernel_fpu_begin(void); +#define kernel_fpu_end() stts() + + +#define unlazy_fpu( tsk ) do { \ + if ( tsk->flags & PF_USEDFPU ) \ + save_init_fpu( tsk ); \ +} while (0) + +#define clear_fpu( tsk ) do { \ + if ( tsk->flags & PF_USEDFPU ) { \ + asm volatile("fwait"); \ + tsk->flags &= ~PF_USEDFPU; \ + stts(); \ + } \ +} while (0) + +/* + * FPU state interaction... + */ +extern unsigned short get_fpu_cwd( struct task_struct *tsk ); +extern unsigned short get_fpu_swd( struct task_struct *tsk ); +extern unsigned short get_fpu_twd( struct task_struct *tsk ); +extern unsigned short get_fpu_mxcsr( struct task_struct *tsk ); + +extern void set_fpu_cwd( struct task_struct *tsk, unsigned short cwd ); +extern void set_fpu_swd( struct task_struct *tsk, unsigned short swd ); +extern void set_fpu_twd( struct task_struct *tsk, unsigned short twd ); +extern void set_fpu_mxcsr( struct task_struct *tsk, unsigned short mxcsr ); + +#define load_mxcsr( val ) do { \ + unsigned long __mxcsr = ((unsigned long)(val) & 0xffbf); \ + asm volatile( "ldmxcsr %0" : : "m" (__mxcsr) ); \ +} while (0) + +/* + * Signal frame handlers... + */ +extern int save_i387( struct _fpstate *buf ); +extern int restore_i387( struct _fpstate *buf ); + +/* + * ptrace request handers... + */ +extern int get_fpregs( struct user_i387_struct *buf, + struct task_struct *tsk ); +extern int set_fpregs( struct task_struct *tsk, + struct user_i387_struct *buf ); + +extern int get_fpxregs( struct user_fxsr_struct *buf, + struct task_struct *tsk ); +extern int set_fpxregs( struct task_struct *tsk, + struct user_fxsr_struct *buf ); + +/* + * FPU state for core dumps... + */ +extern int dump_fpu( struct pt_regs *regs, + struct user_i387_struct *fpu ); +extern int dump_extended_fpu( struct pt_regs *regs, + struct user_fxsr_struct *fpu ); + +#ifdef CONFIG_MOSIX +extern int has_fxsr(void); +extern void fsave_to_fxsave(union i387_union *, union i387_union *); +extern void fxsave_to_fsave(union i387_union *, union i387_union *); +#endif /* CONFIG_MOSIX */ + +#endif /* __ASM_I386_I387_H */ diff -urN linux-2.4.17/include/asm-um/ide.h linux_umopenmosix/include/asm-um/ide.h --- linux-2.4.17/include/asm-um/ide.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/ide.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_IDE_H +#define __UM_IDE_H + +#include "asm/arch/ide.h" + +#endif diff -urN linux-2.4.17/include/asm-um/init.h linux_umopenmosix/include/asm-um/init.h --- linux-2.4.17/include/asm-um/init.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/init.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,11 @@ +#ifndef _UM_INIT_H +#define _UM_INIT_H + +#ifdef notdef +#define __init +#define __initdata +#define __initfunc(__arginit) __arginit +#define __cacheline_aligned +#endif + +#endif diff -urN linux-2.4.17/include/asm-um/io.h linux_umopenmosix/include/asm-um/io.h --- linux-2.4.17/include/asm-um/io.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/io.h Sat Jun 29 16:50:24 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_IO_H +#define __UM_IO_H + +#include "asm/arch/io.h" + +#endif diff -urN linux-2.4.17/include/asm-um/ioctl.h linux_umopenmosix/include/asm-um/ioctl.h --- linux-2.4.17/include/asm-um/ioctl.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/ioctl.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_IOCTL_H +#define __UM_IOCTL_H + +#include "asm/arch/ioctl.h" + +#endif diff -urN linux-2.4.17/include/asm-um/ioctls.h linux_umopenmosix/include/asm-um/ioctls.h --- linux-2.4.17/include/asm-um/ioctls.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/ioctls.h Wed Jun 26 23:47:19 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_IOCTLS_H +#define __UM_IOCTLS_H + +#include "asm/arch/ioctls.h" + +#endif diff -urN linux-2.4.17/include/asm-um/ipc.h linux_umopenmosix/include/asm-um/ipc.h --- linux-2.4.17/include/asm-um/ipc.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/ipc.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_IPC_H +#define __UM_IPC_H + +#include "asm/arch/ipc.h" + +#endif diff -urN linux-2.4.17/include/asm-um/ipcbuf.h linux_umopenmosix/include/asm-um/ipcbuf.h --- linux-2.4.17/include/asm-um/ipcbuf.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/ipcbuf.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_IPCBUF_H +#define __UM_IPCBUF_H + +#include "asm/arch/ipcbuf.h" + +#endif diff -urN linux-2.4.17/include/asm-um/irq.h linux_umopenmosix/include/asm-um/irq.h --- linux-2.4.17/include/asm-um/irq.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/irq.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,27 @@ +#ifndef __UM_IRQ_H +#define __UM_IRQ_H + +#include "asm/arch/irq.h" +#include "asm/ptrace.h" + +#undef NR_IRQS + + +#define TIMER_IRQ 0 +#define UMN_IRQ 1 +#define CONSOLE_IRQ 2 +#define UBD_IRQ 3 +#define UM_ETH_IRQ 4 +#define SSL_IRQ 5 +#define ACCEPT_IRQ 6 +#define MCONSOLE_IRQ 7 + +#define LAST_IRQ MCONSOLE_IRQ +#define NR_IRQS (LAST_IRQ + 1) + +extern int um_request_irq(unsigned int irq, int fd, + void (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, const char * devname, + void *dev_id); + +#endif diff -urN linux-2.4.17/include/asm-um/keyboard.h linux_umopenmosix/include/asm-um/keyboard.h --- linux-2.4.17/include/asm-um/keyboard.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/keyboard.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_KEYBOARD_H +#define __UM_KEYBOARD_H + +#include "asm/arch/keyboard.h" + +#endif diff -urN linux-2.4.17/include/asm-um/linux_logo.h linux_umopenmosix/include/asm-um/linux_logo.h --- linux-2.4.17/include/asm-um/linux_logo.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/linux_logo.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_LINUX_LOGO_H +#define __UM_LINUX_LOGO_H + +#include "asm/arch/linux_logo.h" + +#endif diff -urN linux-2.4.17/include/asm-um/locks.h linux_umopenmosix/include/asm-um/locks.h --- linux-2.4.17/include/asm-um/locks.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/locks.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_LOCKS_H +#define __UM_LOCKS_H + +#include "asm/arch/locks.h" + +#endif diff -urN linux-2.4.17/include/asm-um/mca_dma.h linux_umopenmosix/include/asm-um/mca_dma.h --- linux-2.4.17/include/asm-um/mca_dma.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/mca_dma.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef mca___UM_DMA_H +#define mca___UM_DMA_H + +#include "asm/arch/mca_dma.h" + +#endif diff -urN linux-2.4.17/include/asm-um/mman.h linux_umopenmosix/include/asm-um/mman.h --- linux-2.4.17/include/asm-um/mman.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/mman.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_MMAN_H +#define __UM_MMAN_H + +#include "asm/arch/mman.h" + +#endif diff -urN linux-2.4.17/include/asm-um/mmu.h linux_umopenmosix/include/asm-um/mmu.h --- linux-2.4.17/include/asm-um/mmu.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/mmu.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __MMU_H +#define __MMU_H + +#include "asm/arch/mmu.h" + +#endif diff -urN linux-2.4.17/include/asm-um/mmu_context.h linux_umopenmosix/include/asm-um/mmu_context.h --- linux-2.4.17/include/asm-um/mmu_context.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/mmu_context.h Sat Jun 29 16:49:41 2002 @@ -0,0 +1,25 @@ +#ifndef __UM_MMU_CONTEXT_H +#define __UM_MMU_CONTEXT_H + +#include "linux/sched.h" + +#define init_new_context(task, mm) (0) +#define get_mmu_context(task) do ; while(0) +#define activate_context(tsk) do ; while(0) +#define destroy_context(mm) do ; while(0) + +static inline void activate_mm(struct mm_struct *old, struct mm_struct *new) +{ +} + +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk, unsigned cpu) +{ +} + +static inline void enter_lazy_tlb(struct mm_struct *mm, + struct task_struct *tsk, unsigned cpu) +{ +} + +#endif diff -urN linux-2.4.17/include/asm-um/module.h linux_umopenmosix/include/asm-um/module.h --- linux-2.4.17/include/asm-um/module.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/module.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_MODULE_H +#define __UM_MODULE_H + +#include "asm/arch/module.h" + +#endif diff -urN linux-2.4.17/include/asm-um/msgbuf.h linux_umopenmosix/include/asm-um/msgbuf.h --- linux-2.4.17/include/asm-um/msgbuf.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/msgbuf.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_MSGBUF_H +#define __UM_MSGBUF_H + +#include "asm/arch/msgbuf.h" + +#endif diff -urN linux-2.4.17/include/asm-um/msr.h linux_umopenmosix/include/asm-um/msr.h --- linux-2.4.17/include/asm-um/msr.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/msr.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,105 @@ +#ifndef __ASM_MSR_H +#define __ASM_MSR_H + +/* + * Access to machine-specific registers (available on 586 and better only) + * Note: the rd* operations modify the parameters directly (without using + * pointer indirection), this allows gcc to optimize better + */ + +#define rdmsr(msr,val1,val2) \ + __asm__ __volatile__("rdmsr" \ + : "=a" (val1), "=d" (val2) \ + : "c" (msr)) + +#define wrmsr(msr,val1,val2) \ + __asm__ __volatile__("wrmsr" \ + : /* no outputs */ \ + : "c" (msr), "a" (val1), "d" (val2)) + +#define rdtsc(low,high) \ + __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high)) + +#define rdtscl(low) \ + __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx") + +#define rdtscll(val) \ + __asm__ __volatile__("rdtsc" : "=A" (val)) + +#define write_tsc(val1,val2) wrmsr(0x10, val1, val2) + +#define rdpmc(counter,low,high) \ + __asm__ __volatile__("rdpmc" \ + : "=a" (low), "=d" (high) \ + : "c" (counter)) + +/* symbolic names for some interesting MSRs */ +/* Intel defined MSRs. */ +#define MSR_IA32_P5_MC_ADDR 0 +#define MSR_IA32_P5_MC_TYPE 1 +#define MSR_IA32_PLATFORM_ID 0x17 +#define MSR_IA32_EBL_CR_POWERON 0x2a + +#define MSR_IA32_APICBASE 0x1b +#define MSR_IA32_APICBASE_BSP (1<<8) +#define MSR_IA32_APICBASE_ENABLE (1<<11) +#define MSR_IA32_APICBASE_BASE (0xfffff<<12) + +#define MSR_IA32_UCODE_WRITE 0x79 +#define MSR_IA32_UCODE_REV 0x8b + +#define MSR_IA32_PERFCTR0 0xc1 +#define MSR_IA32_PERFCTR1 0xc2 + +#define MSR_IA32_BBL_CR_CTL 0x119 + +#define MSR_IA32_MCG_CAP 0x179 +#define MSR_IA32_MCG_STATUS 0x17a +#define MSR_IA32_MCG_CTL 0x17b + +#define MSR_IA32_EVNTSEL0 0x186 +#define MSR_IA32_EVNTSEL1 0x187 + +#define MSR_IA32_DEBUGCTLMSR 0x1d9 +#define MSR_IA32_LASTBRANCHFROMIP 0x1db +#define MSR_IA32_LASTBRANCHTOIP 0x1dc +#define MSR_IA32_LASTINTFROMIP 0x1dd +#define MSR_IA32_LASTINTTOIP 0x1de + +#define MSR_IA32_MC0_CTL 0x400 +#define MSR_IA32_MC0_STATUS 0x401 +#define MSR_IA32_MC0_ADDR 0x402 +#define MSR_IA32_MC0_MISC 0x403 + +/* AMD Defined MSRs */ +#define MSR_K6_EFER 0xC0000080 +#define MSR_K6_STAR 0xC0000081 +#define MSR_K6_WHCR 0xC0000082 +#define MSR_K6_UWCCR 0xC0000085 +#define MSR_K6_PSOR 0xC0000087 +#define MSR_K6_PFIR 0xC0000088 + +#define MSR_K7_EVNTSEL0 0xC0010000 +#define MSR_K7_PERFCTR0 0xC0010004 +#define MSR_K7_HWCR 0xC0010015 + +/* Centaur-Hauls/IDT defined MSRs. */ +#define MSR_IDT_FCR1 0x107 +#define MSR_IDT_FCR2 0x108 +#define MSR_IDT_FCR3 0x109 +#define MSR_IDT_FCR4 0x10a + +#define MSR_IDT_MCR0 0x110 +#define MSR_IDT_MCR1 0x111 +#define MSR_IDT_MCR2 0x112 +#define MSR_IDT_MCR3 0x113 +#define MSR_IDT_MCR4 0x114 +#define MSR_IDT_MCR5 0x115 +#define MSR_IDT_MCR6 0x116 +#define MSR_IDT_MCR7 0x117 +#define MSR_IDT_MCR_CTRL 0x120 + +/* VIA Cyrix defined MSRs*/ +#define MSR_VIA_FCR 0x1107 + +#endif /* __ASM_MSR_H */ diff -urN linux-2.4.17/include/asm-um/mtrr.h linux_umopenmosix/include/asm-um/mtrr.h --- linux-2.4.17/include/asm-um/mtrr.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/mtrr.h Fri Jun 28 00:26:40 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_MTRR_H +#define __UM_MTRR_H + +#include "asm/arch/mtrr.h" + +#endif diff -urN linux-2.4.17/include/asm-um/namei.h linux_umopenmosix/include/asm-um/namei.h --- linux-2.4.17/include/asm-um/namei.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/namei.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_NAMEI_H +#define __UM_NAMEI_H + +#include "asm/arch/namei.h" + +#endif diff -urN linux-2.4.17/include/asm-um/page.h linux_umopenmosix/include/asm-um/page.h --- linux-2.4.17/include/asm-um/page.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/page.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,45 @@ +#ifndef __UM_PAGE_H +#define __UM_PAGE_H + +struct page; + +#include "asm/arch/page.h" + +#undef BUG +#undef PAGE_BUG +#undef __pa +#undef __va +#undef virt_to_page +#undef VALID_PAGE +#undef PAGE_OFFSET +#undef KERNELBASE + +#define PAGE_OFFSET (uml_physmem) +#define KERNELBASE PAGE_OFFSET + +#ifndef __ASSEMBLY__ + +extern void stop(void); + +#define BUG() do { \ + printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \ + stop(); \ +} while (0) + +#define PAGE_BUG(page) do { \ + BUG(); \ +} while (0) + +#endif /* __ASSEMBLY__ */ + +extern unsigned long uml_physmem; + +#define __va_space (8*1024*1024) + +#define __pa(x) ((unsigned long) (x) - (uml_physmem)) +#define __va(x) ((void *) ((unsigned long) (x) + (uml_physmem))) + +#define virt_to_page(kaddr) (mem_map + (__pa(kaddr) >> PAGE_SHIFT)) +#define VALID_PAGE(page) ((page - mem_map) < max_mapnr) + +#endif diff -urN linux-2.4.17/include/asm-um/page_offset.h linux_umopenmosix/include/asm-um/page_offset.h --- linux-2.4.17/include/asm-um/page_offset.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/page_offset.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1 @@ +#define PAGE_OFFSET_RAW (uml_physmem) diff -urN linux-2.4.17/include/asm-um/param.h linux_umopenmosix/include/asm-um/param.h --- linux-2.4.17/include/asm-um/param.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/param.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,24 @@ +#ifndef _UM_PARAM_H +#define _UM_PARAM_H + +#ifndef HZ +#define HZ 20 +#endif + +#define EXEC_PAGESIZE 4096 + +#ifndef NGROUPS +#define NGROUPS 32 +#endif + +#ifndef NOGROUP +#define NOGROUP (-1) +#endif + +#define MAXHOSTNAMELEN 64 /* max length of hostname */ + +#ifdef __KERNEL__ +# define CLOCKS_PER_SEC 100 /* frequency at which times() counts */ +#endif + +#endif diff -urN linux-2.4.17/include/asm-um/pci.h linux_umopenmosix/include/asm-um/pci.h --- linux-2.4.17/include/asm-um/pci.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/pci.h Sat Jun 29 16:52:18 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_PCI_H +#define __UM_PCI_H + +#include "asm/arch/pci.h" + +#endif diff -urN linux-2.4.17/include/asm-um/pgalloc.h linux_umopenmosix/include/asm-um/pgalloc.h --- linux-2.4.17/include/asm-um/pgalloc.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/pgalloc.h Sat Jun 29 16:49:30 2002 @@ -0,0 +1,163 @@ +/* + * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) + * Derived from include/asm-i386/pgalloc.h and include/asm-i386/pgtable.h + * Licensed under the GPL + */ + +#ifndef __UM_PGALLOC_H +#define __UM_PGALLOC_H + +#include "linux/mm.h" + +#define pgd_quicklist (current_cpu_data.pgd_quick) +#define pmd_quicklist (current_cpu_data.pmd_quick) +#define pte_quicklist (current_cpu_data.pte_quick) +#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz) + +#define pmd_populate(mm, pmd, pte) \ + set_pmd(pmd, __pmd(_PAGE_TABLE + (unsigned long) (pte))) + +/* + * Allocate and free page tables. + */ + +static inline pgd_t *get_pgd_slow(void) +{ + pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL); + + if (pgd) { + memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); + memcpy(pgd + USER_PTRS_PER_PGD, swapper_pg_dir + USER_PTRS_PER_PGD, (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + } + return pgd; +} + +static inline pgd_t *get_pgd_fast(void) +{ + unsigned long *ret; + + if ((ret = pgd_quicklist) != NULL) { + pgd_quicklist = (unsigned long *)(*ret); + ret[0] = 0; + pgtable_cache_size--; + } else + ret = (unsigned long *)get_pgd_slow(); + return (pgd_t *)ret; +} + +static inline void free_pgd_fast(pgd_t *pgd) +{ + *(unsigned long *)pgd = (unsigned long) pgd_quicklist; + pgd_quicklist = (unsigned long *) pgd; + pgtable_cache_size++; +} + +static inline void free_pgd_slow(pgd_t *pgd) +{ + free_page((unsigned long)pgd); +} + +static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address) +{ + pte_t *pte; + + pte = (pte_t *) __get_free_page(GFP_KERNEL); + if (pte) + clear_page(pte); + return pte; +} + +static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, unsigned long address) +{ + unsigned long *ret; + + if ((ret = (unsigned long *)pte_quicklist) != NULL) { + pte_quicklist = (unsigned long *)(*ret); + ret[0] = ret[1]; + pgtable_cache_size--; + } + return (pte_t *)ret; +} + +static inline void pte_free_pte_fast(pte_t *pte) +{ + *(unsigned long *)pte = (unsigned long) pte_quicklist; + pte_quicklist = (unsigned long *) pte; + pgtable_cache_size++; +} + +static inline void pte_free_slow(pte_t *pte) +{ + free_page((unsigned long)pte); +} + +#define pte_free(pte) pte_free_slow(pte) +#define pgd_free(pgd) free_pgd_slow(pgd) +#define pgd_alloc(mm) get_pgd_fast() + +/* + * allocating and freeing a pmd is trivial: the 1-entry pmd is + * inside the pgd, so has no extra memory associated with it. + */ + +#define pmd_alloc_one_fast(mm, addr) ({ BUG(); ((pmd_t *)1); }) +#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) +#define pmd_free_slow(x) do { } while (0) +#define pmd_free_fast(x) do { } while (0) +#define pmd_free(x) do { } while (0) +#define pgd_populate(mm, pmd, pte) BUG() + +/* + * TLB flushing: + * + * - flush_tlb() flushes the current mm struct TLBs + * - flush_tlb_all() flushes all processes TLBs + * - flush_tlb_mm(mm) flushes the specified mm context TLB's + * - flush_tlb_page(vma, vmaddr) flushes one page + * - flush_tlb_kernel_vm() flushes the kernel vm area + * - flush_tlb_range(mm, start, end) flushes a range of pages + * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables + */ + +extern void flush_tlb_all(void); +extern void flush_tlb_mm(struct mm_struct *mm); +extern void flush_tlb_range(struct mm_struct *mm, unsigned long start, + unsigned long end); +extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); +extern void flush_tlb_kernel_vm(void); + +static inline void flush_tlb_pgtables(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ +} + + + + +#ifndef CONFIG_SMP +#define flush_tlb() __flush_tlb() +#else +#include +#define flush_tlb() flush_tlb_current_task() +#endif + + + + + + + + + + +#endif +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/include/asm-um/pgtable.h linux_umopenmosix/include/asm-um/pgtable.h --- linux-2.4.17/include/asm-um/pgtable.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/pgtable.h Sat Jun 29 16:49:30 2002 @@ -0,0 +1,415 @@ +#ifndef __UM_PGTABLE_H +#define __UM_PGTABLE_H + +#include "linux/sched.h" +#include "asm/processor.h" +#include "asm/page.h" +#include "asm/fixmap.h" + +extern pgd_t swapper_pg_dir[1024]; + +#define flush_cache_all() do ; while (0) +#define flush_cache_mm(mm) do ; while (0) +#define flush_cache_range(mm, start, end) do ; while (0) +#define flush_cache_page(vma, vmaddr) do ; while (0) +#define flush_page_to_ram(page) do ; while (0) +#define flush_dcache_page(page) do ; while (0) +#define flush_icache_range(from, to) do ; while (0) +#define flush_icache_page(vma,pg) do ; while (0) + +extern void pte_free(pte_t *pte); + +extern void pgd_free(pgd_t *pgd); + +extern int do_check_pgt_cache(int, int); + +/* zero page used for uninitialized stuff */ +extern unsigned long *empty_zero_page; + +#define pgtable_cache_init() do ; while (0) + +/* PMD_SHIFT determines the size of the area a second-level page table can map */ +#define PMD_SHIFT 22 +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) + +/* PGDIR_SHIFT determines what a third-level page table entry can map */ +#define PGDIR_SHIFT 22 +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +/* + * entries per page directory level: the i386 is two-level, so + * we don't really have any PMD directory physically. + */ +#define PTRS_PER_PTE 1024 +#define PTRS_PER_PMD 1 +#define PTRS_PER_PGD 1024 +#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE) +#define FIRST_USER_PGD_NR 0 + +#define pte_ERROR(e) \ + printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) +#define pmd_ERROR(e) \ + printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) +#define pgd_ERROR(e) \ + printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) + +/* + * pgd entries used up by user/kernel: + */ + +#define USER_PGD_PTRS (TASK_SIZE >> PGDIR_SHIFT) +#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) + +#ifndef __ASSEMBLY__ +/* Just any arbitrary offset to the start of the vmalloc VM area: the + * current 8MB value just means that there will be a 8MB "hole" after the + * physical memory until the kernel virtual memory starts. That means that + * any out-of-bounds memory accesses will hopefully be caught. + * The vmalloc() routines leaves a hole of 4kB between each vmalloced + * area for the same reason. ;) + */ +extern unsigned long high_physmem; + +#define VMALLOC_OFFSET (__va_space) +#define VMALLOC_START (((unsigned long) high_physmem + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)) +#define VMALLOC_VMADDR(x) ((unsigned long)(x)) +#define VMALLOC_END (FIXADDR_START) + +/* + * The 4MB page is guessing.. Detailed in the infamous "Chapter H" + * of the Pentium details, but assuming intel did the straightforward + * thing, this bit set in the page directory entry just means that + * the page directory entry points directly to a 4MB-aligned block of + * memory. + */ +#define _PAGE_PRESENT 0x001 +#define _PAGE_NEWPAGE 0x002 + +#define _PAGE_PWT 0x003 + +#define _PAGE_PROTNONE 0x004 /* If not present */ +#define _PAGE_RW 0x008 +#define _PAGE_USER 0x010 +#define _PAGE_PCD 0x020 +#define _PAGE_ACCESSED 0x040 +#define _PAGE_DIRTY 0x080 +#define _PAGE_NEWPROT 0x100 + + +#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) + +#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) +#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) +#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED) + +/* + * The i386 can't do page protection for execute, and considers that the same are read. + * Also, write permissions imply read permissions. This is the closest we can get.. + */ +#define __P000 PAGE_NONE +#define __P001 PAGE_READONLY +#define __P010 PAGE_COPY +#define __P011 PAGE_COPY +#define __P100 PAGE_READONLY +#define __P101 PAGE_READONLY +#define __P110 PAGE_COPY +#define __P111 PAGE_COPY + +#define __S000 PAGE_NONE +#define __S001 PAGE_READONLY +#define __S010 PAGE_SHARED +#define __S011 PAGE_SHARED +#define __S100 PAGE_READONLY +#define __S101 PAGE_READONLY +#define __S110 PAGE_SHARED +#define __S111 PAGE_SHARED + +/* + * Define this if things work differently on an i386 and an i486: + * it will (on an i486) warn about kernel memory accesses that are + * done without a 'verify_area(VERIFY_WRITE,..)' + */ +#undef TEST_VERIFY_AREA + +/* page table for 0-4MB for everybody */ +extern unsigned long pg0[1024]; + +/* + * BAD_PAGETABLE is used when we need a bogus page-table, while + * BAD_PAGE is used for a bogus page. + * + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +extern pte_t __bad_page(void); +extern pte_t * __bad_pagetable(void); + +#define BAD_PAGETABLE __bad_pagetable() +#define BAD_PAGE __bad_page() +#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) + +/* number of bits that fit into a memory pointer */ +#define BITS_PER_PTR (8*sizeof(unsigned long)) + +/* to align the pointer to a pointer address */ +#define PTR_MASK (~(sizeof(void*)-1)) + +/* sizeof(void*)==1<>(PAGE_SHIFT-SIZEOF_PTR_LOG2)&PTR_MASK&~PAGE_MASK) + +#define pte_none(x) !(pte_val(x) & ~_PAGE_NEWPAGE) +#define pte_present(x) (pte_val(x) & (_PAGE_PRESENT | _PAGE_PROTNONE)) + +#define pte_clear(xp) do { pte_val(*(xp)) = _PAGE_NEWPAGE; } while (0) + +#define pmd_none(x) (!(pmd_val(x) & ~_PAGE_NEWPAGE)) +#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) +#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) +#define pmd_clear(xp) do { pmd_val(*(xp)) = _PAGE_NEWPAGE; } while (0) + +#define pmd_newpage(x) (pmd_val(x) & _PAGE_NEWPAGE) +#define pmd_mkuptodate(x) (pmd_val(x) &= ~_PAGE_NEWPAGE) + +/* + * The "pgd_xxx()" functions here are trivial for a folded two-level + * setup: the pgd is never bad, and a pmd always exists (as it's folded + * into the pgd entry) + */ +static inline int pgd_none(pgd_t pgd) { return 0; } +static inline int pgd_bad(pgd_t pgd) { return 0; } +static inline int pgd_present(pgd_t pgd) { return 1; } +static inline void pgd_clear(pgd_t * pgdp) { } + + +/* + * Permanent address of a page. Obviously must never be + * called on a highmem page. + */ +#define page_address(page) ({ if (!(page)->virtual) BUG(); (page)->virtual; }) +#define __page_address(page) ({ PAGE_OFFSET + (((page) - mem_map) << PAGE_SHIFT); }) +#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) +#define pte_page(x) \ + (mem_map+((unsigned long)((__pa(pte_val(x)) >> PAGE_SHIFT)))) +#define pte_address(x) ((void *) ((unsigned long) pte_val(x) & PAGE_MASK)) + +static inline pte_t pte_mknewprot(pte_t pte) +{ + pte_val(pte) |= _PAGE_NEWPROT; + return(pte); +} + +static inline pte_t pte_mknewpage(pte_t pte) +{ + pte_val(pte) |= _PAGE_NEWPAGE; + return(pte); +} + +static inline void set_pte(pte_t *pteptr, pte_t pteval) +{ + /* If it's a swap entry, it needs to be marked _PAGE_NEWPAGE so + * fix_range knows to unmap it. _PAGE_NEWPROT is specific to + * mapped pages. + */ + *pteptr = pte_mknewpage(pteval); + if(pte_present(*pteptr)) *pteptr = pte_mknewprot(*pteptr); +} + +/* + * (pmds are folded into pgds so this doesnt get actually called, + * but the define is needed for a generic inline function.) + */ +#define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval) +#define set_pgd(pgdptr, pgdval) (*(pgdptr) = pgdval) + +/* + * The following only work if pte_present() is true. + * Undefined behaviour if not.. + */ +static inline int pte_read(pte_t pte) { return pte_val(pte) & _PAGE_USER; } +static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_USER; } +static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } +static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } +static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; } +static inline int pte_newpage(pte_t pte) { return pte_val(pte) & _PAGE_NEWPAGE; } +static inline int pte_newprot(pte_t pte) { return pte_val(pte) & _PAGE_NEWPROT; } + +static inline pte_t pte_rdprotect(pte_t pte) +{ + pte_val(pte) &= ~_PAGE_USER; + return(pte_mknewprot(pte)); +} + +static inline pte_t pte_exprotect(pte_t pte) +{ + pte_val(pte) &= ~_PAGE_USER; + return(pte_mknewprot(pte)); +} + +static inline pte_t pte_mkclean(pte_t pte) +{ + pte_val(pte) &= ~_PAGE_DIRTY; + return(pte); +} + +static inline pte_t pte_mkold(pte_t pte) +{ + pte_val(pte) &= ~_PAGE_ACCESSED; + return(pte); +} + +static inline pte_t pte_wrprotect(pte_t pte) +{ + pte_val(pte) &= ~_PAGE_RW; + return(pte_mknewprot(pte)); +} + +static inline pte_t pte_mkread(pte_t pte) +{ + pte_val(pte) |= _PAGE_USER; + return(pte_mknewprot(pte)); +} + +static inline pte_t pte_mkexec(pte_t pte) +{ + pte_val(pte) |= _PAGE_USER; + return(pte_mknewprot(pte)); +} + +static inline pte_t pte_mkdirty(pte_t pte) +{ + pte_val(pte) |= _PAGE_DIRTY; + return(pte); +} + +static inline pte_t pte_mkyoung(pte_t pte) +{ + pte_val(pte) |= _PAGE_ACCESSED; + return(pte); +} + +static inline pte_t pte_mkwrite(pte_t pte) +{ + pte_val(pte) |= _PAGE_RW; + return(pte_mknewprot(pte)); +} + +static inline pte_t pte_mkuptodate(pte_t pte) +{ + pte_val(pte) &= ~(_PAGE_NEWPROT | _PAGE_NEWPAGE); + return(pte); +} + +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + */ + +#define mk_pte(page, pgprot) \ +({ \ + pte_t __pte; \ + \ + pte_val(__pte) = ((unsigned long) __va((page-mem_map)*(unsigned long)PAGE_SIZE + pgprot_val(pgprot))); \ + if(pte_present(__pte)) pte_mknewprot(pte_mknewpage(__pte)); \ + __pte; \ +}) + +/* This takes a physical page address that is used by the remapping functions */ +#define mk_pte_phys(physpage, pgprot) \ +({ pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); __pte; }) + +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +{ + pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot); + if(pte_present(pte)) pte = pte_mknewpage(pte_mknewprot(pte)); + return pte; +} + +#define pmd_page(pmd) \ +(pmd_val(pmd) & PAGE_MASK) + +/* to find an entry in a page-table-directory. */ +#define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) + +/* to find an entry in a page-table-directory */ +#define pgd_offset(mm, address) \ +((mm)->pgd + ((address) >> PGDIR_SHIFT)) + +/* to find an entry in a kernel page-table-directory */ +#define pgd_offset_k(address) pgd_offset(&init_mm, address) + +/* Find an entry in the second-level page table.. */ +static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) +{ + return (pmd_t *) dir; +} + + + + + +#define __flush_tlb() \ + do { \ + unsigned int tmpreg; \ + \ + __asm__ __volatile__( \ + "movl %%cr3, %0; # flush TLB \n" \ + "movl %0, %%cr3; \n" \ + : "=r" (tmpreg) \ + :: "memory"); \ + } while (0) + + + + + + + + + + +/* Find an entry in the third-level page table.. */ +#define pte_offset(pmd, address) \ +((pte_t *) (pmd_page(*pmd) + ((address>>10) & ((PTRS_PER_PTE-1)<<2)))) + +#define update_mmu_cache(vma,address,pte) do ; while (0) + +/* Encode and de-code a swap entry */ +#define SWP_TYPE(x) (((x).val >> 3) & 0x7f) +#define SWP_OFFSET(x) ((x).val >> 10) + +#define SWP_ENTRY(type, offset) \ + ((swp_entry_t) { ((type) << 3) | ((offset) << 10) }) +#define pte_to_swp_entry(pte) \ + ((swp_entry_t) { pte_val(pte_mkuptodate(pte)) }) +#define swp_entry_to_pte(x) ((pte_t) { (x).val }) + +#define PageSkip(x) (0) +#define kern_addr_valid(addr) (1) + +#include + +#endif + +#endif +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/include/asm-um/poll.h linux_umopenmosix/include/asm-um/poll.h --- linux-2.4.17/include/asm-um/poll.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/poll.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_POLL_H +#define __UM_POLL_H + +#include "asm/arch/poll.h" + +#endif diff -urN linux-2.4.17/include/asm-um/posix_types.h linux_umopenmosix/include/asm-um/posix_types.h --- linux-2.4.17/include/asm-um/posix_types.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/posix_types.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_POSIX_TYPES_H +#define __UM_POSIX_TYPES_H + +#include "asm/arch/posix_types.h" + +#endif diff -urN linux-2.4.17/include/asm-um/processor-generic.h linux_umopenmosix/include/asm-um/processor-generic.h --- linux-2.4.17/include/asm-um/processor-generic.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/processor-generic.h Sat Jun 29 16:49:28 2002 @@ -0,0 +1,303 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __UM_PROCESSOR_GENERIC_H +#define __UM_PROCESSOR_GENERIC_H + +struct pt_regs; + +struct task_struct; + +#include "linux/config.h" +#include "linux/signal.h" +#include "asm/segment.h" +#include "asm/ptrace.h" +#include "asm/siginfo.h" + +struct mm_struct; + +#define current_text_addr() ((void *) 0) + +#define cpu_relax() do ; while (0) + +#define SIGNAL_NONE 0 +#define SIGNAL_PENDING 1 + + +#define cpu_has_xmm (test_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability)) +#define cpu_has_pge (test_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability)) +#define cpu_has_pse (test_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability)) +#define cpu_has_pae (test_bit(X86_FEATURE_PAE, boot_cpu_data.x86_capability)) +#define cpu_has_tsc (test_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability)) +#define cpu_has_de (test_bit(X86_FEATURE_DE, boot_cpu_data.x86_capability)) +#define cpu_has_vme (test_bit(X86_FEATURE_VME, boot_cpu_data.x86_capability)) +#define cpu_has_fxsr (test_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability)) +#define cpu_has_xmm (test_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability)) +#define cpu_has_fpu (test_bit(X86_FEATURE_FPU, boot_cpu_data.x86_capability)) +#define cpu_has_apic (test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability)) + + + +struct i387_fsave_struct { + long cwd; + long swd; + long twd; + long fip; + long fcs; + long foo; + long fos; + long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ + long status; /* software status information */ +}; + +struct i387_fxsave_struct { + unsigned short cwd; + unsigned short swd; + unsigned short twd; + unsigned short fop; + long fip; + long fcs; + long foo; + long fos; + long mxcsr; + long reserved; + long st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ + long xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ + long padding[56]; +} __attribute__ ((aligned (16))); + +struct i387_soft_struct { + long cwd; + long swd; + long twd; + long fip; + long fcs; + long foo; + long fos; + long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ + unsigned char ftop, changed, lookahead, no_update, rm, alimit; + struct info *info; + unsigned long entry_eip; +}; + + + + +union i387_union { + struct i387_fsave_struct fsave; + struct i387_fxsave_struct fxsave; + struct i387_soft_struct soft; +}; + + + + +#ifdef CONFIG_MOSIX +#define START_THREAD_REGS (BIT_OF_REGISTER(xds)|BIT_OF_REGISTER(xes)|\ + BIT_OF_REGISTER(xss)|BIT_OF_REGISTER(xcs)|\ + BIT_OF_REGISTER(eip)|BIT_OF_REGISTER(esp)) +#endif /* CONFIG_MOSIX */ + + + +struct thread_struct { + int extern_pid; + int tracing; + int forking; + unsigned long kernel_stack; + struct signal_context *signal_context; + int nsyscalls; + void *sc; + struct sys_pt_regs process_regs; + unsigned long cr2, trap_no, error_code; + int err; + void *fault_addr; + void *fault_catcher; + int vm_seq; + + +unsigned long esp0, saved_esp0; + + +union i387_union i387; +unsigned long debugreg[8]; + + + struct task_struct *prev_sched; + unsigned long temp_stack; + int switch_pipe[2]; + struct { + int op; + union { + struct { + int pid; + } fork, exec; + struct { + int (*proc)(void *); + void *arg; + int flags; + int new_pid; + struct task_struct *new_task; + int cpu; + } thread; + struct { + unsigned long stack; + struct sys_pt_regs regs; + struct task_struct *from; + } fork_finish; + struct { + void (*proc)(void *); + void *arg; + } cb; + struct { + int restore_state; + } trace_on; + } u; + } request; +}; + +#define INIT_THREAD \ +{ \ + extern_pid: -1, \ + tracing: 0, \ + forking: 0, \ + kernel_stack: 0, \ + signal_context: NULL, \ + nsyscalls: 0, \ + sc: NULL, \ + process_regs: EMPTY_REGS, \ + cr2: 0, \ + err: 0, \ + fault_addr: NULL, \ + vm_seq: 0, \ + prev_sched: NULL, \ + temp_stack: 0, \ + switch_pipe: { -1, -1 }, \ + request: { 0 } \ +} + + + + + + +#ifdef CONFIG_MOSIX +extern int user_thread(int (*fn)(void *), void * arg, unsigned long flags); +#endif /* CONFIG_MOSIX */ + + +#define THREAD_SIZE (2*PAGE_SIZE) + +typedef struct { + unsigned long seg; +} mm_segment_t; + +extern struct task_struct *alloc_task_struct(void); +extern void free_task_struct(struct task_struct *task); + +#define get_task_struct(tsk) atomic_inc(&virt_to_page(tsk)->count) + +extern void release_thread(struct task_struct *); +extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); + +static inline void release_segments(struct mm_struct *mm) +{ +} + +static inline void copy_segments(struct task_struct *p, + struct mm_struct *new_mm) +{ +} + +#define forget_segments() do ; while(0) + +extern unsigned long thread_saved_pc(struct thread_struct *t); + +/* +extern unsigned long init_task_ptr; + +#define init_task_u (*((union task_union *) init_task_ptr)) +*/ +#define init_task (init_task_union.task) +#define init_stack (init_task_union.stack) + +/* + * User space process size: 3GB (default). + */ +extern unsigned long task_size; + +#define TASK_SIZE (task_size) + +/* This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ +#define TASK_UNMAPPED_BASE (0x40000000) + +extern void start_thread(struct pt_regs *regs, unsigned long entry, + unsigned long stack); + +struct cpuinfo_um { + + + + __u8 x86; /* CPU family */ + __u8 x86_vendor; /* CPU vendor */ + __u8 x86_model; + __u8 x86_mask; + char wp_works_ok; /* It doesn't on 386's */ + char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */ + char hard_math; + char rfu; + int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */ + __u32 x86_capability[4]; + char x86_vendor_id[16]; + char x86_model_id[64]; + int x86_cache_size; /* in KB - valid for CPUS which support this + call */ + int fdiv_bug; + int f00f_bug; + int coma_bug; + + + + + unsigned long loops_per_jiffy; + unsigned long *pgd_quick; + unsigned long *pmd_quick; + unsigned long *pte_quick; + unsigned long pgtable_cache_sz; + int ipi_pipe[2]; +}; + + + +extern struct cpuinfo_um boot_cpu_data; + +#define my_cpu_data cpu_data[smp_processor_id()] + +#ifdef CONFIG_SMP +extern struct cpuinfo_um cpu_data[]; +#define current_cpu_data cpu_data[smp_processor_id()] +#else +#define cpu_data (&boot_cpu_data) +#define current_cpu_data boot_cpu_data +#endif + +#define KSTK_EIP(tsk) (0) +#define KSTK_ESP(tsk) (0) +#define get_wchan(p) (0) + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/include/asm-um/processor-i386.h linux_umopenmosix/include/asm-um/processor-i386.h --- linux-2.4.17/include/asm-um/processor-i386.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/processor-i386.h Sat Jun 29 16:49:28 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_PROCESSOR_I386_H +#define __UM_PROCESSOR_I386_H + +#include "asm/processor-generic.h" + +#endif diff -urN linux-2.4.17/include/asm-um/processor-ppc.h linux_umopenmosix/include/asm-um/processor-ppc.h --- linux-2.4.17/include/asm-um/processor-ppc.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/processor-ppc.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,15 @@ +#ifndef __UM_PROCESSOR_PPC_H +#define __UM_PROCESSOR_PPC_H + +#if defined(__ASSEMBLY__) + +#define CONFIG_ALL_PPC +#include "arch/processor.h" + +#else + +#include "asm/processor-generic.h" + +#endif + +#endif diff -urN linux-2.4.17/include/asm-um/ptrace.h linux_umopenmosix/include/asm-um/ptrace.h --- linux-2.4.17/include/asm-um/ptrace.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/ptrace.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,58 @@ +#ifndef __UM_PTRACE_H +#define __UM_PTRACE_H + +#ifndef __ASSEMBLY__ + +#include "asm/current.h" + +#define pt_regs pt_regs_subarch + +#include "asm/arch/ptrace.h" + +#undef pt_regs +#undef user_mode +#undef instruction_pointer + +#include "../../arch/um/include/sysdep/ptrace.h" + +struct pt_regs { int user_mode; + + long ebx; + long ecx; + long edx; + long esi; + long edi; + long ebp; + long eax; + int xds; + int xes; + long orig_eax; + long eip; + int xcs; + long eflags; + long esp; + int xss; + + }; + + + + + + + + + +#define user_mode(regs) ((regs)->user_mode) + +struct task_struct; + +extern int putreg(struct task_struct *child, unsigned long regno, + unsigned long value); +unsigned long getreg(struct task_struct *child, unsigned long regno); + +#define INIT_TASK_SIZE (4 * PAGE_SIZE) + +#endif + +#endif diff -urN linux-2.4.17/include/asm-um/resource.h linux_umopenmosix/include/asm-um/resource.h --- linux-2.4.17/include/asm-um/resource.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/resource.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_RESOURCE_H +#define __UM_RESOURCE_H + +#include "asm/arch/resource.h" + +#endif diff -urN linux-2.4.17/include/asm-um/rwlock.h linux_umopenmosix/include/asm-um/rwlock.h --- linux-2.4.17/include/asm-um/rwlock.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/rwlock.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_RWLOCK_H +#define __UM_RWLOCK_H + +#include "asm/arch/rwlock.h" + +#endif diff -urN linux-2.4.17/include/asm-um/rwsem.h linux_umopenmosix/include/asm-um/rwsem.h --- linux-2.4.17/include/asm-um/rwsem.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/rwsem.h Sat Jun 29 16:49:28 2002 @@ -0,0 +1,10 @@ +#ifndef __UM_RWSEM_H__ +#define __UM_RWSEM_H__ + +#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 96) +#define __builtin_expect(exp,c) (exp) +#endif + +#include "asm/arch/rwsem.h" + +#endif diff -urN linux-2.4.17/include/asm-um/scatterlist.h linux_umopenmosix/include/asm-um/scatterlist.h --- linux-2.4.17/include/asm-um/scatterlist.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/scatterlist.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_SCATTERLIST_H +#define __UM_SCATTERLIST_H + +#include "asm/arch/scatterlist.h" + +#endif diff -urN linux-2.4.17/include/asm-um/segment.h linux_umopenmosix/include/asm-um/segment.h --- linux-2.4.17/include/asm-um/segment.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/segment.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,16 @@ +#ifndef __UM_SEGMENT_H +#define __UM_SEGMENT_H +#endif + +#ifndef _ASM_SEGMENT_H +#define _ASM_SEGMENT_H + +#define __KERNEL_CS 0x10 +#define __KERNEL_DS 0x18 + +#define __USER_CS 0x23 +#define __USER_DS 0x2B + +#endif + + diff -urN linux-2.4.17/include/asm-um/semaphore.h linux_umopenmosix/include/asm-um/semaphore.h --- linux-2.4.17/include/asm-um/semaphore.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/semaphore.h Sat Jun 29 16:49:28 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_SEMAPHORE_H +#define __UM_SEMAPHORE_H + +#include "asm/arch/semaphore.h" + +#endif diff -urN linux-2.4.17/include/asm-um/sembuf.h linux_umopenmosix/include/asm-um/sembuf.h --- linux-2.4.17/include/asm-um/sembuf.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/sembuf.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_SEMBUF_H +#define __UM_SEMBUF_H + +#include "asm/arch/sembuf.h" + +#endif diff -urN linux-2.4.17/include/asm-um/serial.h linux_umopenmosix/include/asm-um/serial.h --- linux-2.4.17/include/asm-um/serial.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/serial.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_SERIAL_H +#define __UM_SERIAL_H + +#include "asm/arch/serial.h" + +#endif diff -urN linux-2.4.17/include/asm-um/shmbuf.h linux_umopenmosix/include/asm-um/shmbuf.h --- linux-2.4.17/include/asm-um/shmbuf.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/shmbuf.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_SHMBUF_H +#define __UM_SHMBUF_H + +#include "asm/arch/shmbuf.h" + +#endif diff -urN linux-2.4.17/include/asm-um/shmparam.h linux_umopenmosix/include/asm-um/shmparam.h --- linux-2.4.17/include/asm-um/shmparam.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/shmparam.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_SHMPARAM_H +#define __UM_SHMPARAM_H + +#include "asm/arch/shmparam.h" + +#endif diff -urN linux-2.4.17/include/asm-um/sigcontext-generic.h linux_umopenmosix/include/asm-um/sigcontext-generic.h --- linux-2.4.17/include/asm-um/sigcontext-generic.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/sigcontext-generic.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_SIGCONTEXT_GENERIC_H +#define __UM_SIGCONTEXT_GENERIC_H + +#include "asm/arch/sigcontext.h" + +#endif diff -urN linux-2.4.17/include/asm-um/sigcontext-i386.h linux_umopenmosix/include/asm-um/sigcontext-i386.h --- linux-2.4.17/include/asm-um/sigcontext-i386.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/sigcontext-i386.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_SIGCONTEXT_I386_H +#define __UM_SIGCONTEXT_I386_H + +#include "asm/sigcontext-generic.h" + +#endif diff -urN linux-2.4.17/include/asm-um/sigcontext-ppc.h linux_umopenmosix/include/asm-um/sigcontext-ppc.h --- linux-2.4.17/include/asm-um/sigcontext-ppc.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/sigcontext-ppc.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,10 @@ +#ifndef __UM_SIGCONTEXT_PPC_H +#define __UM_SIGCONTEXT_PPC_H + +#define pt_regs sys_pt_regs + +#include "asm/sigcontext-generic.h" + +#undef pt_regs + +#endif diff -urN linux-2.4.17/include/asm-um/siginfo.h linux_umopenmosix/include/asm-um/siginfo.h --- linux-2.4.17/include/asm-um/siginfo.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/siginfo.h Sat Jun 29 16:49:28 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_SIGINFO_H +#define __UM_SIGINFO_H + +#include "asm/arch/siginfo.h" + +#endif diff -urN linux-2.4.17/include/asm-um/signal.h linux_umopenmosix/include/asm-um/signal.h --- linux-2.4.17/include/asm-um/signal.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/signal.h Sat Jun 29 16:49:28 2002 @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __UM_SIGNAL_H +#define __UM_SIGNAL_H + +// #include "sysdep/ptrace.h" +#include "ptrace.h" +#include "asm/arch/signal.h" + +struct signal_context { + void *sc; + struct sys_pt_regs regs; + sigset_t sigs; + struct signal_context *prev; +}; + + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/include/asm-um/smp.h linux_umopenmosix/include/asm-um/smp.h --- linux-2.4.17/include/asm-um/smp.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/smp.h Sat Jun 29 16:49:28 2002 @@ -0,0 +1,19 @@ +#ifndef __UM_SMP_H +#define __UM_SMP_H + +#ifdef CONFIG_SMP + +#include "linux/config.h" +#include "asm/current.h" + +#define smp_processor_id() (current->processor) +#define cpu_logical_map(n) (n) +#define cpu_number_map(n) (n) +#define PROC_CHANGE_PENALTY 15 /* Pick a number, any number */ +extern int hard_smp_processor_id(void); +extern unsigned long cpu_online_map; +#define NO_PROC_ID -1 + +#endif + +#endif diff -urN linux-2.4.17/include/asm-um/smplock.h linux_umopenmosix/include/asm-um/smplock.h --- linux-2.4.17/include/asm-um/smplock.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/smplock.h Sat Jun 29 16:49:30 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_SMPLOCK_H +#define __UM_SMPLOCK_H + +#include "asm/arch/smplock.h" + +#endif diff -urN linux-2.4.17/include/asm-um/socket.h linux_umopenmosix/include/asm-um/socket.h --- linux-2.4.17/include/asm-um/socket.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/socket.h Wed Jun 26 23:47:18 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_SOCKET_H +#define __UM_SOCKET_H + +#include "asm/arch/socket.h" + +#endif diff -urN linux-2.4.17/include/asm-um/sockios.h linux_umopenmosix/include/asm-um/sockios.h --- linux-2.4.17/include/asm-um/sockios.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/sockios.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_SOCKIOS_H +#define __UM_SOCKIOS_H + +#include "asm/arch/sockios.h" + +#endif diff -urN linux-2.4.17/include/asm-um/softirq.h linux_umopenmosix/include/asm-um/softirq.h --- linux-2.4.17/include/asm-um/softirq.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/softirq.h Sat Jun 29 16:49:30 2002 @@ -0,0 +1,13 @@ +#ifndef __UM_SOFTIRQ_H +#define __UM_SOFTIRQ_H + +#include "linux/smp.h" +#include "asm/system.h" +#include "asm/processor.h" + +/* A gratuitous name change */ +#define i386_bh_lock um_bh_lock +#include "asm/arch/softirq.h" +#undef i386_bh_lock + +#endif diff -urN linux-2.4.17/include/asm-um/somename.h linux_umopenmosix/include/asm-um/somename.h --- linux-2.4.17/include/asm-um/somename.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/somename.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_SOMENAME_H +#define __UM_SOMENAME_H + + + +#endif diff -urN linux-2.4.17/include/asm-um/spinlock.h linux_umopenmosix/include/asm-um/spinlock.h --- linux-2.4.17/include/asm-um/spinlock.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/spinlock.h Sat Jun 29 16:49:28 2002 @@ -0,0 +1,10 @@ +#ifndef __UM_SPINLOCK_H +#define __UM_SPINLOCK_H + +#include "linux/config.h" + +#ifdef CONFIG_SMP +#include "asm/arch/spinlock.h" +#endif + +#endif diff -urN linux-2.4.17/include/asm-um/stat.h linux_umopenmosix/include/asm-um/stat.h --- linux-2.4.17/include/asm-um/stat.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/stat.h Sat Jun 29 16:49:28 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_STAT_H +#define __UM_STAT_H + +#include "asm/arch/stat.h" + +#endif diff -urN linux-2.4.17/include/asm-um/statfs.h linux_umopenmosix/include/asm-um/statfs.h --- linux-2.4.17/include/asm-um/statfs.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/statfs.h Sat Jun 29 16:49:28 2002 @@ -0,0 +1,6 @@ +#ifndef _UM_STATFS_H +#define _UM_STATFS_H + +#include "asm/arch/statfs.h" + +#endif diff -urN linux-2.4.17/include/asm-um/string.h linux_umopenmosix/include/asm-um/string.h --- linux-2.4.17/include/asm-um/string.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/string.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,7 @@ +#ifndef __UM_STRING_H +#define __UM_STRING_H + +#include "asm/arch/string.h" +#include "asm/archparam.h" + +#endif diff -urN linux-2.4.17/include/asm-um/system-generic.h linux_umopenmosix/include/asm-um/system-generic.h --- linux-2.4.17/include/asm-um/system-generic.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/system-generic.h Sat Jun 29 16:49:28 2002 @@ -0,0 +1,49 @@ +#ifndef __UM_SYSTEM_GENERIC_H +#define __UM_SYSTEM_GENERIC_H + +#include "asm/arch/system.h" + +#undef prepare_to_switch +#undef switch_to +#undef __save_flags +#undef save_flags +#undef __restore_flags +#undef restore_flags +#undef __cli +#undef __sti +#undef cli +#undef sti +#undef local_irq_save +#undef local_irq_restore +#undef local_irq_disable +#undef local_irq_enable + +#define prepare_to_switch() do ; while(0) + +void *_switch_to(void *prev, void *next); + +#define switch_to(prev, next, last) prev = _switch_to(prev, next) + +extern int set_signals(int enable); +extern void block_signals(void); +extern void unblock_signals(void); + +#define local_irq_save(flags) do { (flags) = set_signals(0); } while(0) + +#define local_irq_restore(flags) do { set_signals(flags); } while(0) + +#define local_irq_enable() unblock_signals() +#define local_irq_disable() block_signals() + +#define __sti() unblock_signals() +#define sti() unblock_signals() +#define __cli() block_signals() +#define cli() block_signals() + +#define __save_flags(x) local_irq_save(x) +#define save_flags(x) __save_flags(x) + +#define __restore_flags(x) local_irq_restore(x) +#define restore_flags(x) __restore_flags(x) + +#endif diff -urN linux-2.4.17/include/asm-um/system-i386.h linux_umopenmosix/include/asm-um/system-i386.h --- linux-2.4.17/include/asm-um/system-i386.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/system-i386.h Sat Jun 29 16:49:28 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_SYSTEM_I386_H +#define __UM_SYSTEM_I386_H + +#include "asm/system-generic.h" + +#endif diff -urN linux-2.4.17/include/asm-um/system-ppc.h linux_umopenmosix/include/asm-um/system-ppc.h --- linux-2.4.17/include/asm-um/system-ppc.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/system-ppc.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,12 @@ +#ifndef __UM_SYSTEM_PPC_H +#define __UM_SYSTEM_PPC_H + +#define _switch_to _ppc_switch_to + +#include "asm/arch/system.h" + +#undef _switch_to + +#include "asm/system-generic.h" + +#endif diff -urN linux-2.4.17/include/asm-um/termbits.h linux_umopenmosix/include/asm-um/termbits.h --- linux-2.4.17/include/asm-um/termbits.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/termbits.h Wed Jun 26 23:47:19 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_TERMBITS_H +#define __UM_TERMBITS_H + +#include "asm/arch/termbits.h" + +#endif diff -urN linux-2.4.17/include/asm-um/termios.h linux_umopenmosix/include/asm-um/termios.h --- linux-2.4.17/include/asm-um/termios.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/termios.h Wed Jun 26 23:47:19 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_TERMIOS_H +#define __UM_TERMIOS_H + +#include "asm/arch/termios.h" + +#endif diff -urN linux-2.4.17/include/asm-um/timex.h linux_umopenmosix/include/asm-um/timex.h --- linux-2.4.17/include/asm-um/timex.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/timex.h Sat Jun 29 16:49:28 2002 @@ -0,0 +1,15 @@ +#ifndef __UM_TIMEX_H +#define __UM_TIMEX_H + +#include "linux/time.h" + +typedef unsigned long cycles_t; + +#define cacheflush_time (0) + +static inline cycles_t get_cycles (void) +{ + return 0; +} + +#endif diff -urN linux-2.4.17/include/asm-um/tlb.h linux_umopenmosix/include/asm-um/tlb.h --- linux-2.4.17/include/asm-um/tlb.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/tlb.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1 @@ +#include diff -urN linux-2.4.17/include/asm-um/types.h linux_umopenmosix/include/asm-um/types.h --- linux-2.4.17/include/asm-um/types.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/types.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_TYPES_H +#define __UM_TYPES_H + +#include "asm/arch/types.h" + +#endif diff -urN linux-2.4.17/include/asm-um/uaccess.h linux_umopenmosix/include/asm-um/uaccess.h --- linux-2.4.17/include/asm-um/uaccess.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/uaccess.h Sat Jun 29 16:49:30 2002 @@ -0,0 +1,290 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __UM_UACCESS_H +#define __UM_UACCESS_H + +#include "linux/string.h" +#include "linux/sched.h" +#include "asm/processor.h" +#include "asm/errno.h" +#include "asm/current.h" +#include "asm/a.out.h" +#include "asm/cpufeature.h" + + +#define VERIFY_READ 0 +#define VERIFY_WRITE 1 + + +#ifdef CONFIG_MOSIX +#define COPY_COUNTER_MULTIPLIER 1024 +enum { FROM_USER, TO_USER }; + +extern unsigned long deputy_copy_from_user(void *, void *, unsigned long, int); +extern unsigned long deputy_copy_to_user(void *, void *, unsigned long, int); +extern unsigned long deputy_clear_user(void *, int, int); +extern long deputy_strncpy_from_user(char *, char *, int, int); +extern long deputy_strnlen_user(char *, long); +extern int deputy_verify_write(void *, unsigned long); + +#endif + +#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) +#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFF) + +#define get_ds() (KERNEL_DS) +#define get_fs() (current->addr_limit) +#define set_fs(x) (current->addr_limit = (x)) + +#define segment_eq(a,b) ((a).seg == (b).seg) + + + +static inline void +mosix_local_statistic(int __nb, int __dir) +{ + extern void cause_link_error_by_routine_that_does_not_exist(void); + + if(__dir == TO_USER) + { +#ifdef CONFIG_MOSIX_DFSA + if(current->mosix.dflags & DSTATSDOWN) + return; +#endif /* CONFIG_MOSIX_DFSA */ + current->mosix.ncopyouts += COPY_COUNTER_MULTIPLIER; + current->mosix.copyoutbytes += __nb; + } + else if(__dir == FROM_USER) + { +#ifdef CONFIG_MOSIX_DFSA + if(current->mosix.dflags & DSTATSDOWN) + { + current->mosix.copy_ins++; + current->mosix.bytes_in += __nb; + } + else +#endif /* CONFIG_MOSIX_DFSA */ + { + current->mosix.ncopyins += COPY_COUNTER_MULTIPLIER; + current->mosix.copyinbytes += __nb; + } + } + else /* impossible */ + cause_link_error_by_routine_that_does_not_exist(); +} + + + +static inline int +memory_not_here(int bytes, int direction) +{ + if(segment_eq(get_fs(), KERNEL_DS)) + return(0); + switch(current->mosix.dflags & (DDEPUTY|DREMOTE)) + { + case DDEPUTY: + return(1); + default: + mosix_local_statistic(bytes, direction); + /* fall thru */ + case DREMOTE: + return(0); + } +} + + + + + + + + + +/* + * The fs value determines whether argument validity checking should be + * performed or not. If get_fs() == USER_DS, checking is performed, with + * get_fs() == KERNEL_DS, checking is bypassed. + * + * For historical reasons, these macros are grossly misnamed. + */ + +#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) + +#define ABOVE_KMEM (16 * 1024 * 1024) + +#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFF) +#define USER_DS MAKE_MM_SEG(TASK_SIZE) + +#define get_ds() (KERNEL_DS) +#define get_fs() (current->addr_limit) +#define set_fs(x) (current->addr_limit = (x)) + +extern unsigned long end_vm; +extern unsigned long uml_physmem; + +#define under_task_size(addr, size) \ + (((unsigned long) (addr) < TASK_SIZE) && \ + (((unsigned long) (addr) + (size)) < TASK_SIZE)) + +#define is_stack(addr, size) \ + (((unsigned long) (addr) < STACK_TOP) && \ + ((unsigned long) (addr) >= STACK_TOP - ABOVE_KMEM) && \ + (((unsigned long) (addr) + (size)) <= STACK_TOP)) + +#define segment_eq(a, b) ((a).seg == (b).seg) + +#define access_ok(type, addr, size) \ + ((type == VERIFY_READ) || (segment_eq(get_fs(), KERNEL_DS)) || \ + (((unsigned long) (addr) <= ((unsigned long) (addr) + (size))) && \ + (under_task_size(addr, size) || is_stack(addr, size)))) + +static inline int verify_area(int type, const void * addr, unsigned long size) +{ + return(access_ok(type, addr, size) ? 0 : -EFAULT); +} + +extern unsigned long get_fault_addr(void); + +extern int __do_copy_from_user(void *to, const void *from, int n, + void **fault_addr, void **fault_catcher); + +static inline int copy_from_user(void *to, const void *from, int n) +{ + return(access_ok(VERIFY_READ, from, n) ? + __do_copy_from_user(to, from, n, + ¤t->thread.fault_addr, + ¤t->thread.fault_catcher) : n); +} + +#define __copy_from_user(to, from, n) copy_from_user(to, from, n) + +extern int __do_copy_to_user(void *to, const void *from, int n, + void **fault_addr, void **fault_catcher); + +static inline int copy_to_user(void *to, const void *from, int n) +{ + return(access_ok(VERIFY_WRITE, to, n) ? + __do_copy_from_user(to, from, n, + ¤t->thread.fault_addr, + ¤t->thread.fault_catcher) : n); +} + +#define __copy_to_user(to, from, n) copy_to_user(to, from, n) + +#define __get_user(x, ptr) \ +({ \ + const __typeof__(ptr) __private_ptr = ptr; \ + __typeof__(*(__private_ptr)) __private_val; \ + int __private_ret = -EFAULT; \ + (x) = 0; \ + if (__copy_from_user(&__private_val, (__private_ptr), \ + sizeof(*(__private_ptr))) == 0) {\ + (x) = (__typeof__(*(__private_ptr))) __private_val; \ + __private_ret = 0; \ + } \ + __private_ret; \ +}) + +#define get_user(x, ptr) \ +({ \ + const __typeof__((*ptr)) *private_ptr = (ptr); \ + (access_ok(VERIFY_READ, private_ptr, sizeof(x)) ? \ + __get_user(x, private_ptr) : ((x) = 0, -EFAULT)); \ +}) + +#define __put_user(x, ptr) \ +({ \ + __typeof__(ptr) __private_ptr = ptr; \ + __typeof__(*(__private_ptr)) __private_val; \ + int __private_ret = -EFAULT; \ + __private_val = (__typeof__(*(__private_ptr))) (x); \ + if (__copy_to_user((__private_ptr), &__private_val, \ + sizeof(*(__private_ptr))) == 0) { \ + __private_ret = 0; \ + } \ + __private_ret; \ +}) + +#define put_user(x, ptr) \ +({ \ + __typeof__(*(ptr)) *private_ptr = (ptr); \ + (access_ok(VERIFY_WRITE, private_ptr, sizeof(x)) ? \ + __put_user(x, private_ptr) : -EFAULT); \ +}) + +extern int __do_strncpy_from_user(char *dst, const char *src, size_t n, + void **fault_addr, void **fault_catcher); + +static inline int strncpy_from_user(char *dst, const char *src, int count) +{ + int n; + + if(!access_ok(VERIFY_READ, src, 1)) return(-EFAULT); + n = __do_strncpy_from_user(dst, src, count, + ¤t->thread.fault_addr, + ¤t->thread.fault_catcher); + if(n < 0) return(-EFAULT); + return(n); +} + + + +extern int __do_clear_user(void *mem, size_t len, void **fault_addr, + void **fault_catcher); + +static inline int __clear_user(void *mem, int len) +{ + return(__do_clear_user(mem, len, + ¤t->thread.fault_addr, + ¤t->thread.fault_catcher)); +} + +static inline int clear_user(void *mem, int len) +{ + return(access_ok(VERIFY_WRITE, mem, len) ? + __do_clear_user(mem, len, + ¤t->thread.fault_addr, + ¤t->thread.fault_catcher) : len); +} + +extern int __do_strnlen_user(const char *str, unsigned long n, + void **fault_addr, void **fault_catcher); + +static inline int strnlen_user(void *str, int len) +{ + return(__do_strnlen_user(str, len, + ¤t->thread.fault_addr, + ¤t->thread.fault_catcher)); +} + +#define strlen_user(str) strnlen_user(str, ~0UL >> 1) + +struct exception_table_entry +{ + unsigned long unused; +}; + + + +#define USER_IS_REMOTE ((current->mosix.dflags & DDEPUTY) && \ + segment_eq(get_fs(), USER_DS)) + + + + + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/include/asm-um/unaligned.h linux_umopenmosix/include/asm-um/unaligned.h --- linux-2.4.17/include/asm-um/unaligned.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/unaligned.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_UNALIGNED_H +#define __UM_UNALIGNED_H + +#include "asm/arch/unaligned.h" + +#endif diff -urN linux-2.4.17/include/asm-um/unistd.h linux_umopenmosix/include/asm-um/unistd.h --- linux-2.4.17/include/asm-um/unistd.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/unistd.h Sat Jun 29 16:49:41 2002 @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef _UM_UNISTD_H_ +#define _UM_UNISTD_H_ + +#include "linux/resource.h" +#include "asm/uaccess.h" + +extern long sys_open(const char *filename, int flags, int mode); +extern long sys_dup(unsigned int fildes); +extern long sys_close(unsigned int fd); +extern long lseek(unsigned int fildes, unsigned long offset, int whence); +extern int read(unsigned int fildes, char *buf, int len); +extern int um_execve(const char *file, char *const argv[], char *const env[]); +extern long sys_setsid(void); +extern long sys_waitpid(pid_t pid, unsigned int * stat_addr, int options); +extern long sys_wait4(pid_t pid,unsigned int *stat_addr, int options, + struct rusage *ru); +extern long sys_mount(char *dev_name, char *dir_name, char *type, + unsigned long flags, void *data); +extern long sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, + struct timeval *tvp); + +#ifdef __KERNEL_SYSCALLS__ + +#define KERNEL_CALL(ret_t, sys, args...) \ + mm_segment_t fs = get_fs(); \ + ret_t ret; \ + set_fs(KERNEL_DS); \ + ret = sys(args); \ + set_fs(fs); \ + return ret; + +static inline long open(const char *pathname, int flags, int mode) +{ + KERNEL_CALL(int, sys_open, pathname, flags, mode) +} + +static inline long dup(unsigned int fd) +{ + KERNEL_CALL(int, sys_dup, fd); +} + +static inline long close(unsigned int fd) +{ + KERNEL_CALL(int, sys_close, fd); +} + +static inline int execve(const char *filename, char *const argv[], + char *const envp[]) +{ + KERNEL_CALL(int, um_execve, filename, argv, envp); +} + +static inline long waitpid(pid_t pid, unsigned int *status, int options) +{ + KERNEL_CALL(pid_t, sys_wait4, pid, status, options, NULL) +} + +static inline pid_t wait(int *status) +{ + KERNEL_CALL(pid_t, sys_wait4, -1, status, 0, NULL) +} + +static inline pid_t setsid(void) +{ + KERNEL_CALL(pid_t, sys_setsid) +} + +#endif + +/* Save the value of __KERNEL_SYSCALLS__, undefine it, include the underlying + * arch's unistd.h for the system call numbers, and restore the old + * __KERNEL_SYSCALLS__. + */ + +#ifdef __KERNEL_SYSCALLS__ +#define __SAVE_KERNEL_SYSCALLS__ __KERNEL_SYSCALLS__ +#endif + +#undef __KERNEL_SYSCALLS__ +#include "asm/arch/unistd.h" + +#ifdef __KERNEL_SYSCALLS__ +#define __KERNEL_SYSCALLS__ __SAVE_KERNEL_SYSCALLS__ +#endif + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/include/asm-um/user.h linux_umopenmosix/include/asm-um/user.h --- linux-2.4.17/include/asm-um/user.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/user.h Wed Jun 26 23:47:20 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_USER_H +#define __UM_USER_H + +#include "asm/arch/user.h" + +#endif diff -urN linux-2.4.17/include/asm-um/vga.h linux_umopenmosix/include/asm-um/vga.h --- linux-2.4.17/include/asm-um/vga.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/asm-um/vga.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,6 @@ +#ifndef __UM_VGA_H +#define __UM_VGA_H + +#include "asm/arch/vga.h" + +#endif diff -urN linux-2.4.17/include/linux/binfmts.h linux_umopenmosix/include/linux/binfmts.h --- linux-2.4.17/include/linux/binfmts.h Thu Nov 22 21:46:19 2001 +++ linux_umopenmosix/include/linux/binfmts.h Sat Jun 29 16:49:28 2002 @@ -65,5 +65,12 @@ #define change_ldt(a,b) setup_arg_pages(a,b) #endif +#ifdef CONFIG_MOSIX +extern void execve_remote_counts(char **, char **, int *, int *); +extern int execve_remote_bring_strings(struct linux_binprm *, char **, char **); +extern unsigned long *create_aout_tables(char *, struct linux_binprm *); +extern int exec_mmap(void); +#endif /* CONFIG_MOSIX */ + #endif /* __KERNEL__ */ #endif /* _LINUX_BINFMTS_H */ diff -urN linux-2.4.17/include/linux/blk.h linux_umopenmosix/include/linux/blk.h --- linux-2.4.17/include/linux/blk.h Thu Nov 22 21:48:07 2001 +++ linux_umopenmosix/include/linux/blk.h Sat Jun 29 16:51:07 2002 @@ -323,6 +323,15 @@ #define DEVICE_REQUEST do_ida_request #define DEVICE_NR(device) (MINOR(device) >> 4) +#elif (MAJOR_NR == UBD_MAJOR) + +#define DEVICE_NAME "User-mode block device" +#define DEVICE_INTR do_ubd +#define DEVICE_REQUEST do_ubd_request +#define DEVICE_NR(device) (MINOR(device)) +#define DEVICE_ON(device) +#define DEVICE_OFF(device) + #endif /* MAJOR_NR == whatever */ /* provide DEVICE_xxx defaults, if not explicitly defined diff -urN linux-2.4.17/include/linux/capability.h linux_umopenmosix/include/linux/capability.h --- linux-2.4.17/include/linux/capability.h Thu Nov 22 21:46:19 2001 +++ linux_umopenmosix/include/linux/capability.h Sat Jun 29 16:49:28 2002 @@ -348,6 +348,11 @@ #define cap_is_fs_cap(c) (CAP_TO_MASK(c) & CAP_FS_MASK) +#ifdef CONFIG_MOSIX +#define REMOTE_CAPS (CAP_FS_MASK | CAP_TO_MASK(CAP_LINUX_IMMUTABLE) | \ + CAP_TO_MASK(CAP_SYS_RESOURCE)) +#endif /* CONFIG_MOSIX */ + #endif /* __KERNEL__ */ #endif /* !_LINUX_CAPABILITY_H */ diff -urN linux-2.4.17/include/linux/completion.h linux_umopenmosix/include/linux/completion.h --- linux-2.4.17/include/linux/completion.h Thu Nov 22 21:46:19 2001 +++ linux_umopenmosix/include/linux/completion.h Sat Jun 29 16:49:28 2002 @@ -28,6 +28,9 @@ } extern void FASTCALL(wait_for_completion(struct completion *)); +#ifdef CONFIG_MOSIX +extern void FASTCALL(wait_for_completion_half_interruptible(struct completion *)); +#endif /* CONFIG_MOSIX */ extern void FASTCALL(complete(struct completion *)); #define INIT_COMPLETION(x) ((x).done = 0) diff -urN linux-2.4.17/include/linux/dcache.h linux_umopenmosix/include/linux/dcache.h --- linux-2.4.17/include/linux/dcache.h Thu Nov 22 21:46:18 2001 +++ linux_umopenmosix/include/linux/dcache.h Sat Jun 29 16:49:28 2002 @@ -124,6 +124,10 @@ */ #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ +#ifdef CONFIG_MOSIX_FS +#define DCACHE_NO_CACHE 0x0010 /* do not hash, do not cache */ +#endif /* CONFIG_MOSIX_FS */ + extern spinlock_t dcache_lock; /** diff -urN linux-2.4.17/include/linux/dfsa_interface.h linux_umopenmosix/include/linux/dfsa_interface.h --- linux-2.4.17/include/linux/dfsa_interface.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/linux/dfsa_interface.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh. + */ +/* DFSA definitions and functions required anywhere outside of MOSIX */ +#ifndef _MOS_DFSA_INTERFACE_H +#define _MOS_DFSA_INTERFACE_H + +#ifdef CONFIG_MOSIX_DFSA + +/* update flags (dupdates): */ + +#define DFSA_UPDCRED 1 /* need to update credentials */ +#define DFSA_UPDTABLE 2 /* need to update DFSA table */ +#define DFSA_UPDCDIR 4 /* need to update current-directory */ +#define DFSA_UPDMAX 8 /* need to update "max_fdset" */ +#define DFSA_UPDCLOSE 16 /* need to update closed files */ +#define DFSA_UPDOPEN 32 /* need to update opened files */ +#define DFSA_UPDMODS 64 /* need to update modified files */ +#define DFSA_UPDUSED 128 /* need to update the "used" bits */ +#define DFSA_UPDSEL 256 /* need to update MFS node-selections */ +#define DFSA_UPDUMASK 512 /* need to update umask */ + +#define DFSA_UPDATE (DFSA_UPDCRED|DFSA_UPDTABLE|DFSA_UPDCDIR|DFSA_UPDMAX|\ + DFSA_UPDCLOSE|DFSA_UPDOPEN|DFSA_UPDMODS|DFSA_UPDUSED|\ + DFSA_UPDSEL|DFSA_UPDUMASK) + +#define CAN_DFSA(p) ((p)->fs && atomic_read(&(p)->fs->users) == 1 && \ + !(p)->fs->altroot && \ + (p)->files && atomic_read(&(p)->files->users) == 1 && \ + (p)->fs->root == init_task.fs->root && \ + (p)->fs->rootmnt == init_task.fs->rootmnt && \ + !((p)->ptrace) && !(p->flags & PF_EXITING)) + +/* adjust the following if any file-system requires more identifying bytes: */ +#define MAX_IDENT_RECORD_LEN 20 + +struct vfsmount; +struct dentry; +struct nameidata; +struct file; + +void dfsa_init(void); +int dfsa_setmnt(struct vfsmount *, int, unsigned long); +int dfsa_option(char *, int *); +void dfsa_close_file(int); +void dfsa_open_file(int); +void dfsa_touch_file(int); +void dfsa_is_not_up_to_date(void); +void dfsa_pwd_changed(void); +int can_dfsa_file(int); +void dfsa_syscall_on_file(int, int); +int name_starts_in_dfsa(char **, struct nameidata *); +int within_dfsa(struct nameidata *); +int dfsa_optimized_read(struct file *, loff_t, unsigned long, unsigned long); +int disable_dfsa(void); +void enable_dfsa(void); + +#endif /* CONFIG_MOSIX_DFSA */ +#endif diff -urN linux-2.4.17/include/linux/elf.h linux_umopenmosix/include/linux/elf.h --- linux-2.4.17/include/linux/elf.h Thu Nov 22 21:48:29 2001 +++ linux_umopenmosix/include/linux/elf.h Sat Jun 29 16:49:30 2002 @@ -607,5 +607,18 @@ #endif +#ifdef CONFIG_MOSIX +#ifdef __KERNEL__ +struct elf_tables_extras +{ + int uid, euid, gid, egid; + long hwcap; + char platform[8]; +}; + +extern unsigned long elf_remote_setup(char *, int, int, struct elfhdr *, unsigned long, unsigned long, unsigned long, int, int, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, struct elf_tables_extras *); +extern int elf_maydump(struct vm_area_struct *); +#endif /*__KERNEL__*/ +#endif /* CONFIG_MOSIX */ #endif /* _LINUX_ELF_H */ diff -urN linux-2.4.17/include/linux/errno.h linux_umopenmosix/include/linux/errno.h --- linux-2.4.17/include/linux/errno.h Sat Feb 10 00:46:13 2001 +++ linux_umopenmosix/include/linux/errno.h Sat Jun 29 16:49:28 2002 @@ -21,6 +21,10 @@ #define EBADTYPE 527 /* Type not supported by server */ #define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */ +#ifdef CONFIG_MOSIX +#define EDOITATHOME 550 +#endif /* CONFIG_MOSIX */ + #endif #endif diff -urN linux-2.4.17/include/linux/file.h linux_umopenmosix/include/linux/file.h --- linux-2.4.17/include/linux/file.h Wed Aug 23 21:22:26 2000 +++ linux_umopenmosix/include/linux/file.h Wed Jun 26 23:45:17 2002 @@ -5,6 +5,10 @@ #ifndef __LINUX_FILE_H #define __LINUX_FILE_H +#ifdef CONFIG_MOSIX_DFSA +#include +#endif /* CONFIG_MOSIX_DFSA */ + extern void FASTCALL(fput(struct file *)); extern struct file * FASTCALL(fget(unsigned int fd)); @@ -93,6 +97,9 @@ BUG(); files->fd[fd] = file; write_unlock(&files->file_lock); +#ifdef CONFIG_MOSIX_DFSA + dfsa_open_file(fd); +#endif /* CONFIG_MOSIX_DFSA */ } void put_files_struct(struct files_struct *fs); diff -urN linux-2.4.17/include/linux/fs.h linux_umopenmosix/include/linux/fs.h --- linux-2.4.17/include/linux/fs.h Fri Dec 21 19:42:03 2001 +++ linux_umopenmosix/include/linux/fs.h Sat Jun 29 16:49:28 2002 @@ -34,8 +34,8 @@ * nr_file rlimit, so it's safe to set up a ridiculously high absolute * upper limit on files-per-process. * - * Some programs (notably those using select()) may have to be - * recompiled to take full advantage of the new limits.. + * Some programs (notably those using select()) may have to be + * recompiled to take full advantage of the new limits.. */ /* Fixed constants first: */ @@ -84,7 +84,7 @@ #define SEL_EX 4 /* public flags for file_system_type */ -#define FS_REQUIRES_DEV 1 +#define FS_REQUIRES_DEV 1 #define FS_NO_DCACHE 2 /* Only dcache the necessary things. */ #define FS_NO_PRELIM 4 /* prevent preloading of dentries, even if * FS_NO_DCACHE is not set. @@ -150,7 +150,12 @@ */ #define __IS_FLG(inode,flg) ((inode)->i_sb->s_flags & (flg)) +#if defined(CONFIG_MOSIX_FS) && defined(__KERNEL__) +#define IS_RDONLY(inode) (((inode)->i_sb->s_flags & MS_RDONLY) || \ + (current->mosix.dirty_bits & MFSARG_RONLY)) +#else #define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY) +#endif /* CONFIG_MOSIX_FS && __KERNEL__ */ #define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || ((inode)->i_flags & S_SYNC)) #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK) @@ -228,10 +233,10 @@ * Try to keep the most commonly used fields in single cache lines (16 * bytes) to improve performance. This ordering should be * particularly beneficial on 32-bit processors. - * + * * We use the first 16 bytes for the data which is used in searches * over the block hash lists (ie. getblk() and friends). - * + * * The second 16 bytes we use for lru buffer scans, as used by * sync_buffers() and refill_freelist(). -- sct */ @@ -313,8 +318,15 @@ #include #include #include +#include #include #include +#ifdef CONFIG_MOSIX +#include +#ifdef CONFIG_MOSIX_FS +#include +#endif /* CONFIG_MOSIX_FS */ +#endif /* CONFIG_MOSIX */ /* * Attribute flags. These should be or-ed together to figure out what @@ -429,7 +441,7 @@ struct list_head i_hash; struct list_head i_list; struct list_head i_dentry; - + struct list_head i_dirty_buffers; struct list_head i_dirty_data_buffers; @@ -504,9 +516,19 @@ struct proc_inode_info proc_i; struct socket socket_i; struct usbdev_inode_info usbdev_i; - struct jffs2_inode_info jffs2_i; + struct hostfs_inode_info hostfs_i; + struct jffs2_inode_info jffs2_i; void *generic_ip; +#ifdef CONFIG_MOSIX + struct remote_inode_info remote_i; +#ifdef CONFIG_MOSIX_FS + struct mfs_inode_info mfs_i; +#endif /* CONFIG_MOSIX_FS */ +#endif /* CONFIG_MOSIX */ } u; +#ifdef CONFIG_MOSIX + uint64_t i_unique; +#endif /* CONFIG_MOSIX */ }; struct fown_struct { @@ -643,6 +665,45 @@ struct qstr last; unsigned int flags; int last_type; +#if defined(CONFIG_MOSIX_DFSA) || defined(CONFIG_MOSIX_FS) + char *name_left; +#endif /* CONFIG_MOSIX_DFSA || CONFIG_MOSIX_FS */ +#ifdef CONFIG_MOSIX_FS + unsigned short express_function; + union complete_args + { + struct + { + int i1; + int i2; + } ints; + struct + { + time_t a; + time_t m; + } times; + struct + { + uid_t uid; + gid_t gid; + } ids; + struct + { + int mode; + dev_t dev; + } mknod; + mode_t mode; + loff_t len; + struct + { + char *buf; + int bufsiz; + } buffer; + struct nameidata *oldnd; + } complete_args; + void *complete_parg; + unsigned int complete_flags; +#endif /* CONFIG_MOSIX_FS */ }; #define DQUOT_USR_ENABLED 0x01 /* User diskquotas enabled */ @@ -852,6 +913,12 @@ int (*revalidate) (struct dentry *); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct dentry *, struct iattr *); +#ifdef CONFIG_MOSIX_FS + int (*express_lookup) (struct nameidata *, char **); +#endif /* CONFIG_MOSIX_FS */ +#if defined(CONFIG_MOSIX_DFSA) || defined(CONFIG_MOSIX_FS) + void (*check_path) (struct dentry *); +#endif /* CONFIG_MOSIX_DFSA || CONFIG_MOSIX_FS */ }; struct seq_file; @@ -862,7 +929,7 @@ */ struct super_operations { void (*read_inode) (struct inode *); - + /* reiserfs kludge. reiserfs needs 64 bits of information to ** find an inode. We are using the read_inode2 call to get ** that information. We don't like this, and are waiting on some @@ -908,6 +975,11 @@ struct dentry * (*fh_to_dentry)(struct super_block *sb, __u32 *fh, int len, int fhtype, int parent); int (*dentry_to_fh)(struct dentry *, __u32 *fh, int *lenp, int need_parent); int (*show_options)(struct seq_file *, struct vfsmount *); +#ifdef CONFIG_MOSIX_DFSA + int (*identify) (struct dentry *, void *); + struct dentry * (*reconstruct) (struct vfsmount *, void *, int); + void (*dfsa_changed) (struct vfsmount *, int); +#endif /* CONFIG_MOSIX_DFSA */ }; /* Inode state bits.. */ @@ -990,7 +1062,7 @@ /* Return value for VFS lock functions - tells locks.c to lock conventionally * REALLY kosha for root NFS and nfs_lock - */ + */ #define LOCK_USE_CLNT 1 #define FLOCK_VERIFY_READ 1 @@ -1236,7 +1308,7 @@ extern int kernel_read(struct file *, unsigned long, char *, unsigned long); extern struct file * open_exec(const char *); - + /* fs/dcache.c -- generic fs support functions */ extern int is_subdir(struct dentry *, struct dentry *); extern ino_t find_inode_number(struct dentry *, struct qstr *); @@ -1277,6 +1349,46 @@ #define LOOKUP_POSITIVE (8) #define LOOKUP_PARENT (16) #define LOOKUP_NOALT (32) + +#ifdef CONFIG_MOSIX_DFSA +#define LOOKUP_STAYFS (64) +#define LOOKUP_DOITATHOME (128) +#define LOOKUP_REPLACENAME (256) +#else +#define LOOKUP_STAYFS (0) +#endif /* CONFIG_MOSIX_DFSA */ +#ifdef CONFIG_MOSIX_FS +#define LOOKUP_MFS (512) +#define LOOKUP_COMPLETE (1024) +#define LOOKUP_MFS_MFS (2048) + +/* complete_flags: */ +#define COMPLETE_DONE 1 +#define COMPLETE_MAGICAL 2 + +/* express functions: */ +enum express_functions +{ + EF_OPEN = 1, + EF_ACCESS, + EF_LINK, + EF_UNLINK, + EF_SYMLINK, + EF_MKDIR, + EF_RMDIR, + EF_RENAME, + EF_READLINK, + EF_TRUNCATE, + EF_UTIME, + EF_UTIMES, + EF_CHMOD, + EF_CHOWN, + EF_STAT, + EF_MKNOD, +}; + +#endif /* CONFIG_MOSIX_FS */ + /* * Type of the last component on LOOKUP_PARENT */ @@ -1582,6 +1694,16 @@ dput(d2); } +#ifdef CONFIG_MOSIX +extern spinlock_t unique_gen_lock; +extern uint64_t unique_generator; +#define VMODIFIED(_ip) do { \ + spin_lock(&unique_gen_lock); \ + _ip->i_unique = ++unique_generator; \ + spin_unlock(&unique_gen_lock); \ + } while(0) +#endif /* CONFIG_MOSIX */ + #endif /* __KERNEL__ */ #endif /* _LINUX_FS_H */ diff -urN linux-2.4.17/include/linux/fs.h.orig linux_umopenmosix/include/linux/fs.h.orig --- linux-2.4.17/include/linux/fs.h.orig Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/linux/fs.h.orig Wed Jun 26 23:45:17 2002 @@ -0,0 +1,1589 @@ +#ifndef _LINUX_FS_H +#define _LINUX_FS_H + +/* + * This file has definitions for some important file table + * structures etc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +struct poll_table_struct; + + +/* + * It's silly to have NR_OPEN bigger than NR_FILE, but you can change + * the file limit at runtime and only root can increase the per-process + * nr_file rlimit, so it's safe to set up a ridiculously high absolute + * upper limit on files-per-process. + * + * Some programs (notably those using select()) may have to be + * recompiled to take full advantage of the new limits.. + */ + +/* Fixed constants first: */ +#undef NR_OPEN +#define NR_OPEN (1024*1024) /* Absolute upper limit on fd num */ +#define INR_OPEN 1024 /* Initial setting for nfile rlimits */ + +#define BLOCK_SIZE_BITS 10 +#define BLOCK_SIZE (1<i_sb->s_flags & (flg)) + +#define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY) +#define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || ((inode)->i_flags & S_SYNC)) +#define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK) + +#define IS_QUOTAINIT(inode) ((inode)->i_flags & S_QUOTA) +#define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) +#define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) +#define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) +#define IS_NOATIME(inode) (__IS_FLG(inode, MS_NOATIME) || ((inode)->i_flags & S_NOATIME)) +#define IS_NODIRATIME(inode) __IS_FLG(inode, MS_NODIRATIME) + +#define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) + +/* the read-only stuff doesn't really belong here, but any other place is + probably as bad and I don't want to create yet another include file. */ + +#define BLKROSET _IO(0x12,93) /* set device read-only (0 = read-write) */ +#define BLKROGET _IO(0x12,94) /* get read-only status (0 = read_write) */ +#define BLKRRPART _IO(0x12,95) /* re-read partition table */ +#define BLKGETSIZE _IO(0x12,96) /* return device size /512 (long *arg) */ +#define BLKFLSBUF _IO(0x12,97) /* flush buffer cache */ +#define BLKRASET _IO(0x12,98) /* Set read ahead for block device */ +#define BLKRAGET _IO(0x12,99) /* get current read ahead setting */ +#define BLKFRASET _IO(0x12,100)/* set filesystem (mm/filemap.c) read-ahead */ +#define BLKFRAGET _IO(0x12,101)/* get filesystem (mm/filemap.c) read-ahead */ +#define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */ +#define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */ +#define BLKSSZGET _IO(0x12,104)/* get block device sector size */ +#if 0 +#define BLKPG _IO(0x12,105)/* See blkpg.h */ +#define BLKELVGET _IOR(0x12,106,sizeof(blkelv_ioctl_arg_t))/* elevator get */ +#define BLKELVSET _IOW(0x12,107,sizeof(blkelv_ioctl_arg_t))/* elevator set */ +/* This was here just to show that the number is taken - + probably all these _IO(0x12,*) ioctls should be moved to blkpg.h. */ +#endif +/* A jump here: 108-111 have been used for various private purposes. */ +#define BLKBSZGET _IOR(0x12,112,sizeof(int)) +#define BLKBSZSET _IOW(0x12,113,sizeof(int)) +#define BLKGETSIZE64 _IOR(0x12,114,sizeof(u64)) /* return device size in bytes (u64 *arg) */ + +#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ +#define FIBMAP _IO(0x00,1) /* bmap access */ +#define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */ + +#ifdef __KERNEL__ + +#include +#include + +extern void update_atime (struct inode *); +#define UPDATE_ATIME(inode) update_atime (inode) + +extern void buffer_init(unsigned long); +extern void inode_init(unsigned long); +extern void mnt_init(unsigned long); + +/* bh state bits */ +enum bh_state_bits { + BH_Uptodate, /* 1 if the buffer contains valid data */ + BH_Dirty, /* 1 if the buffer is dirty */ + BH_Lock, /* 1 if the buffer is locked */ + BH_Req, /* 0 if the buffer has been invalidated */ + BH_Mapped, /* 1 if the buffer has a disk mapping */ + BH_New, /* 1 if the buffer is new and not yet written out */ + BH_Async, /* 1 if the buffer is under end_buffer_io_async I/O */ + BH_Wait_IO, /* 1 if we should write out this buffer */ + BH_launder, /* 1 if we should throttle on this buffer */ + BH_JBD, /* 1 if it has an attached journal_head */ + + BH_PrivateStart,/* not a state bit, but the first bit available + * for private allocation by other entities + */ +}; + +/* + * Try to keep the most commonly used fields in single cache lines (16 + * bytes) to improve performance. This ordering should be + * particularly beneficial on 32-bit processors. + * + * We use the first 16 bytes for the data which is used in searches + * over the block hash lists (ie. getblk() and friends). + * + * The second 16 bytes we use for lru buffer scans, as used by + * sync_buffers() and refill_freelist(). -- sct + */ +struct buffer_head { + /* First cache line: */ + struct buffer_head *b_next; /* Hash queue list */ + unsigned long b_blocknr; /* block number */ + unsigned short b_size; /* block size */ + unsigned short b_list; /* List that this buffer appears */ + kdev_t b_dev; /* device (B_FREE = free) */ + + atomic_t b_count; /* users using this block */ + kdev_t b_rdev; /* Real device */ + unsigned long b_state; /* buffer state bitmap (see above) */ + unsigned long b_flushtime; /* Time when (dirty) buffer should be written */ + + struct buffer_head *b_next_free;/* lru/free list linkage */ + struct buffer_head *b_prev_free;/* doubly linked list of buffers */ + struct buffer_head *b_this_page;/* circular list of buffers in one page */ + struct buffer_head *b_reqnext; /* request queue */ + + struct buffer_head **b_pprev; /* doubly linked list of hash-queue */ + char * b_data; /* pointer to data block */ + struct page *b_page; /* the page this bh is mapped to */ + void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completion */ + void *b_private; /* reserved for b_end_io */ + + unsigned long b_rsector; /* Real buffer location on disk */ + wait_queue_head_t b_wait; + + struct inode * b_inode; + struct list_head b_inode_buffers; /* doubly linked list of inode dirty buffers */ +}; + +typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate); +void init_buffer(struct buffer_head *, bh_end_io_t *, void *); + +#define __buffer_state(bh, state) (((bh)->b_state & (1UL << BH_##state)) != 0) + +#define buffer_uptodate(bh) __buffer_state(bh,Uptodate) +#define buffer_dirty(bh) __buffer_state(bh,Dirty) +#define buffer_locked(bh) __buffer_state(bh,Lock) +#define buffer_req(bh) __buffer_state(bh,Req) +#define buffer_mapped(bh) __buffer_state(bh,Mapped) +#define buffer_new(bh) __buffer_state(bh,New) +#define buffer_async(bh) __buffer_state(bh,Async) + +#define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) + +extern void set_bh_page(struct buffer_head *bh, struct page *page, unsigned long offset); + +#define touch_buffer(bh) mark_page_accessed(bh->b_page) + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Attribute flags. These should be or-ed together to figure out what + * has been changed! + */ +#define ATTR_MODE 1 +#define ATTR_UID 2 +#define ATTR_GID 4 +#define ATTR_SIZE 8 +#define ATTR_ATIME 16 +#define ATTR_MTIME 32 +#define ATTR_CTIME 64 +#define ATTR_ATIME_SET 128 +#define ATTR_MTIME_SET 256 +#define ATTR_FORCE 512 /* Not a change, but a change it */ +#define ATTR_ATTR_FLAG 1024 + +/* + * This is the Inode Attributes structure, used for notify_change(). It + * uses the above definitions as flags, to know which values have changed. + * Also, in this manner, a Filesystem can look at only the values it cares + * about. Basically, these are the attributes that the VFS layer can + * request to change from the FS layer. + * + * Derek Atkins 94-10-20 + */ +struct iattr { + unsigned int ia_valid; + umode_t ia_mode; + uid_t ia_uid; + gid_t ia_gid; + loff_t ia_size; + time_t ia_atime; + time_t ia_mtime; + time_t ia_ctime; + unsigned int ia_attr_flags; +}; + +/* + * This is the inode attributes flag definitions + */ +#define ATTR_FLAG_SYNCRONOUS 1 /* Syncronous write */ +#define ATTR_FLAG_NOATIME 2 /* Don't update atime */ +#define ATTR_FLAG_APPEND 4 /* Append-only file */ +#define ATTR_FLAG_IMMUTABLE 8 /* Immutable file */ +#define ATTR_FLAG_NODIRATIME 16 /* Don't update atime for directory */ + +/* + * Includes for diskquotas and mount structures. + */ +#include +#include + +/* + * oh the beauties of C type declarations. + */ +struct page; +struct address_space; +struct kiobuf; + +struct address_space_operations { + int (*writepage)(struct page *); + int (*readpage)(struct file *, struct page *); + int (*sync_page)(struct page *); + /* + * ext3 requires that a successful prepare_write() call be followed + * by a commit_write() call - they must be balanced + */ + int (*prepare_write)(struct file *, struct page *, unsigned, unsigned); + int (*commit_write)(struct file *, struct page *, unsigned, unsigned); + /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ + int (*bmap)(struct address_space *, long); + int (*flushpage) (struct page *, unsigned long); + int (*releasepage) (struct page *, int); +#define KERNEL_HAS_O_DIRECT /* this is for modules out of the kernel */ + int (*direct_IO)(int, struct inode *, struct kiobuf *, unsigned long, int); +}; + +struct address_space { + struct list_head clean_pages; /* list of clean pages */ + struct list_head dirty_pages; /* list of dirty pages */ + struct list_head locked_pages; /* list of locked pages */ + unsigned long nrpages; /* number of total pages */ + struct address_space_operations *a_ops; /* methods */ + struct inode *host; /* owner: inode, block_device */ + struct vm_area_struct *i_mmap; /* list of private mappings */ + struct vm_area_struct *i_mmap_shared; /* list of shared mappings */ + spinlock_t i_shared_lock; /* and spinlock protecting it */ + int gfp_mask; /* how to allocate the pages */ +}; + +struct char_device { + struct list_head hash; + atomic_t count; + dev_t dev; + atomic_t openers; + struct semaphore sem; +}; + +struct block_device { + struct list_head bd_hash; + atomic_t bd_count; + struct inode * bd_inode; + dev_t bd_dev; /* not a kdev_t - it's a search key */ + int bd_openers; + const struct block_device_operations *bd_op; + struct semaphore bd_sem; /* open/close mutex */ + struct list_head bd_inodes; +}; + +struct inode { + struct list_head i_hash; + struct list_head i_list; + struct list_head i_dentry; + + struct list_head i_dirty_buffers; + struct list_head i_dirty_data_buffers; + + unsigned long i_ino; + atomic_t i_count; + kdev_t i_dev; + umode_t i_mode; + nlink_t i_nlink; + uid_t i_uid; + gid_t i_gid; + kdev_t i_rdev; + loff_t i_size; + time_t i_atime; + time_t i_mtime; + time_t i_ctime; + unsigned int i_blkbits; + unsigned long i_blksize; + unsigned long i_blocks; + unsigned long i_version; + struct semaphore i_sem; + struct semaphore i_zombie; + struct inode_operations *i_op; + struct file_operations *i_fop; /* former ->i_op->default_file_ops */ + struct super_block *i_sb; + wait_queue_head_t i_wait; + struct file_lock *i_flock; + struct address_space *i_mapping; + struct address_space i_data; + struct dquot *i_dquot[MAXQUOTAS]; + /* These three should probably be a union */ + struct list_head i_devices; + struct pipe_inode_info *i_pipe; + struct block_device *i_bdev; + struct char_device *i_cdev; + + unsigned long i_dnotify_mask; /* Directory notify events */ + struct dnotify_struct *i_dnotify; /* for directory notifications */ + + unsigned long i_state; + + unsigned int i_flags; + unsigned char i_sock; + + atomic_t i_writecount; + unsigned int i_attr_flags; + __u32 i_generation; + union { + struct minix_inode_info minix_i; + struct ext2_inode_info ext2_i; + struct ext3_inode_info ext3_i; + struct hpfs_inode_info hpfs_i; + struct ntfs_inode_info ntfs_i; + struct msdos_inode_info msdos_i; + struct umsdos_inode_info umsdos_i; + struct iso_inode_info isofs_i; + struct nfs_inode_info nfs_i; + struct sysv_inode_info sysv_i; + struct affs_inode_info affs_i; + struct ufs_inode_info ufs_i; + struct efs_inode_info efs_i; + struct romfs_inode_info romfs_i; + struct shmem_inode_info shmem_i; + struct coda_inode_info coda_i; + struct smb_inode_info smbfs_i; + struct hfs_inode_info hfs_i; + struct adfs_inode_info adfs_i; + struct qnx4_inode_info qnx4_i; + struct reiserfs_inode_info reiserfs_i; + struct bfs_inode_info bfs_i; + struct udf_inode_info udf_i; + struct ncp_inode_info ncpfs_i; + struct proc_inode_info proc_i; + struct socket socket_i; + struct usbdev_inode_info usbdev_i; + struct hostfs_inode_info hostfs_i; + struct jffs2_inode_info jffs2_i; + void *generic_ip; + } u; +}; + +struct fown_struct { + int pid; /* pid or -pgrp where SIGIO should be sent */ + uid_t uid, euid; /* uid/euid of process setting the owner */ + int signum; /* posix.1b rt signal to be delivered on IO */ +}; + +struct file { + struct list_head f_list; + struct dentry *f_dentry; + struct vfsmount *f_vfsmnt; + struct file_operations *f_op; + atomic_t f_count; + unsigned int f_flags; + mode_t f_mode; + loff_t f_pos; + unsigned long f_reada, f_ramax, f_raend, f_ralen, f_rawin; + struct fown_struct f_owner; + unsigned int f_uid, f_gid; + int f_error; + + unsigned long f_version; + + /* needed for tty driver, and maybe others */ + void *private_data; + + /* preallocated helper kiobuf to speedup O_DIRECT */ + struct kiobuf *f_iobuf; + long f_iobuf_lock; +}; +extern spinlock_t files_lock; +#define file_list_lock() spin_lock(&files_lock); +#define file_list_unlock() spin_unlock(&files_lock); + +#define get_file(x) atomic_inc(&(x)->f_count) +#define file_count(x) atomic_read(&(x)->f_count) + +extern int init_private_file(struct file *, struct dentry *, int); + +#define MAX_NON_LFS ((1UL<<31) - 1) + +#define FL_POSIX 1 +#define FL_FLOCK 2 +#define FL_BROKEN 4 /* broken flock() emulation */ +#define FL_ACCESS 8 /* for processes suspended by mandatory locking */ +#define FL_LOCKD 16 /* lock held by rpc.lockd */ +#define FL_LEASE 32 /* lease held on this file */ + +/* + * The POSIX file lock owner is determined by + * the "struct files_struct" in the thread group + * (or NULL for no owner - BSD locks). + * + * Lockd stuffs a "host" pointer into this. + */ +typedef struct files_struct *fl_owner_t; + +struct file_lock { + struct file_lock *fl_next; /* singly linked list for this inode */ + struct list_head fl_link; /* doubly linked list of all locks */ + struct list_head fl_block; /* circular list of blocked processes */ + fl_owner_t fl_owner; + unsigned int fl_pid; + wait_queue_head_t fl_wait; + struct file *fl_file; + unsigned char fl_flags; + unsigned char fl_type; + loff_t fl_start; + loff_t fl_end; + + void (*fl_notify)(struct file_lock *); /* unblock callback */ + void (*fl_insert)(struct file_lock *); /* lock insertion callback */ + void (*fl_remove)(struct file_lock *); /* lock removal callback */ + + struct fasync_struct * fl_fasync; /* for lease break notifications */ + + union { + struct nfs_lock_info nfs_fl; + } fl_u; +}; + +/* The following constant reflects the upper bound of the file/locking space */ +#ifndef OFFSET_MAX +#define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) +#define OFFSET_MAX INT_LIMIT(loff_t) +#define OFFT_OFFSET_MAX INT_LIMIT(off_t) +#endif + +extern struct list_head file_lock_list; + +#include + +extern int fcntl_getlk(unsigned int, struct flock *); +extern int fcntl_setlk(unsigned int, unsigned int, struct flock *); + +extern int fcntl_getlk64(unsigned int, struct flock64 *); +extern int fcntl_setlk64(unsigned int, unsigned int, struct flock64 *); + +/* fs/locks.c */ +extern void locks_init_lock(struct file_lock *); +extern void locks_copy_lock(struct file_lock *, struct file_lock *); +extern void locks_remove_posix(struct file *, fl_owner_t); +extern void locks_remove_flock(struct file *); +extern struct file_lock *posix_test_lock(struct file *, struct file_lock *); +extern int posix_lock_file(struct file *, struct file_lock *, unsigned int); +extern void posix_block_lock(struct file_lock *, struct file_lock *); +extern void posix_unblock_lock(struct file_lock *); +extern int posix_locks_deadlock(struct file_lock *, struct file_lock *); +extern int __get_lease(struct inode *inode, unsigned int flags); +extern time_t lease_get_mtime(struct inode *); +extern int lock_may_read(struct inode *, loff_t start, unsigned long count); +extern int lock_may_write(struct inode *, loff_t start, unsigned long count); + +struct fasync_struct { + int magic; + int fa_fd; + struct fasync_struct *fa_next; /* singly linked list */ + struct file *fa_file; +}; + +#define FASYNC_MAGIC 0x4601 + +/* SMP safe fasync helpers: */ +extern int fasync_helper(int, struct file *, int, struct fasync_struct **); +/* can be called from interrupts */ +extern void kill_fasync(struct fasync_struct **, int, int); +/* only for net: no internal synchronization */ +extern void __kill_fasync(struct fasync_struct *, int, int); + +struct nameidata { + struct dentry *dentry; + struct vfsmount *mnt; + struct qstr last; + unsigned int flags; + int last_type; +}; + +#define DQUOT_USR_ENABLED 0x01 /* User diskquotas enabled */ +#define DQUOT_GRP_ENABLED 0x02 /* Group diskquotas enabled */ + +struct quota_mount_options +{ + unsigned int flags; /* Flags for diskquotas on this device */ + struct semaphore dqio_sem; /* lock device while I/O in progress */ + struct semaphore dqoff_sem; /* serialize quota_off() and quota_on() on device */ + struct file *files[MAXQUOTAS]; /* fp's to quotafiles */ + time_t inode_expire[MAXQUOTAS]; /* expiretime for inode-quota */ + time_t block_expire[MAXQUOTAS]; /* expiretime for block-quota */ + char rsquash[MAXQUOTAS]; /* for quotas threat root as any other user */ +}; + +/* + * Umount options + */ + +#define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */ +#define MNT_DETACH 0x00000002 /* Just detach from the tree */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern struct list_head super_blocks; +extern spinlock_t sb_lock; + +#define sb_entry(list) list_entry((list), struct super_block, s_list) +#define S_BIAS (1<<30) +struct super_block { + struct list_head s_list; /* Keep this first */ + kdev_t s_dev; + unsigned long s_blocksize; + unsigned char s_blocksize_bits; + unsigned char s_dirt; + unsigned long long s_maxbytes; /* Max file size */ + struct file_system_type *s_type; + struct super_operations *s_op; + struct dquot_operations *dq_op; + unsigned long s_flags; + unsigned long s_magic; + struct dentry *s_root; + struct rw_semaphore s_umount; + struct semaphore s_lock; + int s_count; + atomic_t s_active; + + struct list_head s_dirty; /* dirty inodes */ + struct list_head s_locked_inodes;/* inodes being synced */ + struct list_head s_files; + + struct block_device *s_bdev; + struct list_head s_instances; + struct quota_mount_options s_dquot; /* Diskquota specific options */ + + union { + struct minix_sb_info minix_sb; + struct ext2_sb_info ext2_sb; + struct ext3_sb_info ext3_sb; + struct hpfs_sb_info hpfs_sb; + struct ntfs_sb_info ntfs_sb; + struct msdos_sb_info msdos_sb; + struct isofs_sb_info isofs_sb; + struct nfs_sb_info nfs_sb; + struct sysv_sb_info sysv_sb; + struct affs_sb_info affs_sb; + struct ufs_sb_info ufs_sb; + struct efs_sb_info efs_sb; + struct shmem_sb_info shmem_sb; + struct romfs_sb_info romfs_sb; + struct smb_sb_info smbfs_sb; + struct hfs_sb_info hfs_sb; + struct adfs_sb_info adfs_sb; + struct qnx4_sb_info qnx4_sb; + struct reiserfs_sb_info reiserfs_sb; + struct bfs_sb_info bfs_sb; + struct udf_sb_info udf_sb; + struct ncp_sb_info ncpfs_sb; + struct usbdev_sb_info usbdevfs_sb; + struct jffs2_sb_info jffs2_sb; + struct cramfs_sb_info cramfs_sb; + void *generic_sbp; + } u; + /* + * The next field is for VFS *only*. No filesystems have any business + * even looking at it. You had been warned. + */ + struct semaphore s_vfs_rename_sem; /* Kludge */ + + /* The next field is used by knfsd when converting a (inode number based) + * file handle into a dentry. As it builds a path in the dcache tree from + * the bottom up, there may for a time be a subpath of dentrys which is not + * connected to the main tree. This semaphore ensure that there is only ever + * one such free path per filesystem. Note that unconnected files (or other + * non-directories) are allowed, but not unconnected diretories. + */ + struct semaphore s_nfsd_free_path_sem; +}; + +/* + * VFS helper functions.. + */ +extern int vfs_create(struct inode *, struct dentry *, int); +extern int vfs_mkdir(struct inode *, struct dentry *, int); +extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t); +extern int vfs_symlink(struct inode *, struct dentry *, const char *); +extern int vfs_link(struct dentry *, struct inode *, struct dentry *); +extern int vfs_rmdir(struct inode *, struct dentry *); +extern int vfs_unlink(struct inode *, struct dentry *); +extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); + +/* + * File types + */ +#define DT_UNKNOWN 0 +#define DT_FIFO 1 +#define DT_CHR 2 +#define DT_DIR 4 +#define DT_BLK 6 +#define DT_REG 8 +#define DT_LNK 10 +#define DT_SOCK 12 +#define DT_WHT 14 + +/* + * This is the "filldir" function type, used by readdir() to let + * the kernel specify what kind of dirent layout it wants to have. + * This allows the kernel to read directories into kernel space or + * to have different dirent layouts depending on the binary type. + */ +typedef int (*filldir_t)(void *, const char *, int, loff_t, ino_t, unsigned); + +struct block_device_operations { + int (*open) (struct inode *, struct file *); + int (*release) (struct inode *, struct file *); + int (*ioctl) (struct inode *, struct file *, unsigned, unsigned long); + int (*check_media_change) (kdev_t); + int (*revalidate) (kdev_t); + struct module *owner; +}; + +/* + * NOTE: + * read, write, poll, fsync, readv, writev can be called + * without the big kernel lock held in all filesystems. + */ +struct file_operations { + struct module *owner; + loff_t (*llseek) (struct file *, loff_t, int); + ssize_t (*read) (struct file *, char *, size_t, loff_t *); + ssize_t (*write) (struct file *, const char *, size_t, loff_t *); + int (*readdir) (struct file *, void *, filldir_t); + unsigned int (*poll) (struct file *, struct poll_table_struct *); + int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long); + int (*mmap) (struct file *, struct vm_area_struct *); + int (*open) (struct inode *, struct file *); + int (*flush) (struct file *); + int (*release) (struct inode *, struct file *); + int (*fsync) (struct file *, struct dentry *, int datasync); + int (*fasync) (int, struct file *, int); + int (*lock) (struct file *, int, struct file_lock *); + ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, loff_t *); + ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, loff_t *); + ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); + unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); +}; + +struct inode_operations { + int (*create) (struct inode *,struct dentry *,int); + struct dentry * (*lookup) (struct inode *,struct dentry *); + int (*link) (struct dentry *,struct inode *,struct dentry *); + int (*unlink) (struct inode *,struct dentry *); + int (*symlink) (struct inode *,struct dentry *,const char *); + int (*mkdir) (struct inode *,struct dentry *,int); + int (*rmdir) (struct inode *,struct dentry *); + int (*mknod) (struct inode *,struct dentry *,int,int); + int (*rename) (struct inode *, struct dentry *, + struct inode *, struct dentry *); + int (*readlink) (struct dentry *, char *,int); + int (*follow_link) (struct dentry *, struct nameidata *); + void (*truncate) (struct inode *); + int (*permission) (struct inode *, int); + int (*revalidate) (struct dentry *); + int (*setattr) (struct dentry *, struct iattr *); + int (*getattr) (struct dentry *, struct iattr *); +}; + +struct seq_file; + +/* + * NOTE: write_inode, delete_inode, clear_inode, put_inode can be called + * without the big kernel lock held in all filesystems. + */ +struct super_operations { + void (*read_inode) (struct inode *); + + /* reiserfs kludge. reiserfs needs 64 bits of information to + ** find an inode. We are using the read_inode2 call to get + ** that information. We don't like this, and are waiting on some + ** VFS changes for the real solution. + ** iget4 calls read_inode2, iff it is defined + */ + void (*read_inode2) (struct inode *, void *) ; + void (*dirty_inode) (struct inode *); + void (*write_inode) (struct inode *, int); + void (*put_inode) (struct inode *); + void (*delete_inode) (struct inode *); + void (*put_super) (struct super_block *); + void (*write_super) (struct super_block *); + void (*write_super_lockfs) (struct super_block *); + void (*unlockfs) (struct super_block *); + int (*statfs) (struct super_block *, struct statfs *); + int (*remount_fs) (struct super_block *, int *, char *); + void (*clear_inode) (struct inode *); + void (*umount_begin) (struct super_block *); + + /* Following are for knfsd to interact with "interesting" filesystems + * Currently just reiserfs, but possibly FAT and others later + * + * fh_to_dentry is given a filehandle fragement with length, and a type flag + * and must return a dentry for the referenced object or, if "parent" is + * set, a dentry for the parent of the object. + * If a dentry cannot be found, a "root" dentry should be created and + * flaged as DCACHE_NFSD_DISCONNECTED. nfsd_iget is an example implementation. + * + * dentry_to_fh is given a dentry and must generate the filesys specific + * part of the file handle. Available length is passed in *lenp and used + * length should be returned therein. + * If need_parent is set, then dentry_to_fh should encode sufficient information + * to find the (current) parent. + * dentry_to_fh should return a 1byte "type" which will be passed back in + * the fhtype arguement to fh_to_dentry. Type of 0 is reserved. + * If filesystem was exportable before the introduction of fh_to_dentry, + * types 1 and 2 should be used is that same way as the generic code. + * Type 255 means error. + * + * Lengths are in units of 4bytes, not bytes. + */ + struct dentry * (*fh_to_dentry)(struct super_block *sb, __u32 *fh, int len, int fhtype, int parent); + int (*dentry_to_fh)(struct dentry *, __u32 *fh, int *lenp, int need_parent); + int (*show_options)(struct seq_file *, struct vfsmount *); +}; + +/* Inode state bits.. */ +#define I_DIRTY_SYNC 1 /* Not dirty enough for O_DATASYNC */ +#define I_DIRTY_DATASYNC 2 /* Data-related inode changes pending */ +#define I_DIRTY_PAGES 4 /* Data-related inode changes pending */ +#define I_LOCK 8 +#define I_FREEING 16 +#define I_CLEAR 32 + +#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) + +extern void __mark_inode_dirty(struct inode *, int); +static inline void mark_inode_dirty(struct inode *inode) +{ + __mark_inode_dirty(inode, I_DIRTY); +} + +static inline void mark_inode_dirty_sync(struct inode *inode) +{ + __mark_inode_dirty(inode, I_DIRTY_SYNC); +} + +static inline void mark_inode_dirty_pages(struct inode *inode) +{ + __mark_inode_dirty(inode, I_DIRTY_PAGES); +} + +struct dquot_operations { + void (*initialize) (struct inode *, short); + void (*drop) (struct inode *); + int (*alloc_block) (struct inode *, unsigned long, char); + int (*alloc_inode) (const struct inode *, unsigned long); + void (*free_block) (struct inode *, unsigned long); + void (*free_inode) (const struct inode *, unsigned long); + int (*transfer) (struct inode *, struct iattr *); +}; + +struct file_system_type { + const char *name; + int fs_flags; + struct super_block *(*read_super) (struct super_block *, void *, int); + struct module *owner; + struct file_system_type * next; + struct list_head fs_supers; +}; + +#define DECLARE_FSTYPE(var,type,read,flags) \ +struct file_system_type var = { \ + name: type, \ + read_super: read, \ + fs_flags: flags, \ + owner: THIS_MODULE, \ +} + +#define DECLARE_FSTYPE_DEV(var,type,read) \ + DECLARE_FSTYPE(var,type,read,FS_REQUIRES_DEV) + +/* Alas, no aliases. Too much hassle with bringing module.h everywhere */ +#define fops_get(fops) \ + (((fops) && (fops)->owner) \ + ? ( try_inc_mod_count((fops)->owner) ? (fops) : NULL ) \ + : (fops)) + +#define fops_put(fops) \ +do { \ + if ((fops) && (fops)->owner) \ + __MOD_DEC_USE_COUNT((fops)->owner); \ +} while(0) + +extern int register_filesystem(struct file_system_type *); +extern int unregister_filesystem(struct file_system_type *); +extern struct vfsmount *kern_mount(struct file_system_type *); +extern int may_umount(struct vfsmount *); +extern long do_mount(char *, char *, char *, unsigned long, void *); + +#define kern_umount mntput + +extern int vfs_statfs(struct super_block *, struct statfs *); + +/* Return value for VFS lock functions - tells locks.c to lock conventionally + * REALLY kosha for root NFS and nfs_lock + */ +#define LOCK_USE_CLNT 1 + +#define FLOCK_VERIFY_READ 1 +#define FLOCK_VERIFY_WRITE 2 + +extern int locks_mandatory_locked(struct inode *); +extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t); + +/* + * Candidates for mandatory locking have the setgid bit set + * but no group execute bit - an otherwise meaningless combination. + */ +#define MANDATORY_LOCK(inode) \ + (IS_MANDLOCK(inode) && ((inode)->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) + +static inline int locks_verify_locked(struct inode *inode) +{ + if (MANDATORY_LOCK(inode)) + return locks_mandatory_locked(inode); + return 0; +} + +static inline int locks_verify_area(int read_write, struct inode *inode, + struct file *filp, loff_t offset, + size_t count) +{ + if (inode->i_flock && MANDATORY_LOCK(inode)) + return locks_mandatory_area(read_write, inode, filp, offset, count); + return 0; +} + +static inline int locks_verify_truncate(struct inode *inode, + struct file *filp, + loff_t size) +{ + if (inode->i_flock && MANDATORY_LOCK(inode)) + return locks_mandatory_area( + FLOCK_VERIFY_WRITE, inode, filp, + size < inode->i_size ? size : inode->i_size, + (size < inode->i_size ? inode->i_size - size + : size - inode->i_size) + ); + return 0; +} + +static inline int get_lease(struct inode *inode, unsigned int mode) +{ + if (inode->i_flock && (inode->i_flock->fl_flags & FL_LEASE)) + return __get_lease(inode, mode); + return 0; +} + +/* fs/open.c */ + +asmlinkage long sys_open(const char *, int, int); +asmlinkage long sys_close(unsigned int); /* yes, it's really unsigned */ +extern int do_truncate(struct dentry *, loff_t start); + +extern struct file *filp_open(const char *, int, int); +extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); +extern int filp_close(struct file *, fl_owner_t id); +extern char * getname(const char *); + +/* fs/dcache.c */ +extern void vfs_caches_init(unsigned long); + +#define __getname() kmem_cache_alloc(names_cachep, SLAB_KERNEL) +#define putname(name) kmem_cache_free(names_cachep, (void *)(name)) + +enum {BDEV_FILE, BDEV_SWAP, BDEV_FS, BDEV_RAW}; +extern int register_blkdev(unsigned int, const char *, struct block_device_operations *); +extern int unregister_blkdev(unsigned int, const char *); +extern struct block_device *bdget(dev_t); +extern int bd_acquire(struct inode *inode); +extern void bd_forget(struct inode *inode); +extern void bdput(struct block_device *); +extern struct char_device *cdget(dev_t); +extern void cdput(struct char_device *); +extern int blkdev_open(struct inode *, struct file *); +extern int blkdev_close(struct inode *, struct file *); +extern struct file_operations def_blk_fops; +extern struct address_space_operations def_blk_aops; +extern struct file_operations def_fifo_fops; +extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); +extern int blkdev_get(struct block_device *, mode_t, unsigned, int); +extern int blkdev_put(struct block_device *, int); + +/* fs/devices.c */ +extern const struct block_device_operations *get_blkfops(unsigned int); +extern int register_chrdev(unsigned int, const char *, struct file_operations *); +extern int unregister_chrdev(unsigned int, const char *); +extern int chrdev_open(struct inode *, struct file *); +extern const char * bdevname(kdev_t); +extern const char * cdevname(kdev_t); +extern const char * kdevname(kdev_t); +extern void init_special_inode(struct inode *, umode_t, int); + +/* Invalid inode operations -- fs/bad_inode.c */ +extern void make_bad_inode(struct inode *); +extern int is_bad_inode(struct inode *); + +extern struct file_operations read_fifo_fops; +extern struct file_operations write_fifo_fops; +extern struct file_operations rdwr_fifo_fops; +extern struct file_operations read_pipe_fops; +extern struct file_operations write_pipe_fops; +extern struct file_operations rdwr_pipe_fops; + +extern int fs_may_remount_ro(struct super_block *); + +extern int try_to_free_buffers(struct page *, unsigned int); +extern void refile_buffer(struct buffer_head * buf); +extern void create_empty_buffers(struct page *, kdev_t, unsigned long); +extern void end_buffer_io_sync(struct buffer_head *bh, int uptodate); + +/* reiserfs_writepage needs this */ +extern void set_buffer_async_io(struct buffer_head *bh) ; + +#define BUF_CLEAN 0 +#define BUF_LOCKED 1 /* Buffers scheduled for write */ +#define BUF_DIRTY 2 /* Dirty buffers, not yet scheduled for write */ +#define NR_LIST 3 + +static inline void get_bh(struct buffer_head * bh) +{ + atomic_inc(&(bh)->b_count); +} + +static inline void put_bh(struct buffer_head *bh) +{ + smp_mb__before_atomic_dec(); + atomic_dec(&bh->b_count); +} + +/* + * This is called by bh->b_end_io() handlers when I/O has completed. + */ +static inline void mark_buffer_uptodate(struct buffer_head * bh, int on) +{ + if (on) + set_bit(BH_Uptodate, &bh->b_state); + else + clear_bit(BH_Uptodate, &bh->b_state); +} + +#define atomic_set_buffer_clean(bh) test_and_clear_bit(BH_Dirty, &(bh)->b_state) + +static inline void __mark_buffer_clean(struct buffer_head *bh) +{ + refile_buffer(bh); +} + +static inline void mark_buffer_clean(struct buffer_head * bh) +{ + if (atomic_set_buffer_clean(bh)) + __mark_buffer_clean(bh); +} + +extern void FASTCALL(__mark_dirty(struct buffer_head *bh)); +extern void FASTCALL(__mark_buffer_dirty(struct buffer_head *bh)); +extern void FASTCALL(mark_buffer_dirty(struct buffer_head *bh)); +extern void FASTCALL(buffer_insert_inode_data_queue(struct buffer_head *, struct inode *)); + +#define atomic_set_buffer_dirty(bh) test_and_set_bit(BH_Dirty, &(bh)->b_state) + +static inline void mark_buffer_async(struct buffer_head * bh, int on) +{ + if (on) + set_bit(BH_Async, &bh->b_state); + else + clear_bit(BH_Async, &bh->b_state); +} + +/* + * If an error happens during the make_request, this function + * has to be recalled. It marks the buffer as clean and not + * uptodate, and it notifys the upper layer about the end + * of the I/O. + */ +static inline void buffer_IO_error(struct buffer_head * bh) +{ + mark_buffer_clean(bh); + /* + * b_end_io has to clear the BH_Uptodate bitflag in the error case! + */ + bh->b_end_io(bh, 0); +} + +extern void buffer_insert_inode_queue(struct buffer_head *, struct inode *); +static inline void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) +{ + mark_buffer_dirty(bh); + buffer_insert_inode_queue(bh, inode); +} + +extern void set_buffer_flushtime(struct buffer_head *); +extern void balance_dirty(void); +extern int check_disk_change(kdev_t); +extern int invalidate_inodes(struct super_block *); +extern int invalidate_device(kdev_t, int); +extern void invalidate_inode_pages(struct inode *); +extern void invalidate_inode_pages2(struct address_space *); +extern void invalidate_inode_buffers(struct inode *); +#define invalidate_buffers(dev) __invalidate_buffers((dev), 0) +#define destroy_buffers(dev) __invalidate_buffers((dev), 1) +extern void invalidate_bdev(struct block_device *, int); +extern void __invalidate_buffers(kdev_t dev, int); +extern void sync_inodes(kdev_t); +extern void sync_unlocked_inodes(void); +extern void write_inode_now(struct inode *, int); +extern int sync_buffers(kdev_t, int); +extern void sync_dev(kdev_t); +extern int fsync_dev(kdev_t); +extern int fsync_super(struct super_block *); +extern int fsync_no_super(kdev_t); +extern void sync_inodes_sb(struct super_block *); +extern int osync_inode_buffers(struct inode *); +extern int osync_inode_data_buffers(struct inode *); +extern int fsync_inode_buffers(struct inode *); +extern int fsync_inode_data_buffers(struct inode *); +extern int inode_has_buffers(struct inode *); +extern void filemap_fdatasync(struct address_space *); +extern void filemap_fdatawait(struct address_space *); +extern void sync_supers(kdev_t); +extern int bmap(struct inode *, int); +extern int notify_change(struct dentry *, struct iattr *); +extern int permission(struct inode *, int); +extern int vfs_permission(struct inode *, int); +extern int get_write_access(struct inode *); +extern int deny_write_access(struct file *); +static inline void put_write_access(struct inode * inode) +{ + atomic_dec(&inode->i_writecount); +} +static inline void allow_write_access(struct file *file) +{ + if (file) + atomic_inc(&file->f_dentry->d_inode->i_writecount); +} +extern int do_pipe(int *); + +extern int open_namei(const char *, int, int, struct nameidata *); + +extern int kernel_read(struct file *, unsigned long, char *, unsigned long); +extern struct file * open_exec(const char *); + +/* fs/dcache.c -- generic fs support functions */ +extern int is_subdir(struct dentry *, struct dentry *); +extern ino_t find_inode_number(struct dentry *, struct qstr *); + +/* + * Kernel pointers have redundant information, so we can use a + * scheme where we can return either an error code or a dentry + * pointer with the same return value. + * + * This should be a per-architecture thing, to allow different + * error and pointer decisions. + */ +static inline void *ERR_PTR(long error) +{ + return (void *) error; +} + +static inline long PTR_ERR(const void *ptr) +{ + return (long) ptr; +} + +static inline long IS_ERR(const void *ptr) +{ + return (unsigned long)ptr > (unsigned long)-1000L; +} + +/* + * The bitmask for a lookup event: + * - follow links at the end + * - require a directory + * - ending slashes ok even for nonexistent files + * - internal "there are more path compnents" flag + */ +#define LOOKUP_FOLLOW (1) +#define LOOKUP_DIRECTORY (2) +#define LOOKUP_CONTINUE (4) +#define LOOKUP_POSITIVE (8) +#define LOOKUP_PARENT (16) +#define LOOKUP_NOALT (32) +/* + * Type of the last component on LOOKUP_PARENT + */ +enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; + +/* + * "descriptor" for what we're up to with a read for sendfile(). + * This allows us to use the same read code yet + * have multiple different users of the data that + * we read from a file. + * + * The simplest case just copies the data to user + * mode. + */ +typedef struct { + size_t written; + size_t count; + char * buf; + int error; +} read_descriptor_t; + +typedef int (*read_actor_t)(read_descriptor_t *, struct page *, unsigned long, unsigned long); + +/* needed for stackable file system support */ +extern loff_t default_llseek(struct file *file, loff_t offset, int origin); + +extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *)); +extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *)); +extern int FASTCALL(path_walk(const char *, struct nameidata *)); +extern int FASTCALL(link_path_walk(const char *, struct nameidata *)); +extern void path_release(struct nameidata *); +extern int follow_down(struct vfsmount **, struct dentry **); +extern int follow_up(struct vfsmount **, struct dentry **); +extern struct dentry * lookup_one_len(const char *, struct dentry *, int); +extern struct dentry * lookup_hash(struct qstr *, struct dentry *); +#define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd) +#define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd) + +extern void iput(struct inode *); +extern void force_delete(struct inode *); +extern struct inode * igrab(struct inode *); +extern ino_t iunique(struct super_block *, ino_t); + +typedef int (*find_inode_t)(struct inode *, unsigned long, void *); +extern struct inode * iget4(struct super_block *, unsigned long, find_inode_t, void *); +static inline struct inode *iget(struct super_block *sb, unsigned long ino) +{ + return iget4(sb, ino, NULL, NULL); +} + +extern void clear_inode(struct inode *); +extern struct inode * get_empty_inode(void); + +static inline struct inode * new_inode(struct super_block *sb) +{ + struct inode *inode = get_empty_inode(); + if (inode) { + inode->i_sb = sb; + inode->i_dev = sb->s_dev; + inode->i_blkbits = sb->s_blocksize_bits; + } + return inode; +} +extern void remove_suid(struct inode *inode); + +extern void insert_inode_hash(struct inode *); +extern void remove_inode_hash(struct inode *); +extern struct file * get_empty_filp(void); +extern void file_move(struct file *f, struct list_head *list); +extern struct buffer_head * get_hash_table(kdev_t, int, int); +extern struct buffer_head * getblk(kdev_t, int, int); +extern void ll_rw_block(int, int, struct buffer_head * bh[]); +extern void submit_bh(int, struct buffer_head *); +extern int is_read_only(kdev_t); +extern void __brelse(struct buffer_head *); +static inline void brelse(struct buffer_head *buf) +{ + if (buf) + __brelse(buf); +} +extern void __bforget(struct buffer_head *); +static inline void bforget(struct buffer_head *buf) +{ + if (buf) + __bforget(buf); +} +extern int set_blocksize(kdev_t, int); +extern struct buffer_head * bread(kdev_t, int, int); +extern void wakeup_bdflush(void); +extern void put_unused_buffer_head(struct buffer_head * bh); +extern struct buffer_head * get_unused_buffer_head(int async); + +extern int brw_page(int, struct page *, kdev_t, int [], int); + +typedef int (get_block_t)(struct inode*,long,struct buffer_head*,int); + +/* Generic buffer handling for block filesystems.. */ +extern int try_to_release_page(struct page * page, int gfp_mask); +extern int discard_bh_page(struct page *, unsigned long, int); +#define block_flushpage(page, offset) discard_bh_page(page, offset, 1) +#define block_invalidate_page(page) discard_bh_page(page, 0, 0) +extern int block_symlink(struct inode *, const char *, int); +extern int block_write_full_page(struct page*, get_block_t*); +extern int block_read_full_page(struct page*, get_block_t*); +extern int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*); +extern int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*, + unsigned long *); +extern int block_commit_write(struct page *page, unsigned from, unsigned to); +extern int block_sync_page(struct page *); + +int generic_block_bmap(struct address_space *, long, get_block_t *); +int generic_commit_write(struct file *, struct page *, unsigned, unsigned); +int block_truncate_page(struct address_space *, loff_t, get_block_t *); +extern int generic_direct_IO(int, struct inode *, struct kiobuf *, unsigned long, int, get_block_t *); + +extern int waitfor_one_page(struct page*); +extern int generic_file_mmap(struct file *, struct vm_area_struct *); +extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); +extern ssize_t generic_file_read(struct file *, char *, size_t, loff_t *); +extern ssize_t generic_file_write(struct file *, const char *, size_t, loff_t *); +extern void do_generic_file_read(struct file *, loff_t *, read_descriptor_t *, read_actor_t); +extern loff_t no_llseek(struct file *file, loff_t offset, int origin); +extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); +extern ssize_t generic_read_dir(struct file *, char *, size_t, loff_t *); +extern int generic_file_open(struct inode * inode, struct file * filp); + +extern struct file_operations generic_ro_fops; + +extern int vfs_readlink(struct dentry *, char *, int, const char *); +extern int vfs_follow_link(struct nameidata *, const char *); +extern int page_readlink(struct dentry *, char *, int); +extern int page_follow_link(struct dentry *, struct nameidata *); +extern struct inode_operations page_symlink_inode_operations; + +extern int vfs_readdir(struct file *, filldir_t, void *); +extern int dcache_readdir(struct file *, void *, filldir_t); + +extern struct file_system_type *get_fs_type(const char *name); +extern struct super_block *get_super(kdev_t); +extern void drop_super(struct super_block *sb); +static inline int is_mounted(kdev_t dev) +{ + struct super_block *sb = get_super(dev); + if (sb) { + drop_super(sb); + return 1; + } + return 0; +} +unsigned long generate_cluster(kdev_t, int b[], int); +unsigned long generate_cluster_swab32(kdev_t, int b[], int); +extern kdev_t ROOT_DEV; +extern char root_device_name[]; + + +extern void show_buffers(void); +extern void mount_root(void); + +#ifdef CONFIG_BLK_DEV_INITRD +extern unsigned int real_root_dev; +extern int change_root(kdev_t, const char *); +#endif + +extern ssize_t char_read(struct file *, char *, size_t, loff_t *); +extern ssize_t block_read(struct file *, char *, size_t, loff_t *); +extern int read_ahead[]; + +extern ssize_t char_write(struct file *, const char *, size_t, loff_t *); +extern ssize_t block_write(struct file *, const char *, size_t, loff_t *); + +extern int file_fsync(struct file *, struct dentry *, int); +extern int generic_buffer_fdatasync(struct inode *inode, unsigned long start_idx, unsigned long end_idx); +extern int generic_osync_inode(struct inode *, int); +#define OSYNC_METADATA (1<<0) +#define OSYNC_DATA (1<<1) +#define OSYNC_INODE (1<<2) + +extern int inode_change_ok(struct inode *, struct iattr *); +extern int inode_setattr(struct inode *, struct iattr *); + +/* + * Common dentry functions for inclusion in the VFS + * or in other stackable file systems. Some of these + * functions were in linux/fs/ C (VFS) files. + * + */ + +/* + * Locking the parent is needed to: + * - serialize directory operations + * - make sure the parent doesn't change from + * under us in the middle of an operation. + * + * NOTE! Right now we'd rather use a "struct inode" + * for this, but as I expect things to move toward + * using dentries instead for most things it is + * probably better to start with the conceptually + * better interface of relying on a path of dentries. + */ +static inline struct dentry *lock_parent(struct dentry *dentry) +{ + struct dentry *dir = dget(dentry->d_parent); + + down(&dir->d_inode->i_sem); + return dir; +} + +static inline struct dentry *get_parent(struct dentry *dentry) +{ + return dget(dentry->d_parent); +} + +static inline void unlock_dir(struct dentry *dir) +{ + up(&dir->d_inode->i_sem); + dput(dir); +} + +/* + * Whee.. Deadlock country. Happily there are only two VFS + * operations that does this.. + */ +static inline void double_down(struct semaphore *s1, struct semaphore *s2) +{ + if (s1 != s2) { + if ((unsigned long) s1 < (unsigned long) s2) { + struct semaphore *tmp = s2; + s2 = s1; s1 = tmp; + } + down(s1); + } + down(s2); +} + +/* + * Ewwwwwwww... _triple_ lock. We are guaranteed that the 3rd argument is + * not equal to 1st and not equal to 2nd - the first case (target is parent of + * source) would be already caught, the second is plain impossible (target is + * its own parent and that case would be caught even earlier). Very messy. + * I _think_ that it works, but no warranties - please, look it through. + * Pox on bloody lusers who mandated overwriting rename() for directories... + */ + +static inline void triple_down(struct semaphore *s1, + struct semaphore *s2, + struct semaphore *s3) +{ + if (s1 != s2) { + if ((unsigned long) s1 < (unsigned long) s2) { + if ((unsigned long) s1 < (unsigned long) s3) { + struct semaphore *tmp = s3; + s3 = s1; s1 = tmp; + } + if ((unsigned long) s1 < (unsigned long) s2) { + struct semaphore *tmp = s2; + s2 = s1; s1 = tmp; + } + } else { + if ((unsigned long) s1 < (unsigned long) s3) { + struct semaphore *tmp = s3; + s3 = s1; s1 = tmp; + } + if ((unsigned long) s2 < (unsigned long) s3) { + struct semaphore *tmp = s3; + s3 = s2; s2 = tmp; + } + } + down(s1); + } else if ((unsigned long) s2 < (unsigned long) s3) { + struct semaphore *tmp = s3; + s3 = s2; s2 = tmp; + } + down(s2); + down(s3); +} + +static inline void double_up(struct semaphore *s1, struct semaphore *s2) +{ + up(s1); + if (s1 != s2) + up(s2); +} + +static inline void triple_up(struct semaphore *s1, + struct semaphore *s2, + struct semaphore *s3) +{ + up(s1); + if (s1 != s2) + up(s2); + up(s3); +} + +static inline void double_lock(struct dentry *d1, struct dentry *d2) +{ + double_down(&d1->d_inode->i_sem, &d2->d_inode->i_sem); +} + +static inline void double_unlock(struct dentry *d1, struct dentry *d2) +{ + double_up(&d1->d_inode->i_sem,&d2->d_inode->i_sem); + dput(d1); + dput(d2); +} + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_FS_H */ diff -urN linux-2.4.17/include/linux/fs_struct.h linux_umopenmosix/include/linux/fs_struct.h --- linux-2.4.17/include/linux/fs_struct.h Sat Jul 14 01:10:44 2001 +++ linux_umopenmosix/include/linux/fs_struct.h Wed Jun 26 23:45:17 2002 @@ -2,12 +2,24 @@ #define _LINUX_FS_STRUCT_H #ifdef __KERNEL__ +#ifdef CONFIG_MOSIX_DFSA +#include +#endif /* CONFIG_MOSIX_DFSA */ + struct fs_struct { atomic_t count; rwlock_t lock; int umask; struct dentry * root, * pwd, * altroot; struct vfsmount * rootmnt, * pwdmnt, * altrootmnt; +#ifdef CONFIG_MOSIX_DFSA + atomic_t users; + short count_dropped_to_one; + short pwd_changed; +#define INIT_FS_DFSA , ATOMIC_INIT(1), 0, 0 +#else +#define INIT_FS_DFSA +#endif /* CONFIG_MOSIX_DFSA */ }; #define INIT_FS { \ @@ -15,6 +27,7 @@ RW_LOCK_UNLOCKED, \ 0022, \ NULL, NULL, NULL, NULL, NULL, NULL \ + INIT_FS_DFSA \ } extern void exit_fs(struct task_struct *); @@ -41,6 +54,9 @@ dput(old_root); mntput(old_rootmnt); } +#ifdef CONFIG_MOSIX_DFSA + dfsa_is_not_up_to_date(); +#endif /* CONFIG_MOSIX_DFSA */ } /* @@ -60,6 +76,10 @@ fs->pwdmnt = mntget(mnt); fs->pwd = dget(dentry); write_unlock(&fs->lock); +#ifdef CONFIG_MOSIX_DFSA + if(fs->pwdmnt != old_pwdmnt || fs->pwd != old_pwd) + dfsa_pwd_changed(); +#endif /* CONFIG_MOSIX_DFSA */ if (old_pwd) { dput(old_pwd); mntput(old_pwdmnt); @@ -68,6 +88,9 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old); void put_fs_struct(struct fs_struct *fs); +#ifdef CONFIG_MOSIX_DFSA +void put_used_fs_struct(struct fs_struct *fs); +#endif /* CONFIG_MOSIX_DFSA */ #endif #endif diff -urN linux-2.4.17/include/linux/hostfs_fs_i.h linux_umopenmosix/include/linux/hostfs_fs_i.h --- linux-2.4.17/include/linux/hostfs_fs_i.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/linux/hostfs_fs_i.h Wed Jun 26 23:45:17 2002 @@ -0,0 +1,21 @@ +#ifndef _HOSTFS_FS_I +#define _HOSTFS_FS_I + +struct hostfs_inode_info { + char *host_filename; + int fd; + int mode; +}; + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff -urN linux-2.4.17/include/linux/iobuf.h linux_umopenmosix/include/linux/iobuf.h --- linux-2.4.17/include/linux/iobuf.h Thu Nov 22 21:46:26 2001 +++ linux_umopenmosix/include/linux/iobuf.h Sat Jun 29 16:51:03 2002 @@ -56,6 +56,9 @@ int errno; /* Status of completed IO */ void (*end_io) (struct kiobuf *); /* Completion callback */ wait_queue_head_t wait_queue; +#ifdef CONFIG_MOSIX + struct mm_struct *mm; +#endif /* CONFIG_MOSIX */ }; diff -urN linux-2.4.17/include/linux/kernel.h linux_umopenmosix/include/linux/kernel.h --- linux-2.4.17/include/linux/kernel.h Thu Nov 22 21:46:18 2001 +++ linux_umopenmosix/include/linux/kernel.h Sat Jun 29 16:49:28 2002 @@ -40,7 +40,7 @@ # define ATTRIB_NORET __attribute__((noreturn)) # define NORET_AND noreturn, -#ifdef __i386__ +#if defined(__i386__) || defined(UM_FASTCALL) #define FASTCALL(x) x __attribute__((regparm(3))) #else #define FASTCALL(x) x diff -urN linux-2.4.17/include/linux/kernel_stat.h linux_umopenmosix/include/linux/kernel_stat.h --- linux-2.4.17/include/linux/kernel_stat.h Thu Nov 22 21:46:19 2001 +++ linux_umopenmosix/include/linux/kernel_stat.h Sat Jun 29 16:49:54 2002 @@ -12,7 +12,7 @@ * used by rstatd/perfmeter */ -#define DK_MAX_MAJOR 16 +#define DK_MAX_MAJOR 99 #define DK_MAX_DISK 16 struct kernel_stat { diff -urN linux-2.4.17/include/linux/keyboard.h linux_umopenmosix/include/linux/keyboard.h --- linux-2.4.17/include/linux/keyboard.h Thu Nov 22 21:47:07 2001 +++ linux_umopenmosix/include/linux/keyboard.h Sat Jun 29 16:51:25 2002 @@ -329,6 +329,10 @@ #define K_SPAWNCONSOLE K(KT_SPEC,18) #define K_BARENUMLOCK K(KT_SPEC,19) +#ifdef CONFIG_MOSIX_UDB +#define K_DEBUGGER K(KT_SPEC,20) +#endif /* CONFIG_MOSIX_UDB */ + #define K_ALLOCATED K(KT_SPEC,126) /* dynamically allocated keymap */ #define K_NOSUCHMAP K(KT_SPEC,127) /* returned by KDGKBENT */ diff -urN linux-2.4.17/include/linux/linkage.h linux_umopenmosix/include/linux/linkage.h --- linux-2.4.17/include/linux/linkage.h Mon Dec 11 22:49:54 2000 +++ linux_umopenmosix/include/linux/linkage.h Sat Jun 29 16:49:28 2002 @@ -10,7 +10,13 @@ #endif #if defined __i386__ +#if defined(CONFIG_MOSIX) +/* there is a bug in egcs-2.91.66 using "regparm(0)" */ +/* it often crashes the compiler on some MOSIX files */ +#define asmlinkage CPP_ASMLINKAGE +#else #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0))) +#endif /* CONFIG_MOSIX */ #elif defined __ia64__ #define asmlinkage CPP_ASMLINKAGE __attribute__((syscall_linkage)) #else diff -urN linux-2.4.17/include/linux/mfs.h linux_umopenmosix/include/linux/mfs.h --- linux-2.4.17/include/linux/mfs.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/linux/mfs.h Sat Jun 29 16:49:30 2002 @@ -0,0 +1,465 @@ +#ifndef _MFS_H +#define _MFS_H + +#include + +#define MFS_VERSION 2 + +#define MFS_GARBAGE_TIME (2700*HZ) +#define MFS_REPORT_TIME (MFS_GARBAGE_TIME/3) + +#define MFS_MAXBLOCK 16384 + +struct mfs_server_contact; +struct mfs_client_contact; + +struct mfs_completion +{ + unsigned char needed; + unsigned char done; + unsigned char func; + unsigned char ro; + int base_pe; + mfs_handle_t base; + mfs_handle_t dispose_base; + int other_pe; + mfs_handle_t other; + mfs_handle_t dispose_other; + union complete_args args; + void *more_data; + int more_data_len; + int error; +}; + +struct subinode +{ + unsigned long si_ino; + unsigned int si_count; + umode_t si_mode; + nlink_t si_nlink; + uid_t si_iuid; + gid_t si_gid; + kdev_t si_rdev; + loff_t si_size; + time_t si_atime; + time_t si_mtime; + time_t si_ctime; + unsigned long si_blksize; + unsigned long si_blocks; + unsigned long si_origino; + kdev_t si_origdev; +}; + +struct mfs_dinfo +{ + int pe; + mfs_handle_t handle; + unsigned long latest; +}; + +extern int init_mfs(void); +extern void mfs_change_pe(void); +extern void mfs_change_root(struct dentry *, struct vfsmount *, struct dentry *, struct vfsmount *); +extern int is_mfs_root(struct nameidata *); +extern int mfs_walk_init_root(struct nameidata *); +extern void mfs_ip_to_subip(struct subinode *, struct inode *); +extern void mfs_subip_to_ip(struct inode *, struct subinode *); +extern int mfs_conversion_init(void); +extern mfs_handle_t local_to_mfs(struct nameidata *); +extern int mfs_to_local(mfs_handle_t, struct nameidata *); +extern int mfs_get_handle(mfs_handle_t handle); +extern void mfs_put_handle(mfs_handle_t handle); +extern int mfs_access_handle(mfs_handle_t handle); +extern void mfs_lput(struct dentry *, struct vfsmount *); +extern int mfs_try_to_cleanup(int); +extern int mfs_attach_handle(int, mfs_handle_t); +extern void mfs_dispose_handle(int, mfs_handle_t); +extern int mfs_touch_handle(int, mfs_handle_t); +extern ino_t global_inum(struct inode *); +extern void cp_file_fields(struct file *, struct file *); + +extern loff_t mfs_low_llseek(mfs_handle_t, struct file *, loff_t, int); +extern ssize_t mfs_low_read(mfs_handle_t, struct file *, char *, size_t, loff_t *); +extern ssize_t mfs_low_write(mfs_handle_t, struct file *, char *, size_t, loff_t *, long); +extern int mfs_low_readdir(mfs_handle_t, struct file *, void *, filldir_t, int *, int *); +extern int mfs_low_ioctl(mfs_handle_t, struct file *, unsigned int, unsigned long, int, int *); +extern int mfs_low_open(mfs_handle_t, struct file *); +extern int mfs_low_fsync(mfs_handle_t, struct file *, int); +extern int mfs_low_attach_handle(mfs_handle_t); +extern int mfs_low_touch_handle(mfs_handle_t); +extern void mfs_low_dispose_handle(mfs_handle_t); +extern int mfs_low_revalidate(mfs_handle_t, struct subinode *); +extern int mfs_low_express_lookup(mfs_handle_t *, char *, unsigned int, int, int, struct subinode *, char **, struct mfs_completion *); +extern int mfs_low_setattr(mfs_handle_t, struct iattr *); +extern char *mfs_low_check_path(mfs_handle_t); + +extern void mfs_prepare_completion(struct mfs_completion *, struct nameidata *); +extern int mfs_check_completion(struct mfs_completion *, struct nameidata *); +extern mfs_handle_t mfs_low_complete(struct mfs_completion *, struct nameidata *); +extern void mfs_low_complete_error(struct mfs_completion *, int); +extern int mfs_link_complete(struct mfs_completion *, int, struct nameidata *); + +extern struct dentry *mfs_root; +extern struct vfsmount *mfs_mnt; +extern ino_t mfs_root_ino; +extern dev_t mfs_root_dev; +extern struct super_block *mfs_super; +extern int MFS_PE; +extern int mfs_debug; +extern char mfs_is_ro; +#ifdef CONFIG_MOSIX_DFSA +extern char mfs_is_dfsa; +#endif /* CONFIG_MOSIX_DFSA */ + +#define MFS_SUPER_MAGIC 235466776 +#define MFS_BAD_PE 65536 +#define MFS_UNKNOWN_PE 65537 +#define MFS_ROOT_INO 65538 + +/* protocols: */ + +enum +{ + MFS_NONE = 0, + MFS_CLOSE, + MFS_CONTINUE, + MFS_STOP, + MFS_INTERIM_DATA, + MFS_REQUEST_LLSEEK, + MFS_REPLY_LLSEEK, + MFS_REQUEST_READ, + MFS_REPLY_READ, + MFS_REQUEST_WRITE, + MFS_REPLY_WRITE, + MFS_REQUEST_READDIR, + MFS_INTERIM_READDIR, + MFS_REPLY_READDIR, + MFS_REQUEST_IOCTL, + MFS_REPLY_IOCTL, + MFS_REQUEST_FSYNC, + MFS_REPLY_FSYNC, + MFS_REQUEST_REVALIDATE, + MFS_REPLY_REVALIDATE, + MFS_REQUEST_EXPRESS, + MFS_REPLY_EXPRESS, + MFS_REQUEST_CHECKPATH, + MFS_REPLY_CHECKPATH, + MFS_REQUEST_SETATTR, + MFS_REPLY_SETATTR, + MFS_REQUEST_ATTACH_HANDLE, + MFS_REPLY_ATTACH_HANDLE, + MFS_REQUEST_TOUCH_HANDLE, + MFS_REPLY_TOUCH_HANDLE, + MFS_REQUEST_DISPOSE_HANDLE, +}; + +struct mfs_cred +{ + uid_t uid, euid, suid, fsuid; + gid_t gid, egid, sgid, fsgid; + int ngroups; + gid_t groups[NGROUPS]; + kernel_cap_t caps; + int whereami; + int deppe; +}; + +struct mfs_request_llseek +{ + struct mfs_cred cred; + mfs_handle_t handle; + struct file file; + loff_t offset; + int origin; +}; + +struct mfs_reply_llseek +{ + struct file file; + int result; +}; + +struct mfs_request_read +{ + struct mfs_cred cred; + mfs_handle_t handle; + struct file file; + size_t count; + loff_t pos; +}; + +struct mfs_interim_data +{ + size_t count; +}; + +struct mfs_reply_read +{ + struct file file; + loff_t ppos; + ssize_t result; + size_t datalen; +}; + +struct mfs_request_write +{ + struct mfs_cred cred; + mfs_handle_t handle; + struct file file; + size_t count; + loff_t pos; + size_t datalen; + long flim; +}; + +struct mfs_reply_write +{ + struct file file; + loff_t ppos; + ssize_t result; + int hadsigxfsz; +}; + +struct mfs_request_readdir +{ + struct mfs_cred cred; + mfs_handle_t handle; + struct file file; + int policy; + int maxbytes; +}; + +#define MFS_READDIR_UNKNOWN 0 +#define MFS_READDIR_JUSTONE 1 +#define MFS_READDIR_GETDENTS 2 +#define MFS_READDIR_GETDENTS64 3 + +struct mfs_interim_readdir +{ + int error; +}; + +struct mfs_reply_readdir +{ + struct file file; + int result; + int datalen; +}; + +struct mfs_request_ioctl +{ + struct mfs_cred cred; + mfs_handle_t handle; + struct file file; + unsigned int cmd; + unsigned long arg; + int data; + int size; +}; + +struct mfs_reply_ioctl +{ + struct file file; + int data; + int result; +}; + +struct mfs_request_open +{ + struct mfs_cred cred; + mfs_handle_t handle; + struct file file; +}; + +struct mfs_reply_open +{ + struct file file; + int result; +}; + +struct mfs_request_fsync +{ + struct mfs_cred cred; + mfs_handle_t handle; + struct file file; + int datasync; +}; + +struct mfs_reply_fsync +{ + struct file file; + int result; +}; + +struct mfs_request_revalidate +{ + struct mfs_cred cred; + mfs_handle_t handle; +}; + +struct mfs_reply_revalidate +{ + struct subinode iinfo; + int result; +}; + +struct mfs_request_express +{ + struct mfs_cred cred; + mfs_handle_t handle; + int namelen; + unsigned int follow; + int link_count; + int total_link_count; + struct mfs_completion complete; +}; + +struct mfs_reply_express +{ + mfs_handle_t handle; + struct subinode iinfo; + int namelen; + struct mfs_completion complete; + int result; +}; + +struct mfs_request_checkpath +{ + struct mfs_cred cred; + mfs_handle_t handle; +}; + +struct mfs_reply_checkpath +{ + int namelen; +}; + +struct mfs_request_setattr +{ + struct mfs_cred cred; + mfs_handle_t handle; + struct iattr attr; +}; + +struct mfs_reply_setattr +{ + int result; +}; + +struct mfs_request_attach_handle +{ + mfs_handle_t handle; +}; + +struct mfs_reply_attach_handle +{ + int result; +}; + +struct mfs_request_touch_handle +{ + mfs_handle_t handle; +}; + +struct mfs_reply_touch_handle +{ + int result; +}; + +struct mfs_request_dispose_handle +{ + mfs_handle_t handle; +}; + +struct mfs_server_contact *mfs_alloc_service(void); +void mfs_release_contact(struct mfs_client_contact *); +int mfs_client_contact_has_data(struct mfs_client_contact *); +void mfs_end_serve(struct mfs_server_contact *); +int mfs_server_send(struct mfs_server_contact *, int, void *, int, void *, int, void *, int); +int mfs_client_send(struct mfs_client_contact *, int, void *, int, void *, int, void *, int); +int mfs_server_receive(struct mfs_server_contact *, void **, void **, void **); +int mfs_client_receive(struct mfs_client_contact *, void **, void **, void **); +struct mfs_client_contact *mfs_new_request(int, int, void *, int, void *, int, void *, int); +void mfs_make_servers(void); +void mfs_new_request_arrived(void); +void mfs_serve(struct mfs_server_contact *, int, void *, void *, void *); + +int mfs_client_attach_handle(int, mfs_handle_t); +void mfs_client_dispose_handle(int, mfs_handle_t); +int mfs_client_touch_handle(int, mfs_handle_t); +int mfs_client_llseek(int, mfs_handle_t, struct file *, loff_t, int); +ssize_t mfs_client_read(int, mfs_handle_t, struct file *, char *, size_t, loff_t *); +ssize_t mfs_client_write(int, mfs_handle_t, struct file *, char *, size_t, loff_t *, long); +int mfs_client_readdir(int, mfs_handle_t, struct file *, void *, filldir_t, int, int, int *, int *); +int mfs_client_ioctl(int, mfs_handle_t, struct file *, unsigned int, unsigned long, int, int *); +int mfs_client_open(int, mfs_handle_t, struct file *); +int mfs_client_fsync(int, mfs_handle_t, struct file *, int); +int mfs_client_revalidate(int, mfs_handle_t, struct subinode *); +int mfs_client_express_lookup(int, mfs_handle_t *, char *, unsigned int, struct subinode *, char **, struct mfs_completion *); +char *mfs_client_check_path(int, mfs_handle_t); +int mfs_client_setattr(int, mfs_handle_t, struct iattr *); +void mfs_throw_garbage(void); +int mfs_client_daemon(void *); +int mfs_kill(char *); +void mfs_tuneinfo(char *); +#ifdef CONFIG_MOSIX_DFSA +int mfs_add_stats(struct mfs_stats *, struct mfs_stats *, struct mosix_task *); +#else +static inline int mfs_add_stats(struct mfs_stats *x, struct mfs_stats *y, struct mosix_task *z) +{ + return(0); +} +#endif /* CONFIG_MOSIX_DFSA */ + +extern struct task_struct *mfs_main_server_task; + +static inline int +file_is_mfs(struct file *file) +{ + struct dentry *dp; + struct inode *ip; + struct super_block *sb; + + return(file && (dp = file->f_dentry) && (ip = dp->d_inode) && + (sb = ip->i_sb) && sb->s_magic == MFS_SUPER_MAGIC); +} + +/* statistic collection: */ + +#ifdef CONFIG_MOSIX_DFSA +extern void mfs_count(int, int, int); + +#define mfs_count_attach_handle(w) mfs_count(w, 0, 0) +#define mfs_count_touch_handle(w) mfs_count(w, 0, 0) +#define mfs_count_dispose_handle(w) mfs_count(w, 0, 0) +#define mfs_count_llseek(w) mfs_count(w, 0, 0) +#define mfs_count_read(w,n) mfs_count(w, n, 0) +#define mfs_count_write(w,n) mfs_count(w, 0, n) +#define mfs_count_readdir(w,p,b) mfs_count(w, b, (p) * sizeof(int)) +#define mfs_count_ioctl(w) mfs_count(w, 0, 0) +#define mfs_count_open(w) mfs_count(w, 0, 0) +#define mfs_count_fsync(w) mfs_count(w, 0, 0) +#define mfs_count_setattr(w) mfs_count(w, 0, 0) +#define mfs_count_revalidate(w) mfs_count(w, 0, 0) +#define mfs_count_express(w,l) mfs_count(w, 0, l) +#define mfs_count_check_path(w,l) mfs_count(w, 0, l) +#define mfs_count_notify_change(w) mfs_count(w, 0, 0) +#else +#define mfs_count_attach_handle(w) do {} while(0) +#define mfs_count_touch_handle(w) do {} while(0) +#define mfs_count_dispose_handle(w) do {} while(0) +#define mfs_count_llseek(w) do {} while(0) +#define mfs_count_read(w,n) do {} while(0) +#define mfs_count_write(w,n) do {} while(0) +#define mfs_count_readdir(w,p,b) do {} while(0) +#define mfs_count_ioctl(w) do {} while(0) +#define mfs_count_open(w) do {} while(0) +#define mfs_count_fsync(w) do {} while(0) +#define mfs_count_setattr(w) do {} while(0) +#define mfs_count_revalidate(w) do {} while(0) +#define mfs_count_express(w,l) do {} while(0) +#define mfs_count_check_path(w,l) do {} while(0) +#define mfs_count_notify_change(w) do {} while(0) +#endif /* CONFIG_MOSIX_DFSA */ + +#endif /* _MFS_H */ diff -urN linux-2.4.17/include/linux/mfs_fs_i.h linux_umopenmosix/include/linux/mfs_fs_i.h --- linux-2.4.17/include/linux/mfs_fs_i.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/linux/mfs_fs_i.h Sat Jun 29 16:49:28 2002 @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh. + */ +#ifndef _LINUX_MFS_FS_I_H +#define _LINUX_MFS_FS_I_H +#ifdef CONFIG_MOSIX + +typedef unsigned long long mfs_handle_t; + +struct mfs_inode_info +{ + int pe; + mfs_handle_t handle; +}; + +#endif /* CONFIG_MOSIX */ +#endif diff -urN linux-2.4.17/include/linux/mfs_socket.h linux_umopenmosix/include/linux/mfs_socket.h --- linux-2.4.17/include/linux/mfs_socket.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/linux/mfs_socket.h Wed Jun 26 23:45:18 2002 @@ -0,0 +1,40 @@ +#ifndef _MFS_SOCKET_H +#define _MFS_SOCKET_H + +struct socket; + +struct mfs_message_header +{ + char version; + char first; + short hdsz; + int caller; + int serial; + int type; + int dat1sz; + int dat2sz; +}; + +struct socket *mfs_open_main_socket(void); +struct socket *mfs_new_connection(struct socket *, int *); +struct socket *mfs_connect_to(int); +int mfs_socket_has_data(struct socket *); +int mfs_socket_is_closed(struct socket *); +int mfs_send(struct socket *, int, int, int, void *, int, void *, int, void *, int); +int mfs_receive(struct socket *, int, int *, struct mfs_message_header *, void **, void **, void **); +int mfs_set_main_ownership(struct socket *, int); +void mfs_close_socket(struct socket *); +void mfs_monitor_client_contacts(void); + +/* 2nd parameter of mfs_receive: */ +enum +{ + MFS_FIRST_RECEIVE, /* obtain serial */ + MFS_MUST_MATCH, /* fail if serial does not match */ + MFS_SKIP_OTHERS, /* skip packet if serial does not match */ + MFS_JUST_HEADER /* preliminary header check */ +}; + +#define MFS_MAIN_PORT 0xD302 + +#endif /* _MFS__SOCKET_H */ diff -urN linux-2.4.17/include/linux/mm.h linux_umopenmosix/include/linux/mm.h --- linux-2.4.17/include/linux/mm.h Fri Dec 21 19:42:03 2001 +++ linux_umopenmosix/include/linux/mm.h Sat Jun 29 16:49:30 2002 @@ -165,6 +165,10 @@ void *virtual; /* Kernel virtual address (NULL if not kmapped, ie. highmem) */ struct zone_struct *zone; /* Memory zone we are in. */ +#ifdef CONFIG_MOSIX + char young; + unsigned long last_young; +#endif /* CONFIG_MOSIX */ } mem_map_t; /* @@ -509,6 +513,48 @@ return 0; } +#ifdef CONFIG_MOSIX +static inline unsigned long +do_mmap_pgoff_down(struct file *file, unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flag, unsigned long pgoff) +{ + int initially_local = !(current->mosix.dflags & DDEPUTY); + struct mm_struct *mm = current->mm; + unsigned long result; + + if(initially_local) + { + down_write(&mm->mmap_sem); +#ifdef CONFIG_MOSIX_FS + current->mosix.dirty_bits |= MMAP_MMDOWNED; +#endif /* CONFIG_MOSIX_FS */ + } + result = do_mmap_pgoff(file, addr, len, prot, flag, pgoff); + if(initially_local) + { + up_write(&mm->mmap_sem); +#ifdef CONFIG_MOSIX_FS + current->mosix.dirty_bits &= ~MMAP_MMDOWNED; +#endif /* CONFIG_MOSIX_FS */ + } + return(result); +} + +static inline unsigned long do_mmap_down(struct file *file, unsigned long addr, + unsigned long len, unsigned long prot, + unsigned long flag, unsigned long offset) +{ + unsigned long ret = -EINVAL; + if ((offset + PAGE_ALIGN(len)) < offset) + goto out; + if (!(offset & ~PAGE_MASK)) + ret = do_mmap_pgoff_down(file, addr, len, prot, flag, offset >> PAGE_SHIFT); +out: + return ret; +} + +#endif /* CONFIG_MOSIX */ + struct zone_t; /* filemap.c */ extern void remove_inode_page(struct page *); diff -urN linux-2.4.17/include/linux/mosctl.h linux_umopenmosix/include/linux/mosctl.h --- linux-2.4.17/include/linux/mosctl.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/linux/mosctl.h Sat Jun 29 16:49:30 2002 @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Oren Laadan. + */ +#ifndef _LINUX_MOSCTL_H +#define _LINUX_MOSCTL_H + +#ifdef __KERNEL__ +#include +#else +#include +#endif /*__KERNEL__*/ + +#include + +/* configuration */ + +#define MOSIX_MAX 65535 +#define MAX_MOSNET_ENTS 256 + +struct mosixnet +{ + int base; + struct sockaddr saddr; + int cnt; +}; + + +#endif diff -urN linux-2.4.17/include/linux/mosix.h linux_umopenmosix/include/linux/mosix.h --- linux-2.4.17/include/linux/mosix.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/linux/mosix.h Sat Jun 29 16:49:30 2002 @@ -0,0 +1,183 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh. + */ +#ifndef _LINUX_MOSIX_H +#define _LINUX_MOSIX_H + +#ifdef CONFIG_MOSIX + +#ifdef __KERNEL__ + +#include +#include +#include +#include +#include + +/* operations on DEPUTY's data-base of REMOTE files: */ + +extern int mosix_register_a_file(struct file *, int); +extern void mosix_undo_last_file_registration(struct file *, int); +extern int mosix_rebuild_file_list(void); +extern void mosix_update_remote_files(void); + +struct vmalist +{ + unsigned int vmstart; + unsigned int vmend; + unsigned short vmflags; + short maydump; +}; + +struct vmamaps +{ + unsigned long vmstart, vmend, vmflags, vmpgoff; + struct file *fp; +}; + +extern void init_mosix(void); +extern void wake_up_mosix(struct task_struct *); +extern int mosix_wakeable(struct task_struct *); +extern int mosix_go_home(int); +extern int mosix_go_home_for_reason(int, int); +extern int mosix_send_back_home(struct task_struct *); +extern int mosix_need_while_asleep(void); +extern void mosix_run_while_asleep(void); +extern void mosix_pre_usermode_functions(void); +extern int stay_me_and_my_clones(uint32_t); +extern void unstay_mm(struct mm_struct *); +extern void mosix_inform_remote_of_nice(void); +extern void mosix_snap_load(int); +extern void mosix_clear_statistics(void); +extern int mosix_fork_init_fields(struct task_struct *); +extern void mosix_fork_free_fields(struct task_struct *); +extern void mosix_exit(void); +extern void mosix_very_exit(void); +extern void mosix_obtain_registers(unsigned long); +extern void mosix_bring_monkey_users_back(struct inode *); +extern void mosix_no_longer_monkey(struct inode *); +extern void mosix_check_for_freedom_to_move(void); +extern int mosix_pre_clone(void); +extern void mosix_post_clone(void); +extern void mosix_remote_syscall_trace(void); +extern u64 mosix_remote_tsc(void); +extern int mosix_forkmigrate(void); +extern int mosix_deputy_fork(struct task_struct *, int, unsigned long); +extern void mosix_exit_mm(struct task_struct *); +extern void mosix_exec_mmap(struct mm_struct *); +extern void mosix_deputy_rusage(int); +extern int mosix_deputy_personality(unsigned long); +extern void mosix_deputy_count_args(char **, char**, int *, int *); +extern int mosix_deputy_bring_strings(struct linux_binprm *, char *, char **, char **); +extern int mosix_deputy_setup_args(int, unsigned long *); +extern int mosix_deputy_exec_mmap(char *); +extern int mosix_deputy_dump_thread(struct user *); +extern void mosix_deputy_init_aout_mm(struct exec *); +extern unsigned long mosix_deputy_elf_setup(char *, int, int, struct elfhdr *, unsigned long, unsigned long, unsigned long, int, int, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, struct elf_tables_extras *); +extern void mosix_deputy_fix_elf_aout_interp(unsigned, unsigned, unsigned); +extern int mosix_deputy_list_vmas(struct vmalist **, unsigned long *, unsigned long *); +extern long mosix_deputy_brk(unsigned long, unsigned long); + +extern void mosix_notify_urgent(struct socket *); +extern void mosix_notify_receive(struct socket *); + +extern void mosix_proc_init(void); +extern int mosix_proc_get_remote_array(char *, int, int); +extern int mosix_proc_get_node_array(char *, int, int); + +extern void mosix_decay_exec(void); +extern void mosix_add_to_whereto(struct task_struct *, int); +extern void mosix_do_add_to_whereto(struct task_struct *, int); + +struct sockaddr; +extern int reserved_mosix_address(struct sockaddr *); +extern void comm_report_violation(char *, struct sockaddr *); + +void init_guest_user_struct(void); +extern int get_free_guest_slots(void); +extern int count_guests(void); + +/* argument of mosix_deputy_setup_args: */ +#define SETUP_ARGS_PLAIN 0 +#define SETUP_ARGS_AS_AOUT 1 +#define SETUP_ARGS_AS_ELF 2 +#define SETUP_ARGS_NOTYET 3 + +extern int mosix_deputy_restore_sigcontext(struct sigcontext *, int *); +extern void mosix_deputy_setup_frame(unsigned long, struct k_sigaction *, siginfo_t, sigset_t *); +extern unsigned long mosix_deputy_mmap(struct file *, unsigned long, int, unsigned long, unsigned long, unsigned long, off_t, nopage_t); +extern int deputy_munmap(unsigned long, size_t); +extern int deputy_mprotect(unsigned long, size_t, unsigned long); +extern void mosix_deputy_rlimit(int, struct rlimit); +extern int mosix_deputy_dump_fpu(struct user_i387_struct *); +extern int mosix_sync_caps(kernel_cap_t); + +#define ALL_REGISTERS 0x7fff /* as many as in struct pt_regs */ +#define BIT_OF_REGISTER(_which) \ + (1 << (((int)&(((struct pt_regs *)0)->_which)) / sizeof(int))) + +/* + * meaning of signals on REMOTE + */ + +#define FATAL_SIGSEGV SIGINT +#define REMOTE_FILE_RELEASED SIGQUIT + +/* other signals that can occur on REMOTE: + * SIGKILL, SIGSEGV, SIGPROF, SIGVTALRM, SIGFPE, SIGBUS, SIGIOT, SIGILL, + * SIGXCPU, SIGXFSZ, SIGPROF, SIGTRQAP, SIGPWR (a tricky one). + * SIGTERM requires the process to migrate back. + */ + +#define MOSIX_PRIORITY (100) +#define MOSIX_ASLEEP_PRIORITY (MOSIX_PRIORITY * 3 / 10) +#define MOSIX_DEPUTY_PRIORITY (MOSIX_PRIORITY * 1 / 10) +#define MOSIX_RESPOND_PRIORITY (MOSIX_PRIORITY * 6 / 10) + +#define PROC_MOSIX_USE_START 1024 /* coordinate with proc/fs/base.c */ + +#if defined(CONFIG_MOSIX_DIAG) && defined(CONFIG_SMP) +#define KERNEL_LOCKED do{if(current->lock_depth == -1)panic("not locked %d of " __FILE__, __LINE__);}while(0) +#define MOSIX_LOCKED do{if(current->mosix.lock_depth == -1)panic("not locked %d of " __FILE__, __LINE__);}while(0) +#else +#define KERNEL_LOCKED do {} while(0) +#define MOSIX_LOCKED do {} while(0) +#endif /* CONFIG_MOSIX_DIAG */ + +#define mos_to_contact(m) ((struct socket *)((m)->contact)) +#define mos_to_waitp(m) ((wait_queue_head_t *)(&(m)->wait_dist)) +#define mos_to_regs(m) ((struct pt_regs *)((m)->altregs)) + +struct proc_dir_entry; +extern struct proc_dir_entry *proc_mosix; +typedef ssize_t (proc_mosix_pid_writer)(struct file *, struct task_struct *, const char *, size_t); +extern proc_mosix_pid_writer proc_mosix_pid_set_migrate; +extern proc_mosix_pid_writer proc_mosix_pid_set_goto; +extern proc_mosix_pid_writer proc_mosix_pid_set_lock; +extern proc_mosix_pid_writer proc_mosix_pid_set_sigmig; +extern proc_mosix_pid_writer proc_mosix_pid_set_disclosure; +#ifdef CONFIG_MOSIX_FS +extern proc_mosix_pid_writer proc_mosix_pid_set_selected; +#endif /* CONFIG_MOSIX_FS */ + +#ifdef CONFIG_MOSIX_UDB +extern void udbinit(void); +extern int udb_booting; +extern int nmi_debugger; +#endif /* CONFIG_MOSIX_UDB */ + +#endif /*__KERNEL__*/ + +#endif /* CONFIG_MOSIX */ + +#endif diff -urN linux-2.4.17/include/linux/mount.h linux_umopenmosix/include/linux/mount.h --- linux-2.4.17/include/linux/mount.h Fri Oct 5 22:05:55 2001 +++ linux_umopenmosix/include/linux/mount.h Wed Jun 26 23:45:18 2002 @@ -29,6 +29,11 @@ int mnt_flags; char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */ struct list_head mnt_list; +#ifdef CONFIG_MOSIX_DFSA + short mnt_dfsa; /* what should be */ + short mnt_dfsano; /* what is */ + short mnt_dfsaprev; /* what was */ +#endif /* CONFIG_MOSIX_DFSA */ }; static inline struct vfsmount *mntget(struct vfsmount *mnt) diff -urN linux-2.4.17/include/linux/net.h linux_umopenmosix/include/linux/net.h --- linux-2.4.17/include/linux/net.h Thu Nov 22 21:46:19 2001 +++ linux_umopenmosix/include/linux/net.h Sat Jun 29 16:49:28 2002 @@ -62,6 +62,12 @@ #define SOCK_ASYNC_WAITDATA 1 #define SOCK_NOSPACE 2 +#ifdef CONFIG_MOSIX +#define SOCK_INTER_MOSIX 3 /* used internally in kernel-mode */ +#define SOCK_WAIT_MFSDATA 4 /* waiting for MFS data */ +#define SOCK_OOB_IN 5 /* OOB received on MOSIX socket */ +#endif /* CONFIG_MOSIX */ + struct socket { socket_state state; diff -urN linux-2.4.17/include/linux/personality.h linux_umopenmosix/include/linux/personality.h --- linux-2.4.17/include/linux/personality.h Mon Sep 10 23:04:33 2001 +++ linux_umopenmosix/include/linux/personality.h Sat Jun 29 16:49:44 2002 @@ -123,4 +123,8 @@ __MOD_DEC_USE_COUNT(ep->module); \ } while (0) +#ifdef CONFIG_MOSIX +extern struct exec_domain *lookup_exec_domain(unsigned long); +#endif /* CONFIG_MOSIX */ + #endif /* _LINUX_PERSONALITY_H */ diff -urN linux-2.4.17/include/linux/pipe_fs_i.h linux_umopenmosix/include/linux/pipe_fs_i.h --- linux-2.4.17/include/linux/pipe_fs_i.h Thu Apr 26 00:18:23 2001 +++ linux_umopenmosix/include/linux/pipe_fs_i.h Sat Jun 29 16:49:28 2002 @@ -9,6 +9,10 @@ unsigned int start; unsigned int readers; unsigned int writers; +#ifdef CONFIG_MOSIX_PIPE_EXCEPTIONS + unsigned int exceptions; + unsigned long reading_bytes; +#endif /* CONFIG_MOSIX_PIPE_EXCEPTIONS */ unsigned int waiting_readers; unsigned int waiting_writers; unsigned int r_counter; @@ -38,6 +42,14 @@ #define PIPE_MAX_RCHUNK(inode) (PIPE_SIZE - PIPE_START(inode)) #define PIPE_MAX_WCHUNK(inode) (PIPE_SIZE - PIPE_END(inode)) +#ifdef CONFIG_MOSIX_PIPE_EXCEPTIONS +#define PIPE_EXCEPTIONS(inode) ((inode).i_pipe->exceptions) +#define PIPE_READING_BYTES(inode) ((inode).i_pipe->reading_bytes) + +#define PIPE_EXCEPTION_INPUT 1 +#define PIPE_EXCEPTION_NOINPUT 2 +#endif /* CONFIG_MOSIX_PIPE_EXCEPTIONS */ + /* Drop the inode semaphore and wait for a pipe event, atomically */ void pipe_wait(struct inode * inode); diff -urN linux-2.4.17/include/linux/proc_fs.h linux_umopenmosix/include/linux/proc_fs.h --- linux-2.4.17/include/linux/proc_fs.h Thu Nov 22 21:46:23 2001 +++ linux_umopenmosix/include/linux/proc_fs.h Sat Jun 29 16:50:02 2002 @@ -90,6 +90,9 @@ void proc_pid_delete_inode(struct inode *inode); int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); +#ifdef CONFIG_MOSIX +extern int proc_register(struct proc_dir_entry *, struct proc_dir_entry *); +#endif /* CONFIG_MOSIX */ extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, struct proc_dir_entry *parent); extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent); diff -urN linux-2.4.17/include/linux/remote_fs_i.h linux_umopenmosix/include/linux/remote_fs_i.h --- linux-2.4.17/include/linux/remote_fs_i.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/linux/remote_fs_i.h Sat Jun 29 16:49:28 2002 @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh. + */ +#ifndef _LINUX_REMOTE_FS_I_H +#define _LINUX_REMOTE_FS_I_H +#ifdef CONFIG_MOSIX + +struct vm_area_struct; +typedef struct page * (*nopage_t)(struct vm_area_struct *, unsigned long, int); + +struct remote_inode_info +{ + int origin; /* node of origin */ + struct dentry *dp; /* dentry pointer on origin */ + uint64_t unique; /* unique version on origin */ + nopage_t nopage; /* nopage routine */ +}; + +#define home_file(f) (*((struct file **)(&(f)->f_pos))) + +#endif /* CONFIG_MOSIX */ +#endif diff -urN linux-2.4.17/include/linux/sched.h linux_umopenmosix/include/linux/sched.h --- linux-2.4.17/include/linux/sched.h Fri Dec 21 19:42:03 2001 +++ linux_umopenmosix/include/linux/sched.h Sat Jun 29 16:49:28 2002 @@ -29,6 +29,12 @@ struct exec_domain; +#ifdef CONFIG_MOSIX +#include +#else +#define MOSIX_INIT_TASK(tsk) +#endif /* CONFIG_MOSIX */ + /* * cloning flags: */ @@ -88,6 +94,9 @@ #define TASK_UNINTERRUPTIBLE 2 #define TASK_ZOMBIE 4 #define TASK_STOPPED 8 +#ifdef CONFIG_MOSIX +#define TASK_SAME 16 +#endif /* CONFIG_MOSIX */ #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) @@ -181,6 +190,25 @@ fd_set close_on_exec_init; fd_set open_fds_init; struct file * fd_array[NR_OPEN_DEFAULT]; +#ifdef CONFIG_MOSIX_DFSA + atomic_t users; /* the real count, excluding /proc grabs */ + int count_dropped_to_one; + int maxclosed; + fd_set *closed; + fd_set closed_fds_init; + int maxopened; + fd_set *opened; + fd_set opened_fds_init; + int maxmod; + fd_set *modified; + fd_set modified_fds_init; +#define INIT_FILES_DFSA , users : ATOMIC_INIT(1), \ + closed: &init_files.closed_fds_init, \ + opened: &init_files.opened_fds_init, \ + modified: &init_files.modified_fds_init, +#else +#define INIT_FILES_DFSA +#endif /* CONFIG_MOSIX_DFSA */ }; #define INIT_FILES \ @@ -196,6 +224,7 @@ close_on_exec_init: { { 0, } }, \ open_fds_init: { { 0, } }, \ fd_array: { NULL, } \ + INIT_FILES_DFSA \ } /* Maximum number of active map areas.. This is a random (large) number */ @@ -229,6 +258,20 @@ /* Architecture-specific MM context */ mm_context_t context; +#ifdef CONFIG_MOSIX + atomic_t mm_kiocount; + atomic_t mm_realusers; + unsigned long last_memsort; + int used, unused, swapped; /* break-down of memory pages */ + int private_unused; /* unused but not active/inactive */ +/* + int private_unused; +*/ + int mark; +#define INIT_MM_MOSIX mm_kiocount: ATOMIC_INIT(0), mm_realusers: ATOMIC_INIT(1), +#else +#define INIT_MM_MOSIX +#endif /* CONFIG_MOSIX */ }; extern int mmlist_nr; @@ -242,6 +285,7 @@ mmap_sem: __RWSEM_INITIALIZER(name.mmap_sem), \ page_table_lock: SPIN_LOCK_UNLOCKED, \ mmlist: LIST_HEAD_INIT(name.mmlist), \ + INIT_MM_MOSIX \ } struct signal_struct { @@ -340,9 +384,9 @@ pid_t tgid; /* boolean value for session group leader */ int leader; - /* + /* * pointers to (original) parent process, youngest child, younger sibling, - * older sibling, respectively. (p->father can be replaced with + * older sibling, respectively. (p->father can be replaced with * p->p_pptr->pid) */ struct task_struct *p_opptr, *p_pptr, *p_cptr, *p_ysptr, *p_osptr; @@ -401,13 +445,15 @@ int (*notifier)(void *priv); void *notifier_data; sigset_t *notifier_mask; - + /* Thread group tracking */ u32 parent_exec_id; u32 self_exec_id; /* Protection of (de-)allocation: mm, files, fs, tty */ spinlock_t alloc_lock; - +#ifdef CONFIG_MOSIX + struct mosix_task mosix; +#endif /* CONFIG_MOSIX */ /* journalling filesystem info */ void *journal_info; }; @@ -501,6 +547,7 @@ blocked: {{0}}, \ alloc_lock: SPIN_LOCK_UNLOCKED, \ journal_info: NULL, \ +MOSIX_INIT_TASK(tsk) \ } @@ -547,9 +594,23 @@ for(p = *htable; p && p->pid != pid; p = p->pidhash_next) ; +#ifdef CONFIG_MOSIX + if(p && (p->mosix.dflags & DREMOTE)) + p = NULL; +#endif /* CONFIG_MOSIX */ + return p; +} + +#ifdef CONFIG_MOSIX +static inline struct task_struct *find_any_task_by_pid(int pid) +{ + struct task_struct *p, **htable = &pidhash[pid_hashfn(pid)]; + for(p = *htable; p && p->pid != pid; p = p->pidhash_next) + ; return p; } +#endif /* CONFIG_MOSIX */ #define task_has_cpu(tsk) ((tsk)->cpus_runnable != ~0UL) @@ -670,6 +731,14 @@ static inline void recalc_sigpending(struct task_struct *t) { +#ifdef CONFIG_MOSIX + t->mosix.ignoreoldsigs = 0; + if(t->mosix.dflags & DHEAVYSLEEP) + t->sigpending = 0; + else if(t->mosix.dflags & DFAKESIGNAL) + t->sigpending = 1; + else +#endif /* CONFIG_MOSIX */ t->sigpending = has_pending_signals(&t->pending.signal, &t->blocked); } @@ -702,12 +771,12 @@ * fsuser(). This is done, along with moving fsuser() checks to be * last. * - * These will be removed, but in the mean time, when the SECURE_NOROOT + * These will be removed, but in the mean time, when the SECURE_NOROOT * flag is set, uids don't grant privilege. */ static inline int suser(void) { - if (!issecure(SECURE_NOROOT) && current->euid == 0) { + if (!issecure(SECURE_NOROOT) && current->euid == 0) { current->flags |= PF_SUPERPRIV; return 1; } @@ -724,7 +793,7 @@ } /* - * capable() checks for a particular capability. + * capable() checks for a particular capability. * New privilege checks should use this interface, rather than suser() or * fsuser(). See include/linux/capability.h for defined capabilities. */ @@ -836,7 +905,7 @@ current->state = TASK_RUNNING; \ remove_wait_queue(&wq, &__wait); \ } while (0) - + #define wait_event_interruptible(wq, condition) \ ({ \ int __ret = 0; \ @@ -873,10 +942,37 @@ #define next_thread(p) \ list_entry((p)->thread_group.next, struct task_struct, thread_group) +#ifdef CONFIG_MOSIX +#define for_each_local_task(p) \ + for (p = &init_task ; (p = p->next_task) != &init_task ; ) \ + if(!(p->mosix.dflags & DREMOTE)) + +static inline long +LOGICAL_STATE(struct task_struct *p) +{ + int result; + unsigned long flags; + + read_lock_irqsave(&p->mosix.state_lock, flags); + result = (p->mosix.bstate == TASK_SAME) ? p->state : p->mosix.bstate; + read_unlock_irqrestore(&p->mosix.state_lock, flags); + return(result); +} + +extern void run_on(struct task_struct *); +extern void run_off(struct task_struct *); + +#define need_interim_while_asleep() mosix_need_while_asleep() +#define run_interim_while_asleep() mosix_run_while_asleep() +#endif /* CONFIG_MOSIX */ + static inline void del_from_runqueue(struct task_struct * p) { nr_running--; p->sleep_time = jiffies; +#ifdef CONFIG_MOSIX + run_off(p); +#endif /* CONFIG_MOSIX */ list_del(&p->run_list); p->run_list.next = NULL; } @@ -915,6 +1011,11 @@ char *res; struct vfsmount *rootmnt; struct dentry *root; +#if defined(CONFIG_MOSIX_DFSA) || defined(CONFIG_MOSIX_FS) + if(dentry->d_inode && dentry->d_inode->i_op && + dentry->d_inode->i_op->check_path) + dentry->d_inode->i_op->check_path(dentry); +#endif /* CONFIG_MOSIX_DFSA || CONFIG_MOSIX_FS */ read_lock(¤t->fs->lock); rootmnt = mntget(current->fs->rootmnt); root = dget(current->fs->root); diff -urN linux-2.4.17/include/linux/smp_lock.h linux_umopenmosix/include/linux/smp_lock.h --- linux-2.4.17/include/linux/smp_lock.h Thu Nov 22 21:46:27 2001 +++ linux_umopenmosix/include/linux/smp_lock.h Sat Jun 29 16:49:30 2002 @@ -11,6 +11,11 @@ #define reacquire_kernel_lock(task) do { } while(0) #define kernel_locked() 1 +#ifdef CONFIG_MOSIX +#define lock_mosix() do {} while(0) +#define unlock_mosix() do {} while(0) +#endif /* CONFIG_MOSIX */ + #else #include diff -urN linux-2.4.17/include/linux/spinlock.h linux_umopenmosix/include/linux/spinlock.h --- linux-2.4.17/include/linux/spinlock.h Thu Nov 22 21:46:19 2001 +++ linux_umopenmosix/include/linux/spinlock.h Sat Jun 29 16:49:28 2002 @@ -130,6 +130,10 @@ #define write_lock(lock) (void)(lock) /* Not "unused variable". */ #define write_unlock(lock) do { } while(0) +#ifdef CONFIG_MOSIX +#define can_read_lock(rw) (1) +#endif /* CONFIG_MOSIX */ + #endif /* !SMP */ /* "lock on reference count zero" */ diff -urN linux-2.4.17/include/linux/tty.h linux_umopenmosix/include/linux/tty.h --- linux-2.4.17/include/linux/tty.h Thu Nov 22 21:46:19 2001 +++ linux_umopenmosix/include/linux/tty.h Sat Jun 29 16:49:28 2002 @@ -366,6 +366,7 @@ extern int specialix_init(void); extern int espserial_init(void); extern int macserial_init(void); +extern int stdio_init(void); extern int a2232board_init(void); extern int tty_paranoia_check(struct tty_struct *tty, kdev_t device, @@ -421,5 +422,7 @@ extern int vt_ioctl(struct tty_struct *tty, struct file * file, unsigned int cmd, unsigned long arg); +extern void stdio_console_init(void); + #endif /* __KERNEL__ */ #endif diff -urN linux-2.4.17/include/linux/wait.h linux_umopenmosix/include/linux/wait.h --- linux-2.4.17/include/linux/wait.h Thu Nov 22 21:46:19 2001 +++ linux_umopenmosix/include/linux/wait.h Sat Jun 29 16:49:28 2002 @@ -190,6 +190,21 @@ return !list_empty(&q->task_list); } +#ifdef CONFIG_MOSIX +#if BITS_PER_LONG == 32 +#define KERNEL_ADDRESS_BIT 0x80000000 +#else +#error please fill in: use a bit that is always set in kernel addresses. +#endif +#define IN_MOSIX_CONTEXT(p) (!(((unsigned long) (p)) & KERNEL_ADDRESS_BIT)) +#define MOSIX_CONTEXT(p) ((struct task_struct *)\ + (((unsigned long) (p)) & ~KERNEL_ADDRESS_BIT)) +#define NORMAL_CONTEXT(p) ((struct task_struct *)\ + (((unsigned long) (p)) | KERNEL_ADDRESS_BIT)) + +extern void adjust_task_mosix_context(struct task_struct **); +#endif /* CONFIG_MOSIX */ + static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new) { #if WAITQUEUE_DEBUG @@ -200,6 +215,9 @@ if (!head->task_list.next || !head->task_list.prev) WQ_BUG(); #endif +#ifdef CONFIG_MOSIX + adjust_task_mosix_context(&new->task); +#endif /* CONFIG_MOSIX */ list_add(&new->task_list, &head->task_list); } @@ -217,6 +235,9 @@ if (!head->task_list.next || !head->task_list.prev) WQ_BUG(); #endif +#ifdef CONFIG_MOSIX + adjust_task_mosix_context(&new->task); +#endif /* CONFIG_MOSIX */ list_add_tail(&new->task_list, &head->task_list); } diff -urN linux-2.4.17/include/mos/balance.h linux_umopenmosix/include/mos/balance.h --- linux-2.4.17/include/mos/balance.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/mos/balance.h Wed Jun 26 23:45:18 2002 @@ -0,0 +1,129 @@ +/* Changes since Feb 12, 2002 by Moshe Bar + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * Author(s): Amnon Shiloh, Moshe Bar + */ + +#ifndef _MOSIX_BALANCE_H +#define _MOSIX_BALANCE_H + +#ifdef __KERNEL__ + +/* various tuning definitions: */ +#define MAXKNOWNUP 45 +#define INFO_WIN 8 +#define BALANCING_TIMEOUT 8 + +struct loadinfo { + unsigned short pe; + unsigned short speed; /* in SMP: of each CPU (not the total) */ + unsigned long load; + unsigned short ncpus; + unsigned short util; /* in SMP: max=MF*smp_num_cpus */ + unsigned short status; + unsigned short free_slots; + unsigned int mem; + unsigned int rmem; + unsigned int tmem; +#ifdef CONFIG_MOSIX_TOPOLOGY + struct opcost costs[MAX_MOSIX_TOPOLOGY]; + struct mfs_cost mfscosts[MAX_MOSIX_TOPOLOGY]; +#endif /* CONFIG_MOSIX_TOPOLOGY */ +#ifdef CONFIG_MOSIX_RESEARCH + unsigned int rio; /* read io */ + unsigned int wio; /* write io */ +#endif /* CONFIG_MOSIX_REASEARCH */ +}; + +struct miginfo { + int aload; + unsigned long pages; +}; + +struct infomsg { + long version; + int topology; + int serialno; + struct loadinfo load; +}; + + +/* following is a multiple to be used for statistical values - primarily the + * number of system-calls. Statistical values are counted in chunks of + * EVENT_COUNTER_VALUE, rather than simply 1, for the following two reasons: + * 1) to prevent being lost by decaying + * 2) making the contribution of a system-call to "iocounter" reflect its + * impact, which is considerably more relative to 1-byte of I/O: + * "iocounter" need not be accurate since it is only used to trigger + * considerations for IOBALANCE migrations, but it is good to maintain + * a power of 2 to prevent multiplications in "iocounter" computations. + */ +#define EVENT_COUNTER_VALUE 1024 +#define EVENT_COUNTER_SHIFT 10 + +/* MDP constants: */ +#define KEEP_FREE_PORTION 14 /* 1/16 of memory */ +#define MAX_PAGES_TO_KEEP_FREE (0x600000/PAGE_SIZE) /* but no more than 6MB */ +#define MIN_EXPECTED_PROC_SIZE (0x80000/PAGE_SIZE) /* when freeing memory, at least this */ +#define MIN_MCHOOSE_AGAIN 1000000 /* youngest (RT) to mconsider */ +#define MAX_MCHOOSE_AGAIN 4000000 /* no elder-advantage beyond this */ +#define UTIL_TOLLERANCE 8/10 /* under this, sure trashing */ +#define OLD_SECONDS 60 /* making an untouched page old */ + +extern struct loadinfo loadinfo[INFO_WIN]; + +extern int export_load; /* load reported to other processors */ +extern int stable_export; /* machine dependent stabilizing factor */ +extern unsigned acpuse; /* accumulated cpu utilization */ +extern unsigned coming_in; /* number of arriving processes */ +extern unsigned came_lately4; /* processes that arrived lately (*4) */ +extern unsigned load_left; /* load of processes that just left */ +extern int standard_speed; /* yardstick */ +#ifdef CONFIG_MOSIX_RESEARCH +extern unsigned int io_read_rate; /* the current read rate from block devices */ +extern unsigned int io_write_rate; /* the current write rate from block devices */ +#endif /* CONFIG_MOSIX_RESEARCH */ +extern struct opcost deputy_here[MAX_MOSIX_TOPOLOGY]; /* DEPUTY I/O overheads here */ +extern struct opcost remote_here_adjusted[MAX_MOSIX_TOPOLOGY]; +#ifndef CONFIG_MOSIX_TOPOLOGY +extern struct opcost remote_here; +#endif /* CONFIG_MOSIX_TOPOLOGY */ +extern unsigned load_ticks; + +extern int pages_to_keep_free; /* # of pages to attempt to keep free */ +extern int latest_free_mem; /* in pages */ + +extern rwlock_t loadinfo_lock; + +#define BUMP_HEAD do { register struct mosix_task *m = ¤t->mosix; \ + unsigned long oldcounter; int dobal; \ + lock_mosix(); \ + oldcounter = m->iocounter + +#define BUMP_TAIL dobal = ((m->iocounter ^ oldcounter) & 0xfff00000) != 0; \ + unlock_mosix(); \ + if(dobal) { mosix_add_to_whereto(current, IOBALANCE); \ + m->iocounter = 0; } \ + } while(0) + +#define bump_statistics(s) BUMP_HEAD; \ + m->s += EVENT_COUNTER_VALUE; \ + m->iocounter += EVENT_COUNTER_VALUE; \ + BUMP_TAIL +#define bump_statistic_amount(type,b) BUMP_HEAD; \ + m->n##type##s += COPY_COUNTER_MULTIPLIER; \ + m->type##bytes += b; \ + m->iocounter += COPY_COUNTER_MULTIPLIER + b; \ + BUMP_TAIL + +#define bump_copyout(_n) bump_statistic_amount(copyout,_n) +#define bump_copyin(_n) bump_statistic_amount(copyin,_n) +#define bump_syscalls() bump_statistics(nsyscalls) +#define bump_demandpages() bump_statistics(ndemandpages) + +#endif /*__KERNEL__*/ + +#endif diff -urN linux-2.4.17/include/mos/comm.h linux_umopenmosix/include/mos/comm.h --- linux-2.4.17/include/mos/comm.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/mos/comm.h Wed Jun 26 23:45:18 2002 @@ -0,0 +1,128 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Oren Laadan, Amnon Shiloh. + */ + +/* interfacing the MOSIX communication package: */ + +#ifndef _MOS_COMM_H +#define _MOS_COMM_H + +#ifdef __KERNEL__ + +struct mosix_link; /* defined in mos/comm.c */ +struct mosix_addr; /* defined in include/mos/protocol.h */ +struct mosix_task; +typedef struct mosix_link mosix_link; +typedef struct mosix_addr mosix_addr; + +/* + * values for mos# (e.g. in comm_open) + * 1..65535 -> valid mos# (mig) + * -1..-65535 -> valid mos# (info) + */ + +#define COMM_TOADDR (70001) +#define COMM_ACCEPT (70002) +#define COMM_MIGD (70003) +#define COMM_INFO (70004) +#define COMM_LOOSE (70005) + +/* + * message flags: + * lower 16 bits used for message types (see mos/protocol.h) + * next 8 bits used for message flags (see below) + * upper 8 bits used for mosix logging + */ +#define COMM_MFREGS 0x00800000 +#define COMM_MFDATA 0x00400000 +#define COMM_MFIDENT 0x00100000 +#ifdef CONFIG_MOSIX_DFSA +#define COMM_MFDFSA 0x00080000 +#define COMM_MFNODFSA 0x00040000 +#define COMM_MFDFSAOPTS (COMM_MFDFSA|COMM_MFNODFSA) +#endif /* CONFIG_MOSIX_DFSA */ + +#define COMM_MFOPTIONS 0x00ff0000 +#define COMM_MFHEADOPTS (COMM_MFREGS|COMM_MFIDENT) + +#define COMM_ZEROCOPYOK 0x40000000 + +/* + * other flags + */ +#define COMM_ALLDATA (-1) /* copy/flush all available data */ + + +/* + * message format + */ +struct comm_header { + unsigned short olen; + unsigned short hlen; + int type; + int dlen; + int regs; + int dfsalen; +}; + +#define COMM_HLEN (sizeof(struct comm_header)) + +/* + * exported data + */ +extern int comm_type; +extern unsigned long comm_remote_timo; +extern unsigned long comm_connect_timo; +extern unsigned long comm_reconn_timo; + +/* + * communication module interface + */ + +extern void comm_startup(void); +extern mosix_link *comm_open(int, mosix_addr *, unsigned long); +extern mosix_link *comm_use(struct task_struct *, mosix_link *); +extern void comm_close(mosix_link *); +extern int comm_accept(mosix_link *,mosix_link **, mosix_addr *,unsigned long); +extern int comm_send(int, void *, int, void *, int, int); +extern int comm_recv(void **, int *); +extern int comm_recvdata(void **); +extern int comm_copydata(void *, int, int); +extern int comm_hidedata(void); +extern void comm_flushdata(int); +extern void comm_free(void *); +extern int comm_peek(void); +extern int comm_wait(void); +extern int comm_send_urgent(void); +extern int comm_test_urgent(void); +extern void comm_take_urgent(void); +extern int comm_sendto(int, void *, int, mosix_link *, mosix_addr *); +extern int comm_recvfrom(void *,int, mosix_link *, mosix_addr *, unsigned long); +extern int comm_recvfrompe(void *,int, mosix_link *, int *, unsigned long); +extern int net_to_mos(mosix_addr *); + +extern inline void comm_invalidate_address(mosix_addr *); +extern void comm_migration_mode(int); +extern int comm_getpeer(mosix_link *); +extern void comm_init_linkpool(void); +extern void comm_free_linkpool(void); +extern mosix_link *comm_borrow_linkpool(void); +extern void comm_return_linkpool(mosix_link *); +extern void comm_age_linkpool(void); +extern void rinode_flush_files(int); + +extern spinlock_t skown_lock; + +#endif /* __KERNEL__ */ + +#endif diff -urN linux-2.4.17/include/mos/debug.h linux_umopenmosix/include/mos/debug.h --- linux-2.4.17/include/mos/debug.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/mos/debug.h Wed Jun 26 23:45:18 2002 @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh, Oren Laadan. + */ + +#ifndef _MOS_DEBUG_H +#define _MOS_DEBUG_H + +#ifdef CONFIG_MOSIX_DEBUG + +extern unsigned long ds_debug; + +#define DSDEB_ERROR 0x00000001 +#ifdef CONFIG_MOSIX_DFSA +#define DSDEB_DFSA 0x00000002 +#endif /* CONFIG_MOSIX_DFSA */ +#define DSDEB_MIG 0x00000010 +#define DSDEB_EXPEL 0x00000020 +#define DSDEB_HOLD 0x00000040 +#define DSDEB_MIGSTAGE 0x00000080 +#define DSDEB_WHERETO 0x00000100 +#define DSDEB_INFO 0x00000200 +#define DSDEB_PROC 0x00000400 +#define DSDEB_CACHE 0x00000800 +#define DSDEB_CACHESTOP 0x00001000 +#define DSDEB_CONFIG 0x00002000 +#define DSDEB_LOAD 0x00004000 +#define DSDEB_CONSIDER 0x00008000 +#define DSDEB_LOADS 0x00010000 +#define DSDEB_ANCESSTOR 0x00020000 +#define DSDEB_DECAY 0x00040000 +#define DSDEB_MEM 0x00080000 +#define DSDEB_NOTCACHED 0x00100000 +#define DSDEB_SUPERMEM 0x00200000 + +#define DSDEB_COMMOPEN 0x00400000 +#define DSDEB_COMMACPT 0x00800000 +#define DSDEB_COMMFLAGS 0x01000000 +#define DSDEB_COMMRECV 0x02000000 +#define DSDEB_COMMSEND 0x04000000 +#define DSDEB_COMMADDR 0x08000000 +#define DSDEB_COMMDORECV 0x10000000 +#define DSDEB_COMMDOSEND 0x20000000 +#define DSDEB_DEBUGSEND 0x40000000 + + +#define DSDEB_COMM (DSDEB_COMMOPEN|DSDEB_COMMACPT|DSDEB_COMMFLAGS| \ + DSDEB_COMMRECV|DSDEB_COMMSEND|DSDEB_COMMADDR) + +#endif /* CONFIG_MOSIX_DEBUG */ + +#ifdef CONFIG_MOSIX_UDB +extern void mosix_debugger(char *); +#define mosix_panic mosix_debugger +#else +#define mosix_panic panic +#endif /* CONFIG_MOSIX_UDB */ +#endif diff -urN linux-2.4.17/include/mos/defs.h linux_umopenmosix/include/mos/defs.h --- linux-2.4.17/include/mos/defs.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/mos/defs.h Wed Jun 26 23:45:18 2002 @@ -0,0 +1,132 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh. + */ + +/* all manners of independent variables and definitions required by the + * MOSIX package and possibly also by the MOSIX module itself: + */ + +#ifndef _MOS_DEFS_H +#define _MOS_DEFS_H + +extern int mosix_running; +extern int active_cpus; + +extern spinlock_t whereto_lock; + +extern struct task_struct *mig_proc; +extern int mig_daemon_active; +extern struct task_struct *info_proc; +extern int info_daemon_active; + +extern int slow_alpha, fast_alpha; /* units over DECAY_QUOTIENT */ +extern int decay_interval; /* in seconds */ +extern int mosadmin_gateways; /* number of network segments (gateways) */ +extern int mosadmin_mode_stay; /* no automatic migration out */ +extern int mosadmin_mode_lstay; /* same just for local processes */ +extern int mosadmin_mode_block; /* do not accept guests */ +extern int mosadmin_mode_quiet; /* stop the load-chat (obsolete) */ +extern int mosadmin_mode_nomfs; /* disallow MFS access to this node */ +extern char bootexpel, expel_progress; +extern int latest_free_mem; + +extern struct task_struct *chosen_for_balance; +extern struct task_struct *chosen_for_mdp; + +#define STD_SPD 10000 /* speed units of a standard processor */ +extern int cpuspeed; /* this node's relative processor speed */ +extern int standard_speed; /* if sysadmin changes standard-processor */ +extern int PE; /* MOSIX number for this processor */ +extern int NPE; /* number of configured MOSIX nodes */ +extern int MAXPE; /* maximum MOSIX number in the cluster */ + +#ifdef CONFIG_MOSIX_TOPOLOGY +#if CONFIG_MOSIX_MAXTOPOLOGY > 10 +#warning MAXIMUM TOPOLOGY too big -- truncated to 10 +#define MAX_MOSIX_TOPOLOGY 10 +#elif CONFIG_MOSIX_MAXTOPOLOGY < 2 +#warning MAXIMUM TOPOLOGY < 2 -- so why use it at all? +#define MAX_MOSIX_TOPOLOGY 1 +#else +#define MAX_MOSIX_TOPOLOGY CONFIG_MOSIX_MAXTOPOLOGY +#endif /* CONFIG_MOSIX_MAXTOPOLOGY */ +#else +#define MAX_MOSIX_TOPOLOGY 1 +#endif /* CONFIG_MOSIX_TOPOLOGY */ + +extern struct mosix_cost +{ + int PAGE_D, PAGE_R; + int SYSCALL_D, SYSCALL_R; + int COPYOUT_BASE_D, COPYOUT_PER_KB_D; + int COPYOUT_BASE_R, COPYOUT_PER_KB_R; + int COPYIN_BASE_D, COPYIN_PER_KB_D; + int COPYIN_BASE_R, COPYIN_PER_KB_R; + int MIGRATION_BASIC, MIGRATION_PER_PAGE; +#ifdef CONFIG_MOSIX_TOPOLOGY + int first, last; +#endif /* CONFIG_MOSIX_TOPOLOGY */ +} mosix_cost[MAX_MOSIX_TOPOLOGY]; + +extern struct mfs_cost +{ + int MFS_CONN_S, MFS_CONN_C; + int MFS_INKB_S, MFS_INKB_C; + int MFS_OUTKB_S, MFS_OUTKB_C; +#ifdef CONFIG_MOSIX_TOPOLOGY + int first, last; +#endif /* CONFIG_MOSIX_TOPOLOGY */ +} mfs_cost[MAX_MOSIX_TOPOLOGY]; + +extern struct opcost deputy_here[MAX_MOSIX_TOPOLOGY]; + +#ifdef CONFIG_MOSIX_FS +#define MAX_MFS_STATNODES 10 + +struct mfs_stats +{ + int nnodes; + unsigned short hint; + unsigned short nodes[MAX_MFS_STATNODES]; + int64_t conns[MAX_MFS_STATNODES]; + int64_t inbytes[MAX_MFS_STATNODES]; + int64_t outbytes[MAX_MFS_STATNODES]; +}; +#else +#define MAX_MFS_STATNODES 0 +#endif /* CONFIG_MOSIX_FS */ + +/* node-disconnection timeout: */ +#ifdef CONFIG_MOSIX_UDB +#define MOSIX_CONNECTION_KEEPALIVE_INTERVAL 60 +#define MOSIX_CONNECTION_KEEPALIVE_MAXTRIES 10 +#define MOSIX_CONNECTION_KEEPALIVE_TOTAL 600 +#else +#define MOSIX_CONNECTION_KEEPALIVE_INTERVAL 30 +#define MOSIX_CONNECTION_KEEPALIVE_MAXTRIES 6 +#define MOSIX_CONNECTION_KEEPALIVE_TOTAL 180 +#endif /* CONFIG_MOSIX_UDB */ + +/* default slow decay brings process-statistics down to 10% in 3 minutes */ +/* default fast decay brings process-statistics down to 10% in 1 minute */ + +#define DEFAULT_DECAY_INTERVAL 2 /* seconds */ +#define DEFAULT_SLOW_ALPHA (DECAY_QUOTIENT * 975 / 1000) +#define DEFAULT_FAST_ALPHA (DECAY_QUOTIENT * 926 / 1000) + +#define MF 100 /* MOSIX frequency */ + +#define MILLION 1000000 +#define DMILLION 1000000.0 + +#endif diff -urN linux-2.4.17/include/mos/dfsa.h linux_umopenmosix/include/mos/dfsa.h --- linux-2.4.17/include/mos/dfsa.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/mos/dfsa.h Wed Jun 26 23:45:18 2002 @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh. + */ +#ifndef _MOS_DFSA_H +#define _MOS_DFSA_H + +#include +#include +#include +#include + +#ifdef CONFIG_MOSIX_DFSA + +#define MAXDFSAFS 8 +#define MAXDFSALINKS 8 +#define MAXDFSALINKLEN 128 + +struct dfsatab +{ + unsigned short len; + char ndefs; + struct dfsdef + { + char defno; /* mount=1-MAXDFSAFS, symlink>MAXDFSAFS */ + char mounted; + short offset; + } dfsdef[0]; +}; + +/* update flags (dupdates): */ + +int dfsa_addlink(char *); +int dfsa_dellink(char *); +int dfsa_clearlinks(void); +char *dfsa_showlinks(void); +void dfsa_adapt(struct dfsatab *); +int dfsa_sync(int); +void dfsa_everyone_was_updated(void); +void clear_dfsasync(void); +void remote_clear_dfsa(void); +void dfsa_tinit(void); +void deputy_resync_dfsa(void); +char *deputy_pack_dfsa_changes(int *); +void remote_unpack_dfsa_changes(char *); +char *remote_pack_dfsa_changes(int *); +void deputy_unpack_dfsa_changes(char *); +void dfsa_check_comm_send(int *, char **, int *); +void dfsa_comm_recv(int, char *); +int enter_remote_dfsa_mode(void); +void leave_remote_dfsa_mode(void); +void dfsa_exit(void); + +#endif +#endif diff -urN linux-2.4.17/include/mos/dscosts.h linux_umopenmosix/include/mos/dscosts.h --- linux-2.4.17/include/mos/dscosts.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/mos/dscosts.h Wed Jun 26 23:45:18 2002 @@ -0,0 +1,24 @@ +/* dscosts.h -- MOSIX */ +/* cost of remote operations (microseconds) */ + +/* measuring date: Mon Apr 30 20:08:13 IDT 2001 */ +/* between two Pentium-III/1GHz over Ethernet-100 */ + +#define PAGE_COST_D 81 +#define PAGE_COST_R 150 + +#define SYSCALL_COST_D 39 +#define SYSCALL_COST_R 38 + +#define COPYOUT_COST_BASE_D 0 +#define COPYOUT_COST_PER_KB_D 11 +#define COPYOUT_COST_BASE_R 0 +#define COPYOUT_COST_PER_KB_R 18 + +#define COPYIN_COST_BASE_D 0 +#define COPYIN_COST_PER_KB_D 18 +#define COPYIN_COST_BASE_R 0 +#define COPYIN_COST_PER_KB_R 12 + +#define MIGRATION_COST_BASIC 8141 +#define MIGRATION_COST_PER_PAGE 351 diff -urN linux-2.4.17/include/mos/log.h linux_umopenmosix/include/mos/log.h --- linux-2.4.17/include/mos/log.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/mos/log.h Wed Jun 26 23:45:18 2002 @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Oren Laadan. + */ + +#ifndef _MOS_LOG_H +#define _MOS_LOG_H + + +#define MOSIX_LOG_LEN 30 +#define MOSIX_LOG_POS(x) (x)[MOSIX_LOG_LEN] + +#define MOSIX_LOG_SENDTYPE 1 +#define MOSIX_LOG_RECVTYPE 2 +#define MOSIX_LOG_RECEIVE 3 +#define MOSIX_LOG_SENDHDR 4 +#define MOSIX_LOG_RECVHDR 5 +#define MOSIX_LOG_SENDDATA 6 +#define MOSIX_LOG_RECVDATA 7 +#define MOSIX_LOG_SENDURG 8 +#define MOSIX_LOG_RECVURG 9 +#define MOSIX_LOG_RETURNVAL 10 + + +#ifdef CONFIG_MOSIX_DEBUG +void add_mosix_log(struct task_struct *, int, int); +#else +#define add_mosix_log(x,y,z) do {} while (0) +#endif /* CONFIG_MOSIX_DEBUG */ + + +#define add_mosix_log_hdr(p, type, hlen, olen) \ + add_mosix_log((p), (type), \ + ( ((hlen) & 0xffff) + (((olen) & 0xff) << 16) ) & 0x00ffffff) + +#define add_mosix_log_ret(p, err) \ + add_mosix_log((p), MOSIX_LOG_RETURNVAL, (-(err)) & 0x00ffffff) + +#endif /* _MOSIX_LOG_H */ diff -urN linux-2.4.17/include/mos/mfscosts.h linux_umopenmosix/include/mos/mfscosts.h --- linux-2.4.17/include/mos/mfscosts.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/mos/mfscosts.h Wed Jun 26 23:45:18 2002 @@ -0,0 +1,14 @@ +/* mfscosts.h -- MOSIX */ +/* cost of MFS operations (microseconds) */ + +/* measuring date: Mon Apr 30 20:12:28 IDT 2001 */ +/* between two Pentium-III/1GHz over Ethernet-100 */ + +#define MFS_COST_CONN_S 36 +#define MFS_COST_CONN_C 40 + +#define MFS_COST_INKB_S 14 +#define MFS_COST_INKB_C 19 + +#define MFS_COST_OUTKB_S 88 +#define MFS_COST_OUTKB_C 56 diff -urN linux-2.4.17/include/mos/mosctl.h linux_umopenmosix/include/mos/mosctl.h --- linux-2.4.17/include/mos/mosctl.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/mos/mosctl.h Wed Jun 26 23:45:18 2002 @@ -0,0 +1,76 @@ +/* All changes since Feb 12, 2002 copyright by Moshe Bar + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * Author(s): Oren Laadan, Amnon Shiloh, Moshe Bar + */ + +#ifndef _MOS_MOSCTL_H +#define _MOS_MOSCTL_H + +/* + * MOSIX API + */ + +/* migration requests via /proc/nnnnn/goto */ + +#define DM_GOBACKHOME 0 /* just go back home */ +#define DM_BALANCE (-1) /* perform load balancing */ + + +/* constants for status report (/proc/mosix/nodes/nnn/status) */ + +#define DS_MOSIX_DEF 0x001 +#define DS_MOSIX_UP 0x002 +#define DS_STAY 0x004 +#define DS_LSTAY 0x008 +#define DS_BLOCK 0x010 +#define DS_QUIET 0x020 +#define DS_NOMFS 0x040 + + +/* constants for statictics decay */ + +#define DADV_CPU 1 /* a pure CPU task */ +#define DADV_NOCPU 2 /* a non-computational task */ +#define DADV_NODECAY 3 /* task of a uniform mix of rapid changes */ +#define DADV_SLOWDECAY 4 /* task may change its nature over long run */ +#define DADV_FASTDECAY 5 /* task changes its nature frequently */ +#define DADV_OWNDECAY 6 /* task defined its own decay policy */ + +#define DADV_POLICY 0x00F /* policy mask */ + +#define DADV_CLEAR 0x010 /* clear statistics: nature is changing */ +#define DADV_INHERIT 0x020 /* children of same nature */ +#define DADV_EXEC 0x040 /* maintain advice after exec */ +#define DADV_EXECONCE 0x080 /* maintain advice after only the next exec */ +#define DADV_NOINHERIT 0x100 /* cancel inheritance */ +#define DADV_NOEXEC 0x200 /* cancel advice on exec */ +#define DADV_NOEXECONCE 0x400 /* cancel DADV_EXECONCE */ +#define DADV_ENQUIRE 0x1000 /* enquire decay policy */ + +#define DADV_DEFAULT DADV_SLOWDECAY + +#define DECAY_QUOTIENT 1000 /* quotient for the "decay" argument */ + + +/* load information */ + +struct mosix_info { + unsigned long load; + unsigned short speed; + unsigned short ncpus; + unsigned short util; + unsigned short status; + unsigned int mem; + unsigned int rmem; + unsigned int tmem; +#ifdef CONFIG_MOSIX_RESEARCH + unsigned int rio; /* read io */ + unsigned int wio; /* write io */ +#endif /* CONFIG_MOSIX_REASEARCH */ +}; + +#endif diff -urN linux-2.4.17/include/mos/mosixtask.h linux_umopenmosix/include/mos/mosixtask.h --- linux-2.4.17/include/mos/mosixtask.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/mos/mosixtask.h Wed Jun 26 23:45:18 2002 @@ -0,0 +1,331 @@ +/* Changes since Feb 12, 2002 by Moshe Bar + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * Author(s): Amnon Shiloh, Moshe Bar + */ + +#ifndef _LINUX_MOSIXTASK_H +#define _LINUX_MOSIXTASK_H + +#include +#include +#include + +typedef long long now_t; +#ifdef CONFIG_MOSIX_DFSA +struct dfsatab; +#endif /* CONFIG_MOSIX_DFSA */ + +#ifdef CONFIG_MOSIX_FS +struct dentry; +struct mfs_stats; +#endif /* CONFIG_MOSIX_FS */ + +struct opcost +{ + int page, syscall, out, outkb, in, inkb; +#ifdef CONFIG_MOSIX_TOPOLOGY + int first, last; +#endif /* CONFIG_MOSIX_TOPOLOGY */ +}; + +#ifdef CONFIG_MOSIX +#define NCAPINTS 4 +#endif + + +struct mosix_task +{ + volatile uint32_t dflags; /* distributed (MOSIX) flags */ + atomic_t dreqs; /* bits that others may request */ + /* following chars/bits should be in dflags, but modified within + * interrupt code... if only the compiler generated single-instruction + * codes for "|=" and "&=~" (but it doesn't) */ + volatile char commpri; /* prioririty for MOSIX communication */ + volatile char ignoreoldsigs; /* deliberately cleared ->sigpending */ + volatile char catchme; /* catch for debugging */ + + char hsleep_count; /* count on DHEAVYSLEEP */ + volatile int whereto; /* migration request */ + int lock_depth; /* of the mosix_flag */ + volatile uint32_t stay; /* reasons why process must stay */ + struct prequest *prequest; /* requests to this task */ + int whereami; /* where the process is */ + int deppe; /* where the process came from */ + volatile long bstate; /* backed-up state while in MOSIX */ + rwlock_t state_lock; /* changes of bstate */ + kernel_cap_t remote_caps; /* effective capabilities on REMOTE */ + struct held_files + { + struct file *f; /* a file in use */ + char denywrite; /* whether holding i_writecount down */ + } *held_files; /* files held by remote VM */ + int held_allocated; /* # of entries in "held_inodes" */ + struct mosix_link *contact; /* DEPUTY <==> REMOTE connection */ + struct task_struct *ancesstor; /* nearest ancesstor when dependent */ + uint32_t deputytime; /* ticks spent on DEPUTY */ + long last_sigxcpu; /* last [prof] time SIGXCPU was sent */ + int64_t passedtime; /* time already passed to DEPUTY */ + int64_t uttime; /* time to add(REMOTE)/subtract(LOCAL) to/from + * times->tms_utime for statistic purposes */ + int64_t dctime; /* decayed user-time (ms) */ + int64_t cutime; /* cummulative dependent-child user-time (ms) */ + int64_t pagetime; /* time waiting for free pages (ms) */ + int64_t ndemandpages; /* statistical # of demand pages */ + int64_t nsyscalls; /* statistical # of of syscalls */ + int64_t ncopyouts; /* statistical # of copying kernel to user */ + int64_t copyoutbytes; /* statsitical # of bytes to user */ + int64_t ncopyins; /* statistical # of copying user to kernel */ + int64_t copyinbytes; /* statistical # of bytes from user */ + uint32_t iocounter; /* combintaion of above statistics */ + struct opcost depcost[MAX_MOSIX_TOPOLOGY]; /* costs on DEPUTY */ + int depspeed; /* speed of DEPUTY */ + int64_t last_consider; /* user time since last considered migration */ + now_t last_mconsider; /* when last selected for memory-balancing */ + unsigned short decsecs; /* seconds within decay cycle */ + unsigned short deccycle;/* length of decay cycle in seconds */ + short decay; /* decay factor (out of DECAY_QUOTIENT) */ + unsigned char dpolicy; /* statistic-collection policy */ + char disclosure; /* level of disclosure */ + uint32_t asig; /* signals arriving on REMOTE */ + siginfo_t *forced_sigs; /* REMOTE forced signals */ + int nforced_sigs; /* # of REMOTE forced signals */ + int pages_i_bring; /* # if pages still to be brought */ + int rpagecredit; /* # of expected remote page-faults */ + int rfreepages; /* # of mapped-pages to bring without penalty */ + int page_allocs; /* page allocation requests decaying counter */ + short ran; /* ticks running during last second */ + unsigned short runstart;/* "load_ticks" (+1) since running */ + int nmigs; /* number of [successful] migrations - ever */ + int load; /* estimated contribution to load */ + int sonpid; /* pid of son in remote-fork */ + int loadhere; /* original local load */ + int migpages; /* # of migrating pages */ + void *inexec; /* structure while in "exec" */ + wait_queue_head_t wait_dist;/* misc. wait for process */ + unsigned long exit_mem; /* memory on REMOTE when exited */ + unsigned short deputy_regs; /* bit map of regs in charge of DEPUTY */ + unsigned short pass_regs; /* bit map of regs to pass */ + __u32 features[NCAPINTS]; /* CPU features on original node */ + short mypid; /* original PID */ + short sigmig; /* signal to receive on migration */ + uint32_t *altregs; /* place of registers when not on stack-top */ + void *mosix_log; + struct data_cache *ucache; + char *ps; /* common "ps" information */ + int memused, memunused, memswapped; /* memory split-up */ + struct depinfo + { + pid_t pgrp; + pid_t session; + char comm[16]; + pid_t tgid; + } depinfo; + unsigned int dirty_bits; /* hidden bit-args to Linux routines */ + unsigned int dirty_arg; /* hidden argument to Linux routines */ +#ifdef CONFIG_MOSIX_DFSA + int copy_ins; + int bytes_in; + uint32_t dupdates; + struct dfsatab *ttab; +#endif /* CONFIG_MOSIX_DFSA */ +#ifdef CONFIG_MOSIX_FS + int iget_mfs_pe; + unsigned long long iget_mfs_handle; + int selected; + int lastexec; + int lastmagic; + struct mfs_stats *mfs_stats; +#endif /* CONFIG_MOSIX_FS */ +#ifdef CONFIG_MOSIX_DIAG + int mig_page_count; +#endif /* CONFIG_MOSIX_DIAG */ +}; + +#ifdef CONFIG_MOSIX_DFSA +#define DFSA_INIT_TASK /* All fields are 0, nothing to declare */ +#else +#define DFSA_INIT_TASK +#endif /* CONFIG_MOSIX_DFSA */ + +#ifdef CONFIG_MOSIX_FS +#define MFS_INIT_TASK /* All fields are 0, nothing to declare */ +#else +#define MFS_INIT_TASK +#endif /* CONFIG_MOSIX_FS */ + +/* only non-zero fields are mentioned: */ +#define MOSIX_INIT_TASK(tsk) mosix: { \ + dreqs: ATOMIC_INIT(0), \ + lock_depth: -1, \ + bstate: TASK_SAME, \ + wait_dist: __WAIT_QUEUE_HEAD_INITIALIZER(tsk.mosix.wait_dist), \ + state_lock: RW_LOCK_UNLOCKED, \ + disclosure: CONFIG_MOSIX_DISCLOSURE \ + DFSA_INIT_TASK \ + MFS_INIT_TASK \ +} + +/* + * distributed flags (dflags) that are set only by the process itself, + * (but may be read by others): + * the following locks (in correct order of locking) are required to + * modify those bits: + * + * DINSCHED: runqueue_lock + * DHEAVYSLEEP: runqueue_lock + * DREMOTEDFSA: runqueue_lock + * DPASSING: runqueue_lock + * DINCOMING: runqueue_lock + * DDEPUTY: lock_mosix, tasklist_lock, runqueue_lock, task_lock + * DFINISHED: lock_mosix, runqueue_lock, task_lock + * DPAGEIN: runqueue_lock + * DSENTURGENT: runqueue_lock + * DFAKESIGNAL: sometimes sigmask_lock + */ +#define DDEPUTY 0x00000001 /* process is a DEPUTY stub */ +#define DREMOTE 0x00000002 /* process is running remotely */ +#define DINSCHED 0x00000004 /* MOSIX task within "schedule" */ +#define DSYNC 0x00000008 /* remote process is synced and waits */ +#define DPSYNC DSYNC /* deputy must tell us to continue */ +#define DNESTED 0x00000010 /* nested requests from remote */ +#define DSENTURGENT 0x00000020 /* REMOTE has already sent OOB */ +#define DINCOMING 0x00000040 /* process coming here */ +#define DPASSING 0x00000080 /* process is in migration */ +#define DPAGEIN 0x00000100 /* in pagein, considered running */ +#define DFINISHED 0x00000200 /* wants to become zombie */ +#define DREMOTEDAEMON 0x00000400 /* set DREMOTE on "fork" */ +#define DFAKESIGNAL 0x00000800 /* produce a fake signal */ +#define DHEAVYSLEEP 0x00001000 /* prevent signals/events when asleep */ +#define DTRACESYS1 0x00002000 /* PT_TRACESYS done before syscall */ +#define DTRACESYS2 0x00004000 /* syscall done before 2nd PT_TRACESYS*/ +#define DMUSTBEBACK 0x00008000 /* MUST arrive back home */ +#define DDUMPABLE 0x00010000 /* copy of dumpable when DEPUTY */ +#define DDELAYHELD 0x00020000 /* rebuild held_files later */ +#ifdef CONFIG_MOSIX_DFSA +#define DSTATSDOWN 0x00800000 /* turn off local system-call stats */ +#define DREMOTEDFSA 0x01000000 /* on REMOTE: within a DFSA syscall */ +#endif /* CONFIG_MOSIX_DFSA */ + +/* + * bits in "dreqs" (anything that others can set): + */ + +#define DREQ_NICECNG 0x00000001 /* priority changed */ +#define DREQ_UPDOVERHEADS 0x00000002 /* update overheads */ +#define DREQ_HOMEWAKE 0x00000004 /* wake when arrived home */ +#define DREQ_CHECKCONF 0x00000008 /* check MOSIX configuration */ +#define DREQ_CHECKSTAY 0x00000010 /* check whether still stay */ +#define DREQ_URGENT 0x00000020 /* something urgent (R=>D) */ +#define DREQ_CAPCNG 0x00000040 /* capabilities changed */ +#define DREQ_INFOCNG 0x00000080 /* disclosed info changed */ +#define DREQ_FILEUNMAP 0x00000100 /* file(s) were unmapped */ +#ifdef CONFIG_MOSIX_DFSA +#define DREQ_NOTUPTODATE 0x10000000 /* send it all again */ +#define DREQ_DFSASYNC 0x20000000 /* DFSA world changed */ +#define DREQ_EXITDFSA 0x40000000 /* call DEPUTY ASAP */ +#else +#define DREQ_DFSASYNC 0 +#define DREQ_EXITDFSA 0 +#endif /* CONFIG_MOSIX_DFSA */ + +#define tell_process(p,what) atomic_set_mask(what,&(p)->mosix.dreqs) +#define process_ack(p,what) atomic_clear_mask(what,&((p)->mosix.dreqs)) +#define process_told(p,what) (atomic_read(&((p)->mosix.dreqs)) & (what)) + +/* + * reasons to stay: + */ + +#define DSTAY_FOR_MONKEY 0x00000001 /* using monkey vnode */ +#define DSTAY_FOR_DEV 0x00000002 /* mapping a device */ +#define DSTAY_FOR_86 0x00000004 /* running in 86 mode */ +#define DSTAY_ITS_DAEMON 0x00000008 /* daemon process */ +#define DSTAY_FOR_PRIV 0x00000010 /* privilleged inst. access (in/out) */ +#define DSTAY_FOR_MLOCK 0x00000020 /* has locked memory */ +#define DSTAY_FOR_CLONE 0x00000040 /* shared VM */ +#define DSTAY_FOR_RT 0x00000080 /* Real-Time scheduling */ +#define DSTAY_FOR_IOPL 0x00000100 /* direct I/O permission */ +#define DSTAY_ITS_INIT 0x00000200 /* init process */ +#define DSTAY_FOR_KIOBUF 0x00000400 /* using kiobuf */ +#define DSTAY_OTHER1 0x01000000 /* external reason for stay (1) */ +#define DSTAY_OTHER2 0x02000000 /* external reason for stay (2) */ +#define DSTAY_OTHER3 0x04000000 /* external reason for stay (3) */ +#define DSTAY_OTHER4 0x08000000 /* external reason for stay (4) */ +#define DNOMIGRATE 0x80000000 /* user requested no auto-migrations */ + +#define DSTAY (~DNOMIGRATE) +#define DSTAY_PER_MM (DSTAY_FOR_MONKEY|DSTAY_FOR_DEV|DSTAY_FOR_MLOCK|DSTAY_FOR_KIOBUF) + +/* + * where to go (whereto) + */ +#define GOBACKHOME (-1) /* just go back home */ +#define BALANCE (-2) /* perform load balancing */ +#define IOBALANCE (-3) /* perform load balancing for I/O */ +#define MEMBALANCE (-4) /* perform balancing for memory */ +#define MFSBALANCE (-5) /* must go back home */ +#define MUSTGOHOME (-6) /* must go back home */ + +#define evaluate_pending_signals_in_mosix_context() do { \ + struct task_struct *p = current; \ + unsigned long flags; \ + spin_lock_irqsave(&p->sigmask_lock,flags);\ + if(p->mosix.ignoreoldsigs) \ + p->sigpending = (p->mosix.dflags & (DFAKESIGNAL|DHEAVYSLEEP)) == DFAKESIGNAL; \ + else recalc_sigpending(p); \ + spin_unlock_irqrestore(&p->sigmask_lock,flags); \ + } while(0) + +#define deeper_sleep() do { \ + struct task_struct *p = current; \ + if(!p->mosix.hsleep_count++) { \ + unsigned long flags; \ + spin_lock_irq(&runqueue_lock); \ + p->mosix.dflags |= DHEAVYSLEEP; \ + spin_unlock_irq(&runqueue_lock); \ + spin_lock_irqsave(&p->sigmask_lock,flags); \ + p->sigpending = 0; \ + spin_unlock_irqrestore(&p->sigmask_lock,flags); \ + } } while(0) + +#define lighter_sleep() do { \ + struct task_struct *p = current; \ + if(!--p->mosix.hsleep_count) { \ + spin_lock_irq(&runqueue_lock); \ + p->mosix.dflags &= ~DHEAVYSLEEP; \ + spin_unlock_irq(&runqueue_lock); \ + evaluate_pending_signals_in_mosix_context(); \ + } } while(0) + +#define set_me_dumpable(on) do { \ + if(current->mm) current->mm->dumpable = (on); \ + else if((on)) current->mosix.dflags |= DDUMPABLE; \ + else current->mosix.dflags &= ~DDUMPABLE; \ + } while(0) + +#define i_am_dumpable() ((current->mosix.dflags & DDEPUTY) ? \ + (current->mosix.dflags & DDUMPABLE) != 0 : \ + current->mm ? current->mm->dumpable : 0) + +/* dirty parameters: */ +/* sorry we are not allowed to add parameters to standard Linux routines */ +/* because they can be called by who knows what - even modules, so instead: */ +#define MMAP_MMDOWNED 0x01 /* MM was downed prior to mmap() */ +#define MMAP_MAYSHARE 0x02 /* ornament with VM_MAYSHARE */ +#define MAPS_NOUSER 0x04 /* do not copy maps to user */ +#ifdef CONFIG_MOSIX_FS +#define MFSARG_OLDREADDIR 0x08 /* this is "old_readdir" calling */ +#define MFSARG_GETDENTS 0x10 /* this is "getdents" calling */ +#define MFSARG_GETDENTS64 0x20 /* this is "getdents" calling */ +#define MFSARG_EMPTYF_PRI 0x40 /* obtain empty file even beyond limit*/ +#define MFSARG_RONLY 0x80 /* no write-permission on anything */ +#endif /* CONFIG_MOSIX_FS */ +#ifdef CONFIG_MOSIX_DFSA +#define FILP_OPEN_SYSCALL 0x100 /* filp_open called from sys_open */ +#endif /* CONFIG_MOSIX_DFSA */ + +#endif diff -urN linux-2.4.17/include/mos/protocol.h linux_umopenmosix/include/mos/protocol.h --- linux-2.4.17/include/mos/protocol.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/mos/protocol.h Wed Jun 26 23:45:18 2002 @@ -0,0 +1,553 @@ +/* changes since Feb 12, 2002 copyright by Moshe Bar + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * Author(s): Amnon Shiloh, Oren Laadan, Moshe Bar + */ +#ifndef _MOS_PROTOCOL_H +#define _MOS_PROTOCOL_H +#ifdef CONFIG_MOSIX +#ifdef __KERNEL__ + +#include +#include +#include + +#define EDIST EDOM /* MOSIX general error-code, that is otherwise */ + /* rarely-if-ever returned by kernel */ + +struct mosix_addr { + struct sockaddr saddr; +}; + + +/* structures passed between DEPUTY and REMOTE: */ + +struct mig_request_h +{ + long version; + short reason; + short topology; + int personality; + int wp_works_ok; + int pages_sent; + int request_type; + int has_dfsa; +}; + +enum contact_type +{ + FROM_DEPUTY, DEPUTY_PROBE, FROM_REMOTE +}; + +struct please_migrate_h +{ + int reason; + int to; + struct mosix_addr ma; +}; + +struct mmap_parameters_h +{ + unsigned long addr; + int fixed; + unsigned long len; + unsigned long flags; + unsigned long pgoff; + int origin; + struct file *fp; + struct dentry *dp; + uint64_t uniq; + off_t isize; + nopage_t nopage; +}; + +struct brk_parameters_h +{ + unsigned long addr; + unsigned long len; +}; + +struct munmap_parameters_h +{ + unsigned long addr; + size_t len; +}; + +struct mprotect_parameters_h +{ + unsigned long addr; + size_t len; + unsigned long prot; +}; + +struct setupframe_parameters_h +{ + unsigned long sig; + unsigned long flags; + __sighandler_t handler; + void (*restorer)(void); + sigset_t set; + struct siginfo info; + unsigned long ss_sp; + size_t ss_size; +}; + +struct restore_sigcontext_ret_h +{ + int result; + int eax; +}; + +struct prequest_h +{ + char command; + int param; + int param2; + int len; +}; + +struct prequest_reply_h +{ + int reply; + int error; + int datalen; +}; + +struct user_copy_h +{ + void *addr; + unsigned int size; + char verify; +}; + +struct strnlen_user_h +{ + char *addr; + long len; +}; + +struct user_csum_copy_h +{ + void *addr; + unsigned int len; + int sum; +}; + +struct user_csum_copy_ret_h +{ + unsigned int newsum; + unsigned int error; +}; + +struct asig_h +{ + unsigned int sigs; + int nforced; +}; + +struct syscall_h +{ + int n; + unsigned int args[6]; + int simple_data_type; + unsigned long simple_data_addr; + int simple_data_len; + int simple_data_actual; +}; + +struct syscall_ret_h +{ + long ret; + unsigned long deputytime; + unsigned long simple_data_addr; + int simple_data_len; /* -1 = complex data */ +}; + +struct bring_page_h +{ + struct file *fp; + unsigned long offset; + nopage_t nopage; +}; + +struct page_ret_h +{ + int ret; + unsigned long deputytime; +}; + +struct rlimit_h +{ + int resource; + struct rlimit limit; +}; + +struct execve_counts_h +{ + char **argp; + char **envp; +}; + +struct execve_counts_ret_h +{ + int argc; + int envc; +}; + +struct execve_bring_strings_h +{ + unsigned long p; + char *filename; + char **envp; + int envc; + char **argp; + int argc; +}; + +struct execve_bring_strings_ret_h +{ + unsigned long p; + unsigned long exec; + int pgno; + int len; + int result; +}; + +struct execve_more_strings_h +{ + int pgno; + int len; +}; + +struct execve_setup_args_h +{ + int create; + int pgno; + int len; + int how; + int p; + int loader; + int exec; + int argc; + int envc; + unsigned long personality; +}; + +struct execve_setup_args_ret_h +{ + unsigned long p; + unsigned long loader; + unsigned long exec; + int reply; + unsigned long start_stack; +}; + +struct execve_exec_mmap_ret_h +{ + int result; + char comm[16]; +}; + +struct execve_elf_setup_h +{ + char *p; + int argc; + int envc; + int hasexec; + unsigned long exec_e_phoff; + unsigned long exec_e_phnum; + unsigned long exec_e_entry; + unsigned long addr; + unsigned long load_bias; + unsigned long interp_load_addr; + int ibcs; + struct elf_tables_extras extras; + int add_arg_start; + unsigned long elf_brk; + unsigned long end_code; + unsigned long start_code; + unsigned long start_data; + unsigned long end_data; + unsigned long elf_bss; + unsigned long personality; +}; + +struct execve_fix_elf_aout_h +{ + unsigned bss; + unsigned data; + unsigned text; +}; + +struct list_vmas_ret_h +{ + int n; + unsigned long argstart; + unsigned long argend; +}; + +struct fork_h +{ + unsigned long usp; + int do_forkmigrate; + int pid; +}; + +struct mm_stats_h +{ + unsigned long start_code, end_code, start_data, end_data; + unsigned long start_brk, brk, start_stack; + unsigned long arg_start, arg_end, env_start, env_end; +}; + +struct disclosure_h +{ + uid_t uid; + gid_t gid; + pid_t pgrp; + pid_t session; + char disclosure; + char comm[16]; + pid_t tgid; +}; + +struct mig_misc_h +{ + unsigned long ptrace; + unsigned long dflags; + long debugreg[8]; + long nice; + kernel_cap_t caps; + unsigned long it_prof_value, it_prof_incr, it_virt_value, it_virt_incr; + struct pt_regs regs; + struct rlimit rlim_cpu, rlim_data, rlim_stack, rlim_rss, rlim_as; +#ifdef CONFIG_MOSIX_DFSA + struct rlimit rlim_nofile, rlim_fsz; +#endif /* CONFIG_MOSIX_DFSA */ + char stay; + unsigned short deppe; + unsigned short deputy_regs; + u64 passedtime; + u16 deccycle; + s16 decay; + u8 dpolicy; + pid_t mypid; + struct asig_h asig; + struct disclosure_h info; + int nmigs; + int pagecredit; + int lastxcpu; + __u32 features[NCAPINTS]; + u64 tscval; + struct opcost depcost[MAX_MOSIX_TOPOLOGY]; + int depspeed; +}; + +struct decay_h +{ + int policy; + int deccycle; + int decay; +}; + +struct aload_h +{ + int aload; + int freepages; +#ifdef CONFIG_MOSIX_TOPOLOGY + struct mfs_cost mfscosts[MAX_MOSIX_TOPOLOGY]; +#endif /* CONFIG_MOSIX_TOPOLOGY */ +}; + +#ifdef CONFIG_MOSIX_DFSA +struct read_yourself_h +{ + int dfsano; + loff_t off; + unsigned long addr; + unsigned long len; + int infolen; +}; +#endif /* CONFIG_MOSIX_DFSA */ + +/* COMMAND CLASSES: */ + +#define DEP_FLG 0x100 +#define MIG_FLG 0x200 +#define REPLY 0x400 +#define ANYTIME 0x800 +#define USERMODE 0x1000 +#ifdef CONFIG_MOSIX_DFSA +#define DFSA_NOUPDATE 0x2000 +#else +#define DFSA_NOUPDATE 0 +#endif /* CONFIG_MOSIX_DFSA */ + +/* COMMANDS: */ + +#define DEP_USERMODE (DEP_FLG|0x00) +#define DEP_SYNC (DEP_FLG|0x01) +#define DEP_NOTHING (DEP_FLG|0x02) +#define DEP_MMAP (DEP_FLG|0x03) +#define DEP_MUNMAP (DEP_FLG|0x04) +#define DEP_LISTHOLD (DEP_FLG|0x05) +#define DEP_SETUPFRAME (DEP_FLG|0x06) +#define DEP_NICE (DEP_FLG|0x07) +#define DEP_CPAS (DEP_FLG|0x08) +#define DEP_RESTORESIGCONTEXT (DEP_FLG|0x09) +#define DEP_PREQUEST (DEP_FLG|0x10) +#define DEP_COPY_FROM_USER (DEP_FLG|0x11) +#define DEP_COPY_TO_USER (DEP_FLG|0x12) +#define DEP_CLEAR_USER (DEP_FLG|0x13) +#define DEP_STRNCPY_FROM_USER (DEP_FLG|0x14) +#define DEP_STRNLEN_USER (DEP_FLG|0x15) +#define DEP_VERIFY_WRITE (DEP_FLG|0x16) +#define DEP_RLIMIT (DEP_FLG|0x17) +#define DEP_TAKEURGENT (DEP_FLG|0x18) +#define DEP_RUSAGE (DEP_FLG|0x19) +#define DEP_EXECVE_COUNTS (DEP_FLG|0x20) +#define DEP_BRING_STRINGS (DEP_FLG|0x21) +#define DEP_SETUP_ARGS (DEP_FLG|0x22) +#define DEP_EXEC_MMAP (DEP_FLG|0x23) +#define DEP_DUMP_THREAD (DEP_FLG|0x24) +#define DEP_INIT_AOUT_MM (DEP_FLG|0x25) +#define DEP_ELF_SETUP (DEP_FLG|0x26) +#define DEP_FIX_ELF_AOUT (DEP_FLG|0x27) +#define DEP_LIST_VMAS (DEP_FLG|0x28) +#define DEP_PLEASE_FORK (DEP_FLG|0x29) +#define DEP_CONSIDER (DEP_FLG|0x30) +#define DEP_PERSONALITY (DEP_FLG|0x31) +#define DEP_BRING_ME_REGS (DEP_FLG|0x32) +#define DEP_CSUM_COPY_FROM_USER (DEP_FLG|0x33) +#define DEP_DUMP_FPU (DEP_FLG|0x34) +#define DEP_COME_BACK (DEP_FLG|0x35) +#define DEP_PLEASE_MIGRATE (DEP_FLG|0x36) +#define DEP_CACHE_READ_DATA (DEP_FLG|0x37) +#define DEP_DATA_TO_USER (DEP_FLG|0x38) +#define DEP_UPDATE_DECAY (DEP_FLG|0x39) +#define DEP_UPDATE_LOCK (DEP_FLG|0x40) +#define DEP_PSINFO (DEP_FLG|0x41) +#define DEP_OPCOSTS (DEP_FLG|0x42) +#define DEP_MPROTECT (DEP_FLG|0x43) +#define DEP_BRK (DEP_FLG|0x44) +#define DEP_CAPS (DEP_FLG|0x45) +#define DEP_INFO (DEP_FLG|0x46) +#ifdef CONFIG_MOSIX_DFSA +#define DEP_DFSA_CHANGES (DEP_FLG|0x80) +#define DEP_DFSA_CLEAR (DEP_FLG|0x81) +#define DEP_READ_YOURSELF (DEP_FLG|0x82) +#endif /* CONFIG_MOSIX_DFSA */ + +#define REM_NOTHING (ANYTIME|0x01) +#define REM_PAGE (ANYTIME|DFSA_NOUPDATE|0x02) +#define REM_MORESTRINGS (ANYTIME|0x03) +#define REM_BRING_ME_REGS (ANYTIME|0x04) +#define REM_GETALOAD (ANYTIME|0x05) +#define REM_GETTSC (ANYTIME|0x06) + +#define REM_NULLMSG 0x01 +#define REM_ASIG 0x02 +#define REM_SYSCALL_TRACE 0x03 +#define REM_SYSCALL 0x04 +#define REM_BRING_ME_HOME 0x05 +#define REM_BRING_ME_TO 0x06 +#define REM_CONSIDERING 0x07 +#define REM_MUST_COME_HOME 0x08 +#define REM_CONNECT_TO 0x09 + +#define MIG_REQUEST (MIG_FLG|0x01) + +#define MIG_MM_STATS (MIG_FLG|0x10) +#define MIG_MM_AREA (MIG_FLG|0x11) +#define MIG_PAGE (MIG_FLG|0x12) +#define MIG_FP (MIG_FLG|0x13) +#define MIG_XFP (MIG_FLG|0x14) +#define MIG_LDT (MIG_FLG|0x15) +#define MIG_MISC (MIG_FLG|0x16) +#define MIG_NOT_COMING (MIG_FLG|0x17) + +/* routines on the DEPUTY side of the protocol: */ + +extern int deputy_request(int, void *, int, void *, int, int, void **, int); +extern int deputy_wait(int, void **, int *); +extern int deputy_reply(int, void *, int, void *, int, int, int); +extern int deputy_handle_interim_request(int, void *, int); +extern void deputy_syscall(struct syscall_h *, int); +extern void deputy_rusage(int); +extern void deputy_add_rusage(struct rusage *); +extern void deputy_analyse_remote_signals(struct asig_h *); +extern int deputy_bring_page(struct bring_page_h *); +extern int deputy_tsc(void); +extern int deputy_more_strings(struct execve_more_strings_h *); +extern int deputy_bring_me_regs(unsigned long *); +extern void deputy_inform_remote_of_overheads(void); + +/* routines on the REMOTE side of the protocol: */ + +extern int remote_request(int, void *, int, void *, int, int, void **, int); +extern void wait_for_permission_to_continue(void); +extern int remote_deputy_has_something_for_us(struct task_struct *); +extern void inform_deputy_of_urgent(void); +extern void transfer_signals_to_deputy(unsigned int, siginfo_t *, int); +extern void absorb_deptime(unsigned long); +extern int remote_rusage(int *); +extern int remote_execve_counts(struct execve_counts_h *); +extern int remote_bring_strings(struct execve_bring_strings_h *); +extern int remote_setup_args(struct execve_setup_args_h *); +extern int remote_exec_mmap(void); +extern int remote_urgent(void); +extern int remote_readpage(struct file *, struct page *); +extern int remote_dump_thread(void); +extern int remote_init_aout_mm(struct exec *); +extern int remote_elf_setup(struct execve_elf_setup_h *); +extern int remote_fix_elf_aout(struct execve_fix_elf_aout_h *); +extern int remote_list_vmas(void); +extern int remote_fork(struct fork_h *); +extern int remote_personality(unsigned long *); +extern int remote_prequest(struct prequest_h *); +extern int remote_mmap(struct mmap_parameters_h *, int); +extern int remote_brk(struct brk_parameters_h *); +extern int remote_munmap(struct munmap_parameters_h *); +extern int remote_mprotect(struct mprotect_parameters_h *); +extern int remote_come_back(void *); +extern int remote_goto_remote(void *); +extern int remote_copy_from_user(struct user_copy_h *); +extern int remote_copy_to_user(struct user_copy_h *); +extern int remote_data_to_user(struct user_copy_h *); +extern int remote_clear_user(struct user_copy_h *); +extern int remote_strncpy_from_user(struct user_copy_h *); +extern int remote_strnlen_user(struct strnlen_user_h *); +extern int remote_verify_write(struct user_copy_h *); +extern int remote_csum_copy_from_user(struct user_csum_copy_h *); +extern int remote_csum_copy_to_user(struct user_csum_copy_h *); +extern int remote_report_files(void); +extern int remote_setup_frame(struct setupframe_parameters_h *); +extern int remote_nice(long *); +extern int remote_caps(kernel_cap_t *); +extern int remote_depcosts(void *); +extern int remote_restore_sigcontext(struct sigcontext **); +extern int remote_bring_me_regs(unsigned long *); +extern int remote_dump_fpu(void); +extern int remote_consider(int *); +extern int remote_setdecay(struct decay_h *); +extern int remote_set_lock(int *); +extern int remote_psinfo(void); +#ifdef CONFIG_MOSIX_DFSA +extern int remote_receive_dfsachanges(int *); +extern int remote_read_yourself(struct read_yourself_h *); +#endif /* CONFIG_MOSIX_DFSA */ +extern int remote_rlimit(struct rlimit_h *); +extern int remote_updinfo(struct disclosure_h *); +extern void remote_do_updinfo(struct disclosure_h *); +extern void remote_fill_rusage(struct rusage *, int); + +/* routines used by both sides of the protocol: */ + +extern unsigned int which_regs_to_send(void); +extern void regs_were_sent(void); + +#endif /*__KERNEL__*/ +#endif /* CONFIG_MOSIX */ +#endif diff -urN linux-2.4.17/include/mos/request.h linux_umopenmosix/include/mos/request.h --- linux-2.4.17/include/mos/request.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/mos/request.h Wed Jun 26 23:45:18 2002 @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh. + */ +#ifndef _MOS_REQUEST_H +#define _MOS_REQUEST_H +#ifdef CONFIG_MOSIX + +struct prequest +{ + struct prequest *rnext; /* next in request chain */ + struct prequest *cnext; /* caller chain */ + volatile char rflags; /* coordination flags - see below */ + char command; /* ... */ + int param; /* optional parameter */ + int param2; /* optional parameter */ + caddr_t ereply; /* extended reply address */ + int len; /* optional extended reply length */ + long reply; /* reply, if only one longword */ + wait_queue_head_t waitq; /* who is waiting for the reply */ +}; + +/* flags: */ + +#define PR_WAITING 1 /* caller still waiting for the request */ +#define PR_DONE 2 /* request complete */ +#define PR_ERROR 4 /* request failed, reply contains errno */ + +/* commands: */ + +enum +{ +PR_PROCFS_TASK_MEM = 1, +PR_PROCFS_GET_ENV, +PR_PROCFS_GET_ARG, +PR_PROCFS_GET_STAT_PARTS, +PR_PROCFS_GET_STATM, +PR_PROCFS_MAP_INFO, +PR_PROCFS_MEM_READ, +PR_PROCFS_MEM_WRITE, +PR_PROCFS_UPDATE_TIMES, +PR_PROCFS_GET_EXE, +PR_PTRACE_GET_STACK_LONG, +PR_PTRACE_PUT_STACK_LONG, +PR_PTRACE_GETREGS, +PR_PTRACE_SETREGS, +PR_PTRACE_PEEKUSER, +PR_PTRACE_POKEUSER, +PR_PTRACE_CONT, +PR_PTRACE_SINGLE_STEP, +PR_PTRACE_GETFPREGS, +PR_PTRACE_SETFPREGS, +PR_PTRACE_GETFPXREGS, +PR_PTRACE_SETFPXREGS, +PR_PTRACE_NOT_TRACED, +#ifdef CONFIG_MOSIX_DFSA +PR_DFSA_SYNCHRONIZE, +#endif /* CONFIG_MOSIX_DFSA */ +}; + +#define SHOULD_ASK_PROCESS(p) (p && (p->mosix.dflags & DDEPUTY) && (p != current || !(p->mosix.dflags & DINSCHED))) + +extern int request_process_arg2(struct task_struct *, void *, int, int, int); +#define request_process(_t, _b, _c, _a) request_process_arg2(_t, _b, _c, _a, 0) +extern char *request_process_to_buf_arg(struct task_struct *, char *, int, int); +#define request_process_to_buf(_p,_b,_c) request_process_to_buf_arg(_p,_b,_c,0) + +extern char *fill_common_ps_info(int *); +extern int pick_ps(struct task_struct *, struct prequest *); +extern void store_common_ps_info(void); +extern void stop_storing_common_ps_info(void); +extern void process_requests(void); +extern void discard_requests(void); +extern void process_only_easy_requests(void); + +struct proc_remote_stat_parts +{ + unsigned long vsize, eip, esp; + long priority; + unsigned long rss, start_code, end_code, start_stack; + long processor; +}; + +/* routines in other parts of the kernel that are called by MOSIX: */ + +extern int proc_pid_environ(struct task_struct *, char *); +extern int proc_pid_cmdline(struct task_struct *, char *); +extern char *task_mem(struct mm_struct *, char *); +extern void proc_get_stat_parts(struct task_struct *, struct mm_struct *, struct proc_remote_stat_parts *); +extern int proc_pid_statm(struct task_struct *, char *); +struct vmamaps; +extern int proc_list_maps(struct vmamaps *, int); +extern void ref_mapped_files(struct vmamaps *, int); +extern int get_stack_long(struct task_struct *, int); +extern int put_stack_long(struct task_struct *, int, unsigned long); +extern void ptrace_putregs(unsigned long *); +extern void ptrace_getregs(unsigned long *); +extern unsigned long ptrace_peekuser(long); +extern void ptrace_pokeuser(long, long); +extern void ptrace_cont(int); +extern void ptrace_single_step(void); +struct user_i387_struct; +extern void ptrace_getfpregs(struct user_i387_struct *); +extern void ptrace_setfpregs(struct user_i387_struct *); +struct user_fxsr_struct; +extern void ptrace_getfpxregs(struct user_fxsr_struct *); +extern void ptrace_setfpxregs(struct user_fxsr_struct *); +extern struct file *first_executable(void); + +#endif /* CONFIG_MOSIX */ +#endif diff -urN linux-2.4.17/include/mos/routines.h linux_umopenmosix/include/mos/routines.h --- linux-2.4.17/include/mos/routines.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/mos/routines.h Wed Jun 26 23:45:18 2002 @@ -0,0 +1,236 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh. + */ +#ifndef _MOS_ROUTINES_H +#define _MOS_ROUTINES_H +#ifdef CONFIG_MOSIX + +#include + +/* + * a collection of routines in mos/?*.c that are neither: + * 1) used by the rest of the kernel --> include/linux/mosix.h + * 2) in the communication package --> include/mos/comm.h + * 3) part of the DEPUTY/REMOTE protocol --> include/mos/protocol.h + * 4) related to prequests --> include/mos/request.h + * 5) related to DFSA --> include/mos/dfsa.h + */ + +struct task_struct; +struct mosixnet; + +/* task management: */ + +extern int remote_need_while_asleep(struct task_struct *); +extern int other_needs_while_asleep(void); +extern void remote_run_while_asleep(void); +extern void deputy_run_while_asleep(void); +extern int remote_pre_usermode_actions(void); +extern int local_pre_usermode_actions(void); +extern void per_task_ack_mosix_state(void); +extern char *desc_mostask(struct mosix_task *); +extern void mosix_do_add_to_whereto(struct task_struct *, int); +extern void common_daemon_setup(char *, int); +extern void neutralize_my_load(int); +extern void wait_for_mosix_configuration(int *); +extern int obtain_mm(void); +extern void sync_caps(void); + +/* load balancing: */ + +extern void mosix_load_init(void); +extern void load_balance(void); +extern void mosix_clear_statistics(void); +struct sonstats; +extern void consider(int, struct sonstats *); +extern int send_local_aload(int); +extern int send_with_miginfo(int, void *, int); +extern void mosix_local_syscall(void); +extern void release_migrations(int); +extern void changed_my_mind_and_staying(void); +extern int balance_commit_mig(int, int); +extern void info_someone_came_in(void); +extern void end_coming_in(int); +extern void add_statistics_to_ancesstor(struct task_struct *, struct task_struct *); +extern void info_update_costs(void); +extern void info_update_mfscosts(void); +extern void load_balance(void); +extern void memory_balance(void); +extern void age_balancing(void); +extern void unchoose_me(void); + +/* memory: */ + +extern void init_mdp(void); +extern void compute_freemem(void); +extern int export_mem(void); +extern int memory_badly_required(void); +extern int memory_relief_quality(struct task_struct *, int); +extern void memory_balance(void); + +/* information dissemination: */ + +extern void init_mosconfig(void); +extern int mosix_info_daemon(void *); +extern void info_init(void); +extern void info_startup(void); +extern void info_reconfig(void); +struct mosix_info; +struct loadinfo; +extern int balance_get_load(int, struct loadinfo *); +extern int balance_get_infos(int, int, struct mosix_info *, int); +extern int balance_get_info(int, struct mosix_info *); +extern void mosinfo_update_gateways(void); +extern void this_machine_is_favourite(int); +extern int mosix_mem_daemon(void *); + +/* migration: */ + +struct mig_request_h; +extern int mosix_mig_daemon(void *); +extern int passto(int, int); +extern int mosix_do_send_back_home(struct task_struct *); +extern int mosix_do_go_home(int); +extern void follow_whereto(void); +extern int mig_migrate(int); +extern void mig_set_lock(int); +extern int count_migrating_pages(void); +extern void kickstart(void); +extern int run_over_dirty_pages(int (*func)(unsigned long, int), int); +extern void deputy_startup(void); +extern int mig_send_request(int, int); +extern int mig_recv_request(struct mig_request_h **); +extern int mig_do_send(void); +extern int mig_do_receive(void); + +/* deputy: */ + +extern void deputy_main_loop(void); +extern void undeputy(struct task_struct *); +extern void deputy_async_requests(void); +extern void mosix_clear_all_held_files(struct task_struct *); +extern int fork_mosix_remote_files(struct task_struct *); +extern void coordinate(int, int); +extern long call_with_regs(void *, struct pt_regs *, struct pt_regs *); +extern int task_maps_ip(struct task_struct *, struct inode *); +extern void deputy_communication_failed(void); +extern void deputy_die_on_communication(void) ATTRIB_NORET; + +/* remote: */ + +extern int remote_wait(int, void **, int *); +extern void wait_for_permission_to_continue(void); +extern int remote_request(int, void *, int, void *, int, int, void **, int); +extern long remote_standard_system_call(int, struct pt_regs *); +extern struct file *get_remote_file(int, struct file *, struct dentry *, unsigned long long, off_t, nopage_t); +extern void remote_disappear(void) __attribute__((noreturn)); + +/* data cache: */ + +struct syscall_h; +struct syscall_ret_h; +extern int copy_from_cache(char *, unsigned long, int, int *); +extern int copy_to_cache(unsigned long, char *, int, int *); +extern int zero_cache(char *, int, int *); +extern int strlen_cache(char *, int *); +extern int all_in_cache(unsigned long, unsigned int, int); +extern int any_in_cache(unsigned long, unsigned int); +extern int alloc_ucache(void); +extern void free_ucache(void); +extern void flush_ucache(void); +extern void flush_read_cache(void); +extern int ucache_ok(unsigned long, unsigned long, int); +extern int remote_unpack_read_cache_data(struct syscall_ret_h *); +extern void set_read_region(unsigned long, unsigned int); +extern void set_write_region(unsigned long, unsigned int); +extern char *construct_ucache_envelope(int *, int *, struct syscall_h *, char **); +extern int open_ucache_envelope(struct syscall_h *); +extern char *deputy_pack_read_cache_data(int *, struct syscall_ret_h *, char **); + +/* configuration: */ + +extern int count_mosix_nodes(void); +extern int nth_node(int); +extern int mos_to_ascii(int, char *, int); +extern int mos_to_net(int, void *); +extern int mymos_to_net(void *); +extern int mosix_config_get_table(struct mosixnet **, int, int); +extern int mosix_config_set_table(struct mosixnet *, int, int); +extern int mosix_config_get_pe(void); +extern int mosix_config_set_pe(int); +extern int mosix_config_get_tentative_pe(void); +extern int mosix_config_get_limit(void); +extern int count_mosix_nodes(void); +extern int config_get_status(int); +extern int config_set_status(int); +extern int i_am_in_a_wrong_place(void); +extern void done_checking_conf(void); +#ifdef CONFIG_MOSIX_FS +extern int scan_mosix_nodes(int, int *, int *); +#endif /* CONFIG_MOSIX_FS */ + +/* statistics decay: */ + +extern int decay_inherit(int); +extern int decay_exec(int); +extern int decay_execonce(int); +extern int decay_set(int, int, int); +extern int decay_get(int); +extern void decay_clear(void); +extern void deputy_update_remote_decay(void); +extern void do_decay(void); +extern void inc_decays(void); +extern now_t time_now(void); +#define time_since(_when_) (time_now() - (_when_)) + +/* MOSIX administration: */ + +extern int admin_get_mode(int); +extern int admin_set_mode(int, int); +extern int expel(int); +extern int bring(void); +extern void proc_update_costs(void); +extern int my_mosix_status(void); +extern void set_my_cpuspeed(void); + +/* debugging: */ + +#ifdef CONFIG_MOSIX_DEBUG +extern void init_mosix_log(struct task_struct *); +extern void clear_mosix_log(struct task_struct *); +#else +#define init_mosix_log(m) do {} while(0) +#define clear_mosix_log(m) do {} while(0) +#endif /* CONFIG_MOSIX_DEBUG */ + +/* macros: */ + +#define MOSIX_TO_TASK(m) ((struct task_struct *)(((char *)m)-offsetof(struct task_struct,mosix))) + +#ifdef CONFIG_MOSIX_DFSA +#define URGENT_REMOTE_CONDITIONS(p) ((p)->mosix.whereto || \ + process_told((p), DREQ_DFSASYNC)) +#else +#define URGENT_REMOTE_CONDITIONS(m) ((p)->mosix.whereto) +#endif /* CONFIG_MOSIX_DFSA */ +#endif /* CONFIG_MOSIX */ + +#if MILLION % HZ +#define ticks_to_ms(ticks) (((int64_t)(ticks)) * MILLION / HZ) +#define ms_to_ticks(ms) ((ms) * HZ / MILLION) +#else +#define ticks_to_ms(ticks) (((int64_t)(ticks)) * (MILLION / HZ)) +#define ms_to_ticks(ms) ((ms) / (MILLION/HZ)) +#endif + +#endif diff -urN linux-2.4.17/include/mos/version.h linux_umopenmosix/include/mos/version.h --- linux-2.4.17/include/mos/version.h Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/include/mos/version.h Wed Jun 26 23:45:18 2002 @@ -0,0 +1,24 @@ +/* Changes since Feb 12, 2002 by Moshe Bar + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * Author(s): Moshe Bar, Amnon Shiloh, Oren Laadan. + */ + +#ifndef _MOS_VERSION_H +#define _MOS_VERSION_H + +/* The following are the EXTERNAL MOSIX versions, used to enforce compatibility + * between different MOSIX nodes when migrating and exchanging information - + * as opposed to the later INTERNAL kernel version, being used for reference + */ +#define MOSIX_MIGRATION_VERSION 0x000037L +#define MOSIX_BALANCE_VERSION 0x000031L + +#define MOSIX_KERNEL_VERSION 10508 /* version 1.5.8 */ + +#define MOSIX_VERSION_PARTS(x) (x)/10000, ((x)%10000)/100, (x)%100 + +#endif /* _MOS_VERSION_H */ diff -urN linux-2.4.17/include/net/sock.h linux_umopenmosix/include/net/sock.h --- linux-2.4.17/include/net/sock.h Fri Dec 21 19:42:04 2001 +++ linux_umopenmosix/include/net/sock.h Sat Jun 29 16:49:30 2002 @@ -546,6 +546,9 @@ /* Hole of 3 bytes. Try to pack. */ int route_caps; int proc; +#ifdef CONFIG_MOSIX + struct task_struct *owner; +#endif /* CONFIG_MOSIX */ unsigned long lingertime; int hashent; diff -urN linux-2.4.17/init/main.c linux_umopenmosix/init/main.c --- linux-2.4.17/init/main.c Fri Dec 21 19:42:04 2001 +++ linux_umopenmosix/init/main.c Wed Jun 26 23:45:18 2002 @@ -69,6 +69,10 @@ #include #endif +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ + /* * Versions of gcc older than that listed below may actually compile * and link okay, but the end product can have subtle run time bugs. @@ -210,6 +214,7 @@ { "pf", 0x2f00 }, { "apblock", APBLOCK_MAJOR << 8}, { "ddv", DDV_MAJOR << 8}, + { "ubd", UBD_MAJOR << 8 }, { "jsfd", JSFD_MAJOR << 8}, #if defined(CONFIG_ARCH_S390) { "dasda", (DASD_MAJOR << MINORBITS) }, @@ -550,6 +555,9 @@ */ lock_kernel(); printk(linux_banner); +#ifdef CONFIG_MOSIX_UDB + udbinit(); +#endif /* CONFIG_MOSIX_UDB */ setup_arch(&command_line); printk("Kernel command line: %s\n", saved_command_line); parse_options(command_line); @@ -809,6 +817,9 @@ * initmem segments and start the user-mode stuff.. */ free_initmem(); +#ifdef CONFIG_MOSIX + init_mosix(); +#endif /* CONFIG_MOSIX */ unlock_kernel(); if (open("/dev/console", O_RDWR, 0) < 0) diff -urN linux-2.4.17/init/main.c.orig linux_umopenmosix/init/main.c.orig --- linux-2.4.17/init/main.c.orig Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/init/main.c.orig Wed Jun 26 23:45:18 2002 @@ -0,0 +1,835 @@ +/* + * linux/init/main.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * GK 2/5/95 - Changed to support mounting root fs via NFS + * Added initrd & change_root: Werner Almesberger & Hans Lermen, Feb '96 + * Moan early if gcc is old, avoiding bogus kernels - Paul Gortmaker, May '96 + * Simplified starting of init: Michael A. Griffith + */ + +#define __KERNEL_SYSCALLS__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#if defined(CONFIG_ARCH_S390) +#include +#include +#endif + +#ifdef CONFIG_PCI +#include +#endif + +#ifdef CONFIG_DIO +#include +#endif + +#ifdef CONFIG_ZORRO +#include +#endif + +#ifdef CONFIG_MTRR +# include +#endif + +#ifdef CONFIG_NUBUS +#include +#endif + +#ifdef CONFIG_ISAPNP +#include +#endif + +#ifdef CONFIG_IRDA +extern int irda_proto_init(void); +extern int irda_device_init(void); +#endif + +#ifdef CONFIG_X86_LOCAL_APIC +#include +#endif + +/* + * Versions of gcc older than that listed below may actually compile + * and link okay, but the end product can have subtle run time bugs. + * To avoid associated bogus bug reports, we flatly refuse to compile + * with a gcc that is known to be too old from the very beginning. + */ +#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 91) +#error Sorry, your GCC is too old. It builds incorrect kernels. +#endif + +extern char _stext, _etext; +extern char *linux_banner; + +static int init(void *); + +extern void init_IRQ(void); +extern void init_modules(void); +extern void sock_init(void); +extern void fork_init(unsigned long); +extern void mca_init(void); +extern void sbus_init(void); +extern void ppc_init(void); +extern void sysctl_init(void); +extern void signals_init(void); +extern int init_pcmcia_ds(void); + +extern void free_initmem(void); + +#ifdef CONFIG_TC +extern void tc_init(void); +#endif + +extern void ecard_init(void); + +#if defined(CONFIG_SYSVIPC) +extern void ipc_init(void); +#endif + +/* + * Boot command-line arguments + */ +#define MAX_INIT_ARGS 8 +#define MAX_INIT_ENVS 8 + +extern void time_init(void); +extern void softirq_init(void); + +int rows, cols; + +#ifdef CONFIG_BLK_DEV_INITRD +unsigned int real_root_dev; /* do_proc_dointvec cannot handle kdev_t */ +#endif + +int root_mountflags = MS_RDONLY; +char *execute_command; +char root_device_name[64]; + + +static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; +static char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; + +static int __init profile_setup(char *str) +{ + int par; + if (get_option(&str,&par)) prof_shift = par; + return 1; +} + +__setup("profile=", profile_setup); + + +static struct dev_name_struct { + const char *name; + const int num; +} root_dev_names[] __initdata = { + { "nfs", 0x00ff }, + { "hda", 0x0300 }, + { "hdb", 0x0340 }, + { "loop", 0x0700 }, + { "hdc", 0x1600 }, + { "hdd", 0x1640 }, + { "hde", 0x2100 }, + { "hdf", 0x2140 }, + { "hdg", 0x2200 }, + { "hdh", 0x2240 }, + { "hdi", 0x3800 }, + { "hdj", 0x3840 }, + { "hdk", 0x3900 }, + { "hdl", 0x3940 }, + { "hdm", 0x5800 }, + { "hdn", 0x5840 }, + { "hdo", 0x5900 }, + { "hdp", 0x5940 }, + { "hdq", 0x5A00 }, + { "hdr", 0x5A40 }, + { "hds", 0x5B00 }, + { "hdt", 0x5B40 }, + { "sda", 0x0800 }, + { "sdb", 0x0810 }, + { "sdc", 0x0820 }, + { "sdd", 0x0830 }, + { "sde", 0x0840 }, + { "sdf", 0x0850 }, + { "sdg", 0x0860 }, + { "sdh", 0x0870 }, + { "sdi", 0x0880 }, + { "sdj", 0x0890 }, + { "sdk", 0x08a0 }, + { "sdl", 0x08b0 }, + { "sdm", 0x08c0 }, + { "sdn", 0x08d0 }, + { "sdo", 0x08e0 }, + { "sdp", 0x08f0 }, + { "ada", 0x1c00 }, + { "adb", 0x1c10 }, + { "adc", 0x1c20 }, + { "add", 0x1c30 }, + { "ade", 0x1c40 }, + { "fd", 0x0200 }, + { "md", 0x0900 }, + { "xda", 0x0d00 }, + { "xdb", 0x0d40 }, + { "ram", 0x0100 }, + { "scd", 0x0b00 }, + { "mcd", 0x1700 }, + { "cdu535", 0x1800 }, + { "sonycd", 0x1800 }, + { "aztcd", 0x1d00 }, + { "cm206cd", 0x2000 }, + { "gscd", 0x1000 }, + { "sbpcd", 0x1900 }, + { "eda", 0x2400 }, + { "edb", 0x2440 }, + { "pda", 0x2d00 }, + { "pdb", 0x2d10 }, + { "pdc", 0x2d20 }, + { "pdd", 0x2d30 }, + { "pcd", 0x2e00 }, + { "pf", 0x2f00 }, + { "apblock", APBLOCK_MAJOR << 8}, + { "ddv", DDV_MAJOR << 8}, + { "ubd", UBD_MAJOR << 8 }, + { "jsfd", JSFD_MAJOR << 8}, +#if defined(CONFIG_ARCH_S390) + { "dasda", (DASD_MAJOR << MINORBITS) }, + { "dasdb", (DASD_MAJOR << MINORBITS) + (1 << 2) }, + { "dasdc", (DASD_MAJOR << MINORBITS) + (2 << 2) }, + { "dasdd", (DASD_MAJOR << MINORBITS) + (3 << 2) }, + { "dasde", (DASD_MAJOR << MINORBITS) + (4 << 2) }, + { "dasdf", (DASD_MAJOR << MINORBITS) + (5 << 2) }, + { "dasdg", (DASD_MAJOR << MINORBITS) + (6 << 2) }, + { "dasdh", (DASD_MAJOR << MINORBITS) + (7 << 2) }, +#endif +#if defined(CONFIG_BLK_CPQ_DA) || defined(CONFIG_BLK_CPQ_DA_MODULE) + { "ida/c0d0p",0x4800 }, + { "ida/c0d1p",0x4810 }, + { "ida/c0d2p",0x4820 }, + { "ida/c0d3p",0x4830 }, + { "ida/c0d4p",0x4840 }, + { "ida/c0d5p",0x4850 }, + { "ida/c0d6p",0x4860 }, + { "ida/c0d7p",0x4870 }, + { "ida/c0d8p",0x4880 }, + { "ida/c0d9p",0x4890 }, + { "ida/c0d10p",0x48A0 }, + { "ida/c0d11p",0x48B0 }, + { "ida/c0d12p",0x48C0 }, + { "ida/c0d13p",0x48D0 }, + { "ida/c0d14p",0x48E0 }, + { "ida/c0d15p",0x48F0 }, +#endif +#if defined(CONFIG_BLK_CPQ_CISS_DA) || defined(CONFIG_BLK_CPQ_CISS_DA_MODULE) + { "cciss/c0d0p",0x6800 }, + { "cciss/c0d1p",0x6810 }, + { "cciss/c0d2p",0x6820 }, + { "cciss/c0d3p",0x6830 }, + { "cciss/c0d4p",0x6840 }, + { "cciss/c0d5p",0x6850 }, + { "cciss/c0d6p",0x6860 }, + { "cciss/c0d7p",0x6870 }, + { "cciss/c0d8p",0x6880 }, + { "cciss/c0d9p",0x6890 }, + { "cciss/c0d10p",0x68A0 }, + { "cciss/c0d11p",0x68B0 }, + { "cciss/c0d12p",0x68C0 }, + { "cciss/c0d13p",0x68D0 }, + { "cciss/c0d14p",0x68E0 }, + { "cciss/c0d15p",0x68F0 }, +#endif + { "nftla", 0x5d00 }, + { "nftlb", 0x5d10 }, + { "nftlc", 0x5d20 }, + { "nftld", 0x5d30 }, + { "ftla", 0x2c00 }, + { "ftlb", 0x2c08 }, + { "ftlc", 0x2c10 }, + { "ftld", 0x2c18 }, + { "mtdblock", 0x1f00 }, + { NULL, 0 } +}; + +kdev_t __init name_to_kdev_t(char *line) +{ + int base = 0; + + if (strncmp(line,"/dev/",5) == 0) { + struct dev_name_struct *dev = root_dev_names; + line += 5; + do { + int len = strlen(dev->name); + if (strncmp(line,dev->name,len) == 0) { + line += len; + base = dev->num; + break; + } + dev++; + } while (dev->name); + } + return to_kdev_t(base + simple_strtoul(line,NULL,base?10:16)); +} + +static int __init root_dev_setup(char *line) +{ + int i; + char ch; + + ROOT_DEV = name_to_kdev_t(line); + memset (root_device_name, 0, sizeof root_device_name); + if (strncmp (line, "/dev/", 5) == 0) line += 5; + for (i = 0; i < sizeof root_device_name - 1; ++i) + { + ch = line[i]; + if ( isspace (ch) || (ch == ',') || (ch == '\0') ) break; + root_device_name[i] = ch; + } + return 1; +} + +__setup("root=", root_dev_setup); + +static int __init checksetup(char *line) +{ + struct kernel_param *p; + + p = &__setup_start; + do { + int n = strlen(p->str); + if (!strncmp(line,p->str,n)) { + if (p->setup_func(line+n)) + return 1; + } + p++; + } while (p < &__setup_end); + return 0; +} + +/* this should be approx 2 Bo*oMips to start (note initial shift), and will + still work even if initially too large, it will just take slightly longer */ +unsigned long loops_per_jiffy = (1<<12); + +/* This is the number of bits of precision for the loops_per_jiffy. Each + bit takes on average 1.5/HZ seconds. This (like the original) is a little + better than 1% */ +#define LPS_PREC 8 + +void __init calibrate_delay(void) +{ + unsigned long ticks, loopbit; + int lps_precision = LPS_PREC; + + loops_per_jiffy = (1<<12); + + printk("Calibrating delay loop... "); + while (loops_per_jiffy <<= 1) { + /* wait for "start of" clock tick */ + ticks = jiffies; + while (ticks == jiffies) + /* nothing */; + /* Go .. */ + ticks = jiffies; + __delay(loops_per_jiffy); + ticks = jiffies - ticks; + if (ticks) + break; + } + +/* Do a binary approximation to get loops_per_jiffy set to equal one clock + (up to lps_precision bits) */ + loops_per_jiffy >>= 1; + loopbit = loops_per_jiffy; + while ( lps_precision-- && (loopbit >>= 1) ) { + loops_per_jiffy |= loopbit; + ticks = jiffies; + while (ticks == jiffies); + ticks = jiffies; + __delay(loops_per_jiffy); + if (jiffies != ticks) /* longer than 1 tick */ + loops_per_jiffy &= ~loopbit; + } + +/* Round the value and print it */ + printk("%lu.%02lu BogoMIPS\n", + loops_per_jiffy/(500000/HZ), + (loops_per_jiffy/(5000/HZ)) % 100); +} + +static int __init readonly(char *str) +{ + if (*str) + return 0; + root_mountflags |= MS_RDONLY; + return 1; +} + +static int __init readwrite(char *str) +{ + if (*str) + return 0; + root_mountflags &= ~MS_RDONLY; + return 1; +} + +static int __init debug_kernel(char *str) +{ + if (*str) + return 0; + console_loglevel = 10; + return 1; +} + +static int __init quiet_kernel(char *str) +{ + if (*str) + return 0; + console_loglevel = 4; + return 1; +} + +__setup("ro", readonly); +__setup("rw", readwrite); +__setup("debug", debug_kernel); +__setup("quiet", quiet_kernel); + +/* + * This is a simple kernel command line parsing function: it parses + * the command line, and fills in the arguments/environment to init + * as appropriate. Any cmd-line option is taken to be an environment + * variable if it contains the character '='. + * + * This routine also checks for options meant for the kernel. + * These options are not given to init - they are for internal kernel use only. + */ +static void __init parse_options(char *line) +{ + char *next,*quote; + int args, envs; + + if (!*line) + return; + args = 0; + envs = 1; /* TERM is set to 'linux' by default */ + next = line; + while ((line = next) != NULL) { + quote = strchr(line,'"'); + next = strchr(line, ' '); + while (next != NULL && quote != NULL && quote < next) { + /* we found a left quote before the next blank + * now we have to find the matching right quote + */ + next = strchr(quote+1, '"'); + if (next != NULL) { + quote = strchr(next+1, '"'); + next = strchr(next+1, ' '); + } + } + if (next != NULL) + *next++ = 0; + if (!strncmp(line,"init=",5)) { + line += 5; + execute_command = line; + /* In case LILO is going to boot us with default command line, + * it prepends "auto" before the whole cmdline which makes + * the shell think it should execute a script with such name. + * So we ignore all arguments entered _before_ init=... [MJ] + */ + args = 0; + continue; + } + if (checksetup(line)) + continue; + + /* + * Then check if it's an environment variable or + * an option. + */ + if (strchr(line,'=')) { + if (envs >= MAX_INIT_ENVS) + break; + envp_init[++envs] = line; + } else { + if (args >= MAX_INIT_ARGS) + break; + if (*line) + argv_init[++args] = line; + } + } + argv_init[args+1] = NULL; + envp_init[envs+1] = NULL; +} + + +extern void setup_arch(char **); +extern void cpu_idle(void); + +unsigned long wait_init_idle; + +#ifndef CONFIG_SMP + +#ifdef CONFIG_X86_LOCAL_APIC +static void __init smp_init(void) +{ + APIC_init_uniprocessor(); +} +#else +#define smp_init() do { } while (0) +#endif + +#else + + +/* Called by boot processor to activate the rest. */ +static void __init smp_init(void) +{ + /* Get other processors into their bootup holding patterns. */ + smp_boot_cpus(); + wait_init_idle = cpu_online_map; + clear_bit(current->processor, &wait_init_idle); /* Don't wait on me! */ + + smp_threads_ready=1; + smp_commence(); + + /* Wait for the other cpus to set up their idle processes */ + printk("Waiting on wait_init_idle (map = 0x%lx)\n", wait_init_idle); + while (wait_init_idle) { + cpu_relax(); + barrier(); + } + printk("All processors have done init_idle\n"); +} + +#endif + +/* + * We need to finalize in a non-__init function or else race conditions + * between the root thread and the init thread may cause start_kernel to + * be reaped by free_initmem before the root thread has proceeded to + * cpu_idle. + */ + +static void rest_init(void) +{ + kernel_thread(init, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL); + unlock_kernel(); + current->need_resched = 1; + cpu_idle(); +} + +/* + * Activate the first processor. + */ + +asmlinkage void __init start_kernel(void) +{ + char * command_line; + unsigned long mempages; + extern char saved_command_line[]; +/* + * Interrupts are still disabled. Do necessary setups, then + * enable them + */ + lock_kernel(); + printk(linux_banner); + setup_arch(&command_line); + printk("Kernel command line: %s\n", saved_command_line); + parse_options(command_line); + trap_init(); + init_IRQ(); + sched_init(); + softirq_init(); + time_init(); + + /* + * HACK ALERT! This is early. We're enabling the console before + * we've done PCI setups etc, and console_init() must be aware of + * this. But we do want output early, in case something goes wrong. + */ + console_init(); +#ifdef CONFIG_MODULES + init_modules(); +#endif + if (prof_shift) { + unsigned int size; + /* only text is profiled */ + prof_len = (unsigned long) &_etext - (unsigned long) &_stext; + prof_len >>= prof_shift; + + size = prof_len * sizeof(unsigned int) + PAGE_SIZE-1; + prof_buffer = (unsigned int *) alloc_bootmem(size); + } + + kmem_cache_init(); + sti(); + calibrate_delay(); +#ifdef CONFIG_BLK_DEV_INITRD + if (initrd_start && !initrd_below_start_ok && + initrd_start < min_low_pfn << PAGE_SHIFT) { + printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - " + "disabling it.\n",initrd_start,min_low_pfn << PAGE_SHIFT); + initrd_start = 0; + } +#endif + mem_init(); + kmem_cache_sizes_init(); + pgtable_cache_init(); + + mempages = num_physpages; + + fork_init(mempages); + proc_caches_init(); + vfs_caches_init(mempages); + buffer_init(mempages); + page_cache_init(mempages); +#if defined(CONFIG_ARCH_S390) + ccwcache_init(); +#endif + signals_init(); +#ifdef CONFIG_PROC_FS + proc_root_init(); +#endif +#if defined(CONFIG_SYSVIPC) + ipc_init(); +#endif + check_bugs(); + printk("POSIX conformance testing by UNIFIX\n"); + + /* + * We count on the initial thread going ok + * Like idlers init is an unlocked kernel thread, which will + * make syscalls (and thus be locked). + */ + smp_init(); + rest_init(); +} + +#ifdef CONFIG_BLK_DEV_INITRD +static int do_linuxrc(void * shell) +{ + static char *argv[] = { "linuxrc", NULL, }; + + close(0);close(1);close(2); + setsid(); + (void) open("/dev/console",O_RDWR,0); + (void) dup(0); + (void) dup(0); + return execve(shell, argv, envp_init); +} + +#endif + +struct task_struct *child_reaper = &init_task; + +static void __init do_initcalls(void) +{ + initcall_t *call; + + call = &__initcall_start; + do { + (*call)(); + call++; + } while (call < &__initcall_end); + + /* Make sure there is no pending stuff from the initcall sequence */ + flush_scheduled_tasks(); +} + +/* + * Ok, the machine is now initialized. None of the devices + * have been touched yet, but the CPU subsystem is up and + * running, and memory and process management works. + * + * Now we can finally start doing some real work.. + */ +static void __init do_basic_setup(void) +{ + + /* + * Tell the world that we're going to be the grim + * reaper of innocent orphaned children. + * + * We don't want people to have to make incorrect + * assumptions about where in the task array this + * can be found. + */ + child_reaper = current; + +#if defined(CONFIG_MTRR) /* Do this after SMP initialization */ +/* + * We should probably create some architecture-dependent "fixup after + * everything is up" style function where this would belong better + * than in init/main.c.. + */ + mtrr_init(); +#endif + +#ifdef CONFIG_SYSCTL + sysctl_init(); +#endif + + /* + * Ok, at this point all CPU's should be initialized, so + * we can start looking into devices.. + */ +#if defined(CONFIG_ARCH_S390) + s390_init_machine_check(); +#endif + +#ifdef CONFIG_PCI + pci_init(); +#endif +#ifdef CONFIG_SBUS + sbus_init(); +#endif +#if defined(CONFIG_PPC) + ppc_init(); +#endif +#ifdef CONFIG_MCA + mca_init(); +#endif +#ifdef CONFIG_ARCH_ACORN + ecard_init(); +#endif +#ifdef CONFIG_ZORRO + zorro_init(); +#endif +#ifdef CONFIG_DIO + dio_init(); +#endif +#ifdef CONFIG_NUBUS + nubus_init(); +#endif +#ifdef CONFIG_ISAPNP + isapnp_init(); +#endif +#ifdef CONFIG_TC + tc_init(); +#endif + + /* Networking initialization needs a process context */ + sock_init(); + + start_context_thread(); + do_initcalls(); + +#ifdef CONFIG_IRDA + irda_proto_init(); + irda_device_init(); /* Must be done after protocol initialization */ +#endif +#ifdef CONFIG_PCMCIA + init_pcmcia_ds(); /* Do this last */ +#endif +} + +extern void rd_load(void); +extern void initrd_load(void); + +/* + * Prepare the namespace - decide what/where to mount, load ramdisks, etc. + */ +static void prepare_namespace(void) +{ +#ifdef CONFIG_BLK_DEV_INITRD + int real_root_mountflags = root_mountflags; + if (!initrd_start) + mount_initrd = 0; + if (mount_initrd) + root_mountflags &= ~MS_RDONLY; + real_root_dev = ROOT_DEV; +#endif + +#ifdef CONFIG_BLK_DEV_RAM +#ifdef CONFIG_BLK_DEV_INITRD + if (mount_initrd) + initrd_load(); + else +#endif + rd_load(); +#endif + + /* Mount the root filesystem.. */ + mount_root(); + + mount_devfs_fs (); + +#ifdef CONFIG_BLK_DEV_INITRD + root_mountflags = real_root_mountflags; + if (mount_initrd && ROOT_DEV != real_root_dev + && MAJOR(ROOT_DEV) == RAMDISK_MAJOR && MINOR(ROOT_DEV) == 0) { + int error; + int i, pid; + + pid = kernel_thread(do_linuxrc, "/linuxrc", SIGCHLD); + if (pid > 0) { + while (pid != wait(&i)) { + current->policy |= SCHED_YIELD; + schedule(); + } + } + if (MAJOR(real_root_dev) != RAMDISK_MAJOR + || MINOR(real_root_dev) != 0) { + error = change_root(real_root_dev,"/initrd"); + if (error) + printk(KERN_ERR "Change root to /initrd: " + "error %d\n",error); + } + } +#endif +} + +static int init(void * unused) +{ + lock_kernel(); + do_basic_setup(); + + prepare_namespace(); + + /* + * Ok, we have completed the initial bootup, and + * we're essentially up and running. Get rid of the + * initmem segments and start the user-mode stuff.. + */ + free_initmem(); + unlock_kernel(); + + if (open("/dev/console", O_RDWR, 0) < 0) + printk("Warning: unable to open an initial console.\n"); + + (void) dup(0); + (void) dup(0); + + /* + * We try each of these until one succeeds. + * + * The Bourne shell can be used instead of init if we are + * trying to recover a really broken machine. + */ + + if (execute_command) + execve(execute_command,argv_init,envp_init); + execve("/sbin/init",argv_init,envp_init); + execve("/etc/init",argv_init,envp_init); + execve("/bin/init",argv_init,envp_init); + execve("/bin/sh",argv_init,envp_init); + panic("No init found. Try passing init= option to kernel."); +} diff -urN linux-2.4.17/ipc/shm.c linux_umopenmosix/ipc/shm.c --- linux-2.4.17/ipc/shm.c Fri Dec 21 19:42:04 2001 +++ linux_umopenmosix/ipc/shm.c Wed Jun 26 23:45:18 2002 @@ -27,7 +27,7 @@ #include "util.h" struct shmid_kernel /* private to the kernel */ -{ +{ struct kern_ipc_perm shm_perm; struct file * shm_file; int id; @@ -199,7 +199,7 @@ error = -ENOSPC; id = shm_addid(shp); - if(id == -1) + if(id == -1) goto no_id; shp->shm_perm.key = key; shp->shm_flags = (shmflg & S_IRWXUGO); @@ -284,7 +284,7 @@ uid_t uid; gid_t gid; mode_t mode; -}; +}; static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void *buf, int version) { @@ -337,7 +337,7 @@ out.shmmin = in->shmmin; out.shmmni = in->shmmni; out.shmseg = in->shmseg; - out.shmall = in->shmall; + out.shmall = in->shmall; return copy_to_user(buf, &out, sizeof(out)); } @@ -346,7 +346,7 @@ } } -static void shm_get_stat (unsigned long *rss, unsigned long *swp) +static void shm_get_stat (unsigned long *rss, unsigned long *swp) { struct shmem_inode_info *info; int i; @@ -487,7 +487,7 @@ * We cannot simply remove the file. The SVID states * that the block remains until the last person * detaches from it, then is deleted. A shmat() on - * an RMID segment is legal in older Linux and if + * an RMID segment is legal in older Linux and if * we change it apps break... * * Instead we set a destroyed flag, and then blow @@ -496,13 +496,13 @@ down(&shm_ids.sem); shp = shm_lock(shmid); err = -EINVAL; - if (shp == NULL) + if (shp == NULL) goto out_up; err = shm_checkid(shp, shmid); if(err) goto out_unlock_up; if (current->euid != shp->shm_perm.uid && - current->euid != shp->shm_perm.cuid && + current->euid != shp->shm_perm.cuid && !capable(CAP_SYS_ADMIN)) { err=-EPERM; goto out_unlock_up; @@ -534,7 +534,7 @@ goto out_unlock_up; err=-EPERM; if (current->euid != shp->shm_perm.uid && - current->euid != shp->shm_perm.cuid && + current->euid != shp->shm_perm.cuid && !capable(CAP_SYS_ADMIN)) { goto out_unlock_up; } @@ -627,6 +627,9 @@ shp->shm_nattch++; shm_unlock(shmid); +#ifdef CONFIG_MOSIX + user_addr = (void *) do_mmap_down (file, addr, file->f_dentry->d_inode->i_size, prot, flags, 0); +#else down_write(¤t->mm->mmap_sem); if (addr && !(shmflg & SHM_REMAP)) { user_addr = ERR_PTR(-EINVAL); @@ -640,11 +643,12 @@ addr > current->mm->start_stack - size - PAGE_SIZE * 5) goto invalid; } - + user_addr = (void*) do_mmap (file, addr, size, prot, flags, 0); invalid: up_write(¤t->mm->mmap_sem); +#endif /* CONFIG_MOSIX */ down (&shm_ids.sem); if(!(shp = shm_lock(shmid))) @@ -673,6 +677,10 @@ struct mm_struct *mm = current->mm; struct vm_area_struct *shmd, *shmdnext; +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DDEPUTY) + return(0); +#endif /* CONFIG_MOSIX */ down_write(&mm->mmap_sem); for (shmd = mm->mmap; shmd; shmd = shmdnext) { shmdnext = shmd->vm_next; diff -urN linux-2.4.17/kernel/acct.c linux_umopenmosix/kernel/acct.c --- linux-2.4.17/kernel/acct.c Mon Mar 19 22:35:08 2001 +++ linux_umopenmosix/kernel/acct.c Wed Jun 26 23:45:18 2002 @@ -57,6 +57,9 @@ #include +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ /* * These constants control the amount of freespace that suspend and * resume the process accounting system, and the time delay between @@ -203,6 +206,10 @@ } unlock_kernel(); if (old_acct) { +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DDEPUTY) + mosix_deputy_rusage(2); +#endif /* CONFIG_MOSIX */ do_acct_process(0,old_acct); filp_close(old_acct, NULL); } @@ -278,6 +285,10 @@ mm_segment_t fs; unsigned long vsize; +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DREMOTE) + return; +#endif /* CONFIG_MOSIX */ /* * First check to see if there is enough free_space to continue * the process accounting system. @@ -323,6 +334,10 @@ } up_read(¤t->mm->mmap_sem); } +#ifdef CONFIG_MOSIX + else /* were we a DEPUTY? otherwise, "mosix.exit_mem" is 0 anyway */ + vsize = current->mosix.exit_mem; +#endif /* CONFIG_MOSIX */ vsize = vsize / 1024; ac.ac_mem = encode_comp_t(vsize); ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */ diff -urN linux-2.4.17/kernel/capability.c linux_umopenmosix/kernel/capability.c --- linux-2.4.17/kernel/capability.c Sat Jun 24 07:06:37 2000 +++ linux_umopenmosix/kernel/capability.c Wed Jun 26 23:45:18 2002 @@ -7,6 +7,9 @@ #include #include +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ kernel_cap_t cap_bset = CAP_INIT_EFF_SET; @@ -85,7 +88,11 @@ /* FIXME: do we need to have a write lock here..? */ read_lock(&tasklist_lock); +#ifdef CONFIG_MOSIX + for_each_local_task(target) { +#else for_each_task(target) { +#endif /* CONFIG_MOSIX */ if (target->pgrp != pgrp) continue; target->cap_effective = *effective; @@ -106,7 +113,11 @@ /* FIXME: do we need to have a write lock here..? */ read_lock(&tasklist_lock); /* ALL means everyone other than self or 'init' */ +#ifdef CONFIG_MOSIX + for_each_local_task(target) { +#else for_each_task(target) { +#endif /* CONFIG_MOSIX */ if (target == current || target->pid == 1) continue; target->cap_effective = *effective; @@ -212,5 +223,8 @@ } spin_out: spin_unlock(&task_capability_lock); +#ifdef CONFIG_MOSIX + mosix_sync_caps(effective); +#endif /* CONFIG_MOSIX */ return error; } diff -urN linux-2.4.17/kernel/exec_domain.c linux_umopenmosix/kernel/exec_domain.c --- linux-2.4.17/kernel/exec_domain.c Fri Dec 21 19:42:04 2001 +++ linux_umopenmosix/kernel/exec_domain.c Wed Jun 26 23:45:18 2002 @@ -16,6 +16,10 @@ #include #include #include +#ifdef CONFIG_MOSIX +#include +#include +#endif /* CONFIG_MOSIX */ static void default_handler(int, struct pt_regs *); @@ -73,7 +77,11 @@ send_sig(SIGSEGV, current, 1); } +#ifdef CONFIG_MOSIX +struct exec_domain * +#else static struct exec_domain * +#endif /* CONFIG_MOSIX */ lookup_exec_domain(u_long personality) { struct exec_domain * ep; @@ -167,7 +175,11 @@ return 0; } +#ifdef CONFIG_MOSIX_DFSA + if (atomic_read(¤t->fs->users) != 1) { +#else if (atomic_read(¤t->fs->count) != 1) { +#endif /* CONFIG_MOSIX_DFSA */ struct fs_struct *fsp, *ofsp; fsp = copy_fs_struct(current->fs); @@ -181,7 +193,11 @@ current->fs = fsp; task_unlock(current); +#ifdef CONFIG_MOSIX_DFSA + put_used_fs_struct(ofsp); +#else put_fs_struct(ofsp); +#endif /* CONFIG_MOSIX_DFSA */ } /* @@ -189,6 +205,37 @@ * current->fs. */ +#ifdef CONFIG_MOSIX + if((current->mosix.dflags & DDEPUTY) && + personality != current->personality && + mosix_deputy_personality(personality)) + { + if(!current->mosix.inexec) + mosix_go_home(1); + if(current->mosix.dflags & DDEPUTY) + { + put_exec_domain(ep); + printk("%d(%s)-killed because it could not go back home" + "and\nits new personality (%ld) is not" + " supported where it ran (%d)\n", current->pid, + current->comm, personality, + current->mosix.whereami); + send_sig(SIGKILL, current, 1); + return(-EINVAL); + } + } + else if(current->mosix.dflags & DREMOTE) + { + unsigned long prev = current->personality; + + current->personality = personality; + if(__emul_prefix()) + { + current->personality = prev; + return(-EINVAL); + } + } +#endif /* CONFIG_MOSIX */ current->personality = personality; oep = current->exec_domain; current->exec_domain = ep; diff -urN linux-2.4.17/kernel/exit.c linux_umopenmosix/kernel/exit.c --- linux-2.4.17/kernel/exit.c Thu Nov 22 00:42:27 2001 +++ linux_umopenmosix/kernel/exit.c Wed Jun 26 23:45:18 2002 @@ -15,6 +15,9 @@ #ifdef CONFIG_BSD_PROCESS_ACCT #include #endif +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ #include #include @@ -84,7 +87,11 @@ fallback = -1; read_lock(&tasklist_lock); +#ifdef CONFIG_MOSIX + for_each_local_task(p) { +#else for_each_task(p) { +#endif /* CONFIG_MOSIX */ if (p->session <= 0) continue; if (p->pgrp == pgrp) { @@ -111,9 +118,17 @@ struct task_struct *p; read_lock(&tasklist_lock); +#ifdef CONFIG_MOSIX + for_each_local_task(p) { +#else for_each_task(p) { +#endif /* CONFIG_MOSIX */ if ((p == ignored_task) || (p->pgrp != pgrp) || +#ifdef CONFIG_MOSIX + (LOGICAL_STATE(p) == TASK_ZOMBIE) || +#else (p->state == TASK_ZOMBIE) || +#endif /* CONFIG_MOSIX */ (p->p_pptr->pid == 1)) continue; if ((p->p_pptr->pgrp != pgrp) && @@ -137,10 +152,18 @@ struct task_struct * p; read_lock(&tasklist_lock); +#ifdef CONFIG_MOSIX + for_each_local_task(p) { +#else for_each_task(p) { +#endif /* CONFIG_MOSIX */ if (p->pgrp != pgrp) continue; +#ifdef CONFIG_MOSIX + if (LOGICAL_STATE(p) != TASK_STOPPED) +#else if (p->state != TASK_STOPPED) +#endif /* CONFIG_MOSIX */ continue; retval = 1; break; @@ -171,7 +194,11 @@ read_unlock(&tasklist_lock); } +#ifdef CONFIG_MOSIX_DFSA +void close_files(struct files_struct * files) +#else static inline void close_files(struct files_struct * files) +#endif /* CONFIG_MOSIX_DFSA */ { int i, j; @@ -206,6 +233,11 @@ if (files->max_fdset > __FD_SETSIZE) { free_fdset(files->open_fds, files->max_fdset); free_fdset(files->close_on_exec, files->max_fdset); +#ifdef CONFIG_MOSIX_DFSA + free_fdset(files->closed, files->max_fdset); + free_fdset(files->opened, files->max_fdset); + free_fdset(files->modified, files->max_fdset); +#endif /* CONFIG_MOSIX_DFSA */ } kmem_cache_free(files_cachep, files); } @@ -219,6 +251,11 @@ task_lock(tsk); tsk->files = NULL; task_unlock(tsk); +#ifdef CONFIG_MOSIX_DFSA + atomic_dec(&files->users); + if(atomic_read(&files->users) == 1) + files->count_dropped_to_one = 1; +#endif /* CONFIG_MOSIX_DFSA */ put_files_struct(files); } } @@ -249,6 +286,17 @@ __put_fs_struct(fs); } +#ifdef CONFIG_MOSIX_DFSA +inline void +put_used_fs_struct(struct fs_struct *fs) +{ + atomic_dec(&fs->users); + if(atomic_read(&fs->users) == 1) + fs->count_dropped_to_one = 1; + __put_fs_struct(fs); +} +#endif /* CONFIG_MOSIX_DFSA */ + static inline void __exit_fs(struct task_struct *tsk) { struct fs_struct * fs = tsk->fs; @@ -257,7 +305,11 @@ task_lock(tsk); tsk->fs = NULL; task_unlock(tsk); +#ifdef CONFIG_MOSIX_DFSA + put_used_fs_struct(fs); +#else __put_fs_struct(fs); +#endif /* CONFIG_MOSIX_DFSA */ } } @@ -301,6 +353,9 @@ struct mm_struct * mm = tsk->mm; mm_release(); +#ifdef CONFIG_MOSIX + mosix_exit_mm(tsk); +#endif /* CONFIG_MOSIX */ if (mm) { atomic_inc(&mm->mm_count); if (mm != tsk->active_mm) BUG(); @@ -327,6 +382,14 @@ struct task_struct * p, *t; forget_original_parent(current); +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DREMOTE) + { + current->state = TASK_ZOMBIE; + notify_parent(current, SIGCHLD); + return; + } +#endif /* CONFIG_MOSIX */ /* * Check to see if any process groups have become orphaned * as a result of our exiting, and if they have any stopped @@ -380,10 +443,24 @@ */ write_lock_irq(&tasklist_lock); +#ifndef CONFIG_MOSIX current->state = TASK_ZOMBIE; do_notify_parent(current, current->exit_signal); +#endif /* CONFIG_MOSIX */ while (current->p_cptr != NULL) { p = current->p_cptr; +#ifdef CONFIG_MOSIX + if(p->ptrace && (p->mosix.dflags & DDEPUTY)) + { + get_task_struct(p); + write_unlock_irq(&tasklist_lock); + request_process(p, NULL, PR_PTRACE_NOT_TRACED, 0); + p->ptrace = 0; + free_task_struct(p); + write_lock_irq(&tasklist_lock); + continue; + } +#endif /* CONFIG_MOSIX */ current->p_cptr = p->p_osptr; p->p_ysptr = NULL; p->ptrace = 0; @@ -393,7 +470,11 @@ if (p->p_osptr) p->p_osptr->p_ysptr = p; p->p_pptr->p_cptr = p; +#ifdef CONFIG_MOSIX + if (LOGICAL_STATE(p) == TASK_ZOMBIE) +#else if (p->state == TASK_ZOMBIE) +#endif /* CONFIG_MOSIX */ do_notify_parent(p, p->exit_signal); /* * process group orphan check @@ -413,6 +494,10 @@ write_lock_irq(&tasklist_lock); } } +#ifdef CONFIG_MOSIX + current->state = TASK_ZOMBIE; + do_notify_parent(current, current->exit_signal); +#endif /* CONFIG_MOSIX */ write_unlock_irq(&tasklist_lock); } @@ -430,6 +515,9 @@ del_timer_sync(&tsk->real_timer); fake_volatile: +#ifdef CONFIG_MOSIX + mosix_exit(); +#endif /* CONFIG_MOSIX */ #ifdef CONFIG_BSD_PROCESS_ACCT acct_process(code); #endif @@ -440,6 +528,9 @@ __exit_files(tsk); __exit_fs(tsk); exit_sighand(tsk); +#ifdef CONFIG_MOSIX + mosix_very_exit(); +#endif /* CONFIG_MOSIX */ exit_thread(); if (current->leader) @@ -519,7 +610,11 @@ && !(options & __WALL)) continue; flag = 1; +#ifdef CONFIG_MOSIX + switch (LOGICAL_STATE(p)) { +#else switch (p->state) { +#endif /* CONFIG_MOSIX */ case TASK_STOPPED: if (!p->exit_code) continue; diff -urN linux-2.4.17/kernel/fork.c linux_umopenmosix/kernel/fork.c --- linux-2.4.17/kernel/fork.c Wed Nov 21 20:18:42 2001 +++ linux_umopenmosix/kernel/fork.c Wed Jun 26 23:45:18 2002 @@ -26,6 +26,10 @@ #include #include +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ + /* The idle threads do not count.. */ int nr_threads; int nr_running; @@ -74,8 +78,14 @@ */ max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 8; +#ifdef CONFIG_MOSIX + /* 1/6th of "max_threads" goes for guest tasks */ + init_task.rlim[RLIMIT_NPROC].rlim_cur = max_threads/3; + init_task.rlim[RLIMIT_NPROC].rlim_max = max_threads/3; +#else init_task.rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; init_task.rlim[RLIMIT_NPROC].rlim_max = max_threads/2; +#endif } /* Protects next_safe and last_pid. */ @@ -125,6 +135,16 @@ return last_pid; } +#ifdef CONFIG_MOSIX +/* NOTE: "dup_mmap" is called beyond the point of no migration: + * this means we are no longer allowed to sleep interruptibly here without + * taking special measures to prevent migration (such as raising a new flag + * in "current->mosix.stay"), + * Fortunately, vm_ops->open(tmp) is rarely non-NULL and even then, it never + * sleeps interruptibly. BUT if it ever does, then we must take such measures + * to prevent migration here. + */ +#endif /* CONFIG_MOSIX */ static inline int dup_mmap(struct mm_struct * mm) { struct vm_area_struct * mpnt, *tmp, **pprev; @@ -215,6 +235,13 @@ static struct mm_struct * mm_init(struct mm_struct * mm) { atomic_set(&mm->mm_users, 1); +#ifdef CONFIG_MOSIX + atomic_set(&mm->mm_realusers, 1); + atomic_set(&mm->mm_kiocount, 0); + mm->last_memsort = 0; + mm->used = mm->unused = mm->swapped = mm->private_unused = 0; + mm->mark = 0; +#endif /* CONFIG_MOSIX */ atomic_set(&mm->mm_count, 1); init_rwsem(&mm->mmap_sem); mm->page_table_lock = SPIN_LOCK_UNLOCKED; @@ -319,6 +346,9 @@ if (clone_flags & CLONE_VM) { atomic_inc(&oldmm->mm_users); +#ifdef CONFIG_MOSIX + atomic_inc(&oldmm->mm_realusers); +#endif /* CONFIG_MOSIX */ mm = oldmm; goto good_mm; } @@ -347,6 +377,9 @@ if (init_new_context(tsk,mm)) goto free_pt; +#ifdef CONFIG_MOSIX + tsk->mosix.stay &= ~DSTAY_FOR_CLONE; +#endif /* CONFIG_MOSIX */ good_mm: tsk->mm = mm; @@ -365,13 +398,28 @@ /* We don't need to lock fs - think why ;-) */ if (fs) { atomic_set(&fs->count, 1); +#ifdef CONFIG_MOSIX_DFSA + atomic_set(&fs->users, 1); +#endif /* CONFIG_MOSIX_DFSA */ fs->lock = RW_LOCK_UNLOCKED; fs->umask = old->umask; read_lock(&old->lock); fs->rootmnt = mntget(old->rootmnt); fs->root = dget(old->root); +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & (DREMOTEDAEMON|DREMOTE)) + { + fs->pwdmnt = NULL; + fs->pwd = NULL; + } + else + { +#endif /* CONFIG_MOSIX */ fs->pwdmnt = mntget(old->pwdmnt); fs->pwd = dget(old->pwd); +#ifdef CONFIG_MOSIX + } +#endif /* CONFIG_MOSIX */ if (old->altroot) { fs->altrootmnt = mntget(old->altrootmnt); fs->altroot = dget(old->altroot); @@ -393,6 +441,9 @@ { if (clone_flags & CLONE_FS) { atomic_inc(¤t->fs->count); +#ifdef CONFIG_MOSIX_DFSA + atomic_inc(¤t->fs->users); +#endif /* CONFIG_MOSIX_DFSA */ return 0; } tsk->fs = __copy_fs_struct(current->fs); @@ -429,6 +480,9 @@ if (clone_flags & CLONE_FILES) { atomic_inc(&oldf->count); +#ifdef CONFIG_MOSIX_DFSA + atomic_inc(&oldf->users); +#endif /* CONFIG_MOSIX_DFSA */ goto out; } @@ -439,6 +493,9 @@ goto out; atomic_set(&newf->count, 1); +#ifdef CONFIG_MOSIX_DFSA + atomic_set(&newf->users, 1); +#endif /* CONFIG_MOSIX_DFSA */ newf->file_lock = RW_LOCK_UNLOCKED; newf->next_fd = 0; @@ -447,6 +504,15 @@ newf->close_on_exec = &newf->close_on_exec_init; newf->open_fds = &newf->open_fds_init; newf->fd = &newf->fd_array[0]; +#ifdef CONFIG_MOSIX_DFSA + newf->closed = &newf->closed_fds_init; + newf->opened = &newf->opened_fds_init; + newf->modified = &newf->modified_fds_init; + newf->maxclosed = 0; + newf->maxopened = 0; + newf->maxmod = 0; + newf->count_dropped_to_one = 0; +#endif /* CONFIG_MOSIX_DFSA */ /* We don't yet have the oldf readlock, but even if the old fdset gets grown now, we'll only copy up to "size" fds */ @@ -486,6 +552,11 @@ memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, open_files/8); memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, open_files/8); +#ifdef CONFIG_MOSIX_DFSA + memcpy(newf->closed->fds_bits, oldf->closed->fds_bits, open_files/8); + memcpy(newf->opened->fds_bits, oldf->opened->fds_bits, open_files/8); + memcpy(newf->modified->fds_bits, oldf->modified->fds_bits, open_files/8); +#endif /* CONFIG_MOSIX_DFSA */ for (i = open_files; i != 0; i--) { struct file *f = *old_fds++; @@ -507,6 +578,11 @@ memset(&newf->open_fds->fds_bits[start], 0, left); memset(&newf->close_on_exec->fds_bits[start], 0, left); +#ifdef CONFIG_MOSIX_DFSA + memset(&newf->closed->fds_bits[start], 0, left); + memset(&newf->opened->fds_bits[start], 0, left); + memset(&newf->modified->fds_bits[start], 0, left); +#endif /* CONFIG_MOSIX_DFSA */ } tsk->files = newf; @@ -517,6 +593,11 @@ out_release: free_fdset (newf->close_on_exec, newf->max_fdset); free_fdset (newf->open_fds, newf->max_fdset); +#ifdef CONFIG_MOSIX_DFSA + free_fdset (newf->closed, newf->max_fdset); + free_fdset (newf->opened, newf->max_fdset); + free_fdset (newf->modified, newf->max_fdset); +#endif /* CONFIG_MOSIX_DFSA */ kmem_cache_free(files_cachep, newf); goto out; } @@ -565,6 +646,9 @@ int retval; struct task_struct *p; struct completion vfork; +#ifdef CONFIG_MOSIX + int migrated = 0; +#endif /* CONFIG_MOSIX */ retval = -EPERM; @@ -576,6 +660,11 @@ if (current->pid) goto fork_out; } +#ifdef CONFIG_MOSIX + if(!(current->mosix.stay || + (current->mosix.dflags & (DDEPUTY|DREMOTE|DREMOTEDAEMON)))) + migrated = mosix_forkmigrate(); +#endif /* CONFIG_MOSIX */ retval = -ENOMEM; p = alloc_task_struct(); @@ -652,6 +741,10 @@ INIT_LIST_HEAD(&p->local_pages); retval = -ENOMEM; +#ifdef CONFIG_MOSIX + if(mosix_fork_init_fields(p)) + goto bad_fork_cleanup; +#endif /* CONFIG_MOSIX */ /* copy all the process information */ if (copy_files(clone_flags, p)) goto bad_fork_cleanup; @@ -659,6 +752,12 @@ goto bad_fork_cleanup_files; if (copy_sighand(clone_flags, p)) goto bad_fork_cleanup_fs; +#ifdef CONFIG_MOSIX + if((current->mosix.dflags & DDEPUTY) && + (retval = mosix_deputy_fork(p, migrated, stack_start))) + goto bad_fork_cleanup_sighand; + /* No migrations allowed beyond this point! */ +#endif /* CONFIG_MOSIX */ if (copy_mm(clone_flags, p)) goto bad_fork_cleanup_sighand; retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); @@ -703,6 +802,14 @@ /* CLONE_PARENT and CLONE_THREAD re-use the old parent */ p->p_opptr = current->p_opptr; p->p_pptr = current->p_pptr; +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & (DREMOTE|DREMOTEDAEMON)) + { + extern struct task_struct *child_reaper; + p->p_pptr = p->p_opptr = child_reaper; + } + else +#endif /* CONFIG_MOSIX */ if (!(clone_flags & (CLONE_PARENT | CLONE_THREAD))) { p->p_opptr = current; if (!(p->ptrace & PT_PTRACED)) @@ -720,12 +827,19 @@ write_unlock_irq(&tasklist_lock); if (p->ptrace & PT_PTRACED) +#ifdef CONFIG_MOSIX + if(!(p->mosix.dflags & DREMOTE)) +#endif /* CONFIG_MOSIX */ send_sig(SIGSTOP, p, 1); wake_up_process(p); /* do this last */ ++total_forks; if (clone_flags & CLONE_VFORK) +#ifdef CONFIG_MOSIX + wait_for_completion_half_interruptible(&vfork); +#else wait_for_completion(&vfork); +#endif /* CONFIG_MOSIX */ fork_out: return retval; @@ -739,6 +853,9 @@ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup: +#ifdef CONFIG_MOSIX + mosix_fork_free_fields(p); +#endif /* CONFIG_MOSIX */ put_exec_domain(p->exec_domain); if (p->binfmt && p->binfmt->module) __MOD_DEC_USE_COUNT(p->binfmt->module); diff -urN linux-2.4.17/kernel/module.c linux_umopenmosix/kernel/module.c --- linux-2.4.17/kernel/module.c Sun Nov 11 21:23:14 2001 +++ linux_umopenmosix/kernel/module.c Wed Jun 26 23:45:18 2002 @@ -1066,6 +1066,15 @@ } spin_unlock_irqrestore(&modlist_lock, flags); +#ifdef CONFIG_MOSIX_UDB + /* And from the udb symbols list */ + { + extern void udb_proc_rmmod(char *name, struct module *); + + udb_proc_rmmod((char *)mod->name, mod); + } +#endif /* CONFIG_MOSIX_UDB */ + /* And free the memory. */ module_unmap(mod); diff -urN linux-2.4.17/kernel/panic.c linux_umopenmosix/kernel/panic.c --- linux-2.4.17/kernel/panic.c Sun Sep 30 21:26:08 2001 +++ linux_umopenmosix/kernel/panic.c Wed Jun 26 23:45:18 2002 @@ -97,6 +97,12 @@ sti(); for(;;) { CHECK_EMERGENCY_SYNC +#ifdef CONFIG_MOSIX_UDB + { + extern void mosix_debugger(char *); + mosix_debugger("panic"); + } +#endif /* CONFIG_MOSIX_UDB */ } } diff -urN linux-2.4.17/kernel/ptrace.c linux_umopenmosix/kernel/ptrace.c --- linux-2.4.17/kernel/ptrace.c Fri Dec 21 19:42:04 2001 +++ linux_umopenmosix/kernel/ptrace.c Wed Jun 26 23:45:18 2002 @@ -28,7 +28,11 @@ return -ESRCH; if (!kill) { +#ifdef CONFIG_MOSIX + if (LOGICAL_STATE(child) != TASK_STOPPED) +#else if (child->state != TASK_STOPPED) +#endif /* CONFIG_MOSIX */ return -ESRCH; #ifdef CONFIG_SMP /* Make sure the child gets off its CPU.. */ @@ -38,20 +42,28 @@ break; task_unlock(child); do { +#ifdef CONFIG_MOSIX + if (LOGICAL_STATE(child) != TASK_STOPPED) +#else if (child->state != TASK_STOPPED) +#endif /* CONFIG_MOSIX */ return -ESRCH; barrier(); cpu_relax(); } while (task_has_cpu(child)); } task_unlock(child); -#endif +#endif } /* All systems go.. */ return 0; } +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ + int ptrace_attach(struct task_struct *task) { task_lock(task); @@ -59,6 +71,9 @@ goto bad; if (task == current) goto bad; +#ifdef CONFIG_MOSIX + if(!(task->mosix.dflags & DDEPUTY)) +#endif /* CONFIG_MOSIX */ if (!task->mm) goto bad; if(((current->uid != task->euid) || @@ -70,7 +85,12 @@ (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) goto bad; rmb(); - if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) +#ifdef CONFIG_MOSIX + if (!(task->mm ? task->mm->dumpable : (task->mosix.dflags & DDUMPABLE)) + && !capable(CAP_SYS_PTRACE)) +#else + if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) +#endif /* CONFIG_MOSIX */ goto bad; /* the same process cannot be attached many times */ if (task->ptrace & PT_PTRACED) @@ -81,6 +101,9 @@ if (capable(CAP_SYS_PTRACE)) task->ptrace |= PT_PTRACE_CAP; task_unlock(task); +#ifdef CONFIG_MOSIX_DFSA + tell_process(task, DREQ_NOTUPTODATE); +#endif /* CONFIG_MOSIX_DFSA */ write_lock_irq(&tasklist_lock); if (task->p_pptr != current) { @@ -104,6 +127,14 @@ return -EIO; /* Architecture-specific hardware disable .. */ +#ifdef CONFIG_MOSIX + if(child->mosix.dflags & DDEPUTY) + { + if(request_process(child, NULL, PR_PTRACE_CONT, PTRACE_DETACH)) + return(-ESRCH); + } + else +#endif /* CONFIG_MOSIX */ ptrace_disable(child); /* .. re-parent .. */ @@ -122,7 +153,7 @@ /* * Access another process' address space. - * Source/target buffer must be kernel space, + * Source/target buffer must be kernel space, * Do not walk the page table directly, use get_user_pages */ @@ -133,6 +164,13 @@ struct page *page; void *old_buf = buf; +#ifdef CONFIG_MOSIX + if(tsk != current || (tsk->mosix.dflags & DDEPUTY)) + return(request_process_arg2(tsk, buf, + write ? PR_PROCFS_MEM_WRITE : PR_PROCFS_MEM_READ, + (long)addr, len)); +#endif /* CONFIG_MOSIX */ + /* Worry about races with exit() */ task_lock(tsk); mm = tsk->mm; @@ -176,7 +214,7 @@ } up_read(&mm->mmap_sem); mmput(mm); - + return buf - old_buf; } @@ -200,7 +238,7 @@ copied += retval; src += retval; dst += retval; - len -= retval; + len -= retval; } return copied; } @@ -225,7 +263,7 @@ copied += retval; src += retval; dst += retval; - len -= retval; + len -= retval; } return copied; } diff -urN linux-2.4.17/kernel/sched.c linux_umopenmosix/kernel/sched.c --- linux-2.4.17/kernel/sched.c Fri Dec 21 19:42:04 2001 +++ linux_umopenmosix/kernel/sched.c Wed Jun 26 23:45:18 2002 @@ -37,6 +37,10 @@ extern void tqueue_bh(void); extern void immediate_bh(void); +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ + /* * scheduler variables */ @@ -75,7 +79,7 @@ * Init task must be ok at boot for the ix86 as we will check its signals * via the SMP irq return path. */ - + struct task_struct * init_tasks[NR_CPUS] = {&init_task, }; /* @@ -92,7 +96,11 @@ spinlock_t runqueue_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED; /* inner */ rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* outer */ +#ifdef CONFIG_MOSIX +LIST_HEAD(runqueue_head); +#else static LIST_HEAD(runqueue_head); +#endif /* CONFIG_MOSIX */ /* * We align per-CPU scheduling data on cacheline boundaries, @@ -166,9 +174,19 @@ * over.. */ weight = p->counter; +#ifdef CONFIG_MOSIX + /* give a generous advantage to processes in MOSIX context: */ + /* (they do not hog the CPU anyway) */ + if (p->mosix.dflags & DINSCHED) + weight += MOSIX_ASLEEP_PRIORITY; + if (p->mosix.dflags & DDEPUTY) + weight += MOSIX_DEPUTY_PRIORITY; + if (p->mosix.commpri) + weight += MOSIX_RESPOND_PRIORITY; +#endif /* CONFIG_MOSIX */ if (!weight) goto out; - + #ifdef CONFIG_SMP /* Give a largish advantage to the same processor... */ /* (this is equivalent to penalizing other processors) */ @@ -303,7 +321,7 @@ smp_send_reschedule(tsk->processor); } return; - + #else /* UP */ int this_cpu = smp_processor_id(); @@ -326,6 +344,9 @@ { list_add(&p->run_list, &runqueue_head); nr_running++; +#ifdef CONFIG_MOSIX + run_on(p); +#endif /* CONFIG_MOSIX */ } static inline void move_last_runqueue(struct task_struct * p) @@ -352,12 +373,35 @@ { unsigned long flags; int success = 0; +#ifdef CONFIG_MOSIX + struct task_struct *origp = p; +#endif /* CONFIG_MOSIX */ /* * We want the common case fall through straight, thus the goto. */ spin_lock_irqsave(&runqueue_lock, flags); +#ifdef CONFIG_MOSIX + p = NORMAL_CONTEXT(p); + write_lock(&p->mosix.state_lock); + if(origp != p) + { + if(p->mosix.bstate != TASK_SAME) + p->state = TASK_RUNNING; + } + else if(p->mosix.bstate == TASK_SAME) + p->state = TASK_RUNNING; + else + { + p->mosix.bstate = TASK_RUNNING; + success = 1; + write_unlock(&p->mosix.state_lock); + goto out; + } + write_unlock(&p->mosix.state_lock); +#else p->state = TASK_RUNNING; +#endif /* CONFIG_MOSIX */ if (task_on_runqueue(p)) goto out; add_to_runqueue(p); @@ -374,6 +418,30 @@ return try_to_wake_up(p, 0); } +#ifdef CONFIG_MOSIX +/* wake up a process only if it is interested in MOSIX events */ +void +wake_up_mosix(struct task_struct *p) +{ + unsigned long flags = 0; + + spin_lock_irqsave(&runqueue_lock, flags); + if(mosix_wakeable(p)) + { + write_lock(&p->mosix.state_lock); /* not irq! */ + if(p->mosix.bstate != TASK_SAME) + p->state = TASK_RUNNING; + write_unlock(&p->mosix.state_lock); + if (!task_on_runqueue(p)) + { + add_to_runqueue(p); + reschedule_idle(p); + } + } + spin_unlock_irqrestore(&runqueue_lock, flags); +} +#endif /* CONFIG_MOSIX */ + static void process_timeout(unsigned long __data) { struct task_struct * p = (struct task_struct *) __data; @@ -398,7 +466,7 @@ * delivered to the current task. In this case the remaining time * in jiffies will be returned, or 0 if the timer expired in time * - * The current task state is guaranteed to be TASK_RUNNING when this + * The current task state is guaranteed to be TASK_RUNNING when this * routine returns. * * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule @@ -446,6 +514,11 @@ init_timer(&timer); timer.expires = expire; +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DINSCHED) + timer.data = (unsigned long)MOSIX_CONTEXT(current); + else +#endif /* CONFIG_MOSIX */ timer.data = (unsigned long) current; timer.function = process_timeout; @@ -552,6 +625,13 @@ struct task_struct *prev, *next, *p; struct list_head *tmp; int this_cpu, c; +#ifdef CONFIG_MOSIX + auto volatile int handle_interim = + (current->state & (TASK_INTERRUPTIBLE|TASK_STOPPED)) && + !(current->mosix.dflags & DINSCHED); + + restart: +#endif /* CONFIG_MOSIX */ spin_lock_prefetch(&runqueue_lock); @@ -561,6 +641,9 @@ prev = current; this_cpu = prev->processor; +#ifdef CONFIG_MOSIX_UDB + if(!udb_booting) +#endif /* CONFIG_MOSIX_UDB */ if (unlikely(in_interrupt())) { printk("Scheduling in interrupt\n"); BUG(); @@ -583,8 +666,42 @@ move_last_runqueue(prev); } +#ifdef CONFIG_MOSIX + if(handle_interim && need_interim_while_asleep()) + { + int prev_kernel_lock_depth, prev_mosix_lock_depth; + + do_interim_first: + write_lock(&prev->mosix.state_lock); /* not irq! */ + prev->mosix.bstate = prev->state; + prev->state = TASK_RUNNING; + write_unlock(&prev->mosix.state_lock); + prev->mosix.dflags |= DINSCHED; + spin_unlock_irq(&runqueue_lock); + prev_kernel_lock_depth = prev->lock_depth; + prev_mosix_lock_depth = prev->mosix.lock_depth; + prev->lock_depth = -1; + prev->mosix.lock_depth = -1; + run_interim_while_asleep(); + prev->lock_depth = prev_kernel_lock_depth; + prev->mosix.lock_depth = prev_mosix_lock_depth; + if(prev_kernel_lock_depth != -1 || prev_mosix_lock_depth != -1) + reacquire_kernel_lock(prev); + spin_lock_irq(&runqueue_lock); + write_lock(&prev->mosix.state_lock); + prev->state = prev->mosix.bstate; + prev->mosix.bstate = TASK_SAME; + write_unlock(&prev->mosix.state_lock); + prev->mosix.dflags &= ~DINSCHED; + spin_unlock_irq(&runqueue_lock); + goto restart; + } +#endif /* CONFIG_MOSIX */ switch (prev->state) { case TASK_INTERRUPTIBLE: +#ifdef CONFIG_MOSIX + if (!(prev->mosix.dflags & (DREMOTE|DINSCHED))) +#endif /* CONFIG_MOSIX */ if (signal_pending(prev)) { prev->state = TASK_RUNNING; break; @@ -634,6 +751,14 @@ */ sched_data->curr = next; task_set_cpu(next, this_cpu); +#ifdef CONFIG_MOSIX + if(prev != next) + active_cpus += (next->pid != 0) - (prev->pid != 0); +#endif /* CONFIG_MOSIX */ +#ifdef CONFIG_MOSIX_DIAG + if(active_cpus < 0 || active_cpus > smp_num_cpus) + mosix_panic("active_cpus got out of range"); +#endif /* CONFIG_MOSIX_DIAG */ spin_unlock_irq(&runqueue_lock); if (unlikely(prev == next)) { @@ -696,8 +821,32 @@ */ switch_to(prev, next, prev); __schedule_tail(prev); +#ifdef CONFIG_MOSIX + prev = current; /* we still use it */ +#endif /* CONFIG_MOSIX */ same_process: + +#ifdef CONFIG_MOSIX + if(handle_interim) + { + spin_lock_irq(&runqueue_lock); + if(need_interim_while_asleep()) + goto do_interim_first; + spin_unlock_irq(&runqueue_lock); + } + if(prev->state != TASK_RUNNING) + { + reacquire_kernel_lock(prev); + mosix_panic("Sched-Race: MOSIX awakened too late.\n"); + goto restart; + } +#endif /* CONFIG_MOSIX */ + +#ifdef CONFIG_MOSIX_UDB + if(prev->mosix.catchme) + mosix_debugger("catch"); +#endif /* CONFIG_MOSIX_UDB */ reacquire_kernel_lock(current); if (current->need_resched) goto need_resched_back; @@ -718,17 +867,38 @@ { struct list_head *tmp; struct task_struct *p; +#ifdef CONFIG_MOSIX + struct task_struct *realp; +#endif /* CONFIG_MOSIX */ CHECK_MAGIC_WQHEAD(q); WQ_CHECK_LIST_HEAD(&q->task_list); - + list_for_each(tmp,&q->task_list) { unsigned int state; wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list); CHECK_MAGIC(curr->__magic); p = curr->task; +#ifdef CONFIG_MOSIX + realp = NORMAL_CONTEXT(p); + if(realp != p) + { + read_lock(&realp->mosix.state_lock); + if(realp->mosix.bstate == TASK_SAME) + { + /* no longer in MOSIX */ + read_unlock(&realp->mosix.state_lock); + continue; + } + state = realp->state; + read_unlock(&realp->mosix.state_lock); + } + else + state = LOGICAL_STATE(p); +#else state = p->state; +#endif /* CONFIG_MOSIX */ if (state & mode) { WQ_NOTE_WAKER(curr); if (try_to_wake_up(p, sync) && (curr->flags&WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) @@ -787,6 +957,35 @@ spin_unlock_irq(&x->wait.lock); } +#ifdef CONFIG_MOSIX +void wait_for_completion_half_interruptible(struct completion *x) +{ + struct task_struct *p = current; + + spin_lock_irq(&x->wait.lock); + if (!x->done) { + DECLARE_WAITQUEUE(wait, current); + + wait.flags |= WQ_FLAG_EXCLUSIVE; + __add_wait_queue_tail(&x->wait, &wait); + do { + __set_current_state(TASK_INTERRUPTIBLE); + spin_unlock_irq(&x->wait.lock); + spin_lock_irq(&p->sigmask_lock); + p->mosix.ignoreoldsigs = 1; + p->sigpending = 0; + spin_unlock_irq(&p->sigmask_lock); + schedule(); + spin_lock_irq(&x->wait.lock); + } while (!x->done); + __remove_wait_queue(&x->wait, &wait); + } + x->done--; + spin_unlock_irq(&x->wait.lock); + recalc_sigpending(p); +} +#endif /* CONFIG_MOSIX */ + #define SLEEP_ON_VAR \ unsigned long flags; \ wait_queue_t wait; \ @@ -829,7 +1028,7 @@ void sleep_on(wait_queue_head_t *q) { SLEEP_ON_VAR - + current->state = TASK_UNINTERRUPTIBLE; SLEEP_ON_HEAD @@ -840,7 +1039,7 @@ long sleep_on_timeout(wait_queue_head_t *q, long timeout) { SLEEP_ON_VAR - + current->state = TASK_UNINTERRUPTIBLE; SLEEP_ON_HEAD @@ -884,6 +1083,10 @@ if (newprio > 19) newprio = 19; current->nice = newprio; +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DDEPUTY) + mosix_inform_remote_of_nice(); +#endif /* CONFIG_MOSIX */ return 0; } @@ -898,7 +1101,7 @@ return tsk; } -static int setscheduler(pid_t pid, int policy, +static int setscheduler(pid_t pid, int policy, struct sched_param *param) { struct sched_param lp; @@ -924,7 +1127,7 @@ retval = -ESRCH; if (!p) goto out_unlock; - + if (policy < 0) policy = p->policy; else { @@ -933,7 +1136,7 @@ policy != SCHED_OTHER) goto out_unlock; } - + /* * Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid * priority for SCHED_OTHER is 0. @@ -945,13 +1148,38 @@ goto out_unlock; retval = -EPERM; - if ((policy == SCHED_FIFO || policy == SCHED_RR) && + if ((policy == SCHED_FIFO || policy == SCHED_RR) && !capable(CAP_SYS_NICE)) goto out_unlock; if ((current->euid != p->euid) && (current->euid != p->uid) && !capable(CAP_SYS_NICE)) goto out_unlock; +#ifdef CONFIG_MOSIX + spin_unlock(&runqueue_lock); + if(policy != SCHED_OTHER) + { + while(p->mosix.dflags & DDEPUTY) + { + get_task_struct(p); + read_unlock_irq(&tasklist_lock); + if((retval = mosix_send_back_home(p))) + return(retval); + read_lock(&tasklist_lock); + free_task_struct(p); + } + task_lock(p); + p->mosix.stay |= DSTAY_FOR_RT; + task_unlock(p); + } + else + { + task_lock(p); + p->mosix.stay &= ~DSTAY_FOR_RT; + task_unlock(p); + } + spin_lock(&runqueue_lock); +#endif /* CONFIG_MOSIX */ retval = 0; p->policy = policy; p->rt_priority = lp.sched_priority; @@ -968,7 +1196,7 @@ return retval; } -asmlinkage long sys_sched_setscheduler(pid_t pid, int policy, +asmlinkage long sys_sched_setscheduler(pid_t pid, int policy, struct sched_param *param) { return setscheduler(pid, policy, param); @@ -1033,7 +1261,7 @@ asmlinkage long sys_sched_yield(void) { /* - * Trick. sched_yield() first counts the number of truly + * Trick. sched_yield() first counts the number of truly * 'pending' runnable processes, then returns if it's * only the current processes. (This test does not have * to be atomic.) In threaded applications this optimization @@ -1295,6 +1523,10 @@ exit_files(current); current->files = init_task.files; atomic_inc(¤t->files->count); +#ifdef CONFIG_MOSIX_DFSA + atomic_inc(¤t->files->users); + atomic_inc(&fs->users); +#endif /* CONFIG_MOSIX_DFSA */ } extern unsigned long wait_init_idle; diff -urN linux-2.4.17/kernel/signal.c linux_umopenmosix/kernel/signal.c --- linux-2.4.17/kernel/signal.c Thu Nov 22 02:26:27 2001 +++ linux_umopenmosix/kernel/signal.c Wed Jun 26 23:45:18 2002 @@ -14,6 +14,11 @@ #include #include +#ifdef CONFIG_MOSIX +#include +#include +#endif /* CONFIG_MOSIX */ + #include /* @@ -105,6 +110,9 @@ void flush_signals(struct task_struct *t) { +#ifdef CONFIG_MOSIX + if(!(t->mosix.dflags & DFAKESIGNAL)) +#endif /* CONFIG_MOSIX */ t->sigpending = 0; flush_sigqueue(&t->pending); } @@ -119,6 +127,9 @@ if (atomic_dec_and_test(&sig->count)) kmem_cache_free(sigact_cachep, sig); } +#ifdef CONFIG_MOSIX + if(!(tsk->mosix.dflags & DFAKESIGNAL)) +#endif /* CONFIG_MOSIX */ tsk->sigpending = 0; flush_sigqueue(&tsk->pending); spin_unlock_irq(&tsk->sigmask_lock); @@ -246,6 +257,9 @@ if (current->notifier) { if (sigismember(current->notifier_mask, sig)) { if (!(current->notifier)(current->notifier_data)) { +#ifdef CONFIG_MOSIX + if(!(current->mosix.dflags&DFAKESIGNAL)) +#endif /* CONFIG_MOSIX */ current->sigpending = 0; return 0; } @@ -382,7 +396,11 @@ switch (sig) { case SIGKILL: case SIGCONT: /* Wake up the process if stopped. */ +#ifdef CONFIG_MOSIX + if (LOGICAL_STATE(t) == TASK_STOPPED) +#else if (t->state == TASK_STOPPED) +#endif /* CONFIG_MOSIX */ wake_up_process(t); t->exit_code = 0; rm_sig_from_queue(SIGSTOP, t); @@ -465,6 +483,11 @@ */ static inline void signal_wake_up(struct task_struct *t) { +#ifdef CONFIG_MOSIX + t->mosix.ignoreoldsigs = 0; + if(t->mosix.dflags & DHEAVYSLEEP) + return; +#endif /* CONFIG_MOSIX */ t->sigpending = 1; #ifdef CONFIG_SMP @@ -484,7 +507,11 @@ spin_unlock(&runqueue_lock); #endif /* CONFIG_SMP */ +#ifdef CONFIG_MOSIX + if (LOGICAL_STATE(t) & TASK_INTERRUPTIBLE) { +#else if (t->state & TASK_INTERRUPTIBLE) { +#endif /* CONFIG_MOSIX */ wake_up_process(t); return; } @@ -562,6 +589,42 @@ { unsigned long int flags; +#ifdef CONFIG_MOSIX + if(t->mosix.dflags & DREMOTE) + { + int n; + siginfo_t *x; + + if(!info || ((long)info) == 1) + panic("remote force_sig_info"); + info->si_signo = sig; + /* (should be anyway, but some callers are mindless about it) */ + if(in_interrupt()) + panic("remote force_sig_info in interrupt"); + repeat: + n = t->mosix.nforced_sigs; + if(!(x = kmalloc((n + 1) * sizeof(siginfo_t), GFP_KERNEL))) + { + /* just do our best */ + send_sig(sig, t, 0); + return(0); + } + spin_lock_irqsave(&t->sigmask_lock, flags); + if(t->mosix.nforced_sigs != n) + { + spin_unlock_irqrestore(&t->sigmask_lock, flags); + kfree(x); + goto repeat; + } + if(n) + memcpy(x, t->mosix.forced_sigs, sizeof(siginfo_t) * n); + x[n] = *info; + t->mosix.forced_sigs = x; + t->mosix.nforced_sigs++; + spin_unlock_irqrestore(&t->sigmask_lock, flags); + return(0); + } +#endif /* CONFIG_MOSIX */ spin_lock_irqsave(&t->sigmask_lock, flags); if (t->sig == NULL) { spin_unlock_irqrestore(&t->sigmask_lock, flags); @@ -591,7 +654,11 @@ retval = -ESRCH; read_lock(&tasklist_lock); +#ifdef CONFIG_MOSIX + for_each_local_task(p) { +#else for_each_task(p) { +#endif /* CONFIG_MOSIX */ if (p->pgrp == pgrp) { int err = send_sig_info(sig, info, p); if (retval) @@ -618,7 +685,11 @@ retval = -ESRCH; read_lock(&tasklist_lock); +#ifdef CONFIG_MOSIX + for_each_local_task(p) { +#else for_each_task(p) { +#endif /* CONFIG_MOSIX */ if (p->leader && p->session == sess) { int err = send_sig_info(sig, info, p); if (retval) @@ -662,7 +733,11 @@ struct task_struct * p; read_lock(&tasklist_lock); +#ifdef CONFIG_MOSIX + for_each_local_task(p) { +#else for_each_task(p) { +#endif /* CONFIG_MOSIX */ if (p->pid > 1 && p != current) { int err = send_sig_info(sig, info, p); ++count; @@ -686,12 +761,34 @@ int send_sig(int sig, struct task_struct *p, int priv) { +#ifdef CONFIG_MOSIX + if(p->mosix.dflags & DREMOTE) + { + unsigned long flags; + + spin_lock_irqsave(&p->sigmask_lock, flags); + p->mosix.asig |= (1 << (sig-1)); + spin_unlock_irqrestore(&p->sigmask_lock, flags); + if(p == current) + current->need_resched = 1; + return(0); + } +#endif /* CONFIG_MOSIX */ return send_sig_info(sig, (void*)(long)(priv != 0), p); } void force_sig(int sig, struct task_struct *p) { +#ifdef CONFIG_MOSIX + if(p->mosix.dflags & DREMOTE) + { + if(sig == SIGSEGV) + sig = FATAL_SIGSEGV; + send_sig(sig, p, 0); + return; + } +#endif /* CONFIG_MOSIX */ force_sig_info(sig, (void*)1L, p); } @@ -747,7 +844,11 @@ status = tsk->exit_code & 0x7f; why = SI_KERNEL; /* shouldn't happen */ +#ifdef CONFIG_MOSIX + switch (LOGICAL_STATE(tsk)) { +#else switch (tsk->state) { +#endif /* CONFIG_MOSIX */ case TASK_STOPPED: /* FIXME -- can we deduce CLD_TRAPPED or CLD_CONTINUED? */ if (tsk->ptrace & PT_PTRACED) diff -urN linux-2.4.17/kernel/sys.c linux_umopenmosix/kernel/sys.c --- linux-2.4.17/kernel/sys.c Wed Sep 19 00:10:43 2001 +++ linux_umopenmosix/kernel/sys.c Wed Jun 26 23:45:18 2002 @@ -18,6 +18,13 @@ #include #include +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ +#ifdef CONFIG_MOSIX_DFSA +#include +#endif /* CONFIG_MOSIX_DFSA */ + /* * this is where the system-wide overflow UID and GID are defined, for * architectures that now have 32-bit UID/GID but didn't in the past @@ -174,6 +181,10 @@ static int proc_sel(struct task_struct *p, int which, int who) { +#ifdef CONFIG_MOSIX + if(p->mosix.dflags & DREMOTE) + return(0); +#endif /* CONFIG_MOSIX */ if(p->pid) { switch (which) { @@ -210,7 +221,11 @@ niceval = 19; read_lock(&tasklist_lock); +#ifdef CONFIG_MOSIX + for_each_local_task(p) { +#else for_each_task(p) { +#endif /* CONFIG_MOSIX */ if (!proc_sel(p, which, who)) continue; if (p->uid != current->euid && @@ -223,7 +238,18 @@ if (niceval < p->nice && !capable(CAP_SYS_NICE)) error = -EACCES; else +#ifdef CONFIG_MOSIX + { +#endif /* CONFIG_MOSIX */ p->nice = niceval; +#ifdef CONFIG_MOSIX + if(p->mosix.dflags & DDEPUTY) + { + tell_process(p, DREQ_NICECNG); + wake_up_mosix(p); + } + } +#endif /* CONFIG_MOSIX */ } read_unlock(&tasklist_lock); @@ -400,7 +426,11 @@ } if (new_egid != old_egid) { +#ifdef CONFIG_MOSIX + set_me_dumpable(0); +#else current->mm->dumpable = 0; +#endif /* CONFIG_MOSIX */ wmb(); } if (rgid != (gid_t) -1 || @@ -409,6 +439,12 @@ current->fsgid = new_egid; current->egid = new_egid; current->gid = new_rgid; +#ifdef CONFIG_MOSIX + tell_process(current, DREQ_INFOCNG); +#endif /* CONFIG_MOSIX */ +#ifdef CONFIG_MOSIX_DFSA + current->mosix.dupdates |= DFSA_UPDCRED; +#endif /* CONFIG_MOSIX_DFSA */ return 0; } @@ -421,11 +457,21 @@ { int old_egid = current->egid; +#ifdef CONFIG_MOSIX + tell_process(current, DREQ_INFOCNG); +#endif /* CONFIG_MOSIX */ +#ifdef CONFIG_MOSIX_DFSA + current->mosix.dupdates |= DFSA_UPDCRED; +#endif /* CONFIG_MOSIX_DFSA */ if (capable(CAP_SETGID)) { if(old_egid != gid) { +#ifdef CONFIG_MOSIX + set_me_dumpable(0); +#else current->mm->dumpable=0; +#endif /* CONFIG_MOSIX */ wmb(); } current->gid = current->egid = current->sgid = current->fsgid = gid; @@ -434,7 +480,11 @@ { if(old_egid != gid) { +#ifdef CONFIG_MOSIX + set_me_dumpable(0); +#else current->mm->dumpable=0; +#endif /* CONFIG_MOSIX */ wmb(); } current->egid = current->fsgid = gid; @@ -508,12 +558,19 @@ if(dumpclear) { +#ifdef CONFIG_MOSIX + set_me_dumpable(0); +#else current->mm->dumpable = 0; +#endif /* CONFIG_MOSIX */ wmb(); } current->uid = new_ruid; current->user = new_user; free_uid(old_user); +#ifdef CONFIG_MOSIX + tell_process(current, DREQ_INFOCNG); +#endif /* CONFIG_MOSIX */ return 0; } @@ -548,6 +605,9 @@ return -EPERM; } +#ifdef CONFIG_MOSIX_DFSA + current->mosix.dupdates |= DFSA_UPDCRED; +#endif /* CONFIG_MOSIX_DFSA */ if (euid != (uid_t) -1) { new_euid = euid; if ((old_ruid != euid) && @@ -562,7 +622,11 @@ if (new_euid != old_euid) { +#ifdef CONFIG_MOSIX + set_me_dumpable(0); +#else current->mm->dumpable=0; +#endif /* CONFIG_MOSIX */ wmb(); } current->fsuid = current->euid = new_euid; @@ -596,6 +660,9 @@ int old_euid = current->euid; int old_ruid, old_suid, new_ruid, new_suid; +#ifdef CONFIG_MOSIX_DFSA + current->mosix.dupdates |= DFSA_UPDCRED; +#endif /* CONFIG_MOSIX_DFSA */ old_ruid = new_ruid = current->uid; old_suid = current->suid; new_suid = old_suid; @@ -609,7 +676,11 @@ if (old_euid != uid) { +#ifdef CONFIG_MOSIX + set_me_dumpable(0); +#else current->mm->dumpable = 0; +#endif /* CONFIG_MOSIX */ wmb(); } current->fsuid = current->euid = uid; @@ -644,6 +715,9 @@ (suid != current->euid) && (suid != current->suid)) return -EPERM; } +#ifdef CONFIG_MOSIX_DFSA + current->mosix.dupdates |= DFSA_UPDCRED; +#endif /* CONFIG_MOSIX_DFSA */ if (ruid != (uid_t) -1) { if (ruid != current->uid && set_user(ruid, euid != current->euid) < 0) return -EAGAIN; @@ -651,7 +725,11 @@ if (euid != (uid_t) -1) { if (euid != current->euid) { +#ifdef CONFIG_MOSIX + set_me_dumpable(0); +#else current->mm->dumpable = 0; +#endif /* CONFIG_MOSIX */ wmb(); } current->euid = euid; @@ -694,10 +772,17 @@ (sgid != current->egid) && (sgid != current->sgid)) return -EPERM; } +#ifdef CONFIG_MOSIX_DFSA + current->mosix.dupdates |= DFSA_UPDCRED; +#endif /* CONFIG_MOSIX_DFSA */ if (egid != (gid_t) -1) { if (egid != current->egid) { +#ifdef CONFIG_MOSIX + set_me_dumpable(0); +#else current->mm->dumpable = 0; +#endif /* CONFIG_MOSIX */ wmb(); } current->egid = egid; @@ -707,6 +792,9 @@ current->gid = rgid; if (sgid != (gid_t) -1) current->sgid = sgid; +#ifdef CONFIG_MOSIX + tell_process(current, DREQ_INFOCNG); +#endif /* CONFIG_MOSIX */ return 0; } @@ -732,6 +820,9 @@ { int old_fsuid; +#ifdef CONFIG_MOSIX_DFSA + current->mosix.dupdates |= DFSA_UPDCRED; +#endif /* CONFIG_MOSIX_DFSA */ old_fsuid = current->fsuid; if (uid == current->uid || uid == current->euid || uid == current->suid || uid == current->fsuid || @@ -739,7 +830,11 @@ { if (uid != old_fsuid) { +#ifdef CONFIG_MOSIX + set_me_dumpable(0); +#else current->mm->dumpable = 0; +#endif /* CONFIG_MOSIX */ wmb(); } current->fsuid = uid; @@ -774,6 +869,9 @@ { int old_fsgid; +#ifdef CONFIG_MOSIX_DFSA + current->mosix.dupdates |= DFSA_UPDCRED; +#endif /* CONFIG_MOSIX_DFSA */ old_fsgid = current->fsgid; if (gid == current->gid || gid == current->egid || gid == current->sgid || gid == current->fsgid || @@ -781,7 +879,11 @@ { if (gid != old_fsgid) { +#ifdef CONFIG_MOSIX + set_me_dumpable(0); +#else current->mm->dumpable = 0; +#endif /* CONFIG_MOSIX */ wmb(); } current->fsgid = gid; @@ -797,6 +899,10 @@ * atomically safe type this is just fine. Conceptually its * as if the syscall took an instant longer to occur. */ +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DDEPUTY) + mosix_deputy_rusage(0); +#endif /* CONFIG_MOSIX */ if (tbuf) if (copy_to_user(tbuf, ¤t->times, sizeof(struct tms))) return -EFAULT; @@ -852,7 +958,11 @@ goto out; if (pgid != pid) { struct task_struct * tmp; +#ifdef CONFIG_MOSIX + for_each_local_task (tmp) { +#else for_each_task (tmp) { +#endif /* CONFIG_MOSIX */ if (tmp->pgrp == pgid && tmp->session == current->session) goto ok_pgid; @@ -866,6 +976,13 @@ out: /* All paths lead to here, thus we are safe. -DaveM */ read_unlock(&tasklist_lock); +#ifdef CONFIG_MOSIX + if(!err) + { + tell_process(p, DREQ_INFOCNG); + wake_up_mosix(p); + } +#endif /* CONFIG_MOSIX */ return err; } @@ -919,7 +1036,11 @@ int err = -EPERM; read_lock(&tasklist_lock); +#ifdef CONFIG_MOSIX + for_each_local_task(p) { +#else for_each_task(p) { +#endif /* CONFIG_MOSIX */ if (p->pgrp == current->pid) goto out; } @@ -931,6 +1052,10 @@ err = current->pgrp; out: read_unlock(&tasklist_lock); +#ifdef CONFIG_MOSIX + if(err == current->pid) + tell_process(current, DREQ_INFOCNG); +#endif /* CONFIG_MOSIX */ return err; } @@ -969,6 +1094,9 @@ return -EPERM; if ((unsigned) gidsetsize > NGROUPS) return -EINVAL; +#ifdef CONFIG_MOSIX_DFSA + current->mosix.dupdates |= DFSA_UPDCRED; +#endif /* CONFIG_MOSIX_DFSA */ if(copy_from_user(current->groups, grouplist, gidsetsize * sizeof(gid_t))) return -EFAULT; current->ngroups = gidsetsize; @@ -1130,6 +1258,22 @@ return -EPERM; } *old_rlim = new_rlim; +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DDEPUTY) + switch(resource) + { + case RLIMIT_CPU: + case RLIMIT_DATA: + case RLIMIT_STACK: + case RLIMIT_RSS: + case RLIMIT_AS: +#ifdef CONFIG_MOSIX_DFSA + case RLIMIT_NOFILE: + case RLIMIT_FSIZE: +#endif /* CONFIG_MOSIX_DFSA */ + mosix_deputy_rlimit(resource, new_rlim); + } +#endif /* CONFIG_MOSIX */ return 0; } @@ -1191,12 +1335,19 @@ { if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN) return -EINVAL; +#ifdef CONFIG_MOSIX + if(who == RUSAGE_SELF && (current->mosix.dflags & DDEPUTY)) + mosix_deputy_rusage(0); +#endif /* CONFIG_MOSIX */ return getrusage(current, who, ru); } asmlinkage long sys_umask(int mask) { mask = xchg(¤t->fs->umask, mask & S_IRWXUGO); +#ifdef CONFIG_MOSIX_DFSA + current->mosix.dupdates |= DFSA_UPDUMASK; +#endif /* CONFIG_MOSIX_DFSA */ return mask; } @@ -1219,7 +1370,11 @@ error = put_user(current->pdeath_signal, (int *)arg2); break; case PR_GET_DUMPABLE: +#ifdef CONFIG_MOSIX + if (i_am_dumpable()) +#else if (current->mm->dumpable) +#endif /* CONFIG_MOSIX */ error = 1; break; case PR_SET_DUMPABLE: @@ -1227,7 +1382,11 @@ error = -EINVAL; break; } +#ifdef CONFIG_MOSIX + set_me_dumpable(arg2 != 0); +#else current->mm->dumpable = arg2; +#endif /* CONFIG_MOSIX */ break; case PR_SET_UNALIGN: #ifdef SET_UNALIGN_CTL diff -urN linux-2.4.17/kernel/sysctl.c linux_umopenmosix/kernel/sysctl.c --- linux-2.4.17/kernel/sysctl.c Fri Dec 21 19:42:04 2001 +++ linux_umopenmosix/kernel/sysctl.c Wed Jun 26 23:45:18 2002 @@ -399,11 +399,23 @@ ctl_table *table, void **context) { int n; +#ifdef CONFIG_MOSIX + /* single "get_user" calls are expensive: */ + int udata[CTL_MAXNAME]; + int *nextu = udata; + + if(copy_from_user(udata, name, nlen * sizeof(int))) + return(-EFAULT); +#endif /* CONFIG_MOSIX */ repeat: if (!nlen) return -ENOTDIR; +#ifdef CONFIG_MOSIX + n = *nextu++; +#else if (get_user(n, name)) return -EFAULT; +#endif /* CONFIG_MOSIX */ for ( ; table->ctl_name; table++) { if (n == table->ctl_name || table->ctl_name == CTL_ANY) { int error; @@ -833,10 +845,27 @@ for (; left && vleft--; i++, first=0) { if (write) { +#ifdef CONFIG_MOSIX + /* "get_user" is expensive - need better optimization */ + int nex = TMPBUFLEN; +#endif /* CONFIG_MOSIX */ while (left) { char c; +#ifdef CONFIG_MOSIX + if(nex == TMPBUFLEN) + { + len = left; + if(len > TMPBUFLEN) + len = TMPBUFLEN; + if(copy_from_user(buf, buffer, len)) + return(-EFAULT); + nex = 0; + } + c = buf[nex++]; +#else if(get_user(c,(char *) buffer)) return -EFAULT; +#endif /* CONFIG_MOSIX */ if (!isspace(c)) break; left--; @@ -898,11 +927,29 @@ left--, buffer++; } if (write) { +#ifdef CONFIG_MOSIX + /* "get_user" is expensive - need higher optimization */ + int nex = TMPBUFLEN; +#endif /* CONFIG_MOSIX */ p = (char *) buffer; while (left) { char c; +#ifdef CONFIG_MOSIX + if(nex == TMPBUFLEN) + { + len = left; + if(len > TMPBUFLEN) + len = TMPBUFLEN; + if(copy_from_user(buf, p, len)) + return(-EFAULT); + nex = 0; + } + c = buf[nex++]; + p++; +#else if(get_user(c, p++)) return -EFAULT; +#endif /* CONFIG_MOSIX */ if (!isspace(c)) break; left--; @@ -986,10 +1033,27 @@ for (; left && vleft--; i++, first=0) { if (write) { +#ifdef CONFIG_MOSIX + /* "get_user" is expensive - need higher optimization */ + int nex = TMPBUFLEN; +#endif /* CONFIG_MOSIX */ while (left) { char c; +#ifdef CONFIG_MOSIX + if(nex == TMPBUFLEN) + { + len = left; + if(len > TMPBUFLEN) + len = TMPBUFLEN; + if(copy_from_user(buf, buffer, len)) + return(-EFAULT); + nex = 0; + } + c = buf[nex++]; +#else if(get_user(c, (char *) buffer)) return -EFAULT; +#endif /* CONFIG_MOSIX */ if (!isspace(c)) break; left--; @@ -1046,11 +1110,29 @@ left--, buffer++; } if (write) { +#ifdef CONFIG_MOSIX + /* "get_user" is expensive - need higher optimization */ + int nex = TMPBUFLEN; +#endif /* CONFIG_MOSIX */ p = (char *) buffer; while (left) { char c; +#ifdef CONFIG_MOSIX + if(nex == TMPBUFLEN) + { + len = left; + if(len > TMPBUFLEN) + len = TMPBUFLEN; + if(copy_from_user(buf, p, len)) + return(-EFAULT); + nex = 0; + } + c = buf[nex++]; + p++; +#else if(get_user(c, p++)) return -EFAULT; +#endif /* CONFIG_MOSIX */ if (!isspace(c)) break; left--; @@ -1089,10 +1171,27 @@ for (; left && vleft--; i++, first=0) { if (write) { +#ifdef CONFIG_MOSIX + /* "get_user" is expensive - need better optimization */ + int nex = TMPBUFLEN; +#endif /* CONFIG_MOSIX */ while (left) { char c; +#ifdef CONFIG_MOSIX + if(nex == TMPBUFLEN) + { + len = left; + if(len > TMPBUFLEN) + len = TMPBUFLEN; + if(copy_from_user(buf, buffer, len)) + return(-EFAULT); + nex = 0; + } + c = buf[nex++]; +#else if(get_user(c, (char *) buffer)) return -EFAULT; +#endif /* CONFIG_MOSIX */ if (!isspace(c)) break; left--; @@ -1151,11 +1250,29 @@ left--, buffer++; } if (write) { +#ifdef CONFIG_MOSIX + /* "get_user" is expensive - need higher optimization */ + int nex = TMPBUFLEN; +#endif /* CONFIG_MOSIX */ p = (char *) buffer; while (left) { char c; +#ifdef CONFIG_MOSIX + if(nex == TMPBUFLEN) + { + len = left; + if(len > TMPBUFLEN) + len = TMPBUFLEN; + if(copy_from_user(buf, p, len)) + return(-EFAULT); + nex = 0; + } + c = buf[nex++]; + p++; +#else if(get_user(c, p++)) return -EFAULT; +#endif /* CONFIG_MOSIX */ if (!isspace(c)) break; left--; diff -urN linux-2.4.17/kernel/timer.c linux_umopenmosix/kernel/timer.c --- linux-2.4.17/kernel/timer.c Mon Oct 8 19:41:41 2001 +++ linux_umopenmosix/kernel/timer.c Wed Jun 26 23:45:18 2002 @@ -25,6 +25,10 @@ #include +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ + /* * Timekeeping variables */ @@ -522,12 +526,26 @@ unsigned long user, unsigned long system) { unsigned long psecs; +#ifdef CONFIG_MOSIX + int i; +#endif /* CONFIG_MOSIX */ psecs = (p->times.tms_utime += user); psecs += (p->times.tms_stime += system); +#ifdef CONFIG_MOSIX + if(p->mosix.dflags & DREMOTE) + psecs += p->mosix.passedtime; + if(!(p->mosix.dflags & DDEPUTY)) +#endif /* CONFIG_MOSIX */ if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_cur) { /* Send SIGXCPU every second.. */ +#ifdef CONFIG_MOSIX + i = p->mosix.last_sigxcpu ? : p->rlim[RLIMIT_CPU].rlim_cur; + p->mosix.last_sigxcpu = psecs / HZ; + for(; i < p->mosix.last_sigxcpu ; i++) +#else if (!(psecs % HZ)) +#endif /* CONFIG_MOSIX */ send_sig(SIGXCPU, p, 1); /* and SIGKILL when we go over max.. */ if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_max) @@ -535,7 +553,11 @@ } } +#ifdef CONFIG_MOSIX +inline void do_it_virt(struct task_struct * p, unsigned long ticks) +#else static inline void do_it_virt(struct task_struct * p, unsigned long ticks) +#endif /* CONFIG_MOSIX */ { unsigned long it_virt = p->it_virt_value; @@ -553,6 +575,11 @@ { unsigned long it_prof = p->it_prof_value; +#ifdef CONFIG_MOSIX + if(p->mosix.dflags & DDEPUTY) + p->mosix.deputytime++; + else +#endif /* CONFIG_MOSIX */ if (it_prof) { if (--it_prof == 0) { it_prof = p->it_prof_incr; @@ -627,6 +654,9 @@ unsigned long active_tasks; /* fixed-point */ static int count = LOAD_FREQ; +#ifdef CONFIG_MOSIX + mosix_snap_load(ticks); +#endif /* CONFIG_MOSIX */ count -= ticks; if (count < 0) { count += LOAD_FREQ; diff -urN linux-2.4.17/kernel/user.c linux_umopenmosix/kernel/user.c --- linux-2.4.17/kernel/user.c Wed Nov 29 08:43:39 2000 +++ linux_umopenmosix/kernel/user.c Wed Jun 26 23:45:18 2002 @@ -87,6 +87,10 @@ struct user_struct **hashent = uidhashentry(uid); struct user_struct *up; +#ifdef CONFIG_MOSIX_DIAG + if(current->mosix.dflags & DREMOTE) + panic("REMOTE allocating uid"); +#endif /* CONFIG_MOSIX_DIAG */ spin_lock(&uidhash_lock); up = uid_hash_find(uid, hashent); spin_unlock(&uidhash_lock); @@ -134,4 +138,46 @@ return 0; } +#ifdef CONFIG_MOSIX + +extern int max_threads; + +struct user_struct remote_guests = +{ + __count: ATOMIC_INIT(1), + processes: ATOMIC_INIT(1), + files: ATOMIC_INIT(0) +}; + +void +init_guest_user_struct(void) +{ + struct task_struct *p = current; + + free_uid(p->user); + p->user = &remote_guests; + p->rlim[RLIMIT_NPROC].rlim_cur = max_threads / 6; +} + +int +get_free_guest_slots(void) +{ + int a, b; + + write_lock_irq(&tasklist_lock); + a = max_threads/6 - atomic_read(&remote_guests.__count); + b = max_threads - nr_threads; + write_unlock_irq(&tasklist_lock); + if(a < 0) /* a little paranoidic */ + a = 0; + return(a < b ? a : b); +} + +int +count_guests(void) +{ + return(atomic_read(&remote_guests.__count) - 1); +} +#endif /* CONFIG_MOSIX */ + module_init(uid_cache_init); diff -urN linux-2.4.17/lib/rwsem-spinlock.c linux_umopenmosix/lib/rwsem-spinlock.c --- linux-2.4.17/lib/rwsem-spinlock.c Wed Apr 25 23:31:03 2001 +++ linux_umopenmosix/lib/rwsem-spinlock.c Wed Jun 26 23:45:18 2002 @@ -127,6 +127,9 @@ /* set up my own style of waitqueue */ waiter.task = tsk; +#ifdef CONFIG_MOSIX + adjust_task_mosix_context(&waiter.task); +#endif /* CONFIG_MOSIX */ waiter.flags = RWSEM_WAITING_FOR_READ; list_add_tail(&waiter.list,&sem->wait_list); @@ -173,6 +176,9 @@ /* set up my own style of waitqueue */ waiter.task = tsk; +#ifdef CONFIG_MOSIX + adjust_task_mosix_context(&waiter.task); +#endif /* CONFIG_MOSIX */ waiter.flags = RWSEM_WAITING_FOR_WRITE; list_add_tail(&waiter.list,&sem->wait_list); diff -urN linux-2.4.17/lib/rwsem.c linux_umopenmosix/lib/rwsem.c --- linux-2.4.17/lib/rwsem.c Wed Jul 11 06:08:51 2001 +++ linux_umopenmosix/lib/rwsem.c Wed Jun 26 23:45:18 2002 @@ -122,6 +122,9 @@ /* set up my own style of waitqueue */ spin_lock(&sem->wait_lock); waiter->task = tsk; +#ifdef CONFIG_MOSIX + adjust_task_mosix_context(&waiter->task); +#endif /* CONFIG_MOSIX */ list_add_tail(&waiter->list,&sem->wait_list); diff -urN linux-2.4.17/mm/filemap.c linux_umopenmosix/mm/filemap.c --- linux-2.4.17/mm/filemap.c Fri Dec 21 19:42:04 2001 +++ linux_umopenmosix/mm/filemap.c Wed Jun 26 23:45:18 2002 @@ -31,6 +31,13 @@ #include +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ +#ifdef CONFIG_MOSIX_DFSA +#include +#endif /* CONFIG_MOSIX_DFSA */ + /* * Shared mappings implemented 30.11.1994. It's not fully working yet, * though. @@ -55,7 +62,7 @@ spinlock_t pagecache_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; /* - * NOTE: to avoid deadlocking you must never acquire the pagemap_lru_lock + * NOTE: to avoid deadlocking you must never acquire the pagemap_lru_lock * with the pagecache_lock held. * * Ordering: @@ -213,6 +220,9 @@ spin_unlock(&pagecache_lock); spin_unlock(&pagemap_lru_lock); +#ifdef CONFIG_MOSIX + VMODIFIED(inode); +#endif /* CONFIG_MOSIX */ } static int do_flushpage(struct page *page, unsigned long offset) @@ -242,7 +252,7 @@ * destroyed all buffer-cache references to it. Otherwise some * other process might think this inode page is not in the * page cache and creates a buffer-cache alias to it causing - * all sorts of fun problems ... + * all sorts of fun problems ... */ ClearPageDirty(page); ClearPageUptodate(page); @@ -287,7 +297,7 @@ if (*partial && (offset + 1) == start) { truncate_partial_page(page, *partial); *partial = 0; - } else + } else truncate_complete_page(page); UnlockPage(page); @@ -319,7 +329,7 @@ * that are beyond that offset (and zeroing out partial pages). * If any page is locked we wait for it to become unlocked. */ -void truncate_inode_pages(struct address_space * mapping, loff_t lstart) +void truncate_inode_pages(struct address_space * mapping, loff_t lstart) { unsigned long start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); @@ -470,7 +480,7 @@ continue; bh->b_flushtime = jiffies; - ll_rw_block(WRITE, 1, &bh); + ll_rw_block(WRITE, 1, &bh); } while ((bh = bh->b_this_page) != head); return 0; } @@ -578,7 +588,7 @@ /** * filemap_fdatasync - walk the list of dirty pages of the given address space * and writepage() all of them. - * + * * @mapping: address space structure to write * */ @@ -617,7 +627,7 @@ /** * filemap_fdatawait - walk the list of locked pages of the given address space * and wait for all of them. - * + * * @mapping: address space structure to wait for * */ @@ -648,7 +658,7 @@ /* * Add a page to the inode page cache. * - * The caller must have locked the page and + * The caller must have locked the page and * set all the page flags correctly.. */ void add_to_page_cache_locked(struct page * page, struct address_space *mapping, unsigned long index) @@ -723,7 +733,7 @@ { struct address_space *mapping = file->f_dentry->d_inode->i_mapping; struct page **hash = page_hash(mapping, offset); - struct page *page; + struct page *page; spin_lock(&pagecache_lock); page = __find_page_nolock(mapping, offset, *hash); @@ -741,7 +751,7 @@ return error; } /* - * We arrive here in the unlikely event that someone + * We arrive here in the unlikely event that someone * raced with us and added our page to the cache first. */ page_cache_release(page); @@ -770,7 +780,7 @@ return 0; } -/* +/* * Wait for a page to get unlocked. * * This must be called with the caller "holding" the page, @@ -800,7 +810,7 @@ smp_mb__before_clear_bit(); if (!test_and_clear_bit(PG_locked, &(page)->flags)) BUG(); - smp_mb__after_clear_bit(); + smp_mb__after_clear_bit(); if (waitqueue_active(&(page)->wait)) wake_up(&(page)->wait); } @@ -827,7 +837,7 @@ tsk->state = TASK_RUNNING; remove_wait_queue(&page->wait, &wait); } - + /* * Get an exclusive lock on the page, optimistically @@ -953,11 +963,11 @@ spin_unlock(&pagecache_lock); if (newpage == NULL) lru_cache_add(page); - else + else page_cache_release(newpage); } } - return page; + return page; } /* @@ -1022,11 +1032,11 @@ /* * Read-ahead profiling information * -------------------------------- - * Every PROFILE_MAXREADCOUNT, the following information is written + * Every PROFILE_MAXREADCOUNT, the following information is written * to the syslog: * Percentage of asynchronous read-ahead. * Average of read-ahead fields context value. - * If DEBUG_READAHEAD is defined, a snapshot of these fields is written + * If DEBUG_READAHEAD is defined, a snapshot of these fields is written * to the syslog. */ @@ -1102,7 +1112,7 @@ * * Synchronous read-ahead benefits: * -------------------------------- - * Using reasonable IO xfer length from peripheral devices increase system + * Using reasonable IO xfer length from peripheral devices increase system * performances. * Reasonable means, in this context, not too large but not too small. * The actual maximum value is: @@ -1111,22 +1121,22 @@ * * Asynchronous read-ahead benefits: * --------------------------------- - * Overlapping next read request and user process execution increase system + * Overlapping next read request and user process execution increase system * performance. * * Read-ahead risks: * ----------------- * We have to guess which further data are needed by the user process. - * If these data are often not really needed, it's bad for system + * If these data are often not really needed, it's bad for system * performances. - * However, we know that files are often accessed sequentially by - * application programs and it seems that it is possible to have some good + * However, we know that files are often accessed sequentially by + * application programs and it seems that it is possible to have some good * strategy in that guessing. * We only try to read-ahead files that seems to be read sequentially. * * Asynchronous read-ahead risks: * ------------------------------ - * In order to maximize overlapping, we must start some asynchronous read + * In order to maximize overlapping, we must start some asynchronous read * request from the device, as soon as possible. * We must be very careful about: * - The number of effective pending IO read requests. @@ -1164,7 +1174,7 @@ * If the current position is inside the previous read IO request, do not * try to reread previously read ahead pages. * Otherwise decide or not to read ahead some pages synchronously. - * If we are not going to read ahead, set the read ahead context for this + * If we are not going to read ahead, set the read ahead context for this * page only. */ if (PageLocked(page)) { @@ -1193,7 +1203,7 @@ /* * Add ONE page to max_ahead in order to try to have about the same IO max size * as synchronous read-ahead (MAX_READAHEAD + 1)*PAGE_CACHE_SIZE. - * Compute the position of the last page we have tried to read in order to + * Compute the position of the last page we have tried to read in order to * begin to read ahead just at the next page. */ raend -= 1; @@ -1291,7 +1301,7 @@ offset = *ppos & ~PAGE_CACHE_MASK; /* - * If the current position is outside the previous read-ahead window, + * If the current position is outside the previous read-ahead window, * we reset the current read-ahead context and set read ahead max to zero * (will be set to just needed value later), * otherwise, we assume that the file accesses are sequential enough to @@ -1334,7 +1344,7 @@ unsigned long end_index, nr, ret; end_index = inode->i_size >> PAGE_CACHE_SHIFT; - + if (index > end_index) break; nr = PAGE_CACHE_SIZE; @@ -1474,7 +1484,7 @@ page = cached_page; __add_to_page_cache(page, mapping, index, hash); spin_unlock(&pagecache_lock); - lru_cache_add(page); + lru_cache_add(page); cached_page = NULL; goto readpage; @@ -1543,7 +1553,7 @@ if (rw == READ && retval > 0) mark_dirty_kiobuf(iobuf, retval); - + if (retval >= 0) { count -= retval; buf += retval; @@ -1564,7 +1574,7 @@ clear_bit(0, &filp->f_iobuf_lock); else free_kiovec(1, &iobuf); - out: + out: return retval; } @@ -1579,7 +1589,7 @@ kaddr = kmap(page); left = __copy_to_user(desc->buf, kaddr + offset, size); kunmap(page); - + if (left) { size -= left; desc->error = -EFAULT; @@ -1687,6 +1697,10 @@ struct file * in_file, * out_file; struct inode * in_inode, * out_inode; +#ifdef CONFIG_MOSIX_DFSA + dfsa_touch_file(in_fd); + dfsa_touch_file(out_fd); +#endif /* CONFIG_MOSIX_DFSA */ /* * Get input file, and verify that it is ok.. */ @@ -1925,7 +1939,7 @@ no_cached_page: /* - * If the requested offset is within our file, try to read a whole + * If the requested offset is within our file, try to read a whole * cluster of pages at once. * * Otherwise, we're off the end of a privately mapped file, @@ -2029,7 +2043,7 @@ } static inline int filemap_sync_pte_range(pmd_t * pmd, - unsigned long address, unsigned long size, + unsigned long address, unsigned long size, struct vm_area_struct *vma, unsigned long offset, unsigned int flags) { pte_t * pte; @@ -2059,7 +2073,7 @@ } static inline int filemap_sync_pmd_range(pgd_t * pgd, - unsigned long address, unsigned long size, + unsigned long address, unsigned long size, struct vm_area_struct *vma, unsigned int flags) { pmd_t * pmd; @@ -2170,6 +2184,10 @@ struct vm_area_struct * vma; int unmapped_error, error = -EINVAL; +#ifdef CONFIG_MOSIX_DIAG + if(current->mosix.dflags & DDEPUTY) + panic("sys_msync: DEPUTY"); +#endif /* CONFIG_MOSIX_DIAG */ down_read(¤t->mm->mmap_sem); if (start & ~PAGE_MASK) goto out; @@ -2471,7 +2489,7 @@ error = -EINVAL; break; } - + return error; } @@ -2832,7 +2850,7 @@ } /* - * Write to a file through the page cache. + * Write to a file through the page cache. * * We currently put everything into the page cache prior to writing it. * This is not a problem when writing full pages. With partial pages, @@ -2890,7 +2908,7 @@ * Check whether we've reached the file size limit. */ err = -EFBIG; - + if (limit != RLIM_INFINITY) { if (pos >= limit) { send_sig(SIGXFSZ, current, 0); @@ -2903,7 +2921,7 @@ } /* - * LFS rule + * LFS rule */ if ( pos + count > MAX_NON_LFS && !(file->f_flags&O_LARGEFILE)) { if (pos >= MAX_NON_LFS) { @@ -2925,7 +2943,7 @@ * * Linus frestrict idea will clean these up nicely.. */ - + if (!S_ISBLK(inode->i_mode)) { if (pos >= inode->i_sb->s_maxbytes) { @@ -3009,6 +3027,9 @@ page_fault = __copy_from_user(kaddr+offset, buf, bytes); flush_dcache_page(page); status = mapping->a_ops->commit_write(file, page, offset, offset+bytes); +#ifdef CONFIG_MOSIX + VMODIFIED(inode); +#endif /* CONFIG_MOSIX */ if (page_fault) goto fail_write; if (!status) @@ -3041,8 +3062,8 @@ if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) status = generic_osync_inode(inode, OSYNC_METADATA|OSYNC_DATA); } - -out_status: + +out_status: err = written ? written : status; out: diff -urN linux-2.4.17/mm/memory.c linux_umopenmosix/mm/memory.c --- linux-2.4.17/mm/memory.c Fri Dec 21 19:42:05 2001 +++ linux_umopenmosix/mm/memory.c Wed Jun 26 23:45:18 2002 @@ -49,6 +49,10 @@ #include #include +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ + unsigned long max_mapnr; unsigned long num_physpages; void * high_memory; @@ -71,7 +75,7 @@ mem_map_t * mem_map; /* - * Called by TLB shootdown + * Called by TLB shootdown */ void __free_pte(pte_t pte) { @@ -79,7 +83,7 @@ if ((!VALID_PAGE(page)) || PageReserved(page)) return; if (pte_dirty(pte)) - set_page_dirty(page); + set_page_dirty(page); free_page_and_swap_cache(page); } @@ -186,9 +190,9 @@ pmd_t * src_pmd, * dst_pmd; src_pgd++; dst_pgd++; - + /* copy_pmd_range */ - + if (pgd_none(*src_pgd)) goto skip_copy_pmd_range; if (pgd_bad(*src_pgd)) { @@ -207,9 +211,9 @@ do { pte_t * src_pte, * dst_pte; - + /* copy_pte_range */ - + if (pmd_none(*src_pmd)) goto skip_copy_pte_range; if (pmd_bad(*src_pmd)) { @@ -226,11 +230,11 @@ if (!dst_pte) goto nomem; - spin_lock(&src->page_table_lock); + spin_lock(&src->page_table_lock); do { pte_t pte = *src_pte; struct page *ptepage; - + /* copy_one_pte */ if (pte_none(pte)) @@ -240,7 +244,7 @@ goto cont_copy_pte_range; } ptepage = pte_page(pte); - if ((!VALID_PAGE(ptepage)) || + if ((!VALID_PAGE(ptepage)) || PageReserved(ptepage)) goto cont_copy_pte_range; @@ -265,7 +269,7 @@ dst_pte++; } while ((unsigned long)src_pte & PTE_TABLE_MASK); spin_unlock(&src->page_table_lock); - + cont_copy_pmd_range: src_pmd++; dst_pmd++; } while ((unsigned long)src_pmd & PMD_TABLE_MASK); @@ -346,7 +350,7 @@ freed = 0; do { freed += zap_pte_range(tlb, pmd, address, end - address); - address = (address + PMD_SIZE) & PMD_MASK; + address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address < end); return freed; @@ -398,9 +402,9 @@ } /* - * Do a quick page-table lookup for a single page. + * Do a quick page-table lookup for a single page. */ -static struct page * follow_page(struct mm_struct *mm, unsigned long address, int write) +static struct page * follow_page(struct mm_struct *mm, unsigned long address, int write) { pgd_t *pgd; pmd_t *pmd; @@ -429,7 +433,7 @@ return 0; } -/* +/* * Given a physical address, is there a useful struct page pointing to * it? This may become more complex in the future if we start dealing * with IO-aperture pages in kiobufs. @@ -502,7 +506,7 @@ /* * Force in an entire range of pages from the current process's user VA, - * and pin them in physical memory. + * and pin them in physical memory. */ #define dprintk(x...) @@ -510,14 +514,18 @@ { int pgcount, err; struct mm_struct * mm; - + /* Make sure the iobuf is not already mapped somewhere. */ if (iobuf->nr_pages) return -EINVAL; +#ifdef CONFIG_MOSIX + if(stay_me_and_my_clones(DSTAY_FOR_KIOBUF)) + return(-ENOMEM); +#endif /* CONFIG_MOSIX */ mm = current->mm; dprintk ("map_user_kiobuf: begin\n"); - + pgcount = (va + len + PAGE_SIZE - 1)/PAGE_SIZE - va/PAGE_SIZE; /* mapping 0 bytes is not permitted */ if (!pgcount) BUG(); @@ -528,9 +536,27 @@ iobuf->locked = 0; iobuf->offset = va & (PAGE_SIZE-1); iobuf->length = len; - + /* Try to fault in all of the necessary pages */ down_read(&mm->mmap_sem); +#ifdef CONFIG_MOSIX + write_lock_irq(&tasklist_lock); + if(iobuf->mm) + { + printk("map_user_kiobuf: double use\n"); +#ifdef CONFIG_MOSIX_UDB + mosix_debugger("kiobuf"); +#endif /* CONFIG_MOSIX_UDB */ + write_unlock_irq(&tasklist_lock); + up_read(&mm->mmap_sem); + unstay_mm(mm); + return(-EINVAL); + } + atomic_inc(&mm->mm_count); + atomic_inc(&mm->mm_kiocount); + iobuf->mm = mm; + write_unlock_irq(&tasklist_lock); +#endif /* CONFIG_MOSIX */ /* rw==READ means read from disk, write into memory area */ err = get_user_pages(current, mm, va, pgcount, (rw==READ), 0, iobuf->maplist, NULL); @@ -538,6 +564,9 @@ if (err < 0) { unmap_kiobuf(iobuf); dprintk ("map_user_kiobuf: end %d\n", err); +#ifdef CONFIG_MOSIX + unstay_mm(mm); +#endif /* CONFIG_MOSIX */ return err; } iobuf->nr_pages = err; @@ -552,7 +581,7 @@ } /* - * Mark all of the pages in a kiobuf as dirty + * Mark all of the pages in a kiobuf as dirty * * We need to be able to deal with short reads from disk: if an IO error * occurs, the number of bytes read into memory may be less than the @@ -564,16 +593,16 @@ { int index, offset, remaining; struct page *page; - + index = iobuf->offset >> PAGE_SHIFT; offset = iobuf->offset & ~PAGE_MASK; remaining = bytes; if (remaining > iobuf->length) remaining = iobuf->length; - + while (remaining > 0 && index < iobuf->nr_pages) { page = iobuf->maplist[index]; - + if (!PageReserved(page)) SetPageDirty(page); @@ -585,14 +614,17 @@ /* * Unmap all of the pages referenced by a kiobuf. We release the pages, - * and unlock them if they were locked. + * and unlock them if they were locked. */ -void unmap_kiobuf (struct kiobuf *iobuf) +void unmap_kiobuf (struct kiobuf *iobuf) { int i; struct page *map; - +#ifdef CONFIG_MOSIX + struct mm_struct *mm; +#endif /* CONFIG_MOSIX */ + for (i = 0; i < iobuf->nr_pages; i++) { map = iobuf->maplist[i]; if (map) { @@ -604,9 +636,26 @@ page_cache_release(map); } } - + iobuf->nr_pages = 0; iobuf->locked = 0; +#ifdef CONFIG_MOSIX + write_lock_irq(&tasklist_lock); + mm = iobuf->mm; + iobuf->mm = NULL; + write_unlock_irq(&tasklist_lock); + if(!mm) + { + printk("unmap_kiobuf: no mm!\n"); +#ifdef CONFIG_MOSIX_UDB + mosix_debugger("unmap_kiobuf"); +#endif /* CONFIG_MOSIX_UDB */ + return; + } + if(atomic_dec_and_test(&mm->mm_kiocount)) + unstay_mm(mm); + mmdrop(mm); +#endif /* CONFIG_MOSIX */ } @@ -627,9 +676,9 @@ struct page *page, **ppage; int doublepage = 0; int repeat = 0; - + repeat: - + for (i = 0; i < nr; i++) { iobuf = iovec[i]; @@ -641,7 +690,7 @@ page = *ppage; if (!page) continue; - + if (TryLockPage(page)) { while (j--) { struct page *tmp = *--ppage; @@ -655,36 +704,36 @@ } return 0; - + retry: - - /* + + /* * We couldn't lock one of the pages. Undo the locking so far, - * wait on the page we got to, and try again. + * wait on the page we got to, and try again. */ - + unlock_kiovec(nr, iovec); if (!wait) return -EAGAIN; - - /* + + /* * Did the release also unlock the page we got stuck on? */ if (!PageLocked(page)) { - /* + /* * If so, we may well have the page mapped twice * in the IO address range. Bad news. Of * course, it _might_ just be a coincidence, * but if it happens more than once, chances - * are we have a double-mapped page. + * are we have a double-mapped page. */ - if (++doublepage >= 3) + if (++doublepage >= 3) return -EINVAL; - + /* Try again... */ wait_on_page(page); } - + if (++repeat < 16) goto repeat; return -EAGAIN; @@ -699,14 +748,14 @@ struct kiobuf *iobuf; int i, j; struct page *page, **ppage; - + for (i = 0; i < nr; i++) { iobuf = iovec[i]; if (!iobuf->locked) continue; iobuf->locked = 0; - + ppage = iobuf->maplist; for (j = 0; j < iobuf->nr_pages; ppage++, j++) { page = *ppage; @@ -887,7 +936,7 @@ /* * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock */ -static inline void break_cow(struct vm_area_struct * vma, struct page * new_page, unsigned long address, +static inline void break_cow(struct vm_area_struct * vma, struct page * new_page, unsigned long address, pte_t *page_table) { flush_page_to_ram(new_page); @@ -962,6 +1011,11 @@ spin_unlock(&mm->page_table_lock); page_cache_release(new_page); page_cache_release(old_page); +#ifdef CONFIG_MOSIX + /* patch conflict mini-fix - */ + old_page->young = 0; + old_page->last_young = jiffies; +#endif /* CONFIG_MOSIX */ return 1; /* Minor fault */ bad_wp_page: @@ -1027,6 +1081,9 @@ vmtruncate_list(mapping->i_mmap, pgoff); if (mapping->i_mmap_shared != NULL) vmtruncate_list(mapping->i_mmap_shared, pgoff); +#ifdef CONFIG_MOSIX + VMODIFIED(inode); +#endif /* CONFIG_MOSIX */ out_unlock: spin_unlock(&mapping->i_shared_lock); @@ -1054,11 +1111,11 @@ return -EFBIG; } -/* +/* * Primitive swap readahead code. We simply read an aligned block of * (1 << page_cluster) entries in the swap area. This method is chosen * because it doesn't cost us any seek time. We also make sure to queue - * the 'original' request together with the readahead ones... + * the 'original' request together with the readahead ones... */ void swapin_readahead(swp_entry_t entry) { @@ -1092,8 +1149,18 @@ swp_entry_t entry = pte_to_swp_entry(orig_pte); pte_t pte; int ret = 1; +#ifdef CONFIG_MOSIX + /* patch conflict resolution + no swap readahead conflict here - Qlusters */ + int dpagein = (current->mosix.dflags & DPAGEIN) ^ DPAGEIN; +#endif /* CONFIG_MOSIX */ spin_unlock(&mm->page_table_lock); +#ifdef CONFIG_MOSIX /* comment as before - Qlusters */ + spin_lock_irq(&runqueue_lock); + current->mosix.dflags |= dpagein; + spin_unlock_irq(&runqueue_lock); +#endif /* CONFIG_MOSIX */ page = lookup_swap_cache(entry); if (!page) { swapin_readahead(entry); @@ -1104,6 +1171,12 @@ * we released the page table lock. */ int retval; +#ifdef CONFIG_MOSIX /* comment as before - Qlusters */ + spin_lock_irq(&runqueue_lock); + current->mosix.dflags &= ~dpagein; + spin_unlock_irq(&runqueue_lock); +#endif /* CONFIG_MOSIX */ + spin_lock(&mm->page_table_lock); retval = pte_same(*page_table, orig_pte) ? -1 : 1; spin_unlock(&mm->page_table_lock); @@ -1117,6 +1190,11 @@ mark_page_accessed(page); lock_page(page); +#ifdef CONFIG_MOSIX + spin_lock_irq(&runqueue_lock); + current->mosix.dflags &= ~dpagein; + spin_unlock_irq(&runqueue_lock); +#endif /* CONFIG_MOSIX */ /* * Back out if somebody else faulted in this pte while we @@ -1131,7 +1209,7 @@ } /* The page isn't present yet, go ahead with the fault. */ - + swap_free(entry); if (vm_swap_full()) remove_exclusive_swap_page(page); @@ -1155,7 +1233,7 @@ /* * We are called with the MM semaphore and page_table_lock * spinlock held to protect against concurrent faults in - * multithreaded programs. + * multithreaded programs. */ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr) { @@ -1323,6 +1401,14 @@ entry = pte_mkdirty(entry); } entry = pte_mkyoung(entry); +#ifdef CONFIG_MOSIX + { + struct page *page = pte_page(entry); + + page->young = 0; + page->last_young = jiffies; + } +#endif /* CONFIG_MOSIX */ establish_pte(vma, address, pte, entry); spin_unlock(&mm->page_table_lock); return 1; diff -urN linux-2.4.17/mm/mlock.c linux_umopenmosix/mm/mlock.c --- linux-2.4.17/mm/mlock.c Tue Sep 18 01:30:23 2001 +++ linux_umopenmosix/mm/mlock.c Wed Jun 26 23:45:18 2002 @@ -12,6 +12,10 @@ #include #include +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ + static inline int mlock_fixup_all(struct vm_area_struct * vma, int newflags) { spin_lock(&vma->vm_mm->page_table_lock); @@ -198,6 +202,10 @@ unsigned long lock_limit; int error = -ENOMEM; +#ifdef CONFIG_MOSIX + if(stay_me_and_my_clones(DSTAY_FOR_MLOCK)) + return(-ENOMEM); +#endif /* CONFIG_MOSIX */ down_write(¤t->mm->mmap_sem); len = PAGE_ALIGN(len + (start & ~PAGE_MASK)); start &= PAGE_MASK; @@ -220,6 +228,10 @@ error = do_mlock(start, len, 1); out: up_write(¤t->mm->mmap_sem); +#ifdef CONFIG_MOSIX + if(error) + unstay_mm(current->mm); +#endif /* CONFIG_MOSIX */ return error; } @@ -227,11 +239,18 @@ { int ret; +#ifdef CONFIG_MOSIX_DIAG + if(current->mosix.dflags & DDEPUTY) + panic("sys_munlock -- DEPUTY"); +#endif /* CONFIG_MOSIX_DIAG */ down_write(¤t->mm->mmap_sem); len = PAGE_ALIGN(len + (start & ~PAGE_MASK)); start &= PAGE_MASK; ret = do_mlock(start, len, 0); up_write(¤t->mm->mmap_sem); +#ifdef CONFIG_MOSIX + unstay_mm(current->mm); +#endif /* CONFIG_MOSIX */ return ret; } @@ -268,6 +287,10 @@ unsigned long lock_limit; int ret = -EINVAL; +#ifdef CONFIG_MOSIX + if(stay_me_and_my_clones(DSTAY_FOR_MLOCK)) + return(-ENOMEM); +#endif /* CONFIG_MOSIX */ down_write(¤t->mm->mmap_sem); if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE))) goto out; @@ -287,6 +310,10 @@ ret = do_mlockall(flags); out: up_write(¤t->mm->mmap_sem); +#ifdef CONFIG_MOSIX + if(ret < 0) + unstay_mm(current->mm); +#endif /* CONFIG_MOSIX */ return ret; } @@ -294,8 +321,15 @@ { int ret; +#ifdef CONFIG_MOSIX_DIAG + if(current->mosix.dflags & DDEPUTY) + panic("sys_munlock -- DEPUTY"); +#endif /* CONFIG_MOSIX_DIAG */ down_write(¤t->mm->mmap_sem); ret = do_mlockall(0); up_write(¤t->mm->mmap_sem); +#ifdef CONFIG_MOSIX + unstay_mm(current->mm); +#endif /* CONFIG_MOSIX */ return ret; } diff -urN linux-2.4.17/mm/mmap.c linux_umopenmosix/mm/mmap.c --- linux-2.4.17/mm/mmap.c Sun Nov 4 20:17:20 2001 +++ linux_umopenmosix/mm/mmap.c Wed Jun 26 23:45:18 2002 @@ -24,6 +24,16 @@ */ #undef DEBUG_MM_RB +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ +#ifdef CONFIG_MOSIX_FS +#include +#endif /* CONFIG_MOSIX_FS */ +#ifdef CONFIG_MOSIX_DFSA +#include +#endif /* CONFIG_MOSIX_DFSA */ + /* description of effects of mapping type and prot in current implementation. * this is due to the limited x86 page protection hardware. The expected * behavior is in parens: @@ -66,6 +76,10 @@ /* Sometimes we want to use more memory than we have. */ if (sysctl_overcommit_memory) return 1; +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DDEPUTY) /* never fail coming back home */ + return(1); +#endif /* CONFIG_MOSIX */ /* The page cache contains buffer pages these days.. */ free = atomic_read(&page_cache_size); @@ -99,12 +113,31 @@ if (file) { struct inode *inode = file->f_dentry->d_inode; +#ifdef CONFIG_MOSIX + int was_shared = (inode->i_mapping->i_mmap_shared != NULL); +#endif /* CONFIG_MOSIX */ if (vma->vm_flags & VM_DENYWRITE) atomic_inc(&inode->i_writecount); if(vma->vm_next_share) vma->vm_next_share->vm_pprev_share = vma->vm_pprev_share; *vma->vm_pprev_share = vma->vm_next_share; +#ifdef CONFIG_MOSIX + if(was_shared && !inode->i_mapping->i_mmap_shared) + mosix_no_longer_monkey(inode); + if(current->mosix.dflags & DREMOTE) + { + spin_lock_irq(¤t->sigmask_lock); + current->mosix.asig |= (1 << (REMOTE_FILE_RELEASED-1)); + spin_unlock_irq(¤t->sigmask_lock); + } + else + tell_process(current, DREQ_FILEUNMAP); +#endif /* CONFIG_MOSIX */ } +#ifdef CONFIG_MOSIX + if(current->mosix.stay & DSTAY_PER_MM) + unstay_mm(current->mm); +#endif /* CONFIG_MOSIX */ } static inline void remove_shared_vm_struct(struct vm_area_struct *vma) @@ -149,6 +182,10 @@ unsigned long newbrk, oldbrk; struct mm_struct *mm = current->mm; +#ifdef CONFIG_MOSIX_DIAG + if(current->mosix.dflags & DDEPUTY) + panic("sys_brk: DEPUTY"); +#endif /* CONFIG_MOSIX_DIAG */ down_write(&mm->mmap_sem); if (brk < mm->end_code) @@ -162,6 +199,13 @@ if (brk <= mm->brk) { if (!do_munmap(mm, newbrk, oldbrk-newbrk)) goto set_brk; +#ifdef CONFIG_MOSIX + if(process_told(current, DREQ_FILEUNMAP)) + { + process_ack(current, DREQ_FILEUNMAP); + mosix_rebuild_file_list(); + } +#endif /* CONFIG_MOSIX */ goto out; } @@ -389,6 +433,65 @@ return 0; } +#ifdef CONFIG_MOSIX +#define MMAP_REMOTELY ((current->mosix.dflags&(DDEPUTY|DINCOMING)) == DDEPUTY) +#endif /* CONFIG_MOSIX */ + +#ifdef CONFIG_MOSIX_FS +unsigned long +mmap_read_instead(struct file *file, unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, unsigned long pgoff) +{ + extern asmlinkage long sys_mprotect(unsigned long,size_t,unsigned long); + extern asmlinkage long sys_munmap(unsigned long, size_t); + int mm_downed = (current->mosix.dirty_bits & MMAP_MMDOWNED); + loff_t loff = pgoff << PAGE_SHIFT; + mm_segment_t oldfs; + int error; + + if((flags & MAP_TYPE) != MAP_PRIVATE) + return(-EPERM); + if(!(file->f_mode & 1)) + return(-EACCES); + len = PAGE_ALIGN(len); + addr = do_mmap_pgoff(NULL, addr, len, prot | PROT_WRITE, + flags & ~MAP_DENYWRITE, pgoff); + if(IS_ERR((const void *)addr)) + return(addr); + oldfs = get_fs(); + set_fs(USER_DS); + if(mm_downed) + up_write(¤t->mm->mmap_sem); +#ifdef CONFIG_MOSIX_DFSA + error = dfsa_optimized_read(file, loff, addr, len); +#else + error = file->f_op->read(file, (char *)addr, len, &loff); +#endif /* CONFIG_MOSIX_DFSA */ + if(error < 0) + { + set_fs(oldfs); + sys_munmap(addr, len); + if(mm_downed) + down_write(¤t->mm->mmap_sem); + return(error); + } + if(error < len) + clear_user((void *)addr + error, len - error); + set_fs(oldfs); + if(!(prot & PROT_WRITE)) + { + prot &= (PROT_READ|PROT_EXEC); + if(MMAP_REMOTELY) + deputy_mprotect(addr, len, prot); + else + sys_mprotect(addr, len, prot); + } + if(mm_downed) + down_write(¤t->mm->mmap_sem); + return(addr); +} +#endif /* CONFIG_MOSIX_FS */ + unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff) { @@ -398,9 +501,26 @@ int correct_wcount = 0; int error; rb_node_t ** rb_link, * rb_parent; +#ifdef CONFIG_MOSIX + uint32_t reason_to_come_back = 0; + int registration_result = 0; +#endif /* CONFIG_MOSIX */ if (file && (!file->f_op || !file->f_op->mmap)) return -ENODEV; +#ifdef CONFIG_MOSIX_DIAG + /* a remote mapping is unreadable! */ + if ((current->mosix.dflags & DREMOTE) && ((file && file->f_op->read) || + ((flags & MAP_TYPE) != MAP_PRIVATE))) + panic("Remote map"); +#endif /* CONFIG_MOSIX_DIAG */ +#ifdef CONFIG_MOSIX_FS + if(file && file_is_mfs(file)) + return(mmap_read_instead(file, addr, len, prot, flags, pgoff)); +#endif /* CONFIG_MOSIX_FS */ +#ifdef CONFIG_MOSIX + repeat_after_migrating_back: +#endif /* CONFIG_MOSIX */ if ((len = PAGE_ALIGN(len)) == 0) return addr; @@ -412,6 +532,9 @@ if ((pgoff + (len >> PAGE_SHIFT)) < pgoff) return -EINVAL; +#ifdef CONFIG_MOSIX + if (!MMAP_REMOTELY) /* "mm" NULL on deputy */ +#endif /* CONFIG_MOSIX */ /* Too many mappings? */ if (mm->map_count > MAX_MAP_COUNT) return -ENOMEM; @@ -427,9 +550,17 @@ * to. we assume access permissions have been handled by the open * of the memory object, so we don't do any here. */ +#ifdef CONFIG_MOSIX + vm_flags = calc_vm_flags(prot,flags) | (mm ? mm->def_flags : 0) | + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; +#else vm_flags = calc_vm_flags(prot,flags) | mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; +#endif /* CONFIG_MOSIX */ /* mlock MCL_FUTURE? */ +#ifdef CONFIG_MOSIX + if (mm) +#endif /* CONFIG_MOSIX */ if (vm_flags & VM_LOCKED) { unsigned long locked = mm->locked_vm << PAGE_SHIFT; locked += len; @@ -454,9 +585,31 @@ vm_flags |= VM_SHARED | VM_MAYSHARE; if (!(file->f_mode & FMODE_WRITE)) vm_flags &= ~(VM_MAYWRITE | VM_SHARED); +#ifdef CONFIG_MOSIX + if(file->f_mode & FMODE_WRITE) + reason_to_come_back |= DSTAY_FOR_MONKEY; +#endif /* CONFIG_MOSIX */ /* fall through */ case MAP_PRIVATE: +#ifdef CONFIG_MOSIX + if(file->f_dentry->d_inode->i_mapping->i_mmap_shared) + reason_to_come_back |= DSTAY_FOR_MONKEY; + if(S_ISCHR(file->f_dentry->d_inode->i_mode)) + reason_to_come_back |= DSTAY_FOR_DEV; +#ifdef CONFIG_MOSIX_DIAG + else if(file->f_op && file->f_op->get_unmapped_area) + { + printk("get_unmapped_area on non-chr file " + "(mode 0%o, rdev 0x%x): " + "please inform the MOSIX team!\n", + file->f_dentry->d_inode->i_mode, + file->f_dentry->d_inode->i_rdev); + reason_to_come_back |= DSTAY_FOR_DEV; + } +#endif /* CONFIG_MOSIX_DIAG */ + if (!(current->mosix.dflags & DINCOMING)) +#endif /* CONFIG_MOSIX */ if (!(file->f_mode & FMODE_READ)) return -EACCES; break; @@ -476,6 +629,20 @@ break; } } +#ifdef CONFIG_MOSIX + if(current->mosix.dirty_bits & MMAP_MAYSHARE) + vm_flags |= VM_MAYSHARE; + if(MMAP_REMOTELY) + { + if(reason_to_come_back) + { + if(!mosix_go_home(1)) + return(-EAGAIN); + goto again_locally; + } + goto prepare_vma; + } +#endif /* CONFIG_MOSIX */ /* Clear old maps */ error = -ENOMEM; @@ -484,6 +651,13 @@ if (vma && vma->vm_start < addr + len) { if (do_munmap(mm, addr, len)) return -ENOMEM; +#ifdef CONFIG_MOSIX + if(process_told(current, DREQ_FILEUNMAP)) + { + process_ack(current, DREQ_FILEUNMAP); + mosix_rebuild_file_list(); + } +#endif /* CONFIG_MOSIX */ goto munmap_back; } @@ -493,6 +667,9 @@ return -ENOMEM; /* Private writable mapping? Check memory availability.. */ +#ifdef CONFIG_MOSIX + if(!(current->mosix.dflags & DINCOMING)) +#endif /* CONFIG_MOSIX */ if ((vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE && !(flags & MAP_NORESERVE) && !vm_enough_memory(len >> PAGE_SHIFT)) @@ -507,6 +684,9 @@ * specific mapper. the address has already been validated, but * not unmapped, but the maps are removed from the list. */ +#ifdef CONFIG_MOSIX + prepare_vma: +#endif /* CONFIG_MOSIX */ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); if (!vma) return -ENOMEM; @@ -522,6 +702,53 @@ vma->vm_private_data = NULL; vma->vm_raend = 0; +#ifdef CONFIG_MOSIX + if(file && !(current->mosix.dflags & DREMOTE) && (registration_result = + mosix_register_a_file(file, (flags & MAP_DENYWRITE) != 0)) < 0) + { + error = registration_result; + goto free_vma; + } + /* + * even if we don't end up mapping locally, perform a dummy mapping, + * only to check for fs-specific errors, so we do not fail if and when + * we ever come home: + */ + if(MMAP_REMOTELY) + { + vma->vm_file = file; + if(file && (error = file->f_op->mmap(file, vma))) + goto free_vma; + /* did the mapping itself cause us to arrive back home + * (perhaps we were mapping /proc/nnnn/mem)? */ + if(!MMAP_REMOTELY) + goto again_locally_with_vma; + addr = mosix_deputy_mmap(file, addr, (flags & MAP_FIXED) != 0, + len, vma->vm_flags, pgoff, + file ? file->f_dentry->d_inode->i_size : 0, + file ? vma->vm_ops->nopage : NULL); + if(IS_ERR((const void *)addr)) + { + error = addr; + if(error == -EDOITATHOME) + /* remote resource shortage: don't despair, come back */ + if(mosix_go_home(1)) + goto again_locally_with_vma; + goto free_vma; + } + /* we did not really want that vma... only to check */ + kmem_cache_free(vm_area_cachep, vma); + /* meanwhile someone else could map the same file shared, so: */ + if(file && file->f_dentry->d_inode->i_mapping->i_mmap_shared) + { + if(!mosix_go_home(0)) + return(-EAGAIN); + stay_me_and_my_clones(DSTAY_FOR_MONKEY); + } + return(addr); + } + +#endif /* CONFIG_MOSIX */ if (file) { error = -EINVAL; if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP)) @@ -560,6 +787,12 @@ mm->locked_vm += len >> PAGE_SHIFT; make_pages_present(addr, addr + len); } +#ifdef CONFIG_MOSIX + if(reason_to_come_back) + stay_me_and_my_clones(reason_to_come_back); + if(file && (vm_flags & VM_SHARED)) + mosix_bring_monkey_users_back(file->f_dentry->d_inode); +#endif /* CONFIG_MOSIX */ return addr; unmap_and_free_vma: @@ -572,7 +805,22 @@ zap_page_range(mm, vma->vm_start, vma->vm_end - vma->vm_start); free_vma: kmem_cache_free(vm_area_cachep, vma); +#ifdef CONFIG_MOSIX + if(registration_result > 0) + mosix_undo_last_file_registration(file, registration_result); +#endif /* CONFIG_MOSIX */ return error; +#ifdef CONFIG_MOSIX +again_locally_with_vma: + kmem_cache_free(vm_area_cachep, vma); + if(registration_result > 0) + mosix_undo_last_file_registration(file, registration_result); + registration_result = 0; +again_locally: + mm = current->mm; + reason_to_come_back = 0; + goto repeat_after_migrating_back; +#endif /* CONFIG_MOSIX */ } /* Get an address range which is currently unmapped. @@ -618,6 +866,10 @@ unsigned long get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { +#ifdef CONFIG_MOSIX + if(MMAP_REMOTELY && !(flags & MAP_FIXED)) + return(PAGE_ALIGN(addr)); +#endif /* CONFIG_MOSIX */ if (flags & MAP_FIXED) { if (addr > TASK_SIZE - len) return -EINVAL; @@ -992,9 +1244,20 @@ int ret; struct mm_struct *mm = current->mm; +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DDEPUTY) + return(deputy_munmap(addr, len)); +#endif /* CONFIG_MOSIX */ down_write(&mm->mmap_sem); ret = do_munmap(mm, addr, len); up_write(&mm->mmap_sem); +#ifdef CONFIG_MOSIX + if(process_told(current, DREQ_FILEUNMAP)) + { + process_ack(current, DREQ_FILEUNMAP); + mosix_rebuild_file_list(); + } +#endif /* CONFIG_MOSIX */ return ret; } @@ -1014,6 +1277,10 @@ if (!len) return addr; +#ifdef CONFIG_MOSIX + if(current->mosix.dflags & DDEPUTY) + return(mosix_deputy_brk(addr, len)); +#endif /* CONFIG_MOSIX */ /* * mlock MCL_FUTURE? */ diff -urN linux-2.4.17/mm/mprotect.c linux_umopenmosix/mm/mprotect.c --- linux-2.4.17/mm/mprotect.c Tue Sep 18 01:30:23 2001 +++ linux_umopenmosix/mm/mprotect.c Wed Jun 26 23:45:18 2002 @@ -270,6 +270,10 @@ struct vm_area_struct * vma, * next, * prev; int error = -EINVAL; +#ifdef CONFIG_MOSIX_DIAG + if(current->mosix.dflags & DDEPUTY) + panic("sys_mprotect: DEPUTY"); +#endif /* CONFIG_MOSIX_DIAG */ if (start & ~PAGE_MASK) return -EINVAL; len = PAGE_ALIGN(len); diff -urN linux-2.4.17/mm/mremap.c linux_umopenmosix/mm/mremap.c --- linux-2.4.17/mm/mremap.c Fri Sep 21 06:31:26 2001 +++ linux_umopenmosix/mm/mremap.c Wed Jun 26 23:45:18 2002 @@ -13,6 +13,10 @@ #include #include +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ + extern int vm_enough_memory(long pages); static inline pte_t *get_one_pte(struct mm_struct *mm, unsigned long addr) @@ -218,6 +222,10 @@ struct vm_area_struct *vma; unsigned long ret = -EINVAL; +#ifdef CONFIG_MOSIX_DIAG + if(current->mosix.dflags & DDEPUTY) + panic("sys_mremap: DEPUTY"); +#endif /* CONFIG_MOSIX_DIAG */ if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) goto out; @@ -348,5 +356,12 @@ down_write(¤t->mm->mmap_sem); ret = do_mremap(addr, old_len, new_len, flags, new_addr); up_write(¤t->mm->mmap_sem); +#ifdef CONFIG_MOSIX + if(process_told(current, DREQ_FILEUNMAP)) + { + process_ack(current, DREQ_FILEUNMAP); + mosix_rebuild_file_list(); + } +#endif /* CONFIG_MOSIX */ return ret; } diff -urN linux-2.4.17/mm/oom_kill.c linux_umopenmosix/mm/oom_kill.c --- linux-2.4.17/mm/oom_kill.c Sun Nov 4 03:05:25 2001 +++ linux_umopenmosix/mm/oom_kill.c Wed Jun 26 23:45:18 2002 @@ -89,6 +89,11 @@ * Superuser processes are usually more important, so we make it * less likely that we kill those. */ +#ifdef CONFIG_MOSIX + if(p->mosix.dflags & DREMOTE) + points *= 2; + else +#endif /* CONFIG_MOSIX */ if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_ADMIN) || p->uid == 0 || p->euid == 0) points /= 4; @@ -99,6 +104,9 @@ * tend to only have this flag set on applications they think * of as important. */ +#ifdef CONFIG_MOSIX + if(!(p->mosix.dflags & DREMOTE)) +#endif /* CONFIG_MOSIX */ if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO)) points /= 4; #ifdef DEBUG @@ -153,6 +161,11 @@ p->flags |= PF_MEMALLOC | PF_MEMDIE; /* This process has hardware access, be more careful. */ +#ifdef CONFIG_MOSIX + if(p->mosix.dflags & DREMOTE) + send_sig(SIGTERM, p, 1); + else +#endif /* CONFIG_MOSIX */ if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO)) { force_sig(SIGTERM, p); } else { diff -urN linux-2.4.17/mm/page_alloc.c linux_umopenmosix/mm/page_alloc.c --- linux-2.4.17/mm/page_alloc.c Tue Nov 20 02:35:40 2001 +++ linux_umopenmosix/mm/page_alloc.c Wed Jun 26 23:45:18 2002 @@ -370,7 +370,20 @@ if (!(gfp_mask & __GFP_WAIT)) return NULL; +#ifdef CONFIG_MOSIX + { + __typeof__(jiffies)before = jiffies; +#endif /* CONFIG_MOSIX */ page = balance_classzone(classzone, gfp_mask, order, &freed); +#ifdef CONFIG_MOSIX + if((before = jiffies - before)) +#if 1000000 % HZ == 0 + current->mosix.pagetime += before * (1000000 / HZ); +#else + current->mosix.pagetime += before * 1000000 / HZ; +#endif + } +#endif /* CONFIG_MOSIX */ if (page) return page; diff -urN linux-2.4.17/mm/vmscan.c linux_umopenmosix/mm/vmscan.c --- linux-2.4.17/mm/vmscan.c Fri Dec 21 19:42:05 2001 +++ linux_umopenmosix/mm/vmscan.c Wed Jun 26 23:45:18 2002 @@ -47,6 +47,16 @@ pte_t pte; swp_entry_t entry; +#ifdef CONFIG_MOSIX + /* patch conflict resolution + no swap_out conflict here - Qlusters */ + if(page->young) + { + page->young = 0; + return(0); + } +#endif /* CONFIG_MOSIX */ + /* Don't look at this pte if it's been accessed recently. */ if ((vma->vm_flags & VM_LOCKED) || ptep_test_and_clear_young(page_table)) { mark_page_accessed(page); @@ -165,9 +175,9 @@ pmd_clear(dir); return count; } - + pte = pte_offset(dir, address); - + pmd_end = (address + PMD_SIZE) & PMD_MASK; if (end > pmd_end) end = pmd_end; @@ -207,10 +217,10 @@ pmd = pmd_offset(dir, address); - pgd_end = (address + PGDIR_SIZE) & PGDIR_MASK; + pgd_end = (address + PGDIR_SIZE) & PGDIR_MASK; if (pgd_end && (end > pgd_end)) end = pgd_end; - + do { count = swap_out_pmd(mm, vma, pmd, address, end, count, classzone); if (!count) @@ -258,7 +268,7 @@ struct vm_area_struct* vma; /* - * Find the proper vm-area after freezing the vma chain + * Find the proper vm-area after freezing the vma chain * and ptes. */ spin_lock(&mm->page_table_lock); @@ -689,7 +699,7 @@ /* * The background pageout daemon, started as a kernel thread - * from the init process. + * from the init process. * * This basically trickles out pages so that we have _some_ * free memory available even if there is no other activity @@ -708,7 +718,7 @@ daemonize(); strcpy(tsk->comm, "kswapd"); sigfillset(&tsk->blocked); - + /* * Tell the memory management that we're a "memory allocator", * and that if we need more memory we should get access to it @@ -742,6 +752,9 @@ * the processes needing more memory will wake us * up on a more timely basis. */ +#ifdef CONFIG_MOSIX + refill_inactive((nr_active_pages + 63) >> 6); +#endif /* CONFIG_MOSIX */ kswapd_balance(); run_task_queue(&tq_disk); } diff -urN linux-2.4.17/mos/Makefile linux_umopenmosix/mos/Makefile --- linux-2.4.17/mos/Makefile Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/mos/Makefile Wed Jun 26 23:45:18 2002 @@ -0,0 +1,38 @@ +# +# Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) +# +# Permission to use this software is hereby granted under the terms of the +# GNU General Public License, as published by the Free Software Foundation. +# +# THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY +# WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING +# FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. +# +# +# Makefile for MOS permanent interface. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# + +O_TARGET := mos.o +obj-y := alternate.o auto_syscalls.o balance.o comm.o config.o decay.o \ + deputy.o div.o export.o freemem.o init.o info.o kernel.o load.o \ + log.o mig.o mosadmin.o mosproc.o prequest.o remote.o rinode.o \ + service.o syscalls.o ucache.o + +obj-$(CONFIG_MOSIX_DFSA) += badops.o dfsa.o + +export-objs = export.o + +include $(TOPDIR)/Rules.make + +auto_syscalls.c: mkdefcalls $(TOPDIR)/arch/i386/kernel/entry.S syscalls.c + ./mkdefcalls + +mkdefcalls: mkdefcalls.c $(TOPDIR)/include/linux/config.h $(TOPDIR)/include/linux/types.h $(TOPDIR)/include/linux/sched.h $(TOPDIR)/include/linux/signal.h $(TOPDIR)/include/linux/sys.h $(TOPDIR)/include/linux/kernel.h + $(HOSTCC) $(HOSTCFLAGS) -I$(HPATH) -o mkdefcalls -DENTRY_IN=\"$(TOPDIR)/arch/i386/kernel/entry.S\" -DSYSCALLS_IN=\"$(TOPDIR)/mos/syscalls.c\" -DSYSCALLS_OUT=\"$(TOPDIR)/mos/auto_syscalls.c\" mkdefcalls.c + +alternate.c: copy_unconf $(TOPDIR)/fs/binfmt_aout.c $(TOPDIR)/.config + /bin/sh ./copy_unconf "$(CONFIG_BINFMT_AOUT)" $(TOPDIR)/fs/binfmt_aout.c > alternate.c diff -urN linux-2.4.17/mos/badops.c linux_umopenmosix/mos/badops.c --- linux-2.4.17/mos/badops.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/mos/badops.c Wed Jun 26 23:45:18 2002 @@ -0,0 +1,220 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_MOSIX_DFSA +#include + +static int +bad_result(void) +{ + printk("%s: Tried using a DFSA 'failed_to_reopen' file -- killed!\n", + desc_mostask(NULL)); + force_sig(SIGKILL, current); + return(-EBADF); +} + +loff_t +all_bad_llseek(struct file *f, loff_t x, int y) +{ + return(bad_result()); +} + +ssize_t +all_bad_read(struct file *f, char *x, size_t y, loff_t *z) +{ + return(bad_result()); +} + +ssize_t +all_bad_write(struct file *f, const char *x, size_t y, loff_t *z) +{ + return(bad_result()); +} + +unsigned int +all_bad_poll(struct file *f, struct poll_table_struct *x) +{ + return(bad_result()); +} + +int +all_bad_ioctl(struct inode *i, struct file *x, unsigned int y, unsigned long z) +{ + return(bad_result()); +} + +int +all_bad_mmap(struct file *f, struct vm_area_struct *x) +{ + return(bad_result()); +} + +int +all_bad_open(struct inode *i, struct file *x) +{ + return(bad_result()); +} + +int +all_bad_fsync(struct file *f, struct dentry *d, int datasync) +{ + return(bad_result()); +} + +int +all_bad_fasync(int x, struct file *f, int y) +{ + return(bad_result()); +} + +int +all_bad_lock(struct file *f, int x, struct file_lock *y) +{ + return(bad_result()); +} + +struct file_operations all_bad_file_operations = +{ + llseek: all_bad_llseek, + read: all_bad_read, + write: all_bad_write, + poll: all_bad_poll, + ioctl: all_bad_ioctl, + mmap: all_bad_mmap, + open: all_bad_open, + fsync: all_bad_fsync, + fasync: all_bad_fasync, + lock: all_bad_lock +}; + +int +all_bad_create(struct inode *i, struct dentry *x, int y) +{ + return(bad_result()); +} + +struct dentry * +all_bad_lookup(struct inode *i, struct dentry *x) +{ + return((struct dentry *)bad_result()); +} + +int +all_bad_link(struct dentry *d, struct inode *i, struct dentry *e) +{ + return(bad_result()); +} + +int +all_bad_unlink(struct inode *i, struct dentry *d) +{ + return(bad_result()); +} + +int +all_bad_symlink(struct inode *i, struct dentry *d, const char *a) +{ + return(bad_result()); +} + +int +all_bad_follow_link(struct dentry *dp, struct nameidata *nd) +{ + return(bad_result()); +} + +int +all_bad_get_block(struct inode *f, struct buffer_head *x, int y) +{ + return(bad_result()); +} + +int +all_bad_readpage(struct file *f, struct page *x) +{ + return(bad_result()); +} + +int +all_bad_writepage(struct file *f, struct page *x) +{ + return(bad_result()); +} + +int +all_bad_bmap(struct inode *i, int x) +{ + return(bad_result()); +} + +void +all_bad_truncate(struct inode *i) +{ + bad_result(); +} + +int +all_bad_permission(struct inode *i, int x) +{ + return(bad_result()); +} + +int +all_bad_smap(struct inode *i, int x) +{ + return(bad_result()); +} + +int +all_bad_revalidate(struct dentry *d) +{ + return(bad_result()); +} + +int +all_bad_setattr(struct dentry *d, struct iattr *attr) +{ + return(bad_result()); +} + +struct inode_operations all_bad_inode_operations = { + create: all_bad_create, + lookup: all_bad_lookup, + link: all_bad_link, + unlink: all_bad_unlink, + symlink: all_bad_symlink, + follow_link: all_bad_follow_link, + truncate: all_bad_truncate, + permission: all_bad_permission, + revalidate: all_bad_revalidate, + setattr: all_bad_setattr, +}; + +#endif /* CONFIG_MOSIX_DFSA */ diff -urN linux-2.4.17/mos/balance.c linux_umopenmosix/mos/balance.c --- linux-2.4.17/mos/balance.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/mos/balance.c Wed Jun 26 23:45:18 2002 @@ -0,0 +1,1241 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh, Amnon Barak. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_MOSIX_FS +#include +#endif /* CONFIG_MOSIX_FS */ + +struct opcost remote_here_adjusted[MAX_MOSIX_TOPOLOGY]; +#ifndef CONFIG_MOSIX_TOPOLOGY +struct opcost remote_here; +#endif /* CONFIG_MOSIX_TOPOLOGY */ + +static int load_balancing_counter; +static int memory_balancing_counter; + + +#ifdef CONFIG_MOSIX_DEBUG +char * +fformat(char *buf, double v) +{ + char *p = buf; + int started = 0; + double tens; + int exp = 0; + int n; + + if(v == 0.0) + return("0"); + if(v < 0.0) + { + v = -v; + *p++ = '-'; + } + if(v > 1e20) + while(v >= 10.0) + { + exp++; + v /= 10; + } + else if(v < 1e-20) + while(v < 1.0) + { + exp--; + v *= 10; + } + for(tens = 1e20 ; tens >= 1.0 ; tens /= 10.0) + if(started || v > tens) + { + n = v / tens; + v -= tens * n; + *p++ = '0' + n; + started++; + } + if(started < 8) + { + *p++ = '.'; + for(tens = 0.1 ; started++ < 8 ; tens /= 10.0) + { + n = v / tens; + v -= tens *n; + *p++ = '0' + n; + } + while(*(p-1) == '0') + p--; + if(*(p-1) == '.') + p--; + } + if(exp) + { + *p++ = 'E'; + if(exp < 0) + { + exp = -exp; + *p++ = '-'; + } + if(exp >= 100) + { + *p++ = exp / 100 + '0'; + exp %= 100; + } + if(exp >= 10) + *p++ = exp / 10 + '0'; + *p++ = exp % 10 + '0'; + } + *p = '\0'; + return(buf); +} +#endif /* CONFIG_MOSIX_DEBUG */ + +void +age_balancing(void) +{ + spin_lock_irq(&whereto_lock); + if(load_balancing_counter-- == 0 && chosen_for_balance) + { + free_task_struct(chosen_for_balance); + chosen_for_balance = NULL; + } + if(memory_balancing_counter-- == 0 && chosen_for_mdp) + { + free_task_struct(chosen_for_mdp); + chosen_for_mdp = NULL; + } + spin_unlock_irq(&whereto_lock); +} + +void +mosix_clear_statistics(void) +{ + register struct mosix_task *m = ¤t->mosix; + + m->ndemandpages = 0; + m->nsyscalls = 0; + m->ncopyouts = 0; + m->copyoutbytes = 0; + m->ncopyins = 0; + m->copyinbytes = 0; + m->iocounter = 0; + m->cutime = 0; + m->dctime = 0; + m->pagetime = 0; + read_lock(&tasklist_lock); + m->decsecs = 0; + read_unlock(&tasklist_lock); + spin_lock_irq(&whereto_lock); + m->last_consider = 0; + m->last_mconsider = time_now(); + spin_unlock_irq(&whereto_lock); + if(m->dflags & (DREMOTE|DDEPUTY)) + m->uttime = 0; + else + m->uttime = -current->times.tms_utime; +#ifdef CONFIG_MOSIX_DFSA + m->copy_ins = 0; + m->bytes_in = 0; +#endif /* CONFIG_MOSIX_DFSA */ +#ifdef CONFIG_MOSIX_FS + if(m->mfs_stats) + m->mfs_stats->nnodes = 0; +#endif /* CONFIG_MOSIX_FS */ +} + +#ifdef CONFIG_MOSIX_TOPOLOGY +#define sel_routine(routine, struct_name) \ +static int \ +routine(struct struct_name *costs, int with) \ +{ \ + register int i; \ + \ + for(i = 0 ; i < MAX_MOSIX_TOPOLOGY ; i++ , costs++) \ + { \ + if(!costs->first) \ + return(i); \ + if(!costs->last) \ + { \ + if(with != costs->first) \ + return(i); \ + } \ + else if((costs->last >= costs->first) ? \ + (with >= costs->first && with <= costs->last) : \ + (with > costs->first || with < costs->last)) \ + return(i); \ + } \ + return(0); \ +} + +sel_routine(select_cost, opcost) +#ifdef CONFIG_MOSIX_FS +sel_routine(select_mfscost, mfs_cost) +#endif /* CONFIG_MOSIX_FS */ +#endif /* CONFIG_MOSIX_TOPOLOGY */ + +int64_t +#ifdef CONFIG_MOSIX_TOPOLOGY +io_cost(struct mosix_task *m, struct opcost *costs, int with) +#else +io_cost(struct mosix_task *m, struct opcost *costs) +#endif /* CONFIG_MOSIX_TOPOLOGY */ +{ + int64_t b, e; + +#ifdef CONFIG_MOSIX_TOPOLOGY + costs += select_cost(costs, with); +#endif /* CONFIG_MOSIX_TOPOLOGY */ + b = m->copyoutbytes * costs->outkb + m->copyinbytes * costs->inkb; + e = m->ndemandpages * costs->page + m->nsyscalls * costs->syscall; + if(costs->out || costs->in) /* they are usually 0, so save time */ +#if COPY_COUNTER_MULTIPLIER != EVENT_COUNTER_VALUE +#warning no reason they should not be the same, but inefficient code provided: + return((b >> 10) + (e >> EVENT_COUNTER_SHIFT) + + (m->ncopyouts * costs->out + m->ncopyins * costs->in) / + COPY_COUNTER_MULTIPLIER); +#else + e += m->ncopyouts * costs->out + m->ncopyins * costs->in; +#endif +#if EVENT_COUNTER_SHIFT == 10 + /* we are lucky that the cost of a system call is the same + * order of magnitude as copying 1KB to/from user-space + */ + return((b + e) >> 10); +#else + return((b >> 10) + (e >> EVENT_COUNTER_SHIFT)); +#endif +} + +int64_t +remote_costs_in_ms(struct mosix_task *m) +{ +#ifdef CONFIG_MOSIX_TOPOLOGY + return(io_cost(m, m->depcost, PE) * STD_SPD / m->depspeed + + io_cost(m, remote_here_adjusted, m->deppe)); +#else + return(io_cost(m, m->depcost) * STD_SPD / m->depspeed + + io_cost(m, remote_here_adjusted)); +#endif /* CONFIG_MOSIX_TOPOLOGY */ +} + +#ifdef CONFIG_MOSIX_FS + +enum mfs_where {MFS_CLIENT, MFS_SERVER}; + +int64_t +#ifdef CONFIG_MOSIX_TOPOLOGY +mfs_cos(int64_t conns, int64_t inbytes, int64_t outbytes, + struct mfs_cost *costs, enum mfs_where type, int with) +#else +mfs_cos(int64_t conns, int64_t inbytes, int64_t outbytes, enum mfs_where type) +#endif /* CONFIG_MOSIX_TOPOLOGY */ +{ +#ifdef CONFIG_MOSIX_TOPOLOGY + if(with == 0) + with = PE; + costs += select_mfscost(costs, with); +#else + struct mfs_cost *costs = mfs_cost; +#endif /* CONFIG_MOSIX_TOPOLOGY */ + if(type == MFS_CLIENT) + return(conns * costs->MFS_CONN_C + ((inbytes*costs->MFS_INKB_C + + outbytes * costs->MFS_OUTKB_C) >> 10)); + else + return(conns * costs->MFS_CONN_S + ((inbytes*costs->MFS_INKB_S + + outbytes * costs->MFS_OUTKB_S) >> 10)); +} +#endif /* CONFIG_MOSIX_FS */ + +void +add_statistics_to_ancesstor(struct task_struct *p, struct task_struct *pp) +{ + struct mosix_task *m = &p->mosix; + struct mosix_task *mm = &pp->mosix; + int remote = (m->dflags & DREMOTE); + int64_t ut, age; + unsigned long newcounter; + + lock_mosix(); + age = ticks_to_ms(p->times.tms_utime + m->uttime) + m->cutime; + ut = age - m->dctime; + if(remote) + age += remote_costs_in_ms(m); + if(age > 4 * MILLION) + /* over 4 seconds, son is too old and not dependent any more */ + { +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_ANCESSTOR) + { + printk("addstatistics: %s to ", desc_mostask(m)); + printk("%s: independent, age=%d:%06d\n", + desc_mostask(mm), (int)(age/MILLION), + (int)(age-age/MILLION*MILLION)); + } +#endif + unlock_mosix(); + return; + } + mm->cutime += ut; + mm->nsyscalls += m->nsyscalls; + mm->ncopyouts += m->ncopyouts; + mm->copyoutbytes += m->copyoutbytes; + mm->ncopyins += m->ncopyins; + mm->copyinbytes += m->copyinbytes; + mm->ndemandpages += m->ndemandpages; + if(remote) + { + newcounter = mm->iocounter + m->ndemandpages + m->nsyscalls + + m->ncopyouts + m->copyoutbytes + + m->ncopyins + m->copyinbytes; + if((newcounter ^ mm->iocounter) & 0xfff00000) + { + mosix_add_to_whereto(pp, IOBALANCE); + mm->iocounter = 0; + } + else + mm->iocounter = newcounter; + } + mm->pagetime += m->pagetime; +#ifdef CONFIG_MOSIX_FS + if(mfs_add_stats(m->mfs_stats, mm->mfs_stats, mm)) + mosix_add_to_whereto(pp, MFSBALANCE); +#endif /* CONFIG_MOSIX_FS */ + unlock_mosix(); +} + +/* + * produce statistics at the end of local system-calls: + */ +void +mosix_local_syscall(void) +{ + register struct mosix_task *m = ¤t->mosix; + +#ifdef CONFIG_MOSIX_DFSA + if(m->dflags & (DDEPUTY|DREMOTE|DSTATSDOWN)) + { + m->copy_ins = m->bytes_in = 0; + m->dflags &= ~DSTATSDOWN; + return; + } + if(m->copy_ins) + { + m->ncopyins += m->copy_ins * COPY_COUNTER_MULTIPLIER; + m->copyinbytes += m->bytes_in; + m->copy_ins = m->bytes_in = 0; + } +#else + if(!(m->dflags & (DDEPUTY|DREMOTE))) +#endif /* CONFIG_MOSIX_DFSA */ + m->nsyscalls += EVENT_COUNTER_VALUE; +} + +/* + * altruistic load: + * this function takes into account the cost of a load not only on a given + * process, but also on the other processes in its environment. + * When there are enough processors to take the load, a process gets served as + * soon as practicable, as fast as the processor can run, eg. by the threshold. + * Otherwise, there is a cost which is: + * given L=load, N=ncpus: + * For the process itself: the cost is L. + * For the other (N*L-1) processes: + * Without the extra process, they would each need (N*L-1)/N units of time + * to complete. + * With the extra process, they will need L units of time to complete. + * the loss is therefore (N*L-1)*(L-(N*L-1)/N) = (N*L-1)*(L-L+1/N) = L - 1/N. + */ +inline int +altload(int load, int speed, int ncpus) +{ + int threshold = MF * STD_SPD / speed; + + if(load <= threshold) + return(threshold); + if(load <= threshold + threshold/ncpus) + return(threshold + ncpus * load * (load-threshold) / threshold); + return(2 * load - threshold / ncpus); +} + +int +send_local_aload(int type) +{ + struct aload_h a; + + read_lock_bh(&loadinfo_lock); + a.aload = altload(export_load, loadinfo[0].speed, loadinfo[0].ncpus); + read_unlock_bh(&loadinfo_lock); + a.freepages = export_mem(); +#ifdef CONFIG_MOSIX_TOPOLOGY + memcpy(a.mfscosts, mfs_cost, sizeof(mfs_cost)); +#endif /* CONFIG_MOSIX_TOPOLOGY */ + return(comm_send(type, &a, sizeof(a), NULL, 0, 0)); +} + +void +choose(void) +{ + int64_t priority, lastpri = 0; + int pri, bestpri; + register int load, bestload; + struct task_struct *p, *sel = NULL; + register struct mosix_task *m; + int mintime, very_mintime; + int minload; + + mintime = MILLION * acpuse / (smp_num_cpus * MF); + very_mintime = mintime / 3; + minload = 4 * acpuse / smp_num_cpus; /* normally 4*MF */ + bestpri = mintime; + bestload = -1; + read_lock(&tasklist_lock); + if(!chosen_for_balance) + for_each_task(p) + if(!((m = &p->mosix)->dflags & (DDEPUTY|DFINISHED|DPASSING)) && + !m->stay && !m->whereto && p != chosen_for_mdp && + (!mosadmin_mode_lstay || (m->dflags & DREMOTE))) + { +#ifdef CONFIG_MOSIX_DIAG + if(LOGICAL_STATE(p) == TASK_ZOMBIE) + { + mosix_panic("ZOMBIE without DFINISHED"); + continue; + } +#endif /* CONFIG_MOSIX_DIAG */ + priority = ticks_to_ms(p->times.tms_utime + m->uttime) + + m->cutime; + /* (note: m->dctime deliberately not taken into account: + * otherwise if statistics decay too quickly, there may never + * be a process chosen for migration) */ + if(m->dflags & DREMOTE) +#ifdef CONFIG_MOSIX_TOPOLOGY + priority += io_cost(m, remote_here_adjusted, m->deppe); +#else + priority += io_cost(m, remote_here_adjusted); +#endif /* CONFIG_MOSIX_TOPOLOGY */ + priority -= m->last_consider; + if(priority < 0) + /* it "should not" happen, but some operating systems (BSDI) + can have user-time go back due to rounding... */ + continue; + pri = (priority > 4*MILLION) ? 4*MILLION : priority; /*4 secs*/ + if(pri < bestpri) + { + if(bestpri == mintime && + /* (thrashing, or (m->load <= minload)) */ + m->load > minload && pri >= very_mintime) + pri = mintime; + else + continue; + } + load = m->load; + if(pri > bestpri || load > bestload) + { + bestload = load; + if(sel) + free_task_struct(sel); + get_task_struct(p); + sel = p; + bestpri = pri; + lastpri = priority; + } + } + read_unlock(&tasklist_lock); + if(!sel) + return; + spin_lock_irq(&whereto_lock); + if(chosen_for_balance) + { + free_task_struct(sel); + spin_unlock_irq(&whereto_lock); + return; + } + sel->mosix.last_consider += lastpri; + chosen_for_balance = sel; + mosix_do_add_to_whereto(sel, BALANCE); + load_balancing_counter = BALANCING_TIMEOUT; + if(sel->mosix.whereto != BALANCE) + { + free_task_struct(chosen_for_balance); + chosen_for_balance = NULL; + } + spin_unlock_irq(&whereto_lock); +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & (DSDEB_LOAD|DSDEB_CONSIDER)) + printk("selected %s for load-balancing, pri=%d, load=%d, priority=%d\n", + desc_mostask(&sel->mosix), (int)bestpri, bestload, + (int)lastpri); +#endif /* CONFIG_MOSIX_DEBUG */ +} + +void +load_balance(void) +{ + register int load; + register struct loadinfo *l; + int doit = 0; + + if(mosadmin_mode_stay) + return; + + read_lock_bh(&loadinfo_lock); + load = altload(loadinfo[0].load, loadinfo[0].speed, loadinfo[0].ncpus); + for(l = &loadinfo[1]; l < &loadinfo[INFO_WIN] ; l++) + if(l->pe && l->mem > 0 && altload(l->load, l->speed, l->ncpus) <= load) + { + doit = 1; + break; + } + read_unlock_bh(&loadinfo_lock); + if(doit) + choose(); +} + +void +mchoose(int need) +{ + int quality, bestquality = 0; + int df, diff = MIN_MCHOOSE_AGAIN; + now_t dff; + struct task_struct *p, *sel = NULL; + register struct mosix_task *m; + + read_lock(&tasklist_lock); + if(!chosen_for_mdp) + for_each_task(p) + if(!((m = &p->mosix)->dflags & (DDEPUTY|DFINISHED|DPASSING)) && + !m->stay && !m->whereto && p != chosen_for_balance && + (!mosadmin_mode_lstay || (m->dflags & DREMOTE))) + { +#ifdef CONFIG_MOSIX_DIAG + if(LOGICAL_STATE(p) == TASK_ZOMBIE) + { + mosix_panic("ZOMBIE without DFINISHED"); + continue; + } +#endif /* CONFIG_MOSIX_DIAG */ + dff = time_since(m->last_mconsider); + if(dff > MAX_MCHOOSE_AGAIN) + df = MAX_MCHOOSE_AGAIN; + else + df = dff; + if(df < diff) + continue; + quality = memory_relief_quality(p, need); + if(quality > bestquality || (quality > 0 && df > diff)) + { + if(sel) + free_task_struct(sel); + get_task_struct(p); + sel = p; + bestquality = quality; + diff = df; + } + } + read_unlock(&tasklist_lock); + if(!sel) + return; + spin_lock_irq(&whereto_lock); + if(chosen_for_mdp) + { + free_task_struct(sel); + spin_unlock_irq(&whereto_lock); + return; + } + sel->mosix.page_allocs = 0; + sel->mosix.last_mconsider = time_now(); + chosen_for_mdp = sel; + mosix_do_add_to_whereto(sel, MEMBALANCE); + memory_balancing_counter = BALANCING_TIMEOUT; + if(sel->mosix.whereto != MEMBALANCE) + { + free_task_struct(chosen_for_mdp); + chosen_for_mdp = NULL; + } + spin_unlock_irq(&whereto_lock); +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_CONSIDER) + printk("selected %s for memory-balancing, quality=%d\n", + desc_mostask(&sel->mosix), bestquality); +#endif +} + +void +memory_balance(void) +{ + register struct loadinfo *l; + int need; + int doit = 0; + + if(mosadmin_mode_stay || mosadmin_mode_quiet || + (need = memory_badly_required()) <= 0) + return; + read_lock_bh(&loadinfo_lock); + for(l = &loadinfo[1]; l < &loadinfo[INFO_WIN] ; l++) + if(l->pe && l->mem > MIN_EXPECTED_PROC_SIZE) + { + doit = 1; + break; + } + read_unlock_bh(&loadinfo_lock); + if(doit) + mchoose(need); +} + +void +changed_my_mind_and_staying(void) +{ + current->mosix.pages_i_bring = 0; +} + +void +ask_deputy_to_goto(int where) +{ + void *head; + int hlen; + + if(comm_send(DEP_CONSIDER|REPLY, &where, sizeof(int), NULL, 0, 0) || + (where && remote_wait(DEP_CONSIDER, &head, &hlen))) + remote_disappear(); + if(where) + comm_free(head); +} + +struct sonstats +{ + int64_t utime; /* ms */ + int64_t ndemandpages; + int64_t nsyscalls; + int64_t ncopyouts; + int64_t copyoutbytes; + int64_t ncopyins; + int64_t copyinbytes; + int64_t pagetime; /* ms */ +#ifdef CONFIG_MOSIX_FS + struct mfs_stats mfs; +#endif /* CONFIG_MOSIX_FS */ +}; + +void +consider(int reason, struct sonstats *sons) +{ +#define INFLOAD 0x8000000 +#define MAX_CONSIDERED (INFO_WIN+1+MAX_MFS_STATNODES) +#if MAX_CONSIDERED > 127 +#error char is not enough for map +#endif /*MAX_CONSIDERED*/ + struct task_struct *p = current; + register struct mosix_task *m = &p->mosix; + int mach[MAX_CONSIDERED], aload[MAX_CONSIDERED]; +#ifdef CONFIG_MOSIX_TOPOLOGY + struct costinfo + { + struct opcost opcost[MAX_MOSIX_TOPOLOGY]; +#ifdef CONFIG_MOSIX_FS + struct mfs_cost mfscost[MAX_MOSIX_TOPOLOGY]; +#endif /* CONFIG_MOSIX_FS */ + } *costinfo = NULL; +#endif /* CONFIG_MOSIX_TOPOLOGY */ +#ifdef CONFIG_MOSIX_FS + struct mfs_stats *mfs_tot; + char map[MAX_MFS_STATNODES]; + int orig_n; +#endif /* CONFIG_MOSIX_FS */ + double costs[MAX_CONSIDERED]; + register int i, n = 0, j, k; + int depmach = -1; + double tim, dtim, rtim; + int remote = (m->dflags & DREMOTE) ? 1 : 0; + int64_t ms; + struct aload_h a; +#ifdef CONFIG_MOSIX_DEBUG + char fbuf1[40], fbuf2[40]; +#endif /* CONFIG_MOSIX_DEBUG */ + + if((m->stay & DSTAY) || !PE || mosadmin_mode_quiet) + goto out; + if(reason && (m->stay || mosadmin_mode_stay || + (mosadmin_mode_lstay && !(m->dflags & DREMOTE)))) + { +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & (DSDEB_CONSIDER|DSDEB_MIG)) + printk("%s-not considering, reason=%d\n", + desc_mostask(NULL), reason); +#endif + goto out; + } +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & (DSDEB_CONSIDER|DSDEB_MIG)) + printk("%s-considering, reason=%d:\n", desc_mostask(m), reason); +#endif +#ifdef CONFIG_MOSIX_TOPOLOGY + if(!(costinfo = (struct costinfo *) + kmalloc(sizeof(struct costinfo) * MAX_CONSIDERED, GFP_KERNEL))) + { +#ifdef CONFIG_MOSIX_DEBUG + printk("%s-no memory for considering\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + goto out; + } +#endif /* CONFIG_MOSIX_TOPOLOGY */ + m->migpages = count_migrating_pages(); +#ifdef CONFIG_MOSIX_FS + if(sons && sons->mfs.nnodes) + { + mfs_tot = &sons->mfs; + mfs_add_stats(m->mfs_stats, mfs_tot, NULL); + } + else + mfs_tot = m->mfs_stats; +#endif /* CONFIG_MOSIX_FS */ +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_CONSIDER) + printk("Aloads: "); +#endif /* CONFIG_MOSIX_DEBUG */ + if(remote) + { + if(remote_request(REM_GETALOAD, NULL, 0, NULL, 0, 0, + (void **)&a, -sizeof(a))) + { + printk("%s: failed to send getaload - exiting\n", + desc_mostask(m)); + remote_disappear(); + } + depmach = m->deppe; +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_CONSIDER) + printk("HOME=%d, ", a.aload); +#endif /* CONFIG_MOSIX_DEBUG */ + aload[0] = a.aload; + if(reason != IOBALANCE && a.freepages < m->migpages) + aload[0] += INFLOAD; + /* deliberately leave costinfo[0].opcost undefined! */ +#if defined(CONFIG_MOSIX_FS) && defined(CONFIG_MOSIX_TOPOLOGY) + memcpy(costinfo[0].mfscost, a.mfscosts, sizeof(a.mfscosts)); +#endif /* CONFIG_MOSIX_FS && CONFIG_MOSIX_TOPOLOGY */ + mach[n++] = GOBACKHOME; + } + read_lock_bh(&loadinfo_lock); + for(i = 0 ; i < INFO_WIN ; i++) +#ifdef CONFIG_MOSIX_CHEAT_MIGSELF + if(loadinfo[i].pe && (!i || loadinfo[i].pe != depmach) +#else + if(loadinfo[i].pe && loadinfo[i].pe != depmach +#endif /* CONFIG_MOSIX_CHEAT_MIGSELF */ + && (i == 0 || (loadinfo[i].mem >= m->migpages && + loadinfo[i].free_slots))) + { +#ifdef CONFIG_MOSIX_TOPOLOGY + memcpy(costinfo[n].opcost, loadinfo[i].costs, + sizeof(loadinfo[i].costs)); +#ifdef CONFIG_MOSIX_FS + memcpy(costinfo[n].mfscost, loadinfo[i].mfscosts, + sizeof(loadinfo[i].mfscosts)); +#endif /* CONFIG_MOSIX_FS */ +#endif /* CONFIG_MOSIX_TOPOLOGY */ + aload[n] = altload(loadinfo[i].load, loadinfo[i].speed, + loadinfo[i].ncpus); + if(i) + mach[n] = loadinfo[i].pe; + else + { + mach[n] = 0; + if(reason == MEMBALANCE) + { + if(remote) + continue; + aload[n] += INFLOAD; + } + } +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_CONSIDER) + printk("%d=%d==>%d, ", loadinfo[i].pe, + (int)loadinfo[i].load, aload[n]); +#endif /* CONFIG_MOSIX_DEBUG */ + n++; + } + read_unlock_bh(&loadinfo_lock); +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_CONSIDER) + printk("\n"); +#endif /* CONFIG_MOSIX_DEBUG */ +#ifdef CONFIG_MOSIX_FS + /* collect info on MFS nodes */ + orig_n = n; + if(mfs_tot) + for(i = 0 ; i < mfs_tot->nnodes ; i++) + { + struct loadinfo info; + + if(mfs_tot->conns[i] < 10 && + (mfs_tot->inbytes[i] + mfs_tot->outbytes[i]) < 1000000) + continue; + k = mfs_tot->nodes[i]; + if(k == PE || k == depmach) + continue; + for(j = 1 ; j < orig_n ; j++) + if(mach[j] == k) + goto not_this_one; + if(balance_get_load(k, &info)) + continue; + if (info.mem < m->migpages || !info.free_slots) + continue; + mach[n] = k; + aload[n] = altload(info.load, info.speed, info.ncpus); +#ifdef CONFIG_MOSIX_TOPOLOGY + memcpy(costinfo[n].opcost, info.costs, sizeof(info.costs)); + memcpy(costinfo[n].mfscost, info.mfscosts, + sizeof(info.mfscosts)); +#endif /* CONFIG_MOSIX_TOPOLOGY */ +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_CONSIDER) + printk("%d=%d=>>%d, ", k, (int)info.load, aload[n]); +#endif /* CONFIG_MOSIX_DEBUG */ + n++; + not_this_one:; + } +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_CONSIDER) + printk("\n"); +#endif /* CONFIG_MOSIX_DEBUG */ +#endif /* CONFIG_MOSIX_FS */ + if(n == 0 || (n == 1 && mach[0] == 0)) + goto out; /* save time */ +#ifdef CONFIG_MOSIX_FS + /* prepare the MFS mapping */ + if(mfs_tot) + for(i = mfs_tot->nnodes - 1 ; i >= 0 ; i--) + { + k = mfs_tot->nodes[i]; + if(k == PE) + map[i] = remote; + else if(remote && k == m->deppe) + map[i] = 0; + else + { + map[i] = -1; + for(j = n - 1 ; j > 0 ; j--) + if(mach[j] == k) + { + map[i] = j; + break; + } + } + } +#endif /* CONFIG_MOSIX_FS */ + /* we must NOT sleep throughout the following FP calculations:*/ + kernel_fpu_begin(); + ms = ticks_to_ms(p->times.tms_utime+m->uttime) + m->cutime - m->dctime; + if(sons) + ms += sons->utime; + if((m->dpolicy & DADV_POLICY) == DADV_NOCPU) + tim = 0.0; + else + /* secs on a std. machine */ + tim = ms / DMILLION * cpuspeed / STD_SPD; + if(reason == MEMBALANCE) + tim += (m->pagetime + (sons ? sons->pagetime : 0)) / DMILLION; +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_CONSIDER) + printk("tim=%s\n", fformat(fbuf1, tim)); +#endif /* CONFIG_MOSIX_DEBUG */ + for(i = 0 ; i < n ; i++) + costs[i] = tim * aload[i] / MF; +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_CONSIDER) + { + printk("%s-initial mach:costs:", desc_mostask(m)); + for(i = 0 ; i < n ; i++) + printk(" %d:%s", mach[i], fformat(fbuf1, costs[i])); + printk("\n"); + } +#endif + if((m->dpolicy & DADV_POLICY) == DADV_CPU) + goto skip_comm; + if(sons) /* temporarily add */ + { + lock_mosix(); + m->ndemandpages += sons->ndemandpages; + m->nsyscalls += sons->nsyscalls; + m->ncopyouts += sons->ncopyouts; + m->copyoutbytes += sons->copyoutbytes; + m->ncopyins += sons->ncopyins; + m->copyinbytes += sons->copyinbytes; + unlock_mosix(); + } + for(i = 1 ; i < n ; i++) + { +#ifdef CONFIG_MOSIX_TOPOLOGY + dtim = io_cost(m, remote ? m->depcost : deputy_here, mach[i]); + rtim = io_cost(m, costinfo[i].opcost, m->deppe); +#else + dtim = io_cost(m, remote ? m->depcost : deputy_here); + rtim = io_cost(m, &remote_here); +#endif /* CONFIG_MOSIX_TOPOLOGY */ +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_CONSIDER) + printk("dtim[%d]=%s, rtim[%d]=%s, ", + mach[i], fformat(fbuf1, dtim/DMILLION), + mach[i], fformat(fbuf2, rtim/DMILLION)); +#endif /* CONFIG_MOSIX_DEBUG */ + costs[i] += (dtim * aload[0] + rtim * aload[i]) / (DMILLION*MF); + } +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_CONSIDER) + { + printk("\n+I/O Overheads: "); + for(i = 0 ; i < n ; i++) + printk(" %d:%s", mach[i], fformat(fbuf1, costs[i])); + printk("\n"); + } +#endif + if(sons) /* restore */ + { + lock_mosix(); + m->ndemandpages -= sons->ndemandpages; + m->nsyscalls -= sons->nsyscalls; + m->ncopyouts -= sons->ncopyouts; + m->copyoutbytes -= sons->copyoutbytes; + m->ncopyins -= sons->ncopyins; + m->copyinbytes -= sons->copyinbytes; + unlock_mosix(); + } +#ifdef CONFIG_MOSIX_FS + if(!mfs_tot) + goto skip_comm; + for(i = 0 ; i < n ; i++) + for(j = 0 ; j < mfs_tot->nnodes ; j++) + if((k = map[j]) != -1 && k != i) + { + int c = mfs_tot->conns[j]; + int in = mfs_tot->inbytes[j]; + int out = mfs_tot->outbytes[j]; + +#ifdef CONFIG_MOSIX_TOPOLOGY + dtim = mfs_cos(c, in, out, costinfo[k].mfscost, MFS_SERVER, + mach[i]); + rtim = mfs_cos(c, in, out, costinfo[i].mfscost, MFS_CLIENT, + mach[k]); +#else + dtim = mfs_cos(c, in, out, MFS_SERVER); + rtim = mfs_cos(c, in, out, MFS_CLIENT); +#endif /* CONFIG_MOSIX_TOPOLOGY */ +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_CONSIDER) + printk("stim[%d,%d]=%s, rtim[%d,%d]=%s, ", + mach[i], mach[k], fformat(fbuf1,dtim/DMILLION), + mach[i], mach[k], fformat(fbuf2,rtim/DMILLION)); +#endif /* CONFIG_MOSIX_DEBUG */ + costs[i] += (dtim * aload[k] + rtim * aload[i]) / (DMILLION*MF); + } +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_CONSIDER) + { + printk("\n+MFS Overheads: "); + for(i = 0 ; i < n ; i++) + printk(" %d:%s", mach[i], fformat(fbuf1, costs[i])); + printk("\n"); + } +#endif /* CONFIG_MOSIX_DEBUG */ +#endif /* CONFIG_MOSIX_FS */ + skip_comm: + if (reason != MEMBALANCE) + { + /* migration cost: */ + double decay_factor; + + if(m->dctime > 0) /* decay occured */ + { + ms = ticks_to_ms(p->times.tms_utime + m->uttime); + if(ms) + decay_factor = ((double)(ms - m->dctime)) / ms; + else + { +#ifdef CONFIG_MOSIX_UDB + mosix_panic("dctime without time"); +#endif /* CONFIG_MOSIX_UDB */ + decay_factor = 1.0; + } + } + else + decay_factor = 1.0; + decay_factor /= DMILLION; /* to save multiple divisions later */ +#ifdef CONFIG_MOSIX_TOPOLOGY + for(i = 0 ; i < n ; i++) + if(i != remote) + { + j = select_cost(deputy_here, i ? mach[i] : m->deppe); + costs[i] += decay_factor * + (mosix_cost[j].MIGRATION_BASIC + + m->migpages * mosix_cost[j].MIGRATION_PER_PAGE); + } +#else + tim = decay_factor * (mosix_cost[0].MIGRATION_BASIC + + m->migpages * mosix_cost[0].MIGRATION_PER_PAGE); + for(i = 0 ; i < n ; i++) + if(i != remote) + costs[i] += tim; +#endif /* CONFIG_MOSIX_TOPOLOGY */ +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_CONSIDER) + { + printk("+Migration costs: "); + for(i = 0 ; i < n ; i++) + printk(" %d:%s", mach[i], + fformat(fbuf1, costs[i])); + printk("\n"); + } +#endif + } + /* sort by least cost */ + for(i = 0 ; i < n-1 ; i++) + for(j = i+1 ; j < n ; j++) + if(costs[i] > costs[j]) + { + tim = costs[i]; + costs[i] = costs[j]; + costs[j] = tim; + k = mach[i]; + mach[i] = mach[j]; + mach[j] = k; + } + kernel_fpu_end(); +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_CONSIDER) + { + printk("Consider Preferences:"); + for(i = 0 ; i < n ; i++) + printk(" %d", mach[i]); + printk("\n"); + } +#endif + m->loadhere = aload[remote]; + for(i = 0 ; i < n ; i++) + { + if((k = mach[i]) == 0) + goto out; + if(remote) + { +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIG) + printk("%s: Telling DEPUTY We Want to " + "Go to %d\n", desc_mostask(m), k); +#endif /* CONFIG_MOSIX_DEBUG */ + ask_deputy_to_goto(k); + } + else if(passto(k, reason) == 0) + goto out; + /* conditions changed meanwhile? */ + if(mosadmin_mode_quiet || (reason && (mosadmin_mode_stay || + (mosadmin_mode_lstay && !(m->dflags & DREMOTE))))) + goto out; + } + out: +#ifdef CONFIG_MOSIX_TOPOLOGY + if(costinfo) + kfree(costinfo); +#endif /* CONFIG_MOSIX_TOPOLOGY */ + m->migpages = 0; + unchoose_me(); + if(m->dflags & DREMOTE) + ask_deputy_to_goto(0); +} + +void +unchoose_me(void) +{ + struct task_struct *p = current; + int l = 0, m = 0; + + spin_lock_irq(&whereto_lock); + if(p == chosen_for_balance) + { + free_task_struct(p); + chosen_for_balance = NULL; + l = 1; + } + else if(p == chosen_for_mdp) + { + free_task_struct(p); + chosen_for_mdp = NULL; + m = 1; + } + spin_unlock_irq(&whereto_lock); + if(l) + load_balance(); + else if(m) + memory_balance(); +} + +int +mosix_forkmigrate(void) +{ + struct task_struct *me = current; + struct task_struct *p, *a; + register struct mosix_task *q; + struct sonstats s; + register int remote; + int64_t age; + int any = 0; + + if(me->mosix.dflags & DDEPUTY) + panic("forkmigrate: DEPUTY"); + if(me->mosix.stay || !PE) + return(0); + remote = (me->mosix.dflags & DREMOTE) != 0; + lock_mosix(); + read_lock(&tasklist_lock); + for_each_task(p) + for(a = p->mosix.ancesstor ; a ; a = a->mosix.ancesstor) + if(a == me) + { + q = &p->mosix; + age = ticks_to_ms(p->times.tms_utime + q->uttime) + q->cutime; + if(remote) + age += remote_costs_in_ms(q); + else + age += ticks_to_ms(p->times.tms_stime); + if(age > 4 * MILLION) + /* over 4 seconds, descendant is no longer dependent */ + { +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_ANCESSTOR) + { + printk("%s-forkmigrate: ", desc_mostask(NULL)); + printk("%s no longer dependent: age=%d:%06d\n", + desc_mostask(q), + (int)(age/MILLION), + (int)(age-age/MILLION*MILLION)); + } +#endif + q->ancesstor = NULL; + break; + } + if(!any) + { + any = 1; + memset(&s, 0, sizeof s); + } + s.utime += ticks_to_ms(p->times.tms_utime + q->uttime) + + q->cutime - q->dctime; + /* we need not worry about the "q->uttime" part of q->dctime + on a local process: if the process ever migrated, it would + not be dependent and we would never reach here! + */ + s.nsyscalls += q->nsyscalls; + s.ncopyouts += q->ncopyouts; + s.copyoutbytes += q->copyoutbytes; + s.ncopyins += q->ncopyins; + s.copyinbytes += q->copyinbytes; + s.pagetime += q->pagetime; +#ifdef CONFIG_MOSIX_FS + mfs_add_stats(q->mfs_stats, &s.mfs, NULL); +#endif /* CONFIG_MOSIX_FS */ + break; + } + read_unlock(&tasklist_lock); + unlock_mosix(); + if(!any) + return(0); +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & (DSDEB_CONSIDER|DSDEB_ANCESSTOR)) + printk("%s-forkmigrate: son utime=%d.%06d, pages=%d, " + "syscalls=%d, copyouts=%d/%d, copyins=%d/%d\n", + desc_mostask(NULL), (int)(s.utime/MILLION), + (int)(s.utime-s.utime/MILLION*MILLION), + (int)(s.ndemandpages>>EVENT_COUNTER_SHIFT), + (int)(s.nsyscalls>>EVENT_COUNTER_SHIFT), + (int)(s.ncopyouts>>EVENT_COUNTER_SHIFT), + (int)s.copyoutbytes, + (int)(s.ncopyins>>EVENT_COUNTER_SHIFT), + (int)s.copyinbytes); +#endif + if(remote && comm_send(REM_CONSIDERING, NULL, 0, NULL, 0, 0)) + remote_disappear(); + consider(BALANCE, &s); + return(me->mosix.dflags & DDEPUTY); + /* for local processes, DDEPUTY means we moved, + * and REMOTE processes do not care for result - + * for they would never reach here if migrated. + */ +} + +int +send_with_miginfo(int type, void *header, int hdsz) +{ + struct miginfo mi; + register struct mosix_task *m = ¤t->mosix; + + mi.aload = m->loadhere; + mi.pages = m->migpages ? : count_migrating_pages(); + return(comm_send(type, header, hdsz, (char *)&mi, sizeof(mi), 0)); +} + +int +balance_commit_mig(int reason, int pages_i_bring) +{ + int me, him; + struct miginfo mi; + + if(comm_copydata(&mi, sizeof(mi) | COMM_ZEROCOPYOK, 0)) + goto commit; /* no MOSIX on other side! */ + switch(reason) + { + case 0: + case IOBALANCE: + case MFSBALANCE: + goto commit; + default: + read_lock_bh(&loadinfo_lock); + me = altload(export_load, loadinfo[0].speed, + loadinfo[0].ncpus); + read_unlock_bh(&loadinfo_lock); + him = mi.aload; +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_CONSIDER) + printk("balance_commit_mig: me=%d, him=%d, result=%d\n", me, him, me <= him); +#endif /* CONFIG_MOSIX_DEBUG */ + if(me > him) + break; + /* fall through - check memory as well */ + case MEMBALANCE: + me = export_mem(); +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_CONSIDER) + printk("balance_commit_mig: I have %d pages, asked for %d, reply=%s\n", + me, (int)mi.pages, + mi.pages <= me ? "YES" : "NO"); +#endif /* CONFIG_MOSIX_DEBUG */ + if(me >= mi.pages) + goto commit; + break; + } + return(0); /* NO! */ + commit: + info_someone_came_in(); + return(1); +} diff -urN linux-2.4.17/mos/comm.c linux_umopenmosix/mos/comm.c --- linux-2.4.17/mos/comm.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/mos/comm.c Wed Jun 26 23:45:18 2002 @@ -0,0 +1,2713 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Oren Laadan. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_MOSIX_DFSA +#include +#endif /* CONFIG_MOSIX_DFSA */ + +#ifdef CONFIG_MOSIX_FS +#include +#include +#endif /* CONFIG_MOSIX_FS */ + +#define MIG_DAEMON_PORT 0x3412 +#define INFO_DAEMON_PORT 0x3415 + +/* + * mosix specific data per contact + */ +#define COMM_DEF_HEADSIZE 256 + +struct mosix_link { + /* the first 2 elements are common to both types and must stay there */ + struct socket *sock; /* socket for communications */ + int flags; /* status flags */ + int dlen; /* length of pending data */ + int peer; /* mosix # of peer */ + char *hideptr; /* pointer to hidden data (current) */ + char *hidebuf; /* pointer to start of hidden data buffer */ + int hidelen; /* dlen of hidden data */ + char head[COMM_DEF_HEADSIZE]; +#ifdef CONFIG_MOSIX_DEBUG + int cntmem; + int count; +#endif /* CONFIG_MOSIX_DEBUG */ +}; + +spinlock_t skown_lock = SPIN_LOCK_UNLOCKED; + +#define COMM_HEADINUSE 0x01 +#define COMM_WAITACCEPT 0x02 +#define COMM_HIDEDATA 0x04 +#define COMM_FULLLINK 0x08 +#define COMM_INFOLINK 0x10 + +#define COMM_MAX_MULTIWAIT 10 + +#define COMM_SOCKET_SPARE 2048 +#define COMM_SOCKET_DATA 16384 +/*#define COMM_SOCKET_BUFFER (COMM_SOCKET_SPARE + COMM_SOCKET_DATA)*/ +#define COMM_SOCKET_BUFFER 131072 + + +#ifdef CONFIG_MOSIX_UDB + +#define COMM_REMOTE_TIMO ((unsigned long) 300*HZ) +#define COMM_CONNECT_TIMO ((unsigned long) 60*HZ) +#define COMM_RECONN_TIMO ((unsigned long) 180*HZ) + +#else + +#define COMM_REMOTE_TIMO ((unsigned long) 300*HZ) +#define COMM_CONNECT_TIMO ((unsigned long) 5*HZ) +#define COMM_RECONN_TIMO ((unsigned long) 10*HZ) + +#endif /* CONFIG_MOSIX_UDB */ + +#ifdef CONFIG_MOSIX_DFSA +#define DISABLE_FLAGS (DDEPUTY|DREMOTEDFSA) +#else +#define DISABLE_FLAGS DDEPUTY +#endif /* CONFIG_MOSIX_DFSA */ + +#define DISABLE_EVENTS() do {if(current->mosix.dflags & DISABLE_FLAGS) deeper_sleep();} while(0) +#define ENABLE_EVENTS() do {if(current->mosix.dflags & DISABLE_FLAGS) lighter_sleep();} while(0) + +int comm_type; +unsigned long comm_remote_timo = COMM_REMOTE_TIMO; /* for remote accept */ +unsigned long comm_connect_timo = COMM_CONNECT_TIMO; /* for connection */ +unsigned long comm_reconn_timo = COMM_RECONN_TIMO; /* for reconnection */ + +/* + * make sure that all pages in range cause no remote page-fault requests later. + * Once we find a page that would cause a page-fault, we may either report it + * by returning 0 (normal return is 1) or fix it and make it dirty. + */ +int +dirty_all_remote_pages(unsigned long start, unsigned long size, int doit) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct * vma; + unsigned long addr, lastaddr; + unsigned long limit = (start + size + PAGE_SIZE - 1) & ~(PAGE_SIZE-1); + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + int hidedata = 0; + int ret = 1; + +#ifdef CONFIG_MOSIX_DIAG + if(!(current->mosix.dflags & DREMOTE)) + panic("dirty_all_remote_pages: not remote"); +#endif /* CONFIG_MOSIX_DIAG */ + if(size == 0) + return(1); + down_write(&mm->mmap_sem); + if(!(vma = find_vma(mm, start))) + goto unlock; + for(addr = start & ~(PAGE_SIZE-1) ; addr < limit ; addr += PAGE_SIZE) + { + while(!vma->vm_file || addr >= vma->vm_end || + (doit && !(vma->vm_flags & VM_WRITE))) + /* (the last condition will cause a SIGSEGV, + which we don't want here) */ + { + lastaddr = vma->vm_end; + if(!(vma = vma->vm_next)) + goto unlock; + if(vma->vm_file && (vma->vm_flags & VM_GROWSDOWN)) + /* (a mad-hatter is permitted use sys_mmap this way) */ + { + if((addr = lastaddr) >= limit) + goto unlock; + } + else if((addr = vma->vm_start) >= limit) + goto unlock; + addr = vma->vm_start; + } + if(vma->vm_flags & VM_GROWSDOWN) + /* Hmmm... will any executable file-format ever allow this? */ + panic("dirty_all_remote_pages: stack mapped to file"); + + /* Dirty pages (including swap) are OK - all others are not */ + pgd = pgd_offset(current->mm, addr); + if(pgd_none(*pgd) || pgd_bad(*pgd) || + pmd_none(*(pmd = pmd_offset(pgd, addr))) || + pmd_none(*pmd) || pmd_bad(*pmd) || + (pte_present(*(pte = pte_offset(pmd, addr))) ? + !pte_dirty(*pte) : pte_none(*pte))) + { + if(!doit) + { + ret = 0; + goto unlock; + } + if (!hidedata) { + hidedata = 1; + if(comm_hidedata()) + { + ret = 0; + goto unlock; + } + } + handle_mm_fault(mm, vma, addr, 1); + } + } + unlock: + up_write(&mm->mmap_sem); + return(ret); +} + +/* + * internal communication routines + */ + +static void *comm_mkhead(int); +static inline void comm_setup_link(mosix_link *, int); +static inline void comm_unset_link(mosix_link *); +static inline int comm_setup_socket(struct socket *, int); +static inline void comm_unset_socket(struct socket *); +static void *comm_malloc(int); +static int comm_dorecv(struct socket *, struct msghdr *, int); +static int comm_packregs(struct comm_header *, char *); +static int comm_unpackregs(struct comm_header *, char *); +static int comm_packident(char *); +static int comm_unpackident(char *); +static int comm_waitaccept(void); +static int comm_poll(int, int, unsigned long); +static void comm_data_ready(struct sock *, int); +static int comm_getname(struct socket *, struct sockaddr *); +static void comm_shutdown(mosix_link *); + +static inline struct socket *comm_set_address(int, struct sockaddr *, int); +static inline struct socket *comm_ip_set_addr(int, struct sockaddr *, int); +static inline int comm_ip_getname(struct socket *, struct sockaddr *); +static inline int comm_ip_setup_socket(struct socket *, int); +static void comm_ip_migration_mode(int); + +void +comm_report_violation(char *desire, struct sockaddr *addr) +{ + unsigned int a = htonl(((struct sockaddr_in *)addr)->sin_addr.s_addr); + + printk("Received an unauthorized %s request from %d.%d.%d.%d\n", desire, + (a & 0xff000000) >> 24, (a & 0xff0000) >> 16, + (a & 0xff00) >> 8, a & 0xff); +} + +static inline struct task_struct * +find_task_by_sock(struct socket *sock) +{ + struct task_struct *p = NULL; + struct sock *sk; + + spin_lock(&skown_lock); + if((sk = sock->sk) && (p = sk->owner)) + get_task_struct(p); + else + spin_unlock(&skown_lock); + return(p); +} + +static inline void +comm_setup_link(mosix_link *mlink, int peer) +{ +#ifdef CONFIG_MOSIX_DIAG + if (!(mlink->flags & COMM_FULLLINK)) + panic("comm_setup_link: not FULLLINK"); +#endif /* CONFIG_MOSIX_DIAG */ + mlink->dlen = 0; + mlink->peer = peer; + mlink->hideptr = NULL; + mlink->hidebuf = NULL; + mlink->hidelen = 0; +#ifdef CONFIG_MOSIX_DEBUG + mlink->cntmem = 0; + mlink->count = 0; +#endif /* CONFIG_MOSIX_DEBUG */ +} + +static inline void +comm_unset_link(mosix_link *mlink) +{ + if (mlink->flags & COMM_FULLLINK) { + if (mlink->hidebuf) { + kfree(mlink->hidebuf); + mlink->hidebuf = NULL; + mlink->hideptr = NULL; + mlink->hidelen = 0; + } + mlink->dlen = 0; + } + mlink->flags &= ~(COMM_WAITACCEPT | COMM_HIDEDATA); +} + +static void * +comm_malloc(int size) +{ + void *mem; + + while(1) + { + if((mem = kmalloc(size, GFP_KERNEL))) + return(mem); + } +} + +static int +comm_packregs(struct comm_header *header, char *ptr) +{ + int regs; + int bit; + int size = 0; + int *regptr = (int *)ptr; + struct task_struct *p = current; + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMMFLAGS) + printk("%s-comm_packregs: h 0x%x p 0x%x\n", + desc_mostask(NULL), (int) header, (int) ptr); +#endif /* CONFIG_MOSIX_DEBUG */ + + regs = which_regs_to_send(); + if (header) + header->regs = regs; + if (!regs) + return (0); + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMMFLAGS) + printk("%s-comm_packregs: mask 0x%x\n", desc_mostask(NULL), + regs); +#endif /* CONFIG_MOSIX_DEBUG */ + + regs = ~regs; + +#ifdef CONFIG_MOSIX_DEBUG + if (regptr) { + if (ds_debug & DSDEB_COMMFLAGS) + printk("%s-comm_packregs(): packing regs = 0x%x\n", + desc_mostask(NULL), regs); + } +#endif /* CONFIG_MOSIX_DEBUG */ + while (regs != 0xffffffff) { + bit = ffz(regs); + if (ptr) + *(regptr++) = ((int *)p->mosix.altregs)[bit]; + regs |= (1 << bit); + size++; + } + return (size << 2); /* size * sizeof(int) */ +} + + +static int +comm_unpackregs(struct comm_header *header, char *ptr) +{ + int regs; + int bit; + int size = 0; + int *regptr = (int *)ptr; + struct task_struct *p = current; + + regs = header->regs; +#ifdef CONFIG_MOSIX_DIAG + if ((regs & (p->mosix.dflags & DDEPUTY ? ~p->mosix.deputy_regs : + p->mosix.deputy_regs)) != regs) { + printk("%s-comm_unpackregs: inconsistent (regs=%x, ptr=0x%x)\n", + desc_mostask(NULL), regs, (int) ptr); + panic("comm_unpackregs(): inconsistent regs\n"); + } +#endif /* CONFIG_MOSIX_DIAG */ + p->mosix.deputy_regs ^= regs; +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMMFLAGS) + printk("%s-comm_unpackregs(): unpacking regs = 0x%x\n", + desc_mostask(NULL), ~regs); +#endif /* CONFIG_MOSIX_DEBUG */ + + regs = ~regs; + while (regs != 0xffffffff) { + bit = ffz(regs); + ((int *)p->mosix.altregs)[bit] = *(regptr++); + regs |= (1 << bit); + size++; + } + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMMFLAGS) + printk("%s-comm_unpackregs: size << 2 = %d\n", + desc_mostask(NULL), size << 2); +#endif /* CONFIG_MOSIX_DEBUG */ + + return (size << 2); /* size * sizeof(int) */ +} + + +static int +comm_packident(char *ptr) +{ +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMMFLAGS) + printk("%s-comm_packident: ptr 0x%x\n", desc_mostask(NULL), + (int) ptr); +#endif /* CONFIG_MOSIX_DEBUG */ + + if (ptr) { + ((int *)ptr)[0] = PE; + ((int *)ptr)[1] = comm_getpeer(current->mosix.contact); + + /* + * if this is the first migration to a node, + * tell them, so they flush any of our old cached files, + */ + if (!config_get_status(((int *)ptr)[1])) + ((int *)ptr)[0] |= 0x80000000; + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMMFLAGS) + printk("%s-comm_packident: me=%d him=%d\n", + desc_mostask(NULL), PE, ((int *)ptr)[1]); +#endif /* CONFIG_MOSIX_DEBUG */ + } + return (2 * sizeof(int)); +} + + +static int +comm_unpackident(char *ptr) +{ + int ret = -1; + int him, me; + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMMFLAGS) + printk("%s-comm_unpackident: him=%d me=%d\n", + desc_mostask(NULL), ((int *)ptr)[0], ((int *)ptr)[1]); +#endif /* CONFIG_MOSIX_DEBUG */ + him = ((int *)ptr)[0]; + me = ((int *)ptr)[1]; + + if (him & 0x80000000) + rinode_flush_files(him & ~0x80000000); + if (comm_getpeer(current->mosix.contact) == (him & 0x7fffffff) + && PE == me) + ret = 2 * sizeof(int); +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMMFLAGS) + printk("%s-comm_unpackident: mismatch \n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + + return (ret); +} + + +/* + * comm_startup() - initialize communication layer + */ +void +comm_startup(void) +{ + /* nothing to do */ +} + +/* + * comm_setup_socket() - setup socket parameters (specific to its type) + */ +static inline int +comm_setup_socket(struct socket *sock, int mos) +{ + int val; + int error; + int connect = 0; + mm_segment_t oldfs; + + oldfs = get_fs(); + set_fs(KERNEL_DS); + + /* + * socket parameters: + * - SO_SNDBUF, SO_RCVBUF + * - SO_KEEPALIVE (for connection-oriented sockets) + */ + + val = COMM_SOCKET_BUFFER; + if ((error = sock_setsockopt(sock, SOL_SOCKET, SO_SNDBUF, + (char *) &val, sizeof(val))) || + (error = sock_setsockopt(sock, SOL_SOCKET, SO_RCVBUF, + (char *) &val, sizeof(val)))) + goto failed; +/* + val = IPTOS_LOWDELAY; + if ((error = sock_setsockopt(sock, SOL_IP, IP_TOS, (char *)&val, + sizeof(val)))) + goto failed; +*/ + + switch (mos) { + case COMM_INFO: + sock->sk->data_ready = comm_data_ready; + break; + case COMM_LOOSE: + /* + * set "sk->bsdism" (SO_BSDCOMPAT) to prevent ICMP + * errors from being reflected in the UDP socket, + * thus receiving old errors on new calls. + */ + val = 1; + if ((error = sock_setsockopt(sock, SOL_SOCKET, + SO_BSDCOMPAT, (char*)&val,sizeof(val)))) + goto failed; + case COMM_ACCEPT: + case COMM_TOADDR: + default: + set_bit(SOCK_INTER_MOSIX, &sock->flags); + if (mos == COMM_LOOSE) /* UDP - we're done */ + break; + val = 1; + if ((error = sock_setsockopt(sock, SOL_SOCKET, + SO_KEEPALIVE, (char *) &val, sizeof(val)))) + goto failed; + /* fall through */ + case COMM_MIGD: + connect = 1; + val = 1; + break; + } + + switch (comm_type) { + case AF_INET: + error = comm_ip_setup_socket(sock, connect); + break; + default: + panic("comm_setup_socket: unknown comm_type"); + } + +failed: +#ifdef CONFIG_MOSIX_DEBUG + if (error && (ds_debug & DSDEB_ERROR)) + printk("%s-comm_setup_socket: error %d\n", desc_mostask(NULL), + error); +#endif /* CONFIG_MOSIX_DEBUG */ + set_fs(oldfs); + return (error); +} + +static inline void +comm_unset_socket(struct socket *sock) +{ + spin_lock_irq(&skown_lock); + sock->sk->owner = NULL; + clear_bit(SOCK_INTER_MOSIX, &sock->flags); + spin_unlock_irq(&skown_lock); +} + + +static inline int +comm_ip_setup_socket(struct socket *sock, int connect) +{ + int val; + int error; + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMMOPEN) + printk("%s-comm_ip_setup_socket: connect=%d\n", + desc_mostask(NULL), connect); +#endif /* CONFIG_MOSIX_DEBUG */ + + /* NOTE: at this point set_fs(KERNEL_DS) is on */ + + if (connect) { + mm_segment_t oldfs = get_fs(); + set_fs(KERNEL_DS); + val = MOSIX_CONNECTION_KEEPALIVE_INTERVAL; + if ((error = sock->ops->setsockopt(sock, IPPROTO_TCP, + TCP_KEEPINTVL, (char *) &val, sizeof(val)))) + { + resume: + set_fs(oldfs); + return(error); + } + val = MOSIX_CONNECTION_KEEPALIVE_MAXTRIES; + if ((error = sock->ops->setsockopt(sock, IPPROTO_TCP, + TCP_KEEPCNT, (char *) &val, sizeof(val)))) + goto resume; + val = MOSIX_CONNECTION_KEEPALIVE_TOTAL; + if ((error = sock->ops->setsockopt(sock, IPPROTO_TCP, + TCP_KEEPIDLE, (char *) &val, sizeof(val)))) + goto resume; + val = 1; + if ((error = sock->ops->setsockopt(sock, IPPROTO_TCP, + TCP_NODELAY, (char *) &val, sizeof(val)))) + goto resume; + set_fs(oldfs); + } + return (0); +} + +/* + * comm_open() - open a socket for MOSIX communications + * + * mos > 0: connect to migdaemon on node #mos + * mos = COMM_TOADDR: connect to address given in maddr->saddr + * mos = COMM_ACCEPT: set up a socket and prepare to accept + * mos = COMM_MIGD: set up a socket for mig-daemon + * mos = COMM_INFOD: set up a socket for info-daemon + * mos = COMM_LOOSE connect to multiple daemons + */ +mosix_link * +comm_open(int mos, mosix_addr *maddr, unsigned long timo) +{ + int error = -EDIST; + struct socket *sock = NULL; + mosix_link *mlink = NULL; + struct sockaddr sa; + int connect = 1, bind = 1, need_comm = 1, listen = 1, peer = 0; + struct sockaddr *saddr = &(maddr->saddr); + DECLARE_WAITQUEUE(wait, current); + + /* + * COMM_INFO: connect = 0, need_comm = 0, listen = 0, bind = 1 + * COMM_MIGD: connect = 0, need_comm = 1, listen = 1, bind = 1 + * COMM_ACCEPT: connect = 0, need_comm = 1, listen = 1, bind = 1 + * COMM_TOADDR: connect = 1, need_comm = 1, listen = 0, bind = 1 + * COMM_LOOSE: connect = 0, need_comm = 0, listen = 0, bind = 0 + * deafult: connect = 1, need_comm = 1, listen = 1, bind = 1 + */ + switch (mos) { + case COMM_LOOSE: + bind = 0; + /* fall through */ + case COMM_INFO: + listen = 0; + /* fall through */ + case COMM_MIGD: + need_comm = 0; + /* fall through */ + case COMM_ACCEPT: + connect = 0; + if(!saddr) + saddr = &sa; + break; + case COMM_TOADDR: + if(saddr->sa_family != AF_INET) + return(NULL); + break; + default: +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMMOPEN) + printk("%s-comm_open to mos#%d\n", desc_mostask(NULL), + mos); +#endif /* CONFIG_MOSIX_DEBUG */ +#ifdef CONFIG_MOSIX_DIAG + if (mos <= 0 || mos > MOSIX_MAX) + panic("comm_open: incorrent mos#"); +#endif /* CONFIG_MOSIX_DIAG */ + peer = mos; + saddr = &sa; + break; + } +#ifdef CONFIG_MOSIX_DIAG + if (!saddr) + panic("comm_open: NULL saddr"); +#endif /* CONFIG_MOSIX_DIAG */ + DISABLE_EVENTS(); + /* fill in socket address and allocate a socket */ + if (!(sock = comm_set_address(mos, saddr, 1))) + goto failed; + + if (need_comm) { /* mosix_link required ? */ + mlink = kmalloc(sizeof(mosix_link), GFP_KERNEL); + if (!mlink) { + no_memory: +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMMOPEN) + printk("%s-comm_open: insufficient memory\n", + desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + goto failed; + } + mlink->flags = COMM_FULLLINK; + comm_setup_link(mlink, peer); + } else { + mlink = kmalloc(sizeof(struct socket *) + sizeof(int), + GFP_KERNEL); + if (!mlink) + goto no_memory; + mlink->flags = COMM_INFOLINK; + } + mlink->sock = sock; + + if ((error = comm_setup_socket(sock, mos))) + goto failed; + + if (!connect) { /* setup server - listen */ + if(bind) + { + error = sock->ops->bind(sock, saddr, sizeof(*saddr)); + if (error) { + if(error == -EADDRINUSE && mos == COMM_MIGD) + printk("Migration port (%d) Already in use\n", ntohs(MIG_DAEMON_PORT)); +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMMOPEN) + printk("bind() in "); +#endif /* CONFIG_MOSIX_DEBUG */ + goto failed; + } + } + if (listen) { /* connection required ? */ + error = sock->ops->listen(sock, SOMAXCONN); + if (error) { +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMMOPEN) + printk("listen() in "); +#endif /* CONFIG_MOSIX_DEBUG */ + goto failed; + } + } + + if (mos == COMM_ACCEPT) { + mlink->flags |= COMM_WAITACCEPT; + if ((error = comm_getname(sock, saddr))) { +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMMOPEN) + printk("comm_getname() in "); +#endif /* CONFIG_MOSIX_DEBUG */ + goto failed; + } + } + } else { /* setup client - connect */ +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMMOPEN) + printk("%s-comm_open: connecting...\n", + desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + if (!timo) + timo = MAX_SCHEDULE_TIMEOUT; + error = sock->ops->connect(sock, saddr, sizeof(*saddr), + O_NONBLOCK); + add_wait_queue(sock->sk->sleep, &wait); + while (sock->state != SS_CONNECTED) { + set_current_state(TASK_INTERRUPTIBLE); + error = sock->ops->connect(sock, saddr, sizeof(*saddr), + O_NONBLOCK); +#ifdef CONFIG_MOSIX_DIAG + if (error == -EISCONN || error == -EINVAL) { + printk("%s-comm_open: error %d\n", + desc_mostask(NULL), error); + panic("comm_open while connect"); + } +#endif /* CONFIG_MOSIX_DIAG */ + if (error != -EALREADY || (error=sock_error(sock->sk))) + break; + + timo = schedule_timeout(timo); + if (timo <= 0) { + error = -EAGAIN; + break; + } + } + remove_wait_queue(sock->sk->sleep, &wait); + set_current_state(TASK_RUNNING); + + if (error) { +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMMOPEN) + printk("connect() in "); +#endif /* CONFIG_MOSIX_DEBUG */ + goto failed; + } + if (sock->sk->err) { + error = sock_error(sock->sk); /* cleans error.. */ +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMMOPEN) + printk("sock_error in "); +#endif /* CONFIG_MOSIX_DEBUG */ + goto failed; + } +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMMOPEN) + printk("%s-comm_open: SUCCESS connection...\n", + desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + /* client socket ready */ + } + ENABLE_EVENTS(); + + return (mlink); + +failed: + ENABLE_EVENTS(); + if (sock) + sock_release(sock); + if (mlink) + kfree(mlink); +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & (DSDEB_COMMOPEN|DSDEB_ERROR)) + printk("%s: comm_open: failed err=%d\n", desc_mostask(NULL), + error); +#endif /* CONFIG_MOSIX_DEBUG */ + return (0); +} + +/* + * comm_use() - set new "contact" for a process (and get the old one) + */ +mosix_link * +comm_use(struct task_struct *p, mosix_link *mlink) +{ + mosix_link *oldmlink; + + spin_lock_irq(&skown_lock); + if ((oldmlink = p->mosix.contact)) + oldmlink->sock->sk->owner = NULL; + p->mosix.contact = NULL; + if (mlink) + { + p->mosix.contact = mlink; + mlink->sock->sk->owner = p; + } + spin_unlock_irq(&skown_lock); + if(test_bit(SOCK_OOB_IN, &mlink->sock->flags)) + tell_process(p, DREQ_URGENT); + return (oldmlink); +} + + +/* + * comm_close() - close a MOSIX communication socket + * if called with NULL argument, close our contact and nullify it + */ +void +comm_close(mosix_link *mlink) +{ + int ours = 0; + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMM) + printk("%s-comm_close: closing%s socket\n", + desc_mostask(NULL), mlink ? "" : " my"); +#endif /* CONFIG_MOSIX_DEBUG */ + + if (!mlink) { + ours = 1; + mlink = current->mosix.contact; +#ifdef CONFIG_MOSIX_DIAG + if (!mlink) + panic("comm_close: no contact"); +#endif /* CONFIG_MOSIX_DIAG */ + } +#ifdef CONFIG_MOSIX_DIAG + else if (mlink == current->mosix.contact) + panic("comm_close: closing my socket explicitly"); +#endif /* CONFIG_MOSIX_DIAG */ + + comm_shutdown(mlink); + sock_release(mlink->sock); + + if (ours) + current->mosix.contact = NULL; + +#ifdef CONFIG_MOSIX_DIAG + if (mlink->flags & COMM_FULLLINK) { + if (mlink->flags & COMM_HEADINUSE) + mosix_panic("comm_close: comm->head still in use !"); +#ifdef CONFIG_MOSIX_DEBUG + if (mlink->cntmem || mlink->count) { + printk("cntmem=%d, count=%d\n", + mlink->cntmem, mlink->count); + mosix_panic("comm_close: count not zero"); + } +#endif /* CONFIG_MOSIX_DEBUG */ + } else if (!(mlink->flags & COMM_INFOLINK)) + mosix_panic("comm_close: nor FULL neither LINK"); +#endif /* CONFIG_MOSIX_DIAG */ + + kfree(mlink); +} + + +/* + * comm_accept() - accept a connection on MOSIX socket + */ +int +comm_accept(mosix_link *ml, mosix_link **mlp, mosix_addr *ma, unsigned long t) +{ + int error = 0; + struct socket *sock; + struct sockaddr *saddr = &(ma->saddr); + +#ifdef CONFIG_MOSIX_DIAG + if (current->mosix.dflags & DDEPUTY) + panic("comm_accept called by DEPUTY"); +#endif /* CONFIG_MOSIX_DIAG */ + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMMACPT) + printk("%s-comm_accept: accepting connection\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + + if ((*mlp = kmalloc(sizeof(mosix_link), GFP_KERNEL)) == 0) + return (-ENOMEM); + + sock = sock_alloc(); + if (!sock) { + kfree(*mlp); + return (-EMFILE); + } + sock->type = ml->sock->type; + sock->ops = ml->sock->ops; + + if (t && !comm_poll(POLLIN | POLLRDNORM, 0, t)) + error = -EAGAIN; + + if (!error) + error = ml->sock->ops->accept(ml->sock, sock, 0); + +#ifdef CONFIG_MOSIX_DEBUG + if (error && (ds_debug & DSDEB_COMMACPT)) + printk("%s-comm_accept: error1 %d\n", desc_mostask(NULL),error); +#endif /* CONFIG_MOSIX_DEBUG */ + + if (!error && saddr) + error = comm_getname(sock, saddr); + + if (error) { +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMMACPT) + printk("%s-comm_accept: error %d\n", desc_mostask(NULL), + error); +#endif /* CONFIG_MOSIX_DEBUG */ + failed: + kfree(*mlp); + sock_release(sock); + *mlp = NULL; + } else { + struct sockaddr saddr; + int val; + + if (sock->ops->getname(sock, &saddr, &val, 1)) { +#ifdef CONFIG_MOSIX_DIAG + printk("%s-comm_accept: strange, not connected\n", + desc_mostask(NULL)); + mosix_panic("comm_accept disconnected ?!"); +#endif /* CONFIG_MOSIX_DIAG */ + error = -ENOTCONN; + goto failed; + } + if (!(val = net_to_mos((mosix_addr *)&saddr))) { + comm_report_violation("migration", &saddr); + error = -EDIST; + goto failed; + } + (*mlp)->sock = sock; + (*mlp)->flags = COMM_FULLLINK; + comm_setup_link(*mlp, val); + + if ((error = comm_setup_socket(sock, PE))) + goto failed; + } + return (error); +} + +/* + * comm_waitaccept() - accept a connection on MOSIX socket + */ +static int +comm_waitaccept() +{ + struct task_struct *p = current; + mosix_link *mlink; + int error; + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & (DSDEB_COMMACPT|DSDEB_COMMOPEN)) + printk("%s-comm_waitaccept: accepting .\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + if ((error = comm_accept(p->mosix.contact, &mlink, 0, comm_remote_timo))) + return (error); + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & (DSDEB_COMMACPT|DSDEB_COMMOPEN)) + printk("%s-comm_waitaccept: accepted\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + comm_close(comm_use(p, mlink)); + return (0); +} + + +/* + * comm_free() - free a head/data which is no longer used + */ +void +comm_free(void *head) +{ + mosix_link *mlink; + struct task_struct *p = current; + + if(!head) + return; + + mlink = p->mosix.contact; + if (mlink->flags & COMM_INFOLINK) { + kfree(head); + return; + } + + if (head == &mlink->head + COMM_HLEN) { +#ifdef CONFIG_MOSIX_DIAG + printk("%s-comm_free(): shift (sk=0x%x, comm=0x%x)\n", + desc_mostask(NULL), (int) mlink->sock->sk, + (int) mlink); + goto debug; +#endif /* CONFIG_MOSIX_DIAG */ + mlink->flags &= ~COMM_HEADINUSE; + goto out; + } else if (head == &(mlink->head)) { + /* default head is (supposedly) in use */ + +#ifdef CONFIG_MOSIX_DIAG + if (!(mlink->flags & COMM_HEADINUSE)) { + printk("%s-comm_free(): inuse (sk=0x%x, comm=0x%x)\n", + desc_mostask(NULL), (int) mlink->sock->sk, + (int) mlink); + goto debug; + } +#endif /* CONFIG_MOSIX_DIAG */ + mlink->flags &= ~COMM_HEADINUSE; + goto out; + } else { + /* must be a head we previously allocated */ +#ifdef CONFIG_MOSIX_DEBUG + mlink->cntmem--; + if (mlink->cntmem < 0) + mosix_panic("comm_free: cntmem < 0"); +#endif /* CONFIG_MOSIX_DEBUG */ + kfree(head); + return; + } + +#ifdef CONFIG_MOSIX_DIAG +debug: +#endif /* CONFIG_MOSIX_DIAG */ + mosix_panic("comm_free() # 1"); + +out: + +#ifdef CONFIG_MOSIX_DEBUG + if (!mlink->count) { + printk("%s-comm_free: count==0 (sk=0x%x, comm=0x%x)\n", + desc_mostask(NULL), (int) mlink->sock->sk, (int) mlink); + mosix_panic("comm_free() #2"); + return; + } + mlink->count--; +#endif /* CONFIG_MOSIX_DEBUG */ +} + + +/* + * comm_mkhead() - prepares a head of size hlen + */ +void * +comm_mkhead(int hlen) +{ + mosix_link *mlink; + void *head; + + mlink = current->mosix.contact; + + if ((mlink->flags & COMM_HEADINUSE) || hlen > COMM_DEF_HEADSIZE) { + head = kmalloc(hlen, GFP_KERNEL); + if(!head) + return(NULL); +#ifdef CONFIG_MOSIX_DEBUG + mlink->cntmem++; + if (mlink->cntmem > 2) + mosix_panic("comm_mkhead: cntmem > 2"); +#endif /* CONFIG_MOSIX_DEBUG */ + return (head); + } + + head = &mlink->head; + mlink->flags |= COMM_HEADINUSE; +#ifdef CONFIG_MOSIX_DEBUG + mlink->count++; +#endif /* CONFIG_MOSIX_DEBUG */ + return (head); +} + +#define CMHDR(x) ((struct comm_header *)x) + +static char bigbuf[PAGE_SIZE], midbuf[512], smallbuf[128]; +static int bigtaken, midtaken, smalltaken; + +/* + * comm_send() - send a message (head + data) + */ +int +comm_send(int type, void *head, int hlen, void *data, int dlen, int uspace) +{ + mosix_link *mlink; +#ifdef CONFIG_MOSIX_DFSA + struct comm_header header = { 0, 0, 0, 0, 0, 0 }; + struct iovec iov[4]; + int dfsalen = 0; + char *dfsadata = NULL; +#else + struct comm_header header = { 0, 0, 0, 0, 0, 0 }; + struct iovec iov[3]; +#endif /* CONFIG_MOSIX_DFSA */ + struct msghdr msg = { NULL, 0, iov, 0, NULL, 0, MSG_NOSIGNAL }; + mm_segment_t oldfs = { 0 }; + int niov = 1; + int size = 0; + void *free_head = 0; + int error = -ENOMEM; + void *tofree = NULL; + int *tozero = NULL; + void *kerdat; + + if(uspace && !dirty_all_remote_pages((unsigned long)data, dlen, 0)) + { + if(!smalltaken && dlen <= sizeof(smallbuf)) + { + kerdat = smallbuf; + smalltaken = 1; + tozero = &smalltaken; + } + else if(!midtaken && dlen <= sizeof(midbuf)) + { + kerdat = midbuf; + midtaken = 1; + tozero = &midtaken; + } + else if(!bigtaken && dlen <= sizeof(bigbuf)) + { + kerdat = bigbuf; + bigtaken = 1; + tozero = &bigtaken; + } + else if(!(kerdat = tofree = kmalloc(dlen, GFP_KERNEL))) + return(-ENOMEM); + if(copy_from_user(kerdat, data, dlen)) + { + if(tofree) + kfree(tofree); + return(-EFAULT); + } + uspace = 0; + data = kerdat; + } + DISABLE_EVENTS(); + + if(current->mosix.pass_regs) + type |= COMM_MFREGS; + +#ifdef CONFIG_MOSIX_DFSA + dfsa_check_comm_send(&type, &dfsadata, &dfsalen); + if(dfsalen < 0) + { + error = dfsalen; + goto failed; + } +#endif /* CONFIG_MOSIX_DFSA */ + + if (current->mosix.dflags & DREMOTE) + current->mosix.dflags |= DPSYNC; + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMMSEND) + printk("%s-comm_send: type=%x hd=0x%x (%d) dt=0x%x (%d) %c\n", + desc_mostask(NULL), type, (int)head, hlen, (int)data, + dlen, uspace ? 'U' : 'K'); +#endif /* CONFIG_MOSIX_DEBUG */ + mlink = current->mosix.contact; + + /* validate address range if in user-space (EFAULT ?) */ + /*if (data && uspace && !access_ok(VERIFY_READ, data, dlen)) {*/ + if (data && uspace && !ucache_ok((unsigned long)data, dlen, VM_READ)) { + error = -EFAULT; + goto failed; + } + + if (type & COMM_MFREGS) + { + if (!(size = comm_packregs(0, 0))) { + type &= ~COMM_MFREGS; + regs_were_sent(); + } + } + if (type & COMM_MFIDENT) + size += comm_packident(0); + if (!size) { +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_COMMDOSEND) + printk("%s-comm_send: REGULAR\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + header.type = type; + header.hlen = (u_short) hlen; + iov[0].iov_base = &header; + iov[0].iov_len = COMM_HLEN; + iov[1].iov_base = head; + iov[1].iov_len = hlen; + niov++; + } else { + char *options; + + /* allocate space for options - use default buffer if free */ + size += COMM_HLEN; + iov[0].iov_len = size; + if ((mlink->flags & COMM_HEADINUSE) || size>COMM_DEF_HEADSIZE){ +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_COMMDOSEND) + printk("%s-comm_send: 1.3 size = %d\n", + desc_mostask(NULL), size - COMM_HLEN); +#endif /* CONFIG_MOSIX_DEBUG */ + free_head = iov->iov_base = kmalloc(size, GFP_KERNEL); + if (!iov->iov_base) + goto failed; /* xxx - error set above */ +#ifdef CONFIG_MOSIX_DEBUG + mlink->cntmem++; + if (mlink->cntmem > 2) + mosix_panic("comm_send: cntmem > 2"); +#endif /* CONFIG_MOSIX_DEBUG */ + } else { +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_COMMDOSEND) + printk("%s-comm_send: 1.4 size = %d\n", + desc_mostask(NULL), size); +#endif /* CONFIG_MOSIX_DEBUG */ + free_head = iov->iov_base = mlink->head; + mlink->flags |= COMM_HEADINUSE; +#ifdef CONFIG_MOSIX_DEBUG + mlink->count++; + if (mlink->count > 2) + mosix_panic("comm_send: count > 2"); +#endif /* CONFIG_MOSIX_DEBUG */ + } + options = iov->iov_base + COMM_HLEN; + size = 0; + if (type & COMM_MFREGS) + size += comm_packregs(CMHDR(iov->iov_base), options); + if (type & COMM_MFIDENT) + size += comm_packident(options + size); + CMHDR(iov->iov_base)->type = type; + CMHDR(iov->iov_base)->hlen = (u_short) hlen; + iov[1].iov_base = head; + iov[1].iov_len = hlen; + niov++; + } + CMHDR(iov->iov_base)->olen = size; + CMHDR(iov->iov_base)->dlen = 0; + size += COMM_HLEN + hlen; +#ifdef CONFIG_MOSIX_DFSA + CMHDR(iov->iov_base)->dfsalen = dfsalen; + if(dfsalen) + { +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_COMMDOSEND) + printk("%s-comm_send: dfsadata = %d\n", + desc_mostask(NULL), dfsalen); +#endif /* CONFIG_MOSIX_DEBUG */ + iov[niov].iov_base = dfsadata; + iov[niov].iov_len = dfsalen; + size += dfsalen; + niov++; + } +#else + CMHDR(iov->iov_base)->dfsalen = 0; +#endif /* CONFIG_MOSIX_DFSA */ + if (data) { +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_COMMDOSEND) + printk("%s-comm_send: 2 data = %d\n", + desc_mostask(NULL), dlen); +#endif /* CONFIG_MOSIX_DEBUG */ + CMHDR(iov->iov_base)->type |= COMM_MFDATA; + CMHDR(iov->iov_base)->dlen = dlen; + iov[niov].iov_base = data; + iov[niov].iov_len = dlen; + size += dlen; + niov++; + } + + msg.msg_iovlen = niov; + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMMDOSEND) + printk("%s-comm_send: totsize=%d niov=%d (hlen=%d olen=%d)\n", + desc_mostask(NULL), size, niov, + CMHDR(iov->iov_base)->hlen, CMHDR(iov->iov_base)->olen); +#endif /* CONFIG_MOSIX_DEBUG */ + + add_mosix_log(current, MOSIX_LOG_SENDTYPE, type); + add_mosix_log_hdr(current, MOSIX_LOG_SENDHDR, + CMHDR(iov->iov_base)->hlen, CMHDR(iov->iov_base)->olen); + +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_COMMDOSEND) + printk("sock=0x%x msg=0x%x size=%d\n", + (int)current->mosix.contact, (int)&msg,size); +#ifdef CONFIG_MOSIX_UDB + if ((ds_debug & DSDEB_COMMSEND) && ds_debug != -1 && + (ds_debug & DSDEB_DEBUGSEND)) mosix_debugger("after sending.."); +#endif /* CONFIG_MOSIX_UDB */ +#endif /* CONFIG_MOSIX_DEBUG */ + + oldfs = get_fs(); + set_fs(KERNEL_DS); + error = mlink->sock->ops->sendmsg(mlink->sock, &msg, size, NULL); + set_fs(oldfs); + + add_mosix_log_ret(current, error); + + if (free_head) + comm_free(free_head); + +#ifdef CONFIG_MOSIX_DEBUG + if ((ds_debug & DSDEB_COMMDOSEND) || + (error < 0 && (ds_debug & DSDEB_ERROR))) + printk("%s-comm_send: result %d\n", desc_mostask(NULL), error); +#endif /* CONFIG_MOSIX_DEBUG */ + + if((type & COMM_MFREGS) && error >= 0) + regs_were_sent(); +failed: + ENABLE_EVENTS(); +#ifdef CONFIG_MOSIX_DFSA + if (dfsadata) + kfree(dfsadata); +#endif /* CONFIG_MOSIX_DFSA */ + if(tofree) + kfree(tofree); + if(tozero) + *tozero = 0; + return (error < 0 ? error : 0); +} + + +/* + * comm_dorecv() - reliably read data from socket + * on success, or if error == -EFAULT: returns number of bytes received + * otherwise (any other error): returns negative error. + */ +static int +comm_dorecv(struct socket *sock, struct msghdr *msg, int len) +{ + int n = 0; + int left = len; + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMMDORECV) + printk("%s-comm_dorecv: sock 0x%x, msg 0x%x, len %d\n", + desc_mostask(NULL), (int)sock, (int)msg, len); +#endif /* CONFIG_MOSIX_DEBUG */ + + do { + current->mosix.commpri = 1; + n = sock->ops->recvmsg(sock, msg, left, msg->msg_flags, 0); + current->mosix.commpri = 0; +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_COMMDORECV) + printk("%s-comm_dorecv: n=%d\n", desc_mostask(NULL), n); +#endif /* CONFIG_MOSIX_DEBUG */ +#ifdef CONFIG_MOSIX_DIAG + if (n == -EINTR || n == -EAGAIN || n == -ERESTARTSYS) { + printk("%s-comm_dorecv: EINTR,EAGAIN,ERESTARTSYS %d\n", + desc_mostask(NULL), n); +#ifdef CONFIG_MOSIX_UDB + mosix_panic("comm_dorecv() - 1"); + return (-EINTR); /* never mind data: this is fatal */ +#else + panic("comm_dorecv() - 1"); +#endif /* CONFIG_MOSIX_UDB */ + } +#endif /* CONFIG_MOSIX_DIAG */ + if (n <= 0) { +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & (DSDEB_ERROR)) + printk("%s-comm_dorecv: error %d\n", + desc_mostask(NULL), n); +#endif /* CONFIG_MOSIX_DEBUG */ + /* if we already got -EFAULT, we must report */ + if (n == -EFAULT) { + for ( ; msg->msg_iovlen; msg->msg_iov++, + msg->msg_iovlen--) + len -= msg->msg_iov->iov_len; + return (len); + } + + /* .. otherwise - it is fatal - return error */ + if (n < 0) + return (n); +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & (DSDEB_COMM | DSDEB_ERROR)) + printk("%s-comm_dorecv: EOF\n", + desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + return (-EPIPE); + } + + left -= n; + if (left) { +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMMRECV) + printk("%s-comm_dorecv: read %d out of %d\n", + desc_mostask(NULL), n, left + n); +#endif /* CONFIG_MOSIX_DEBUG */ + /* ...->recvmsg() updated iovec, we update msg */ + while (!msg->msg_iov->iov_len) { + msg->msg_iov++; + msg->msg_iovlen--; + } + } + } while (left); + return (len); +} + + +/* + * comm_recv() - receive a message head + */ +int +comm_recv(void **headp, int *hlen) +{ + struct comm_header header; + mosix_link *mlink = current->mosix.contact; + char *head = 0, *options = 0; + struct iovec iov[2]; + struct msghdr msg = + { NULL, 0, iov, 1, NULL, 0, MSG_WAITALL | MSG_NOSIGNAL }; + mm_segment_t oldfs; + int niov = 0; + int size; + int error = -EDIST; +#ifdef CONFIG_MOSIX_DFSA + char *dfsadata = NULL; +#endif /* CONFIG_MOSIX_DFSA */ + + if (!mlink) + { + if (current->mosix.dflags & (DDEPUTY|DREMOTE)) + panic("comm_recv: null contact\n"); +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMM) + printk("%s-comm_recv: EX-deputy\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + return(-EDIST); + } + DISABLE_EVENTS(); + +#ifdef CONFIG_MOSIX_DIAG + if (mlink->dlen) + panic("comm_recv: there is data in the socket"); +#endif /* CONFIG_MOSIX_DIAG */ + + /* if we're not connected yet we should first accept a connection */ + if (mlink->flags & COMM_WAITACCEPT) { + if (comm_waitaccept()) + goto edist; + mlink = current->mosix.contact; + } + + iov[0].iov_base = &header; + iov[0].iov_len = COMM_HLEN; + + add_mosix_log(current, MOSIX_LOG_RECEIVE, 0); + + oldfs = get_fs(); + set_fs(KERNEL_DS); + if ((error = comm_dorecv(mlink->sock, &msg, COMM_HLEN)) != COMM_HLEN) { + if (error >= 0) + error = -EFAULT; + goto failed; + } + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMMRECV) + printk("%s-comm_recv: type=0x%x olen=%d hlen=%d dlen=%d\n", + desc_mostask(NULL), header.type, header.olen, + header.hlen, header.dlen); +#endif /* CONFIG_MOSIX_DEBUG */ + + size = header.hlen + header.olen; + if (size && !(head = comm_mkhead(size))) + { +#ifdef CONFIG_MOSIX_DIAG + printk("%s-comm_recv: comm_mkhead failed!", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DIAG */ + error = -ENOMEM; + goto failed; + } + + if (header.olen || (header.type & COMM_MFHEADOPTS)) { +#ifdef CONFIG_MOSIX_DIAG + if ((header.type & COMM_MFHEADOPTS) == 0 || !header.olen) { + printk("%s-comm_recv(): olen mismatch 0x%x\n", + desc_mostask(NULL), (int) &header); + mosix_panic("olen mismatch"); + } +#endif /* CONFIG_MOSIX_DIAG */ + + /* the options will be placed after the message head */ + options = head + header.hlen; + iov[0].iov_base = options; + iov[0].iov_len = header.olen; + niov++; + } + if (header.hlen) { + iov[niov].iov_base = head; + iov[niov].iov_len = header.hlen; + niov++; + } + + msg.msg_iovlen = niov; + if (niov && (error = comm_dorecv(mlink->sock, &msg, size)) != size) { + if (error >= 0) + error = -EFAULT; + goto failed; + } + +#ifdef CONFIG_MOSIX_DFSA + if (header.dfsalen) + { + if(!(dfsadata = kmalloc(header.dfsalen, GFP_KERNEL))) + { + printk("%s-comm_recv: failed to allocate DFSA space\n", + desc_mostask(NULL)); + error = -ENOMEM; + goto failed; + } + iov[0].iov_base = dfsadata; + iov[0].iov_len = header.dfsalen; + msg.msg_iovlen = 1; + if ((error = comm_dorecv(mlink->sock, &msg, header.dfsalen)) != + header.dfsalen) + { + kfree(dfsadata); + if (error >= 0) + error = -EFAULT; + goto failed; + } + } + if(header.type & COMM_MFDFSAOPTS) + dfsa_comm_recv(header.type, dfsadata); +#ifdef CONFIG_MOSIX_DIAG + else if(dfsadata) + mosix_panic("dfsadata, but no flags"); +#endif /* CONFIG_MOSIX_DIAG */ +#endif /* CONFIG_MOSIX_DFSA */ + if (header.type & COMM_MFDATA) + mlink->dlen = header.dlen; + + size = 0; + if ((header.type & COMM_MFREGS) && + (size = comm_unpackregs(&header, options)) < 0) { + goto edist; + } + options += size; + if ((header.type & COMM_MFIDENT) && + (size = comm_unpackident(options)) < 0) { + goto edist; + } + options += size; + error = header.type & ~COMM_MFOPTIONS; /* return value :-) */ + if (hlen) + *hlen = header.hlen; + + add_mosix_log(current, MOSIX_LOG_RECVTYPE, error); + goto done; + +edist: + error = -EDIST; +failed: + if (head) + comm_free(head); +#ifdef CONFIG_MOSIX_DIAG + if (error == -EINTR || error == -EAGAIN || error == -ERESTARTSYS) + panic("comm_recv: invalid error"); + if (error == -EFAULT) { + printk("%s-comm_recv: got error -EFAULT\n", desc_mostask(NULL)); + mosix_panic("EFAULT(1)"); + } +#endif /* CONFIG_MOSIX_DIAG */ + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & (DSDEB_ERROR | DSDEB_COMMRECV)) + printk("%s-comm_recv: failed error=%d\n", desc_mostask(NULL), + error); +#endif /* CONFIG_MOSIX_DEBUG */ + comm_shutdown(mlink); + head = NULL; +done: + /* the caller knows what she wants; from the type she knows the + message is empty, and will not free it -> we do it ourselves */ + if (!header.hlen) { + comm_free(head); + head = NULL; + } + + add_mosix_log_ret(current, error); + ENABLE_EVENTS(); + set_fs(oldfs); + *headp = head; + return (error); +} + +/* + * comm_copydata() - copy data from the message (possibly to USPACE) + * if 'uspace': returns 0 on success or result from comm_dorecv() + * else: returns 0 on success, error otherwise (partial read -> panic) + */ +int +comm_copydata(void *data, int len, int uspace) +{ + mosix_link *mlink; + struct iovec iov; + struct msghdr msg = + { NULL, 0, &iov, 1, NULL, 0, MSG_WAITALL | MSG_NOSIGNAL }; + int error = 0; + mm_segment_t oldfs = { 0 }; +#ifdef CONFIG_MOSIX_DIAG + int zero_ok = 0; +#endif /* CONFIG_MOSIX_DIAG */ + + if(uspace && !dirty_all_remote_pages((unsigned long)data, len, 1)) + return(-ENOMEM); + DISABLE_EVENTS(); + + mlink = current->mosix.contact; + if(mlink->dlen == 0 && mlink->hidebuf) + { + mlink->flags |= COMM_HIDEDATA; + mlink->dlen = mlink->hidelen; + mlink->hidelen = 0; + } + +#ifdef CONFIG_MOSIX_DIAG + if(len & COMM_ZEROCOPYOK) + { + len &= ~COMM_ZEROCOPYOK; + zero_ok = 1; + } + if (len <= 0) { + printk("%s-comm_copydata(): invalid length %d\n", + desc_mostask(NULL), len); + panic("comm_copydata(): invalid length"); + } else if (mlink->dlen < len) { + if (!mlink->dlen) + { + if(zero_ok) + goto out; + printk("%s-comm_copydata(): no data (mlink=0x%x)\n", + desc_mostask(NULL), (int) mlink); + } + else + printk("%s-comm_copydata(): not enough (mlink=0x%x)\n", + desc_mostask(NULL), (int) mlink); + panic("comm_copydata(): mlink->dlen < len"); + } +#else + if(!mlink->dlen) + goto out; + len &= ~COMM_ZEROCOPYOK; +#endif /* CONFIG_MOSIX_DIAG */ + + if (mlink->flags & COMM_HIDEDATA) { + if (uspace) + error = copy_to_user(data, mlink->hideptr, len); + else + memcpy(data, mlink->hideptr, len); + + /* NOTE: at this point page-fault *cannot* happen */ + mlink->dlen -= len; + mlink->hideptr += len; + if (!mlink->dlen) { + kfree(mlink->hidebuf); + mlink->hideptr = NULL; + mlink->hidebuf = NULL; + mlink->flags &= ~COMM_HIDEDATA; + } + if (error) { + error = -EFAULT; + goto do_error; + } + goto out; + } + + if (!uspace) { /* switch to (KSPACE) */ + oldfs = get_fs(); + set_fs(KERNEL_DS); + } + + iov.iov_base = data; + iov.iov_len = len; + + error = comm_dorecv(mlink->sock, &msg, len); + + if (error == len) { + error = 0; + mlink->dlen -= len; + } else if (error < 0) { + + do_error: +#ifdef CONFIG_MOSIX_DIAG + if (error == -EFAULT && !uspace) + panic("comm_copydata: (2) got -EFAULT not in uspace"); +#endif /* CONFIG_MOSIX_DIAG */ +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug) + printk("comm_copydata: wanted %d bytes, got %d\n", + len, error); +#endif /* CONFIG_MOSIX_DEBUG */ + if (error != -EFAULT) + comm_shutdown(mlink); + } else { +#ifdef CONFIG_MOSIX_DIAG + if (!uspace) + panic("comm_copydata: (1) got -EFAULT not in uspace"); +#endif /* CONFIG_MOSIX_DIAG */ + mlink->dlen -= error; + error = len - error; + } + + if (!uspace) + set_fs(oldfs); + +out: + ENABLE_EVENTS(); + return (error); +} + + +/* + * comm_recvdata() - receive entire data to an allocated buffer + */ +int +comm_recvdata(void **data) +{ + mosix_link *mlink; + char *buf; + struct iovec iov; + struct msghdr msg = + { NULL, 0, &iov, 1, NULL, 0, MSG_WAITALL | MSG_NOSIGNAL}; + mm_segment_t oldfs; + int error = 0; + + DISABLE_EVENTS(); + + mlink = current->mosix.contact; + if(mlink->dlen == 0 && mlink->hidebuf) + { +#ifdef CONFIG_MOSIX_DIAG + /* while this should be OK, we have no reason to reach here */ + printk("comm_recvdata of previously hidden?"); +#endif /* CONFIG_MOSIX_DIAG */ + mlink->flags |= COMM_HIDEDATA; + mlink->dlen = mlink->hidelen; + mlink->hidelen = 0; + } +#ifdef CONFIG_MOSIX_DIAG + if (!mlink->dlen) { + printk("%s-comm_recvdata(): no data (mlink = 0x%x)\n", + desc_mostask(NULL), (int) mlink); + panic("comm_recvdata(): mlink->dlen < len"); + } +#endif /* CONFIG_MOSIX_DIAG */ + + /* data was hidden: simply hand the hidden data to the caller */ + if (mlink->flags & COMM_HIDEDATA) { + /* actually, we never reach here in real-life */ + + *data = mlink->hidebuf; + + /* test for the pathologic case of calling comm_recvdata() + after a partial call to comm_copydata() ...*/ + if (mlink->hidebuf != mlink->hideptr) + panic("comm_recvdata: hide pathology"); + + mlink->hideptr = NULL; + mlink->hidebuf = NULL; + +#ifdef CONFIG_MOSIX_DEBUG + mlink->cntmem++; + if (mlink->cntmem > 2) + mosix_panic("comm_recvdata: cntmem > 2"); +#endif /* CONFIG_MOSIX_DEBUG */ + goto out; + } + + buf = comm_malloc(mlink->dlen); +#ifdef CONFIG_MOSIX_DEBUG + mlink->cntmem++; + if (mlink->cntmem > 2) + mosix_panic("comm_recvdata: cntmem > 2"); +#endif /* CONFIG_MOSIX_DEBUG */ + iov.iov_base = buf; + iov.iov_len = mlink->dlen; + oldfs = get_fs(); + set_fs(KERNEL_DS); + error = comm_dorecv(mlink->sock, &msg, mlink->dlen); + set_fs(oldfs); + if (error != mlink->dlen) { + if (error >= 0) + error = -EFAULT; + kfree(buf); +#ifdef CONFIG_MOSIX_DEBUG + mlink->cntmem--; +#endif /* CONFIG_MOSIX_DEBUG */ + comm_shutdown(mlink); + *data = 0; + } else { + error = 0; + *data = buf; + } + +out: + mlink->dlen = 0; + ENABLE_EVENTS(); + +#ifdef CONFIG_MOSIX_DIAG + if (error == -EFAULT) { + printk("%s-comm_recvdata: got error -EFAULT\n", + desc_mostask(NULL)); + mosix_panic("EFAULT(1)"); + } +#endif /* CONFIG_MOSIX_DIAG */ + return (error); +} + + +/* + * comm_hidedata() - hide data (possibly exisiting) in the socket, + * to allow the socket to be used for a page-fault. The data is put + * in a buffer pointed to by comm->hide. + */ +int +comm_hidedata() +{ + mosix_link *mlink; + struct iovec iov; + struct msghdr msg = + { NULL, 0, &iov, 1, NULL, 0, MSG_WAITALL | MSG_NOSIGNAL}; + mm_segment_t oldfs; + int error = 0; + + mlink = current->mosix.contact; + + /* if already hidden, or no data - return */ + if (mlink->hidebuf || !mlink->dlen) + return(0); + + DISABLE_EVENTS(); + + mlink->hideptr = mlink->hidebuf = comm_malloc(mlink->dlen); + + iov.iov_base = mlink->hidebuf; + iov.iov_len = mlink->dlen; + oldfs = get_fs(); + set_fs(KERNEL_DS); + error = comm_dorecv(mlink->sock, &msg, mlink->dlen); + set_fs(oldfs); + if (error != mlink->dlen) { + comm_shutdown(mlink); +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_ERROR) + printk("%s-comm_hidedata: error %d\n", + desc_mostask(NULL), error); +#endif /* CONFIG_MOSIX_DEBUG */ + } else + error = 0; + mlink->hidelen = mlink->dlen; + mlink->dlen = 0; + ENABLE_EVENTS(); + + return (error); +} + +/* + * comm_flushdata() - flush remaining data from previous message + */ +void +comm_flushdata(int dlen) +{ + mosix_link *mlink; + struct iovec iov; + struct msghdr msg = { NULL, 0, &iov, 1, NULL, 0, 0 }; + mm_segment_t oldfs; + int n = 0; +#define COMM_FLUSH_BUF_SIZE 256 + static char comm_flush_buffer[COMM_FLUSH_BUF_SIZE]; /* global bin */ + + mlink = current->mosix.contact; + if(mlink->dlen == 0 && mlink->hidebuf) + { + mlink->flags |= COMM_HIDEDATA; + mlink->dlen = mlink->hidelen; + mlink->hidelen = 0; + } + if (dlen == COMM_ALLDATA && !(dlen = mlink->dlen)) + return; + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & (DSDEB_COMMRECV | DSDEB_COMMSEND)) + printk("%s-comm_flushdata: dlen = %d\n", + desc_mostask(NULL), dlen); +#endif /* CONFIG_MOSIX_DEBUG */ + +#ifdef CONFIG_MOSIX_DIAG + if (mlink->dlen < dlen) { + printk("%s-comm_flushdata(): no data (mlink = 0x%x)\n", + desc_mostask(NULL), (int) mlink); + panic("comm_flushdata(): mlink->dlen < len"); + } +#endif /* CONFIG_MOSIX_DIAG */ + + if (mlink->flags & COMM_HIDEDATA) { + mlink->hideptr += mlink->dlen; + mlink->dlen -= dlen; + if (!mlink->dlen) { + kfree(mlink->hidebuf); + mlink->hidebuf = NULL; + mlink->hideptr = NULL; + mlink->flags &= ~COMM_HIDEDATA; + } + return; + } + + DISABLE_EVENTS(); + + oldfs = get_fs(); + set_fs(KERNEL_DS); + while (dlen) + { + iov.iov_base = comm_flush_buffer; + iov.iov_len = (dlen < COMM_FLUSH_BUF_SIZE) ? dlen : + COMM_FLUSH_BUF_SIZE; + msg.msg_flags = MSG_WAITALL | MSG_NOSIGNAL; + if ((n = comm_dorecv(mlink->sock, &msg, iov.iov_len)) <= 0) + break; + dlen -= n; + mlink->dlen -= n; + } + set_fs(oldfs); + if (n <= 0) { +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & (DSDEB_ERROR | DSDEB_COMMSEND | DSDEB_COMMRECV)) + printk("%s-comm_flushdata: err %d\n", + desc_mostask(NULL), n); +#endif /* CONFIG_MOSIX_DEBUG */ + comm_shutdown(current->mosix.contact); + } + ENABLE_EVENTS(); +} + + +#define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR) + +/* + * comm_peek() - peek the socket looking for data pending + */ +int +comm_peek(void) +{ + struct socket *sock; + int mask; + + sock = current->mosix.contact->sock; +#ifdef CONFIG_MOSIX_DIAG + if (!sock) { + mosix_panic("comm_peek: no sock"); + return (-EDIST); + } +#endif /* CONFIG_MOSIX_DIAG */ + + mask = sock->ops->poll(NULL, sock, NULL); + return ((mask & POLLIN_SET) ? 1 : 0); +} + + +/* + * comm_poll() - wait for a communication event, interrupt or MOSIX event + * returns 1 on communication events, and 0 otherwise + */ +static int +comm_poll(int mask, int interruptible, unsigned long timo) +{ + struct socket *sock; + int pollmask; + static struct file sighfile = {f_count: ATOMIC_INIT(1)}; + DECLARE_WAITQUEUE(wait, current); + + /* + * sighfile: we are required to supply a file to "hold" while we poll. + * a bit ridiculous in this context, but nobody will notice because + * f_count will never drop to 0 + */ + sock = current->mosix.contact->sock; + if(!timo) + timo = MAX_SCHEDULE_TIMEOUT; + add_wait_queue(sock->sk->sleep, &wait); + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + pollmask = sock->ops->poll(&sighfile, sock, NULL); + if ((pollmask & mask) || + (interruptible && signal_pending(current))) + break; + current->mosix.commpri = 1; + timo = schedule_timeout(timo); + current->mosix.commpri = 0; + if (timo <= 0) + break; + } + remove_wait_queue(sock->sk->sleep, &wait); + set_current_state(TASK_RUNNING); + return ((pollmask & mask) ? 1 : 0); +} + + +/* + * comm_wait() - wait for a message, or some other MOSIX event + * return 1 if there is a message, 0 if another event occured first. + */ +int +comm_wait(void) +{ + return (comm_poll(POLLIN_SET, 1, 0UL)); +} + +/* + * comm_send_urgent() - send notification using urgent data (OOB) + */ +int +comm_send_urgent(void) +{ + struct iovec iov; + struct msghdr msg = { NULL, 0, &iov, 1, NULL, 0, MSG_OOB|MSG_NOSIGNAL }; + mm_segment_t oldfs; + int error; + unsigned char dummy; + +#ifdef CONFIG_MOSIX_DIAG + if (!(current->mosix.dflags & DREMOTE)) + panic("comm_send_urgent: not REMOTE"); +#endif /* CONFIG_MOSIX_DIAG */ + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & (DSDEB_COMMSEND | DSDEB_WHERETO)) + printk("%s-comm_send_urgent: notifying\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + dummy = 0xdb; + iov.iov_base = &dummy; + iov.iov_len = 1; + + add_mosix_log(current, MOSIX_LOG_SENDURG, 0xffff); + + oldfs = get_fs(); + set_fs(KERNEL_DS); + + error = current->mosix.contact->sock->ops-> + sendmsg(current->mosix.contact->sock, &msg, 1, 0); + + set_fs(oldfs); + + add_mosix_log_ret(current, error); + + return(0); +} + +/* + * comm_test_urgent() - test if urgent data is pending (OOB) + */ +int +comm_test_urgent(void) +{ +#ifdef CONFIG_MOSIX_DIAG + if (!(current->mosix.dflags & DDEPUTY)) + panic("comm_test_urgent: not DEPUTY"); +#endif /* CONFIG_MOSIX_DIAG */ + + return(test_bit(SOCK_OOB_IN, ¤t->mosix.contact->sock->flags)); +} + +/* + * comm_take_urgent() - take the pending OOB out of the stream, + * preventing it from turning into a normal byte. + * Caller must make sure that there is no other data in the stream. + */ +void +comm_take_urgent(void) +{ + int was = test_and_clear_bit(SOCK_OOB_IN, + ¤t->mosix.contact->sock->flags); + +#ifdef CONFIG_MOSIX_DEBUG + if(!was && (ds_debug & DSDEB_WHERETO)) + printk("%s-comm_take_urgent: was not there!\n", + desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + add_mosix_log(current, MOSIX_LOG_RECVURG, was); +} + +void +mosix_notify_urgent(struct socket *sock) +{ + struct task_struct *p; + + set_bit(SOCK_OOB_IN, &sock->flags); + if (!(p = find_task_by_sock(sock))) + return; + spin_unlock(&skown_lock); + if (p->mosix.dflags & DDEPUTY) + { + tell_process(p, DREQ_URGENT); + wake_up_mosix(p); + add_mosix_log(p, MOSIX_LOG_RECVURG, 0xffff); + } + free_task_struct(p); +} + +void +mosix_notify_receive(struct socket *sock) +{ + struct task_struct *p = find_task_by_sock(sock); + + if(!p) + return; +#ifdef CONFIG_MOSIX_FS + if(p == mfs_main_server_task) + { + spin_unlock(&skown_lock); + mfs_new_request_arrived(); + goto free_task; + } + if(test_bit(SOCK_WAIT_MFSDATA, &sock->flags)) + { + spin_unlock(&skown_lock); + wake_up_process(p); + goto free_task; + } +#endif /* CONFIG_MOSIX_FS */ + spin_unlock(&skown_lock); + + if(!(p->mosix.dflags & DREMOTE)) + { + if (!(p->mosix.dflags & (DDEPUTY|DREMOTEDAEMON|DFINISHED))) + printk("mosix_notify_receive: pid = %d, " + "not DEPUTY or REMOTE\n", p->pid); + goto free_task; + } + + p->mosix.commpri = 1; + current->need_resched = 1; + wake_up_mosix(p); + free_task: + free_task_struct(p); +} + +/* + * comm_sendto() - send a datagram to someone + */ +int +comm_sendto(int mos, void *data, int len, mosix_link *mlink, mosix_addr *to) +{ + struct iovec iov = {data, len}; + struct sockaddr sa; + struct msghdr msg = {NULL, 0, &iov, 1, NULL, 0, MSG_NOSIGNAL}; + mm_segment_t oldfs; + int error = -EDIST; + + if (to) + sa = to->saddr; + else if (!comm_set_address(-mos, &sa, 0)) + return (error); + +#ifdef CONFIG_MOSIX_DIAG + if (len <= 0) { + mosix_panic("comm_sendto: zero size message"); + return(-EDIST); + } +#endif /* CONFIG_MOSIX_DIAG */ + + msg.msg_name = &sa; + msg.msg_namelen = sizeof(sa); + + oldfs = get_fs(); + set_fs(KERNEL_DS); + + error = mlink->sock->ops->sendmsg(mlink->sock, &msg, len, NULL); + + set_fs(oldfs); + +#ifdef CONFIG_MOSIX_DIAG + if(!error) { + mosix_panic("comm_sendto: sendmsg returned zero"); + return(-EDIST); + } +#endif /* CONFIG_MOSIX_DIAG */ + + return (error); +} + + +/* + * comm_recvfrom() - receive a datagram from someone + * NOTE NOTE NOTE: the timeout is in microseconds -- NOT ticks! + */ +int +comm_recvfrom(void *data, int len, mosix_link *mlink, mosix_addr *from, + unsigned long timo) +{ + struct iovec iov = {data, len}; + struct msghdr msg = { &from->saddr, sizeof(struct sockaddr), + &iov, 1, NULL, 0, 0 }; + mm_segment_t oldfs; + int error; + int nrecv = len, msgflg = 0; + DECLARE_WAITQUEUE(wait, current); + + if(timo) + { +#if MILLION % HZ + timo = timo * HZ / MILLION; +#else + timo = timo / (MILLION/HZ); +#endif + } + oldfs = get_fs(); + set_fs(KERNEL_DS); + + if (timo) + msgflg |= MSG_DONTWAIT; + else + timo = MAX_SCHEDULE_TIMEOUT; + + add_wait_queue(mlink->sock->sk->sleep, &wait); + while (1) { + set_current_state( + msgflg ? TASK_UNINTERRUPTIBLE : TASK_INTERRUPTIBLE); + + error = mlink->sock->ops->recvmsg(mlink->sock, &msg, + nrecv, msgflg, 0); + if(error >= 0 && !net_to_mos((mosix_addr *)&from->saddr)) + { + comm_report_violation("information", &from->saddr); + error = -EPERM; + break; + } +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_INFO) + printk("comm_recvfrom: recvmsg=%d\n", error); + if (error == 0) /* zero-size attack */ + error = -ERESTART; +#endif /* CONFIG_MOSIX_DEBUG */ + if (error > 0) + break; + + if (msgflg && timo <= 0) { + error = -EWOULDBLOCK; + break; + } + + timo = schedule_timeout(timo); + + if (signal_pending(current)) { +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_INFO) + printk("%s: comm_recvfrom interrupted\n", + desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + error = -EINTR; + break; + } + } + remove_wait_queue(mlink->sock->sk->sleep, &wait); + set_current_state(TASK_RUNNING); + set_fs(oldfs); + + if (msg.msg_flags & MSG_TRUNC) { +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_INFO) + printk("%s-comm_recvfrom: message truncated\n", + desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + error = -EDIST; + } + + return (error); +} + + +/* + * comm_recvfrompe() - wrapper for comm_recvfrom, fills in mosix id + * instead of source address. + * NOTE NOTE NOTE: the timeout is in microseconds -- NOT ticks! + */ +int +comm_recvfrompe(void *data, int len, mosix_link *mlink, + int *from, unsigned long timo) +{ + int error; + struct mosix_addr maddr; + + error = comm_recvfrom(data, len, mlink, from ? &maddr : NULL, timo); + if (from) + *from = net_to_mos(&maddr); + return (error); +} + + +/* + * routines for debugging (can call from debugger) + */ +inline void +comm_invalidate_address(mosix_addr *ma) +{ + ma->saddr.sa_family = AF_UNSPEC; +} + + +static inline struct socket * +comm_set_address(int mos, struct sockaddr *sa, int makesock) +{ + switch (comm_type) { + case AF_INET: + return (comm_ip_set_addr(mos, sa, makesock)); + /* NOT REACHED */ + case AF_UNSPEC: + if (mos == COMM_TOADDR) + return (0); + /* FALL THROUGH */ + default: + panic("comm_set_address: unknown comm_type"); + return(0); + } +} + + +/* + * comm_getname: fill in our name + */ +static int +comm_getname(struct socket *sock, struct sockaddr *saddr) +{ + switch (comm_type) { + case AF_INET: + return (comm_ip_getname(sock, saddr)); + break; + default: + panic("comm_getname: invalid comm_type"); + } +} + +/* + * comm_shutdown: shutdown socket + */ +static void +comm_shutdown(mosix_link *mlink) +{ + struct socket *sock; + if (!mlink) + return; + sock = mlink->sock; +#ifdef CONFIG_MOSIX_DIAG + if(!sock) + { + mosix_panic("comm_shutdown: no sock"); + return; + } +#endif /* CONFIG_MOSIX_DIAG */ + + spin_lock_irq(&skown_lock); + sock->sk->owner = NULL; + clear_bit(SOCK_INTER_MOSIX, &sock->flags); + spin_unlock_irq(&skown_lock); + if (mlink->flags & COMM_FULLLINK) { + if (mlink->hidebuf) { + kfree(mlink->hidebuf); + mlink->hidebuf = NULL; + mlink->hideptr = NULL; + mlink->hidelen = 0; + } + mlink->dlen = 0; + } + mlink->flags &= ~(COMM_WAITACCEPT | COMM_HIDEDATA); + if (sock->ops) + sock->ops->shutdown(sock, SEND_SHUTDOWN); +} + +static inline struct socket * +comm_ip_set_addr(int mos, struct sockaddr *saddr, int makesock) +{ + struct sockaddr_in *sa; + struct socket *sock = (struct socket *) 1; /* xxx - not NULL */ + int proto = IPPROTO_TCP; + int type = SOCK_STREAM; + unsigned short port = MIG_DAEMON_PORT; + + sa = (struct sockaddr_in *) saddr; + switch (mos) { + case COMM_LOOSE: + type = SOCK_DGRAM; + proto = IPPROTO_UDP; + port = INFO_DAEMON_PORT; + break; + case COMM_TOADDR: + /* address was set by the caller */ + break; + case COMM_INFO: + proto = IPPROTO_UDP; + type = SOCK_DGRAM; + /* fall through */ + case COMM_MIGD: + memset((void *)sa, 0, sizeof(struct sockaddr_in)); + sa->sin_family = AF_INET; + sa->sin_addr.s_addr = INADDR_ANY; + if (mos == COMM_MIGD) + sa->sin_port = MIG_DAEMON_PORT; + else + sa->sin_port = INFO_DAEMON_PORT; + break; + case COMM_ACCEPT: + memset((void *)sa, 0, sizeof(struct sockaddr_in)); + sa->sin_family = AF_INET; + sa->sin_addr.s_addr = INADDR_ANY; + sa->sin_port = 0; + break; + default: + if (mos < 0) { + type = SOCK_DGRAM; + proto = IPPROTO_UDP; + port = INFO_DAEMON_PORT; + mos = -mos; + } + + if (!mos_to_net(mos, saddr)) { +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug) + printk("comm_ip_set_addr, mos=%d: error1\n", + mos); +#endif /* CONFIG_MOSIX_DEBUG */ + return (0); + } + sa->sin_port = port; + break; + } + + if (makesock && sock_create(AF_INET, type, proto, &sock)) { +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMMOPEN) + printk("%s-comm_ip_set_addr: sock_create failed\n", + desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + return (0); + } + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_COMMADDR) + printk("%s-comm_ip_set_addr: m=%d, ip=%x, t=%d, s=%x, p=%d \n", + desc_mostask(NULL), mos, sa->sin_addr.s_addr, type, + (int) sock, sa->sin_port); +#endif /* CONFIG_MOSIX_DEBUG */ + + return (sock); +} + + +/* + * comm_ip_getname: fill in our name + */ +static inline int +comm_ip_getname(struct socket *sock, struct sockaddr *saddr) +{ + int val; + struct sockaddr_in sa; + int error; + + val = sizeof(sa); + error = sock->ops->getname(sock, (struct sockaddr *)&sa, &val, 0); + if(!error) + { + error = mymos_to_net((void *)saddr); + ((struct sockaddr_in *)saddr)->sin_port = sa.sin_port; + } + return (error); +} + + +/* + * comm_migration_mode() - setup parameters for migration mode (on/off) + */ +void +comm_migration_mode(int on) +{ + switch (comm_type) { + case AF_INET: + comm_ip_migration_mode(on); + break; + default: + panic("comm_setup_socket: unknown comm_type"); + } +} + + +/* + * comm_ip_migration_mode() - setup parameters for migration mode (on/off) + */ +void +comm_ip_migration_mode(int on) +{ + int val = on ? 0 : 1; + mm_segment_t oldfs; + int error; + + oldfs = get_fs(); + set_fs(KERNEL_DS); + + error = current->mosix.contact->sock->ops->setsockopt( + current->mosix.contact->sock, IPPROTO_TCP, + TCP_NODELAY, (char *) &val, sizeof(val)); +#ifdef CONFIG_MOSIX_DEBUG + if (error & (ds_debug & DSDEB_ERROR)) { + printk("%s-comm_ip_migration_mode: failed (%d)\n", + desc_mostask(NULL), error); + } +#endif /* CONFIG_MOSIX_DEBUG */ + + set_fs(oldfs); +} + + +void +comm_data_ready(struct sock *sk, int len) +{ +#ifdef CONFIG_MOSIX_DIAG + if (sk->dead) + panic("comm_data_ready: sk->dead !\n"); +#endif /* CONFIG_MOSIX_DIAG */ + wake_up_interruptible(sk->sleep); +} + + +int +comm_getpeer(mosix_link *mlink) +{ + return (mlink->peer); +} + + +#define MAX_LINK_POOL 30 +#define MIN_LINK_POOL 2 +#define LINK_POOL_AGE 60 + +static int link_pool_age = LINK_POOL_AGE; + +static struct { + mosix_link *mlink; + int age; +} link_pool[MAX_LINK_POOL]; + +spinlock_t linkpool_lock = SPIN_LOCK_UNLOCKED; +static atomic_t link_pool_used = ATOMIC_INIT(0); +static int link_pool_next = 0; +static int linkpool_closed = 1; +DECLARE_WAIT_QUEUE_HEAD(wait_for_link_pool_used); + +void +comm_init_linkpool(void) +{ + int i; + + for(i = 0 ; i < MIN_LINK_POOL ; i++) + if (!(link_pool[i].mlink = comm_open(COMM_LOOSE, NULL, 0))) + break; + link_pool_next = i; + linkpool_closed = 0; +} + +void +comm_free_linkpool(void) +{ + int i; + + spin_lock(&linkpool_lock); + linkpool_closed = 1; + spin_unlock(&linkpool_lock); + wake_up(&wait_for_link_pool_used); + for (i = 0 ; i < link_pool_next ; i++) + comm_close(link_pool[i].mlink); + link_pool_next = 0; +} + +mosix_link * +comm_borrow_linkpool() +{ + mosix_link *mlink; + + while(1) + { + spin_lock(&linkpool_lock); + if(linkpool_closed) + { + spin_unlock(&linkpool_lock); + return(NULL); + } + if(link_pool_next) + { + mlink = link_pool[--link_pool_next].mlink; + atomic_inc(&link_pool_used); + spin_unlock(&linkpool_lock); + return(mlink); + } + if(atomic_read(&link_pool_used) <= MAX_LINK_POOL) + { + spin_unlock(&linkpool_lock); + atomic_inc(&link_pool_used); + if(!(mlink = comm_open(COMM_LOOSE, NULL, 0))) + { + atomic_dec(&link_pool_used); + wake_up(&wait_for_link_pool_used); + } + return(mlink); + } + spin_unlock(&linkpool_lock); + sleep_on_timeout(&wait_for_link_pool_used, HZ/10); + if (signal_pending(current)) + return(NULL); + } +} + + +void +comm_return_linkpool(mosix_link *mlink) +{ + atomic_dec(&link_pool_used); + spin_lock(&linkpool_lock); + if (!linkpool_closed && + link_pool_next + atomic_read(&link_pool_used) <= MAX_LINK_POOL) + { + link_pool[link_pool_next].mlink = mlink; + link_pool[link_pool_next].age = link_pool_age; + link_pool_next++; + spin_unlock(&linkpool_lock); + } + else + { + spin_unlock(&linkpool_lock); + comm_close(mlink); + } + wake_up(&wait_for_link_pool_used); +} + + +void +comm_age_linkpool() +{ + register int i; + struct mosix_link *mlink; + + spin_lock(&linkpool_lock); + for(i = MIN_LINK_POOL ; i < link_pool_next ; ) + if(--link_pool[i].age <= 0) + { + mlink = link_pool[i].mlink; + link_pool[i] = link_pool[--link_pool_next]; + spin_unlock(&linkpool_lock); + comm_close(mlink); + spin_lock(&linkpool_lock); + } + else + i++; + spin_unlock(&linkpool_lock); +} + +int +reserved_mosix_address(struct sockaddr *addr) +{ + int port = ((struct sockaddr_in *)addr)->sin_port; + + if(port != MIG_DAEMON_PORT && port != INFO_DAEMON_PORT) +#ifdef CONFIG_MOSIX_FS + if(port != MFS_MAIN_PORT) +#endif /* CONFIG_MOSIX_FS */ + return(0); + if(current->mosix.dflags & (DPASSING|DHEAVYSLEEP)) + return(0); +#ifdef CONFIG_MOSIX_SECUREPORTS + if(!capable(CAP_NET_RAW)) + return(1); +#endif /* CONFIG_MOSIX_SECUREPORTS */ + if (!net_to_mos((mosix_addr *)addr)) + return(0); +#ifndef CONFIG_MOSIX_SECUREPORTS + if(!capable(CAP_NET_RAW)) + return(1); +#endif /* CONFIG_MOSIX_SECUREPORTS */ + printk("The Super-User is allowed to access MOSIX ports -- " + "but it is not a good idea!\n"); + return(0); +} + +void +debug_commdump(int pid) +{ + struct task_struct *p = 0; + struct socket *sock = 0 ; + mosix_link *mlink = 0; + + if (pid == -1) + p = current; + else for_each_task(p) + if (p->pid == pid) + break; + if (p) { + mlink = p->mosix.contact; + if (mlink) + sock = mlink->sock; + else + sock = NULL; + printk("task=0x%x, socket=0x%x, sock=0x%x, mlink=0x%x\n", + (int)p, (int)sock, sock ? (int)sock->sk : 0,(int)mlink); + if (sock) + printk("socket-flags=0x%x, owner=%x\n", + (int)sock->flags, (int)sock->sk->owner); + if (mlink) { + printk("mlink: dlen=%d flags=0x%x peer=0x%x", + mlink->dlen, mlink->flags, (int)mlink->peer); +#ifdef CONFIG_MOSIX_DEBUG + printk(" count=%d cntmem=%d",mlink->count,mlink->cntmem); +#endif /* CONFIG_MOSIX_DEBUG */ + printk("\n"); + } + } else + printk("X: %s\n", current ? "no such pid" : "no current task"); +} diff -urN linux-2.4.17/mos/config.c linux_umopenmosix/mos/config.c --- linux-2.4.17/mos/config.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/mos/config.c Wed Jun 26 23:45:18 2002 @@ -0,0 +1,1062 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Oren Laadan, Amnon Shiloh. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_MOSIX_FS +#include +#endif /* CONFIG_MOSIX_FS */ + +static int +mosix_notify_reboot(struct notifier_block *this, unsigned long code, void *x) +{ + if(code == SYS_DOWN || code == SYS_HALT || code == SYS_POWER_OFF || + code == SYS_RESTART) + expel(1); + return(NOTIFY_DONE); +} + +struct notifier_block mosix_notifier = +{ + notifier_call: mosix_notify_reboot, + priority: 10, /* Higher than drivers, so network still goes */ +}; + +int PE; +int NPE; +int MAXPE; +struct sockaddr my_mosix_net_address; + +#ifdef CONFIG_SMP +static spinlock_t modifying = SPIN_LOCK_UNLOCKED; /* use only TRYLOCK! */ +#endif /* CONFIG_SMP */ + +static int pe_ready; + +int nmosnet; +struct mosixnet *mosnet; +static char *mosnetstat; +rwlock_t mosnetstat_lock = RW_LOCK_UNLOCKED; + +DECLARE_WAIT_QUEUE_HEAD(wait_for_mosix_config); +DECLARE_WAIT_QUEUE_HEAD(wait_for_all_to_adjust); + +/* local subroutines */ +static int config_validate(void); +static int config_reduce_to_common(struct mosixnet *, int); +static int config_shutdown(void); + + +/* hooks for IP network */ +static int ip_to_mosix(struct sockaddr *); +static int ip_consistent(int, struct mosixnet *, int, int); +static void ip_showconf(int, struct mosixnet *, int); +static void ip_mos_to_net(int, int, struct sockaddr *); +static int ip_addr_to_ascii(struct sockaddr *, char *, int); +static struct mosixnet *ip_reduce_to_common(struct mosixnet *, int *); + +int +config_get_status(int pe) +{ + int ret; + + if (!mos_to_net(pe, NULL)) + return (-ENXIO); + pe--; + read_lock(&mosnetstat_lock); + ret = mosnetstat[pe >> 3] & (1 << (pe & 7)); + read_unlock(&mosnetstat_lock); + return(ret); +} + +int +config_set_status(int pe) +{ +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_CONFIG) + printk("set_status: setting for %d\n", pe); +#endif + pe--; + write_lock(&mosnetstat_lock); + mosnetstat[pe >> 3] |= (1 << (pe & 7)); + write_unlock(&mosnetstat_lock); + return (0); +} + +static int +config_shutdown(void) +{ + struct mosixnet *svmton; + int svnmton, svpe, svnpe, svmaxpe; + int error; + struct task_struct *t1, *t2; + + /* temporarily change configuration */ + lock_mosix(); + svpe = PE; + svnpe = NPE; + svmaxpe = MAXPE; + svmton = mosnet; + svnmton = nmosnet; + mosnet = NULL; + nmosnet = 0; + NPE = MAXPE = 0; + unlock_mosix(); + + /* bring back our processes, and expel foreign */ + if ((error = config_validate())) + goto fail; + lock_mosix(); + PE = 0; + unlock_mosix(); + info_reconfig(); + + /* wakeup our daemons */ + lock_mosix(); + if((t1 = info_proc)) + get_task_struct(t1); + if((t2 = mig_proc)) + get_task_struct(t2); + unlock_mosix(); + if(t1) + { + send_sig(SIGALRM, t1, 1); + free_task_struct(t1); + } + if(t2) + { + send_sig(SIGALRM, t2, 1); + free_task_struct(t2); + } + + lock_mosix(); + while ((mig_daemon_active || info_daemon_active) && + !signal_pending(current)) + { + unlock_mosix(); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ/10); + lock_mosix(); + } + unlock_mosix(); + set_current_state(TASK_RUNNING); + if (signal_pending(current)) + { + error = -ERESTART; + goto fail; + } + + printk(KERN_NOTICE "MOSIX configuration disabled\n"); + unregister_reboot_notifier(&mosix_notifier); + + if (svnmton) { + kfree(svmton); + kfree(mosnetstat); + } + + lock_mosix(); + pe_ready = 0; + unlock_mosix(); +#ifdef CONFIG_MOSIX_FS + mfs_change_pe(); +#endif /* CONFIG_MOSIX_FS */ + + return (0); + +fail: + lock_mosix(); + PE = svpe; + NPE = svnpe; + MAXPE = svmaxpe; + mosnet = svmton; + nmosnet = svnmton; + unlock_mosix(); + wake_up(&wait_for_mosix_config); + return (error); +} + +void +wait_for_mosix_configuration(int *active) +{ + struct task_struct *p = current; + DECLARE_WAITQUEUE(wait, current); + + if(active) + { + lock_mosix(); + *active = 0; + unlock_mosix(); + } + add_wait_queue(&wait_for_mosix_config, &wait); + while(1) + { + if(p->sigpending) + flush_signals(p); + set_current_state(TASK_INTERRUPTIBLE); + if(PE) + break; + schedule(); + } + remove_wait_queue(&wait_for_mosix_config, &wait); + set_current_state(TASK_RUNNING); + if(signal_pending(p)) + { + spin_lock_irq(&p->sigmask_lock); + flush_signals(p); + spin_unlock_irq(&p->sigmask_lock); + } + if(active) + { + lock_mosix(); + *active = 1; + unlock_mosix(); + } +} + +int +mosix_config_get_table(struct mosixnet **mos, int start, int count) +{ + int copy; + + if (count > MAX_MOSNET_ENTS) + count = MAX_MOSNET_ENTS; + if(!count) + count = nmosnet - start; + if(!PE || count < 0) + return(-ENXIO); + if(count == 0) + return(0); + *mos = (struct mosixnet *) + kmalloc(sizeof(struct mosixnet) * count, GFP_USER); + if(!*mos) + return(-ENOMEM); + lock_mosix(); + if(!PE || !nmosnet || nmosnet < start) + { + unlock_mosix(); + kfree(*mos); + return(-ENXIO); + } + if(nmosnet == start) + { + unlock_mosix(); + kfree(*mos); + return(0); + } + + /* create a private copy */ + copy = nmosnet - start; + if (copy > count) + copy = count; + memcpy(*mos, &mosnet[start], sizeof(struct mosixnet) * copy); + unlock_mosix(); + return (copy); +} + +int +mosix_config_set_table(struct mosixnet *m, int nents, int newpe) +{ + register int i, j; + register struct mosixnet *t, *q; + char *mstat = NULL; + char foundme = 0; + int maxpe = 1; + int numpe = 0; + int error = 0; + +// if (!boot_cpu_data.hard_math) +// { +// printk("Sorry, MOSIX requires a math-unit.\n"); +// return(-ENODEV); +// } + if(!spin_trylock(&modifying)) + return(-EBUSY); + error = -E2BIG; + if (nents > MAX_MOSNET_ENTS) + goto out; + + /* + * PE == 0: (1) newpe == 0 || nents == 0 => return (0) + * (2) newpe > 0 && nents != 0 => (regular) + * PE != 0: (3) newpe == PE => (regular) + * (4) newpe == 0 || nents == 0 => (shutdown) + * (5) newpe > 0 (nmosnet == 1) => change PE, (regular) + */ + + if(!newpe || !nents) + { + if(m) + kfree(m); + error = PE ? config_shutdown() : 0; + goto out; + } + + error = -EBUSY; + if(PE && newpe != PE && + (nmosnet != 1 || m->cnt != 1 || m->base != newpe)) + goto out; + + if(newpe == PE && nents == nmosnet && + !memcmp(m, mosnet, nmosnet * sizeof(struct mosixnet))) + { + /* no change */ + error = 0; + kfree(m); + goto out; + } + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_CONFIG) + printk("%s-config_set_table: stage 1\n", + desc_mostask(NULL)); +#endif + + /* convention: first entry must hold the network type (AF_xxx) */ + error = -EINVAL; + if (m->saddr.sa_family != AF_INET) + goto out; + comm_type = m->saddr.sa_family; + + /* consistency checks */ + + for (t = m, i = 0; i < nents; i++, t++) { + t->saddr.sa_family = comm_type; + + if (t->cnt < 0 || t->base < 1 || t->base + t->cnt < 0 || + t->base + t->cnt - 1 > MOSIX_MAX) + goto out; + + if (t->cnt == 0) + { + for (q = m, j = 0 ; j < nents ; j++ , q++) + if (q->cnt > 0 && q->base <= t->base && + q->base + q->cnt > t->base) + break; + if(j == nents) + { + error = -ESRCH; + goto out; + } + } + else + { + for (q = m, j = 0; j < i; j++, q++) + if (q->cnt && ( + (q->base < t->base && q->base + q->cnt > t->base) || + (q->base >= t->base && q->base < t->base + t->cnt))) + { + error = -EXDEV; + goto out; + } + + if (t->base + t->cnt - 1 > maxpe) + maxpe = t->base + t->cnt - 1; + + if (newpe >= t->base && newpe < t->base + t->cnt) + foundme = 1; + + numpe += t->cnt; + } + } + switch (comm_type) { + case AF_INET: + error = ip_consistent(newpe, m, nents, numpe); + break; + default: + error = 0; + } + if (error) + goto out; + error = -ESRCH; + if (!foundme) + goto out; + error = -ENOMEM; + i = (maxpe + 7) >> 3; + if (!(mstat = (char *) kmalloc(i, GFP_KERNEL))) + goto out; + memset(mstat, 0, i); + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_CONFIG) + printk("%s-config_set_table: stage 2\n", + desc_mostask(NULL)); +#endif + + if(PE && (error = config_reduce_to_common(m, nents))) + { + kfree(mstat); + goto out; + } + + /* COMMIT POINT */ + + lock_mosix(); + write_lock(&mosnetstat_lock); + if (PE) { + int i, j, n; + + if(newpe == PE) + for (i = 0; i < nents; i++) + for (n = m[i].base - 1 , j = 0 ; j < m[i].cnt ; j++ , n++) + mstat[n >> 3] |= (mosnetstat[n >> 3] & (1 << (n & 7))); + kfree(mosnetstat); + kfree(mosnet); + } + + mosnetstat = mstat; + NPE = numpe; + MAXPE = maxpe; + nmosnet = nents; + mosnet = m; + write_unlock(&mosnetstat_lock); + + pe_ready = newpe; + if(newpe != PE) + { + i = PE; + PE = newpe; + mos_to_net(PE, (void *)&my_mosix_net_address); + unlock_mosix(); + info_startup(); + if (!i) + { + comm_startup(); + register_reboot_notifier(&mosix_notifier); + wake_up(&wait_for_mosix_config); + } + } + else + unlock_mosix(); +#ifdef CONFIG_MOSIX_FS + mfs_change_pe(); +#endif /* CONFIG_MOSIX_FS */ + printk(KERN_NOTICE "MOSIX configuration %s: This is MOSIX #%d " + "(of %d configured)\n", PE ? "changed" : "set", newpe, NPE); + switch (comm_type) + { + case AF_INET: + ip_showconf(newpe, mosnet, nmosnet); + break; + } + error = 0; + + out: + spin_unlock(&modifying); + return(error); +} + +/* nth_node: n starts at 0 and does not count the local node */ +int +nth_node(int n) +{ + register int i; + struct mosixnet *t; + + MOSIX_LOCKED; + for(t = mosnet , i = 0 ; i < nmosnet ; i++ , t++) + { + if(t->base <= PE && t->base + t->cnt > PE) + { + if(t->base + n < PE) + return(t->base + n); + n++; + } + if(t->cnt <= n) + n -= t->cnt; + else + return(t->base + n); + } + panic("nth_node"); +} + +int +mosix_config_get_pe(void) +{ + return (PE); +} + +int +mosix_config_get_tentative_pe(void) +{ + int ret; + + lock_mosix(); + ret = pe_ready; + unlock_mosix(); + return(ret); +} + +int +mosix_config_get_limit(void) +{ + int ret; + + lock_mosix(); + ret = PE ? MAXPE : 0; + unlock_mosix(); + return(ret); +} + +int +mosix_config_set_pe(int newpe) +{ + int error = 0; + + if (newpe < 0 || newpe > MOSIX_MAX) + return (-EINVAL); + if(!spin_trylock(&modifying)) + return(-EBUSY); + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_CONFIG) + printk("setting PE from %d to %d\n", PE, newpe); +#endif + + if (!PE) { + lock_mosix(); + pe_ready = newpe; + unlock_mosix(); + goto out; + } + + if (newpe == PE) + goto out; + + error = -EBUSY; + if (newpe && (nmosnet > 1 || mosnet->cnt != 1)) + goto out; + + if (newpe) + { + struct mosixnet m = *mosnet; + + spin_unlock(&modifying); + m.base = newpe; + m.cnt = 1; + error = mosix_config_set_table(&m, 1, newpe); + } + else + { + spin_unlock(&modifying); + error = mosix_config_set_table(NULL, 0, 0); + } + return(error); + out: + spin_unlock(&modifying); + return (error); +} + +/* + * mos_to_net: return 1 if mos# exists (possibly fill sockaddr) + */ +int +mos_to_net(int mos, void *sa) +{ + int n; + int ret = 0; + +#ifdef CONFIG_MOSIX_DIAG + if(mos == COMM_TOADDR) { + mosix_panic("COMM_TOADDR in mos_to_net"); + return (1); + } +#endif /* CONFIG_MOSIX_DIAG */ + + lock_mosix(); + for (n = 0; n < nmosnet; n++) + if (mos >= mosnet[n].base && mos < mosnet[n].base + mosnet[n].cnt) + { + if(sa) + switch (comm_type) { + case AF_INET: + ip_mos_to_net(mos, n, (struct sockaddr *)sa); + break; + case AF_INET6: + panic("mos_to_net: AF_INET6 unsupported"); + /* NOT REACHED */ + default: + panic("mos_to_net: unknown type"); + } + ret = 1; + } + unlock_mosix(); + return(ret); +} + +int +mymos_to_net(void *sa) +{ + int ret = 0; + + lock_mosix(); + if(PE) + *(struct sockaddr *)sa = my_mosix_net_address; + else + ret = -ENETDOWN; + unlock_mosix(); + return(ret); +} + +int +count_mosix_nodes(void) +{ + int i, n = 0; + + lock_mosix(); + for(i = 0 ; i < nmosnet ; i++) + n += mosnet[i].cnt; + unlock_mosix(); + return(n); +} + +#ifdef CONFIG_MOSIX_FS +int +scan_mosix_nodes(int start, int *first, int *last) +{ + int i; + int max = MOSIX_MAX + 1; + + lock_mosix(); + for(i = 0 ; i < nmosnet ; i++) + if(mosnet[i].cnt && mosnet[i].base < max && + mosnet[i].base + mosnet[i].cnt > start) + { + *last = mosnet[i].base + mosnet[i].cnt - 1; + if(mosnet[i].base <= start) + { + *first = start; + max = 0; /* eg. return(1) */ + break; + } + *first = mosnet[i].base; + max = mosnet[i].base; + } + unlock_mosix(); + return(max != MOSIX_MAX+1); +} +#endif /* CONFIG_MOSIX_FS */ + +int +mos_to_ascii(int mos, char *str, int len) +{ + struct sockaddr sa; + + if (!mos_to_net(mos, &sa)) + return (-EDIST); + switch (comm_type) { + case AF_INET: + return (ip_addr_to_ascii(&sa, str, len)); + case AF_INET6: + panic("mos_to_ascii: AF_INET6 unsupported"); + default: + panic("mos_to_ascii: unknown type"); + } +} + + +int +net_to_mos(mosix_addr *sa) +{ + switch (comm_type) { + case AF_INET: + return (ip_to_mosix(&sa->saddr)); + /* NOT REACHED */ + case AF_INET6: + panic("net_to_mos: AF_INET6 unsupported"); + /* NOT REACHED */ + default: + return(0); + } +} + +int +config_validate(void) +{ + register struct task_struct *p; + DECLARE_WAITQUEUE(wait, current); + int err = 0; + + read_lock(&tasklist_lock); + for_each_task(p) + if((p->mm || (p->mosix.dflags & (DDEPUTY|DREMOTE))) && + !(p->mosix.dflags & DFINISHED)) + { + tell_process(p, DREQ_CHECKCONF); + wake_up_mosix(p); + } + read_unlock(&tasklist_lock); + add_wait_queue(&wait_for_all_to_adjust, &wait); + while(1) + { + set_current_state(TASK_INTERRUPTIBLE); + read_lock(&tasklist_lock); + for_each_task(p) + if(process_told(p, DREQ_CHECKCONF)) + break; + read_unlock(&tasklist_lock); + if(p == &init_task) + break; + schedule(); + if (signal_pending(current)) + { + err = -ERESTARTSYS; + break; + } + } + remove_wait_queue(&wait_for_all_to_adjust, &wait); + set_current_state(TASK_RUNNING); + return(err); +} + +static int +config_reduce_to_common(struct mosixnet *nm, int nmn) +{ + struct mosixnet *mt, *svmton; + int svnmton; + int error; + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_CONFIG) + printk("%s-config_reduce_to_common: stage 1\n", + desc_mostask(NULL)); +#endif + + switch (comm_type) { + case AF_INET: + mt = ip_reduce_to_common(nm, &nmn); + break; + case AF_INET6: + panic("config_reduce_to_common: AF_INET6 unsupported"); + /* NOT REACHED */ + default: + panic("config_reduce_to_common: unknown type"); + } + + if (!mt) + return (-ENOMEM); + + svmton = mosnet; + svnmton = nmosnet; + lock_mosix(); + mosnet = mt; + nmosnet = nmn; + unlock_mosix(); + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_CONFIG) + printk("%s-config_reduce_to_common: stage 2\n", + desc_mostask(NULL)); +#endif + + info_reconfig(); + + if ((error = config_validate())) { + lock_mosix(); + mosnet = svmton; + nmosnet = svnmton; + unlock_mosix(); + kfree(mt); + return (error); + } + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_CONFIG) + printk("%s-config_reduce_to_common: stage 3 (done)\n", + desc_mostask(NULL)); +#endif + + kfree(svmton); + return(0); +} + +int +i_am_in_a_wrong_place(void) +{ + register struct task_struct *p = current; + + if(p->mosix.dflags & (DPASSING|DINCOMING)) + return(0); /* not time to hit yet */ + switch(p->mosix.dflags & (DDEPUTY|DREMOTE)) + { + case DREMOTE: + if (mos_to_net(p->mosix.deppe, 0) == 0) + { +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_EXPEL) + printk("%s:my home (%d) no longer recognised!\n", + desc_mostask(NULL), p->mosix.deppe); +#endif /* CONFIG_MOSIX_DEBUG */ + return(1); + } + break; + case DDEPUTY: + if (mos_to_net(p->mosix.whereami, 0) == 0) + { +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_EXPEL) + printk("%s: where I am (%d) is no longer recognised\n", + desc_mostask(NULL), + p->mosix.whereami); +#endif /* CONFIG_MOSIX_DEBUG */ + return(1); + } + } + done_checking_conf(); + return(0); +} + +void +done_checking_conf(void) +{ + register struct task_struct *p; + + read_lock(&tasklist_lock); + process_ack(current, DREQ_CHECKCONF); + for_each_task(p) + if(process_told(p, DREQ_CHECKCONF)) + break; + read_unlock(&tasklist_lock); + if(p == &init_task) /* we were last to check */ + wake_up(&wait_for_all_to_adjust); +} + + + +/* + * Network dependent part of config: + * current implementation support only IP (not including v6). + */ + +#define NET_TO_IP(sa) (((struct sockaddr_in *) &(sa))->sin_addr.s_addr) +#define NET_TO_IP_H(sa) (ntohl(NET_TO_IP(sa))) + +static void +ip_mos_to_net(int mos, int n, struct sockaddr *sa) +{ + *sa = mosnet[n].saddr; + NET_TO_IP(*sa) = htonl(NET_TO_IP_H(*sa) + mos - mosnet[n].base); +} + + +static int +ip_to_mosix(struct sockaddr *sa) +{ + int i, ip, mip, c, ret = 0; + + ip = NET_TO_IP_H(*sa); + + lock_mosix(); + for (i = 0 ; i < nmosnet ; i++) + { + mip = NET_TO_IP_H(mosnet[i].saddr); + c = mosnet[i].cnt ? : 1; + if(ip >= mip && ip < mip + c) + { + ret = mosnet[i].base + ip - mip; + break; + } + } + unlock_mosix(); + return(ret); +} + +static struct mosixnet * +ip_reduce_to_common(struct mosixnet *nm, int *nmn) +{ + struct mosixnet *mt; + register int i, j, n = 0; + int b1, e1, b2, e2; + + /* 2*MAX_MOSNET_ENTS should cover the worst-case intersection */ + + mt = kmalloc(2 * MAX_MOSNET_ENTS * sizeof(struct mosixnet), GFP_KERNEL); + if (!mt) + return (0); + for (i = 0; i < *nmn; i++) + for (j = 0; j < nmosnet; j++) + if (NET_TO_IP_H(mosnet[j].saddr) - mosnet[j].base == + NET_TO_IP_H(nm[i].saddr) - nm[i].base) + { + if ((nm[i].cnt != 0) != (mosnet[j].cnt != 0)) + continue; + b1 = nm[i].base; + e1 = nm[i].base + nm[i].cnt - 1; + b2 = mosnet[j].base; + e2 = mosnet[j].base + mosnet[j].cnt - 1; + + if (b2 > b1) + b1 = b2; + if (e2 < e1) + e1 = e2; + if (e1 >= b1) { +#ifdef CONFIG_MOSIX_DIAG + if (n >= 2*MAX_MOSNET_ENTS) + panic("ip_reduce_to_common: algorithm failed"); +#endif + mt[n].base = b1; + mt[n].cnt = e1 - b1 + 1; + mt[n].saddr.sa_family = AF_INET; + NET_TO_IP(mt[n].saddr) = htonl(NET_TO_IP_H(nm[i].saddr) + + b1 - nm[i].base); +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug) + printk("setpe: common chunk=%d/%x/%d\n", + mt[n].base, (int)NET_TO_IP(mt[n].saddr), + mt[n].cnt); +#endif /* CONFIG_MOSIX_DEBUG */ + n++; + } + } + *nmn = n; + return (mt); +} + +static int +ip_consistent(int pe, struct mosixnet *m, int nents, int numpe) +{ + int i, j; + struct mosixnet *t, *q; + + for (t = m, i = 0; i < nents; i++, t++) + if (t->cnt == 0) /* an ALIAS address */ + for (q = m , j = 0 ; j < i ; j++ , q++) + { + /* check if this IP appears more than once */ + if (q->cnt == 0 && + NET_TO_IP_H(q->saddr) == NET_TO_IP_H(t->saddr)) + { + if (q->base != t->base) + return (-EXDEV); + } + else if (q->cnt != 0 && + NET_TO_IP_H(q->saddr) <= NET_TO_IP_H(t->saddr) && + NET_TO_IP_H(q->saddr) + q->cnt > NET_TO_IP_H(t->saddr) + && q->base + NET_TO_IP_H(t->saddr) - + NET_TO_IP_H(q->saddr) != t->base) + return (-EXDEV); + } + else /* a normal range */ + { + if (NET_TO_IP_H(t->saddr) + t->cnt <= NET_TO_IP_H(t->saddr)) + return (-EINVAL); + + for (q = m , j = 0 ; j < i ; j++ , q++) + if (q->cnt == 0 && + NET_TO_IP_H(t->saddr) <= NET_TO_IP_H(q->saddr) && + NET_TO_IP_H(t->saddr)+t->cnt > NET_TO_IP_H(q->saddr) + && t->base + NET_TO_IP_H(q->saddr) - NET_TO_IP_H(t->saddr) + != q->base) + return (-EXDEV); + else if (q->cnt != 0 && ( + (NET_TO_IP_H(q->saddr) < NET_TO_IP_H(t->saddr) && + NET_TO_IP_H(q->saddr) + q->cnt > NET_TO_IP_H(t->saddr)) || + (NET_TO_IP_H(q->saddr) >= NET_TO_IP_H(t->saddr) && + NET_TO_IP_H(q->saddr) < NET_TO_IP_H(t->saddr) + t->cnt))) + return (-EXDEV); + } + + if (PE && numpe > 1) + /* allow change of address only if PE remains alone */ + for (t = m, i = 0; i < nents; i++, t++) + { + struct sockaddr sa; + + if (t->cnt != 0 && pe >= t->base && pe < t->base + t->cnt && + (!mos_to_net(pe, &sa) || NET_TO_IP_H(sa) != + NET_TO_IP_H(t->saddr) + (pe - t->base))) + return (-EBUSY); + } + + return (0); +} + + +static int +ip_addr_to_ascii(struct sockaddr *sa, char *buf, int len) +{ + int a, b, c, d; + char tmp[20]; /* should be enough for ALL inet addresses */ + unsigned long ipaddr; + + ipaddr = htonl(((struct sockaddr_in *) sa)->sin_addr.s_addr); + + a = (ipaddr & 0xff000000) >> 24; + b = (ipaddr & 0x00ff0000) >> 16; + c = (ipaddr & 0x0000ff00) >> 8; + d = (ipaddr & 0x000000ff); + + sprintf(tmp, "%d.%d.%d.%d", a, b, c, d); + strncpy(buf, tmp, len - 1); + buf[len - 1] = '\0'; + return (strlen(tmp)); +} + + +static void +ip_showconf(int pe, struct mosixnet *m, int nents) +{ + int i; + int a, b, c, d; + long ipaddr; + + for(i = 0 ; i < nents ; i++, m++) + { + ipaddr = ntohl(NET_TO_IP(m->saddr)); + a = (ipaddr & 0xff000000) >> 24; + b = (ipaddr & 0x00ff0000) >> 16; + c = (ipaddr & 0x0000ff00) >> 8; + d = ipaddr & 0xff; + if (m->cnt == 0) + printk(KERN_NOTICE "MOSIX #%d is aliased to IP address " + "%d.%d.%d.%d\n", m->base, a, b, c, d); + else if (m->cnt == 1) + printk(KERN_NOTICE "MOSIX #%d is at IP address " + "%d.%d.%d.%d\n", m->base, a, b, c, d); + else + printk(KERN_NOTICE "MOSIX range: %d-%d begins at " + "%d.%d.%d.%d\n", m->base, + m->base + m->cnt - 1, a, b, c, d); + } +} + +#ifdef CONFIG_MOSIX_DEBUG +void +ip_config_dump(void) +{ + int i, a, b, c, d; + long ipaddr; + + for (i = 0; i < nmosnet; i++) { + ipaddr = NET_TO_IP(mosnet[i].saddr); + a = (ipaddr & 0xff000000) >> 24; + b = (ipaddr & 0x00ff0000) >> 16; + c = (ipaddr & 0x0000ff00) >> 8; + d = (ipaddr & 0x000000ff); + printk("%-8d-%8d %d.%d.%d.%d (%d)\n", + mosnet[i].base, mosnet[i].base+mosnet[i].cnt - 1, + a, b, c, d, mosnet[i].saddr.sa_family); + } +} +#endif /* CONFIG_MOSIX_DEBUG */ diff -urN linux-2.4.17/mos/copy_unconf linux_umopenmosix/mos/copy_unconf --- linux-2.4.17/mos/copy_unconf Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/mos/copy_unconf Wed Jun 26 23:45:18 2002 @@ -0,0 +1,14 @@ +#!/bin/sh - +echo "/* alternate.c: automatically generated with copy of unconfigured routines */" +echo +if [ "$1" != y ] +then + awk ' \ + /^#include/ {print $0; next; } \ + /^aout_remote_init_mm\(/ { on=1 ; print "\nvoid" ; } \ + /^unsigned long .*create_aout_tables\(/ { on=1 ; print "\n#ifdef CONFIG_MOSIX"; } \ + /^static void set_brk/ { on=1; print ""; } + /^}/ { if(on) print "}" ; on=0; next; } \ + on == 1 { print $0; } + ' < $2 +fi diff -urN linux-2.4.17/mos/decay.c linux_umopenmosix/mos/decay.c --- linux-2.4.17/mos/decay.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/mos/decay.c Wed Jun 26 23:45:18 2002 @@ -0,0 +1,257 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int slow_alpha = DEFAULT_SLOW_ALPHA; +int fast_alpha = DEFAULT_FAST_ALPHA; +int decay_interval = DEFAULT_DECAY_INTERVAL; + +void +mosix_decay_exec(void) +{ + register typeof (current->mosix.dpolicy) *p = ¤t->mosix.dpolicy; + + if(!(*p & (DADV_EXEC|DADV_EXECONCE))) + *p = (*p & ~DADV_POLICY) | DADV_DEFAULT; + *p &= ~DADV_EXECONCE; +} + +int +decay_modify_bit(int on, int bit) +{ + if(on == -1) + return((current->mosix.dpolicy & bit) != 0); + if((on != 0) == ((current->mosix.dpolicy & bit) != 0)) + return(0); + if(on) + current->mosix.dpolicy |= bit; + else + current->mosix.dpolicy &= ~bit; + if(current->mosix.dflags & DDEPUTY) + deputy_update_remote_decay(); + return(0); +} + +int +decay_inherit(int on) +{ + return(decay_modify_bit(on, DADV_INHERIT)); +} + +int +decay_exec(int on) +{ + return(decay_modify_bit(on, DADV_EXEC)); +} + +int +decay_execonce(int on) +{ + return(decay_modify_bit(on, DADV_EXECONCE)); +} + +int +decay_set(int policy, int quotient, int seconds) +{ + register struct task_struct *p = current; + int prev; + + if(policy == (p->mosix.dpolicy & DADV_POLICY) && + (policy != DADV_OWNDECAY || (quotient == p->mosix.decay && + seconds == p->mosix.deccycle))) + return(0); + if(seconds < 1 || seconds > 65535 || + quotient < 0 || quotient > DECAY_QUOTIENT) + return(-EINVAL); + if(policy == DADV_CPU || policy == DADV_NOCPU || + (prev = (p->mosix.dpolicy & DADV_POLICY)) == DADV_CPU || + prev == DADV_NOCPU) + mosix_clear_statistics(); + p->mosix.dpolicy = (p->mosix.dpolicy & ~DADV_POLICY) | policy; + p->mosix.decay = quotient; + p->mosix.deccycle = seconds; + if(p->mosix.dflags & DDEPUTY) + deputy_update_remote_decay(); + return(0); +} + +int +decay_get(int policy) +{ + return((current->mosix.dpolicy & DADV_POLICY) == policy); +} + +void +decay_clear(void) +{ + struct decay_h d; + + d.policy = DADV_CLEAR; + if(current->mosix.dflags & DDEPUTY) + deputy_request(DEP_UPDATE_DECAY, &d, sizeof(d), NULL, 0, 0, NULL, 0); + else + mosix_clear_statistics(); +} + +void +deputy_update_remote_decay(void) +{ + struct decay_h d; + struct mosix_task *m = ¤t->mosix; + + d.policy = m->dpolicy; + d.deccycle = m->deccycle; + d.decay = m->decay; + deputy_request(DEP_UPDATE_DECAY, &d, sizeof(d), NULL, 0, 0, NULL, 0); +} + +int +remote_setdecay(struct decay_h *d) +{ + int prev, new; + struct mosix_task *m = ¤t->mosix; + + if(d->policy == DADV_CLEAR) + mosix_clear_statistics(); + else + { + prev = m->dpolicy & DADV_POLICY; + new = d->policy & DADV_POLICY; + if(prev == DADV_CPU || prev == DADV_NOCPU + || new == DADV_CPU || new == DADV_NOCPU) + mosix_clear_statistics(); + m->dpolicy = d->policy; + m->deccycle = d->deccycle; + m->decay = d->decay; + } + comm_free(d); + return(comm_send(DEP_UPDATE_DECAY|REPLY, NULL, 0, NULL, 0, 0)); +} + +void +inc_decays(void) +{ + struct task_struct *p; + register struct mosix_task *m; + register unsigned char policy; + + read_lock(&tasklist_lock); + for_each_task(p) + { + m = &p->mosix; + if(!(m->dflags & DDEPUTY) && + ((policy = m->dpolicy & DADV_POLICY) == DADV_SLOWDECAY + || policy == DADV_FASTDECAY || + policy == DADV_OWNDECAY) && + m->decsecs != 0xffff) + m->decsecs++; + } + read_unlock(&tasklist_lock); +} + +void +do_decay(void) +{ + register struct mosix_task *m = ¤t->mosix; + int d; + int64_t ms; +#ifdef CONFIG_MOSIX_FS + int i; +#endif /* CONFIG_MOSIX_FS */ + /* an expensive 64-bit-division saving macro: */ +#define proportionate(fld) if(m->fld > (0xffffffffUL/DECAY_QUOTIENT)) \ + m->fld = m->fld * d / DECAY_QUOTIENT; \ + else m->fld = ((unsigned long)m->fld) * d / DECAY_QUOTIENT + + switch(m->dpolicy & DADV_POLICY) + { + case DADV_SLOWDECAY: + if(m->decsecs < decay_interval) + return; + d = slow_alpha; + break; + case DADV_FASTDECAY: + if(m->decsecs < decay_interval) + return; + d = fast_alpha; + break; + case DADV_OWNDECAY: + if(m->decsecs < m->deccycle) + return; + d = m->decay; + break; + default: + return; + } +#ifdef CONFIG_MOSIX_DIAG + if(m->dflags & DDEPUTY) + mosix_panic("decay on deputy"); +#endif /* CONFIG_MOSIX_DIAG */ + read_lock(&tasklist_lock); + m->decsecs = 0; + read_unlock(&tasklist_lock); + lock_mosix(); + proportionate(cutime); + proportionate(ndemandpages); + proportionate(nsyscalls); + proportionate(ncopyins); + proportionate(copyinbytes); + proportionate(ncopyouts); + proportionate(copyoutbytes); + proportionate(pagetime); +#ifdef CONFIG_MOSIX_FS + if(m->mfs_stats) + for(i = m->mfs_stats->nnodes-1 ; i >= 0 ; i--) + { + proportionate(mfs_stats->conns[i]); + proportionate(mfs_stats->inbytes[i]); + proportionate(mfs_stats->outbytes[i]); + if(m->mfs_stats->conns[i] == 0) + { + int last = --m->mfs_stats->nnodes; + + if(i == last) + continue; + m->mfs_stats->nodes[i] = m->mfs_stats->nodes[last]; + m->mfs_stats->conns[i] = m->mfs_stats->conns[last]; + m->mfs_stats->inbytes[i] = m->mfs_stats->inbytes[last]; + m->mfs_stats->outbytes[i] = m->mfs_stats->outbytes[last]; + if(m->mfs_stats->hint == last) + m->mfs_stats->hint = i; + } + } +#endif /* CONFIG_MOSIX_FS */ + + ms = ticks_to_ms(current->times.tms_utime + m->uttime) - m->dctime; +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_DECAY) + printk("decaying %s by adding %d/%d of %d to %d\n", + desc_mostask(m), DECAY_QUOTIENT - d, DECAY_QUOTIENT, + (int)ms, (int)m->dctime); +#endif /* CONFIG_MOSIX_DEBUG */ + if(ms > 0) + m->dctime += ms * (DECAY_QUOTIENT - d) / DECAY_QUOTIENT; + unlock_mosix(); +} diff -urN linux-2.4.17/mos/deputy.c linux_umopenmosix/mos/deputy.c --- linux-2.4.17/mos/deputy.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/mos/deputy.c Fri Jun 28 00:32:15 2002 @@ -0,0 +1,1481 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_MOSIX_DFSA +#include +#endif /* CONFIG_MOSIX_DFSA */ + +int x86_udelay_tsc = 0; + +static inline int +HAS_ASYNC(struct task_struct *p) +{ + process_ack(p, DREQ_CHECKSTAY); + return(p->mosix.prequest || p->mosix.whereto || + process_told(p, (DREQ_CAPCNG|DREQ_DFSASYNC|DREQ_NICECNG| + DREQ_INFOCNG|DREQ_UPDOVERHEADS|DREQ_URGENT)) || + (p->mosix.stay & DSTAY) || + (process_told(p, DREQ_CHECKCONF) && i_am_in_a_wrong_place()) + ); +} + +void +mosix_inform_remote_of_info(void) +{ + struct disclosure_h d; + struct task_struct *p = current; + + d.disclosure = p->mosix.disclosure; + d.uid = p->uid; + d.gid = p->gid; + d.pgrp = p->pgrp; + d.session = p->session; + d.tgid = p->tgid; + memcpy(d.comm, p->comm, sizeof(d.comm)); + if(deputy_request(DEP_INFO, &d, sizeof(d), NULL, 0, 0, NULL, 0)) + deputy_die_on_communication(); +} + +void +deputy_async_requests(void) +{ + register struct task_struct *p = current; + register struct mosix_task *m = &p->mosix; + int w; + + check_again: + process_ack(p, DREQ_CHECKSTAY); + while(m->prequest) + process_requests(); + if(process_told(p, DREQ_URGENT)) + { + process_ack(p, DREQ_URGENT); + comm_take_urgent(); + if(deputy_request(DEP_TAKEURGENT, NULL, 0, NULL, 0, 0, + (void **)&w, -sizeof(w))) + goto check_again; + if(w) + mosix_add_to_whereto(p, w); + } + if(m->whereto) + { + follow_whereto(); + if(!(m->dflags & DDEPUTY)) + return; + goto check_again; + } + if(process_told(p, DREQ_CAPCNG)) + sync_caps(); +#ifdef CONFIG_MOSIX_DFSA + if(process_told(p, DREQ_DFSASYNC)) + deputy_resync_dfsa(); +#endif /* CONFIG_MOSIX_DFSA */ + if(process_told(p, DREQ_NICECNG)) + { + process_ack(p, DREQ_NICECNG); + mosix_inform_remote_of_nice(); + goto check_again; + } + if(process_told(p, DREQ_INFOCNG)) + { + process_ack(p, DREQ_INFOCNG); + mosix_inform_remote_of_info(); + goto check_again; + } + if(process_told(p, DREQ_UPDOVERHEADS)) + { + process_ack(p, DREQ_UPDOVERHEADS); + deputy_inform_remote_of_overheads(); + goto check_again; + } + if((m->stay & DSTAY) || + ((process_told(p, DREQ_CHECKCONF)) && i_am_in_a_wrong_place())) + { + if(!mosix_go_home(1)) + { + printk("Process %s, uid=%d, killed because it failed to migrate back here\n", + desc_mostask(m), p->uid); + printk("(possible reasons are exhaustion of swap-space, or network disconnection)\n"); + send_sig(SIGKILL, p, 1); + } + done_checking_conf(); + return; + } +} + +static inline void +deputy_communication_failed_msg(void) +{ + printk("Process %s, uid=%d, killed because it lost communication\n" + "with the remote site where it was running\n", + desc_mostask(NULL), current->uid); +} + +void +deputy_communication_failed(void) +{ + deputy_communication_failed_msg(); + force_sig(SIGKILL, current); +} + +NORET_TYPE void +deputy_die_on_communication(void) +{ + deputy_communication_failed_msg(); + do_exit(SIGKILL); +#ifdef CONFIG_MOSIX_DIAG + panic("do_exit returned"); +#endif /* CONFIG_MOSIX_DIAG */ +} + +extern asmlinkage int FASTCALL(do_signal(struct pt_regs *, sigset_t *)); + +void +deputy_main_loop() +{ + register struct task_struct *p = current; + extern asmlinkage void do_syscall_trace(void); + void *head; + int hlen, type = 0; /* compiler bug requires the =0 */ + int delay_sigs = (p->mosix.dflags & (DTRACESYS1|DTRACESYS2)) != 0; + int has; + +#ifdef CONFIG_MOSIX_DIAG + if(!(p->mosix.dflags & DSYNC)) + panic("deputy start unsynced"); +#endif /* CONFIG_MOSIX_DIAG */ + p->mosix.dflags &= ~(DTRACESYS1|DTRACESYS2); + while(p->mosix.dflags & DDEPUTY) + if(p->mosix.dflags & DSYNC) + { + if(HAS_ASYNC(p)) + deputy_async_requests(); + else if(!delay_sigs && signal_pending(p)) + do_signal(mos_to_regs(&p->mosix), 0); + else + { + p->mosix.pass_regs = ALL_REGISTERS; + comm_send(DEP_USERMODE|USERMODE, NULL, 0, NULL, 0, 0); + p->mosix.dflags &= ~DSYNC; + } + } + else + { + has = comm_wait(); + if(p->mosix.dflags & DFAKESIGNAL) + { + p->mosix.dflags &= ~DFAKESIGNAL; + evaluate_pending_signals_in_mosix_context(); + } + if(!has) + { +#ifdef CONFIG_MOSIX_DIAG + int was_checkconf = process_told(p, DREQ_CHECKCONF); +#endif /* CONFIG_MOSIX_DIAG */ + if(!HAS_ASYNC(p)) + { + if(!signal_pending(p)) + { + /* so why did we wake? + * the only good reason would + * be a DREQ_CHECKCONF while + * we are NOT in a wrong place */ +#ifdef CONFIG_MOSIX_DIAG + if(was_checkconf) + continue; + printk("%s: no packet and no event\n", + desc_mostask(NULL)); + mosix_panic("no packet+no event"); +#endif /* CONFIG_MOSIX_DIAG */ + continue; + } + if(delay_sigs) + { + spin_lock_irq(&p->sigmask_lock); + p->sigpending = 0; + spin_unlock_irq(&p->sigmask_lock); + continue; /* back to sleep */ + } + } + p->mosix.dflags |= DSYNC; + if(comm_send(DEP_SYNC, NULL, 0, NULL, 0, 0)) + deputy_die_on_communication(); + } + p->mosix.dflags |= DSYNC; + if(delay_sigs) + evaluate_pending_signals_in_mosix_context(); + if((type = comm_recv(&head, &hlen)) < 0) + deputy_die_on_communication(); + if(type & ANYTIME) + { + if(deputy_handle_interim_request(type, head, hlen)) + deputy_die_on_communication(); + } + else switch(type) + { + case REM_NULLMSG: + comm_free(head); + continue; + case REM_ASIG: + deputy_analyse_remote_signals((struct asig_h *)head); + continue; + case REM_SYSCALL_TRACE: + delay_sigs ^= 1; + deputy_add_rusage((struct rusage *)head); + comm_free(head); + do_syscall_trace(); + if(!(p->mosix.dflags & DDEPUTY)) + break; + if(deputy_reply(REM_SYSCALL_TRACE, NULL, 0, NULL, 0, 0, + delay_sigs)) + deputy_die_on_communication(); + continue; + case REM_SYSCALL: + deputy_syscall(head, delay_sigs); + p = current; /* for "fork" son */ + continue; + case REM_BRING_ME_HOME: + case REM_MUST_COME_HOME: + mosix_go_home(type == REM_MUST_COME_HOME ? 2 : 1); + if((p->mosix.dflags & DDEPUTY) && + comm_send(type|REPLY, NULL, 0, NULL, 0, 0)) + deputy_die_on_communication(); + continue; + default: + printk("Process %s, uid=%d, killed because it received" + " an unrecognized\n" "message type (0x%x) from" + " the remote site where it was running\n", + desc_mostask(NULL), current->uid, type); + do_exit(SIGKILL); + } + } +} + +int +deputy_reply(int type, void *hd, int hlen, void *dat, int dlen, int um, int delay_sigs) +{ + register struct task_struct *p = current; + int err; + + type |= REPLY; + if(!(p->mosix.dflags & DNESTED) && (delay_sigs == 2 || + (!HAS_ASYNC(p) && (delay_sigs || !signal_pending(p))))) + { + p->mosix.pass_regs = ALL_REGISTERS; + type |= USERMODE; + } + err = comm_send(type, hd, hlen, dat, dlen, um); + if(type & USERMODE) + p->mosix.dflags &= ~DSYNC; + return(err); +} + +void +mosix_inform_remote_of_nice(void) +{ + if(deputy_request(DEP_NICE, ¤t->nice, sizeof(current->nice), + NULL, 0, 0, NULL, 0)) + deputy_die_on_communication(); +} + +void +deputy_inform_remote_of_overheads(void) +{ + if(deputy_request(DEP_OPCOSTS, (caddr_t)deputy_here, + sizeof(deputy_here), NULL, 0, 0, NULL, 0)) + deputy_die_on_communication(); +} + +void +mosix_deputy_rusage(int final) +{ + struct rusage *r; + + if(deputy_request(DEP_RUSAGE, &final, sizeof(final), NULL, 0, 0, + (void **)&r, sizeof(*r))) + { + if(!(current->mosix.dflags & DFINISHED)) + deputy_communication_failed(); + return; + } + deputy_add_rusage(r); + if(final) + current->mosix.exit_mem = r->ru_maxrss; + comm_free(r); +} + +int +mosix_deputy_restore_sigcontext(struct sigcontext *frame, int *eax) +{ + struct restore_sigcontext_ret_h r; + + current->mosix.pass_regs = ALL_REGISTERS; + if(deputy_request(DEP_RESTORESIGCONTEXT, &frame, sizeof(frame), NULL, + 0, 0, (void **)&r, -sizeof(r))) + r.result = 1; + *eax = r.eax; + return(r.result); +} + +void +mosix_deputy_setup_frame(unsigned long sig, struct k_sigaction *ka, + siginfo_t info, sigset_t *oldset) +{ + struct setupframe_parameters_h s; + + s.sig = sig; + s.flags = ka->sa.sa_flags; + s.handler = ka->sa.sa_handler; + s.restorer = ka->sa.sa_restorer; + s.set = *oldset; + s.ss_sp = current->sas_ss_sp; + s.ss_size = current->sas_ss_size; + if(ka->sa.sa_flags & SA_SIGINFO) + s.info = info; + current->mosix.pass_regs = ALL_REGISTERS; + if(deputy_request(DEP_SETUPFRAME, &s, sizeof(s), NULL, 0, 0, NULL, 0)) + deputy_die_on_communication(); +} + +int +deputy_handle_interim_request(int type, void *head, int hlen) +{ + switch(type) + { + case REM_NOTHING: + comm_free(head); + return(comm_send(REM_NOTHING|REPLY, NULL, 0, NULL, 0, 0)); + case REM_PAGE: + return(deputy_bring_page((struct bring_page_h *)head)); + case REM_GETTSC: + return(deputy_tsc()); + case REM_MORESTRINGS: + return(deputy_more_strings((struct execve_more_strings_h *)head)); + case REM_BRING_ME_REGS: + return(deputy_bring_me_regs((unsigned long *)head)); + case REM_GETALOAD: + return(send_local_aload(REM_GETALOAD|REPLY)); + default: + printk("Process %s, uid=%d, received an unexpected " + "urgent request\n" "(type 0x%x) from the " + "remote site where it was running\n", + desc_mostask(NULL), current->uid, type); +#ifdef CONFIG_MOSIX_UDB + mosix_panic("deputy_handle_interim_request"); +#endif /* CONFIG_MOSIX_UDB */ + comm_free(head); + return(-EDIST); + } +} + +int +deputy_wait(int expect, void **head, int *hlen) +{ + int type, err; + + while((type = comm_recv(head, hlen)) != expect) + if(type < 0) + return(type); + else if(type & ANYTIME) + { + if((err = deputy_handle_interim_request(type, *head, *hlen))) + return(err); + } + else + { + printk("DEPUTY: Unexpected non-urgent request type %x\n", type); + mosix_panic("deputy_wait"); + comm_free(*head); + comm_flushdata(COMM_ALLDATA); + } + return(0); +} + +int +deputy_request(int type, void *header, int hlen, void *data, int dlen, + int uspace, void **result, int reslen) +{ + register struct task_struct *p = current; + int wasnested = (p->mosix.dflags & DNESTED); + int error; + void *head; + int rhlen; + + p->mosix.dflags |= DNESTED; + if((error = comm_send(type, header, hlen, data, dlen, uspace))) + goto out; + if((error = deputy_wait(type|REPLY, &head, &rhlen))) + goto out; + if(reslen > 0) + { + if(rhlen > reslen) + goto lenerr; + if(result) + *result = head; + } + else + { + if(rhlen > -reslen && reslen) + { + comm_free(head); + lenerr: + printk("deputy_request type %x: %d instead of %d\n", + type, rhlen, reslen); + error = -EDIST; + goto out; + } + if(reslen) + memcpy(result, head, rhlen); + comm_free(head); + } + out: + if(!wasnested) + p->mosix.dflags &= ~DNESTED; + return(error); +} + +unsigned long +mosix_deputy_mmap(struct file *fp, unsigned long addr, int fixed, + unsigned long len, unsigned long flags, unsigned long off, off_t isize, + nopage_t nopage) +{ + struct mmap_parameters_h mp; + int error; + unsigned long result; + + mp.addr = addr; + mp.fixed = fixed; + mp.len = len; + mp.flags = flags; + mp.pgoff = off; + mp.origin = PE; + if((mp.fp = fp)) + { + mp.dp = fp->f_dentry; + mp.uniq = mp.fp->f_dentry->d_inode->i_unique; + mp.isize = mp.fp->f_dentry->d_inode->i_size; + } + mp.nopage = nopage; + if((error = deputy_request(DEP_MMAP, &mp, sizeof(mp), NULL, 0, 0, + (void **)&result, -sizeof(result)))) + return(error); + return(result); +} + +long +mosix_deputy_brk(unsigned long addr, unsigned long len) +{ + struct brk_parameters_h b; + long error; + long result; + + b.addr = addr; + b.len = len; + if((error = deputy_request(DEP_BRK, &b, sizeof(b), NULL, 0, 0, + (void **)&result, -sizeof(result)))) + result = error; + return(result); +} + +int +deputy_munmap(unsigned long addr, size_t len) +{ + struct munmap_parameters_h mp; + int error; + int result; + + mp.addr = addr; + mp.len = len; + if((error = deputy_request(DEP_MUNMAP, &mp, sizeof(mp), NULL, 0, 0, + (void **)&result, -sizeof(result)))) + result = error; + return(result); +} + +int +deputy_mprotect(unsigned long addr, size_t len, unsigned long prot) +{ + struct mprotect_parameters_h mp; + int error; + int result; + + mp.addr = addr; + mp.len = len; + mp.prot = prot; + if((error = deputy_request(DEP_MPROTECT, &mp, sizeof(mp), NULL, 0, 0, + (void **)&result, -sizeof(result)))) + result = error; + return(result); +} + +void +deputy_analyse_remote_signals(struct asig_h *s) +{ + register int i; + siginfo_t info; + int back = 0; + + while(s->nforced--) + { + if(comm_copydata(&info, sizeof(info), 0)) + deputy_die_on_communication(); + force_sig_info(info.si_signo, &info, current); + } + while(s->sigs) + { + switch(i = ffz(~s->sigs) + 1) + { + case FATAL_SIGSEGV: + force_sig(SIGSEGV, current); + break; + case REMOTE_FILE_RELEASED: + mosix_rebuild_file_list(); + break; + case SIGSEGV: + case SIGKILL: + case SIGVTALRM: + case SIGPROF: + case SIGBUS: + case SIGIOT: + case SIGFPE: + case SIGILL: + case SIGXCPU: + case SIGXFSZ: + case SIGPWR: + case SIGTRAP: + send_sig(i, current, 1); + break; + case SIGTERM: + back = 1; + break; + default: + printk("%s - How did signal #%d arrive at REMOTE???\n", + desc_mostask(NULL), i); + } + s->sigs &= ~(1 << (i-1)); + } + if(current->mosix.dflags & DINCOMING) + return; + comm_free(s); + if(back) + mosix_go_home(2); +} + +#define MAX_COPY_FROM_USER (PAGE_SIZE) + +unsigned long +deputy_copy_from_user(void *to, void *from, unsigned long size, int verify) +{ + struct user_copy_h u; + int result, non_cache; + + while(size > 0) + { + if((result = copy_from_cache(to, (unsigned long)from, size, + &non_cache))) + { + if(!(size -= result)) + break; + ((char *)from) += result; + ((char *)to) += result; + if(!non_cache) + continue; + } + u.addr = from; + u.size = non_cache > MAX_COPY_FROM_USER ? MAX_COPY_FROM_USER : non_cache; + u.verify = verify; + if(deputy_request(DEP_COPY_FROM_USER, &u, sizeof(u), NULL, 0, 0, + (void **)&result, -sizeof(result))) + break; + if(result) + { + if(result < u.size && + comm_copydata(to, u.size - result, 0)) + result = u.size; + size += result - u.size; + break; + } + if(comm_copydata(to, u.size, 0)) + break; + size -= u.size; + ((char *)from) += u.size; + ((char *)to) += u.size; + } + return(size); +} + +#define MAX_COPY_TO_USER (2*PAGE_SIZE) + +unsigned long +deputy_copy_to_user(void *to, void *from, unsigned long size, int verify) +{ + struct user_copy_h u; + int result; + int non_cache; + + while(size > 0) + { + if((result = copy_to_cache((unsigned long)to, from, size, + &non_cache))) + { + if(!(size -= result)) + break; + ((char *)from) += result; + ((char *)to) += result; + if(!non_cache) + continue; + } + u.addr = to; + u.size = non_cache > MAX_COPY_TO_USER ? MAX_COPY_TO_USER : non_cache; + u.verify = verify; + if(deputy_request(DEP_COPY_TO_USER, &u, sizeof(u), + from, u.size, 0, (void **)&result, -sizeof(result))) + break; + if(result) + { + size += result - u.size; + break; + } + size -= u.size; + ((char *)from) += u.size; + ((char *)to) += u.size; + } + return(size); +} + +unsigned long +deputy_clear_user(void *to, int size, int verify) +{ + struct user_copy_h u; + int result; + int non_cache; + + while(size > 0) + { + if((result = zero_cache(to, size, &non_cache))) + { + if(!(size -= result)) + break; + ((char *)to) += u.size; + if(!non_cache) + continue; + } + u.addr = to; + u.size = non_cache; + u.verify = verify; + if(deputy_request(DEP_CLEAR_USER, &u, sizeof(u), NULL, 0, 0, + (void **)&result, -sizeof(result))) + break; + if(result < 0) + { + size = result; + break; + } + size -= non_cache; + if(result) + { + size += result; + break; + } + } + return(size); +} + +long +deputy_strncpy_from_user(char *to, char *from, int max, int verify) +{ + struct user_copy_h u; + int result; + int non_cache; + int read = 0; + char *oto = to; + int omax = max; + + while(max > 0) + { + if((result = copy_from_cache(to, (unsigned long)from, -max, + &non_cache))) + { + if(non_cache == -1) + result--; + read += result; + if(!(max -= result)) + break; + to += result; + from += result; + if(!non_cache) + continue; + } + if(non_cache == -1) /* 0 encountered */ + break; + u.addr = from; + u.size = non_cache; + u.verify = verify; + if(deputy_request(DEP_STRNCPY_FROM_USER, &u, sizeof(u), NULL, 0, 0, + (void **)&result, -sizeof(result))) + { + result = -EFAULT; + break; + } +#ifdef CONFIG_MOSIX_DIAG + if(result > non_cache) + { + printk("%s: deputy_strncpy_from_user: %d > %d\n", + desc_mostask(NULL), result, max); + result = -EFAULT; + break; + } +#endif /* CONFIG_MOSIX_DIAG */ + if(result < 0) + break; + if(result && comm_copydata(to, result, 0)) + { + result = -EFAULT; + break; + } + from += result; + to += result; + read += result; + max -= result; + if(result != non_cache) + break; + } + if(result >= 0) + { + result = read; + if(omax && read < omax) + oto[read] = '\0'; + } + return(result); +} + +long +deputy_strnlen_user(char *from, long len) +{ + long result; + long read = 0; + int non_cache; + struct strnlen_user_h h; + int error = 0; + + while(1) + { + result = strlen_cache(from, &non_cache); + read += result; + if(non_cache == -1) /* 0 encountered */ + break; + from += result; + if(read > len) + break; + if(!non_cache) + continue; + h.addr = from; + h.len = len - read; + if(deputy_request(DEP_STRNLEN_USER, (void *)&h, sizeof(h), + NULL, 0, 0, (void **)&result, -sizeof(result)) || + result == 0) + { + error = 1; + break; + } + if(result > non_cache) + result = non_cache; + read += result; + if(read > len) + break; + from += result; + } + result = error ? 0 : (read > len ? len+1 : read); + return(result); +} + +int +deputy_verify_write(void *addr, unsigned long size) +{ + struct user_copy_h u; + int result; + + u.addr = addr; + u.size = size; + if(all_in_cache((unsigned long)addr, size, 1)) + result = 1; + else if(deputy_request(DEP_VERIFY_WRITE, &u, sizeof(u), NULL, 0, 0, + (void **)&result, -sizeof(result))) + result = 0; + return(result); +} + +unsigned int +deputy_csum_partial_copy_from_user(const char *src, char *dst, int len, int sum, + int *err_ptr) +{ + struct user_csum_copy_h u; + struct user_csum_copy_ret_h r; + + /* must not play around with partial results, because unlike + * "deputy_copy_from_user", we may not break the buffer + * (at least not into non-4-byte-aligned segments, for now). + */ + if(any_in_cache((unsigned long)src, len)) + { + if(deputy_copy_from_user(dst, (char *)src, len, 1)) + { + *err_ptr |= 1; + return(sum); + } + return(csum_partial(dst, len, sum)); + } + while(len > 0) + { + u.addr = (void *)src; + u.len = len > MAX_COPY_FROM_USER ? MAX_COPY_FROM_USER : len; + u.sum = sum; + if(deputy_request(DEP_CSUM_COPY_FROM_USER, &u, sizeof(u), + NULL, 0, 0, (void **)&r, -sizeof(r))) + { + *err_ptr |= 1; + return(0); + } + sum = r.newsum; + if(r.error || comm_copydata(dst, len, 0)) + { + *err_ptr |= 1; + return(sum); + } + len -= u.len; + ((char *)src) += u.len; + dst += u.len; + } + return(sum); +} + +unsigned int +deputy_csum_partial_copy_to_user(const char *src, char *dst, int len, int sum, + int *err_ptr) +{ + unsigned int newsum = csum_partial(src, len, sum); + + if(deputy_copy_to_user(dst, (char *)src, len, 1)) + *err_ptr = -EINVAL; + return(newsum); +} + +struct int6 {int x[6];}; + +void +deputy_syscall(struct syscall_h *s, int delay_sigs) +{ + register struct task_struct *p = current; + extern void *sys_call_table[]; + long (*func)(struct int6); + struct syscall_ret_h r; + int n = s->n; + struct pt_regs regs; + char *cdata, *tofree; + int l; + + if(p->mosix.deputy_regs) + regs = *(mos_to_regs(&p->mosix)); + memcpy((caddr_t)®s, (caddr_t)s->args, sizeof(s->args)); + open_ucache_envelope(s); + comm_free(s); + switch(n) + { + /* Add to this list all machine-dependent system-calls that + * take the parameters' address, assuming it is the process' + * registers: Such routines are all in "arch/i386/ * / *" + */ + case __NR_fork: + case __NR_vfork: + case __NR_clone: + case __NR_execve: + case __NR_iopl: + case __NR_ioperm: + case __NR_sigsuspend: + case __NR_rt_sigsuspend: + case __NR_sigreturn: + case __NR_rt_sigreturn: + case __NR_sigaltstack: + case __NR_vm86old: + case __NR_vm86: + if(delay_sigs) + p->mosix.dflags |= DTRACESYS1; + /* (for the son to set "delay_sigs") */ + +// disabled by Matt r.ret = call_with_regs((void *)sys_call_table[n], ®s, ®s); + + p->mosix.dflags &= ~DTRACESYS1; + if(p->mosix.deputy_regs || + !(current->mosix.dflags & DDEPUTY)) + /* sorry, cannot keep it in "regs" after we return */ + *(mos_to_regs(&p->mosix)) = regs; + break; + default: + func = (__typeof__(func)) sys_call_table[n]; + r.ret = func(*((struct int6 *)®s)); + p = current; + break; + } + if(!(p->mosix.dflags & DDEPUTY)) + { + mos_to_regs(¤t->mosix)->eax = r.ret; + return; + } + cdata = deputy_pack_read_cache_data(&l, &r, &tofree); + cli(); + r.deputytime = p->mosix.deputytime; + p->mosix.deputytime = 0; + sti(); + if(deputy_reply(REM_SYSCALL, &r, sizeof(r), cdata, l, 0, delay_sigs)) + { + if(tofree) + kfree(tofree); + deputy_die_on_communication(); + } + if(tofree) + kfree(tofree); +} + +void +mosix_deputy_rlimit(int resource, struct rlimit r) +{ + struct rlimit_h l; + + l.resource = resource; + l.limit = r; + deputy_request(DEP_RLIMIT, &l, sizeof(l), NULL, 0, 0, NULL, 0); +} + +int +deputy_bring_page(struct bring_page_h *b) +{ + struct file *fp = b->fp; + struct vm_area_struct v; + struct page_ret_h r; + int err; + int address = b->offset; + struct page *page; + register struct task_struct *p = current; + + v.vm_start = v.vm_pgoff = 0; + v.vm_end = address + PAGE_SIZE; + v.vm_mm = p->mm; /* (eg. NULL) */ + v.vm_file = fp; + v.vm_flags = 0; /* anything really, but VM_SHARED */ + + deeper_sleep(); + page = b->nopage(&v, address, 0); + lighter_sleep(); + if(page == NOPAGE_OOM) + { + r.ret = -ENOMEM; + page = 0; + } + else + r.ret = page ? 0 : -EFBIG; + comm_free(b); + cli(); + r.deputytime = p->mosix.deputytime; + p->mosix.deputytime = 0; + sti(); + err = deputy_reply(REM_PAGE, &r, sizeof(r), + page ? kmap(page) : NULL, page ? PAGE_SIZE : 0, 0, 2); + if(page) + { + kunmap(page); + __free_page(page); + } + return(err); +} + +int +deputy_tsc(void) +{ + u64 r; + + rdtsc(*((unsigned long *)&r), *(((unsigned long *)&r) + 1)); + return(deputy_reply(REM_GETTSC|REPLY, &r, sizeof(r), NULL, 0, 0, 2)); +} + +void +deputy_add_rusage(struct rusage *r) +{ + register struct task_struct *p = current; + int64_t ut = r->ru_utime.tv_sec * (int64_t)HZ + + r->ru_utime.tv_usec*HZ/MILLION; + int64_t st = r->ru_stime.tv_sec * (int64_t)HZ + + r->ru_stime.tv_usec*HZ/MILLION; + + cli(); + p->times.tms_utime += ut; + p->times.tms_stime += st; + sti(); + p->min_flt += r->ru_minflt; + p->maj_flt += r->ru_majflt; + p->nswap += r->ru_nswap; +} + +void +mosix_deputy_count_args(char **argp, char **envp, int *argc, int *envc) +{ + struct execve_counts_h e; + struct execve_counts_ret_h r; + + e.argp = argp; + e.envp = envp; + if(deputy_request(DEP_EXECVE_COUNTS, &e, sizeof(e), NULL, 0, 0, + (void **)&r, -sizeof(r))) + *argc = -EFAULT; + else + { + *argc = r.argc; + *envc = r.envc; + } +} + +static inline int +fill_strings_page(int pgno, int len) +{ + struct linux_binprm *bprm; + struct page *page; + char *kaddr; + int ret; + + if(!pgno || len == 0) + return(0); + if(!(bprm = current->mosix.inexec) || pgno < 0 || + pgno > MAX_ARG_PAGES || + len < 0 || len > PAGE_SIZE || bprm->page[pgno-1]) + { + printk("%s - Improper strings-page(%d:%d)\n", + desc_mostask(NULL), pgno, len); + comm_flushdata(COMM_ALLDATA); + return(-EFAULT); + } + if(!(page = alloc_page(GFP_HIGHUSER))) + { + comm_flushdata(COMM_ALLDATA); + return(0); /* error will be detected by missing the page */ + } + bprm->page[pgno-1] = page; + kaddr = kmap(page); + ret = comm_copydata(kaddr + (PAGE_SIZE - len), len, 0); + kunmap(page); + return(ret); +} + +int +mosix_deputy_bring_strings(struct linux_binprm *bprm, char *filename, + char **envp, char **argp) +{ + struct execve_bring_strings_h b; + struct execve_bring_strings_ret_h r; + int i; + + b.p = bprm->p; + b.filename = filename; + b.envp = envp; + b.envc = bprm->envc; + b.argp = argp; + b.argc = bprm->argc; + if(deputy_request(DEP_BRING_STRINGS, &b, sizeof(b), NULL, 0, 0, + (void **)&r, -sizeof(r))) + return(-ENOMEM); + if(r.result) + { + comm_flushdata(COMM_ALLDATA); + return(r.result); + } + bprm->p = r.p; + bprm->exec = r.exec; + fill_strings_page(r.pgno, r.len); + for(i = r.p / PAGE_SIZE ; i < MAX_ARG_PAGES ; i++) + if(!bprm->page[i]) + /* "fill_strings_page" or one of the "deputy_more_strings" failed */ + return(-EAGAIN); + return(0); +} + +int +deputy_more_strings(struct execve_more_strings_h *m) +{ + int result = fill_strings_page(m->pgno, m->len); + + comm_free(m); + if(result == -EAGAIN) + result = 0; + return(result); +} + +int +deputy_bring_me_regs(unsigned long *bring) +{ + current->mosix.pass_regs |= *bring & current->mosix.deputy_regs; + comm_free(bring); + return(comm_send(REM_BRING_ME_REGS|REPLY, NULL, 0, NULL, 0, 0)); +} + +int +mosix_deputy_setup_args(int how, unsigned long *start_stack_p) +{ + struct execve_setup_args_h e; + struct execve_setup_args_ret_h r; + register struct linux_binprm *bprm = current->mosix.inexec; + int i, err; + struct page *page; + char *kaddr = NULL; + + e.create = 1; + for(i = 0 ; i < MAX_ARG_PAGES-1 ; i++) + if((page = bprm->page[i])) + { + e.how = SETUP_ARGS_NOTYET; + e.pgno = i+1; + e.len = (i+1) * PAGE_SIZE - bprm->p; + if(e.len > PAGE_SIZE) + e.len = PAGE_SIZE; + kaddr = kmap(page); + err = comm_send(DEP_SETUP_ARGS, &e, sizeof(e), + kaddr + (PAGE_SIZE - e.len), e.len, 0); + kunmap(page); + if(err) + return(-EDIST); + e.create = 0; + } + e.exec = bprm->exec; + e.argc = bprm->argc; + e.envc = bprm->envc; + e.personality = current->personality; + e.how = how; + if((page = bprm->page[MAX_ARG_PAGES-1])) + { + e.pgno = MAX_ARG_PAGES; + e.len = MAX_ARG_PAGES * PAGE_SIZE - bprm->p; + if(e.len > PAGE_SIZE) + e.len = PAGE_SIZE; + kaddr = kmap(page); + } + else + e.len = e.pgno = 0; + e.p = bprm->p; + err = deputy_request(DEP_SETUP_ARGS, &e, sizeof(e), + e.len ? kaddr + (PAGE_SIZE - e.len) : NULL, e.len, 0, + (void **)&r, -sizeof(r)); + if(page) + kunmap(page); + if(err) + return(-EDIST); + bprm->loader = r.loader; + bprm->exec = r.exec; + bprm->p = r.p; + for(i = 0 ; i < MAX_ARG_PAGES ; i++) + if(bprm->page[i]) + { + __free_page(bprm->page[i]); + bprm->page[i] = 0; + } + if(start_stack_p) + *start_stack_p = r.start_stack; + return(r.reply); +} + +int +mosix_deputy_exec_mmap(char *filename) +{ + int r; + + flush_ucache(); + if(deputy_request(DEP_EXEC_MMAP, NULL, 0, NULL, 0, 0, + (void **)&r, -sizeof(r))) + return(-ENOMEM); + if(!r) + mosix_clear_all_held_files(current); + return(r); +} + +int +mosix_deputy_dump_thread(struct user *u) +{ + return(deputy_request(DEP_DUMP_THREAD, NULL, 0, NULL, 0, 0, + (void **)u, -sizeof(struct user))); +} + +void +mosix_deputy_init_aout_mm(struct exec *ex) +{ + deputy_request(DEP_INIT_AOUT_MM, ex, sizeof(*ex), NULL, 0, 0, NULL, 0); +} + +unsigned long +mosix_deputy_elf_setup(char *p, int argc, int envc, struct elfhdr *exec, + unsigned long addr, unsigned long load_bias, + unsigned long interp_load_addr, int ibcs, + int add_arg_start, unsigned long elf_brk, unsigned long end_code, + unsigned long start_code, unsigned long start_data, + unsigned long end_data, unsigned long elf_bss, + unsigned long personality, struct elf_tables_extras *extras) +{ + struct execve_elf_setup_h e; + unsigned long r; + + e.p = p; + e.argc = argc; + e.envc = envc; + if((e.hasexec = (exec != NULL))) + { + e.exec_e_phoff = exec->e_phoff; + e.exec_e_phnum = exec->e_phnum; + e.exec_e_entry = exec->e_entry; + } + e.addr = addr; + e.load_bias = load_bias; + e.interp_load_addr = interp_load_addr; + e.ibcs = ibcs; + e.extras = *extras; + e.add_arg_start = add_arg_start, + e.elf_brk = elf_brk; + e.end_code = end_code; + e.start_code = start_code; + e.end_data = end_data; + e.start_data = start_data; + e.elf_bss = elf_bss; + e.personality = personality; + + if(deputy_request(DEP_ELF_SETUP, &e, sizeof(e), NULL, 0, 0, + (void **)&r, -sizeof(r))) + { + if(current->mosix.dflags & DDEPUTY) + deputy_communication_failed(); + r = (unsigned long)e.p; /* we did nothing, keep same */ + } + return(r); +} + +void +mosix_deputy_fix_elf_aout_interp(unsigned bss, unsigned data, unsigned text) +{ + struct execve_fix_elf_aout_h f; + + f.bss = bss; + f.data = data; + f.text = text; + deputy_request(DEP_FIX_ELF_AOUT, &f, sizeof(f), NULL, 0, 0, NULL, 0); +} + +int +mosix_deputy_list_vmas(struct vmalist **vmasp, unsigned long *argstartp, + unsigned long *argendp) +{ + struct list_vmas_ret_h r; + + if(deputy_request(DEP_LIST_VMAS, NULL, 0, NULL, 0, 0, (void **)&r, + -sizeof(r)) || r.n < 0) + return(-1); + if(r.n == 0) + *vmasp = NULL; + else + { + if(!(*vmasp = kmalloc(r.n*sizeof(struct vmalist), GFP_KERNEL))) + return(-1); + if(comm_copydata(*vmasp, r.n * sizeof(struct vmalist), 0)) + return(-1); + } + *argstartp = r.argstart; + *argendp = r.argend; + return(r.n); +} + +/* + * migrated away - shed memory etc. + */ +void +deputy_startup(void) +{ + register struct task_struct *p = current; + + exit_mm(p); + p->mosix.pages_i_bring = 0; + read_lock(&tasklist_lock); + p->mosix.ancesstor = NULL; + read_unlock(&tasklist_lock); +#ifdef CONFIG_MOSIX_DFSA + tell_process(p, DREQ_NOTUPTODATE); +#endif /* CONFIG_MOSIX_DFSA */ +} + +int +mosix_deputy_personality(unsigned long personality) +{ + int result; + + if(deputy_request(DEP_PERSONALITY, &personality, sizeof(personality), + NULL, 0, 0, (void **)&result, -sizeof(result))) + return(-1); + return(result); +} + +int +mosix_deputy_dump_fpu(struct user_i387_struct *f) +{ + int r; + + if(deputy_request(DEP_DUMP_FPU, NULL, 0, NULL, 0, 0, + (void **)&r, -sizeof(r))) + return(0); + if(r && comm_copydata(f, sizeof(*f), 0)) + return(0); + return(r); +} + +int +mosix_deputy_fork(struct task_struct *son, int migrated, unsigned long usp) +{ + struct mosix_task *m = ¤t->mosix; + void *head; + int hlen; + int type; + int where; + mosix_link *sonsock = NULL; + struct fork_h f; + int n; + + f.usp = usp; + f.do_forkmigrate = !migrated; + f.pid = son->pid; + resend: + m->pass_regs = ALL_REGISTERS & ~BIT_OF_REGISTER(eax); + if(comm_send(DEP_PLEASE_FORK, &f, sizeof(f), NULL, 0, 0)) + { + bad: + if(sonsock) + comm_close(sonsock); + deputy_communication_failed(); + return(-EDIST); + } + son->mosix.deputy_regs = m->deputy_regs; + while(1) + switch(type = comm_recv(&head, &hlen)) + { + default: + if(type < 0) + goto bad; + if(!(type & ANYTIME)) + { + printk("Unexpected reply (0x%x) to fork request" + "\n", type); + goto bad; + } + if(deputy_handle_interim_request(type, head, hlen)) + goto bad; + break; + case REM_MUST_COME_HOME: + case REM_BRING_ME_HOME: + /* comm_free(head); not needed - it's empty */ + go_home_first: + if(sonsock) + { + comm_close(sonsock); + sonsock = NULL; + } + if(passto((type == REM_MUST_COME_HOME) ? + MUSTGOHOME : GOBACKHOME, 0)) + return(-EAGAIN); + came_home: + if(sonsock) + { + comm_close(sonsock); + sonsock = NULL; + } + son->mosix.whereami = 0; + undeputy(son); + return(0); + case REM_BRING_ME_TO: + where = *((int *)head); + comm_free(head); + if(sonsock) + { + comm_close(sonsock); + sonsock = NULL; + } + if(!passto(where, 0)) + { + if(!(m->dflags & DDEPUTY)) + goto came_home; + son->mosix.whereami = m->whereami; + f.do_forkmigrate = 0; + goto resend; + } + if(comm_send(REM_BRING_ME_TO|REPLY, NULL, 0, NULL, 0,0)) + goto bad; + break; + case REM_CONSIDERING: + /* comm_free(head); not needed - it's empty */ + if(sonsock) + { + comm_close(sonsock); + sonsock = NULL; + } + n = m->nmigs; + coordinate(BALANCE, 1); + f.do_forkmigrate = 0; /* once only */ + if(!(m->dflags & DDEPUTY)) + goto came_home; + son->mosix.whereami = m->whereami; + if(m->nmigs != n) + goto resend; + break; + case REM_CONNECT_TO: + if(sonsock) + mosix_panic("Dup REM_CONNECT_TO"); + sonsock = comm_open(COMM_TOADDR, (mosix_addr *)head, + comm_connect_timo); + comm_free(head); + n = (sonsock != NULL); + if(comm_send(REM_CONNECT_TO|REPLY, &n, sizeof(n), + NULL, 0, 0)) + goto bad; + break; + case DEP_PLEASE_FORK|REPLY: + n = *((int *)head); + comm_free(head); + if(n) + goto go_home_first; + if(!sonsock) + { + mosix_panic("deputy_fork: no sonsock"); + goto go_home_first; + } + son->mosix.dflags |= (DDEPUTY|DSYNC| + (m->dflags & DDUMPABLE)); + comm_use(son, sonsock); + return(0); + } +} diff -urN linux-2.4.17/mos/dfsa.c linux_umopenmosix/mos/dfsa.c --- linux-2.4.17/mos/dfsa.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/mos/dfsa.c Wed Jun 26 23:45:18 2002 @@ -0,0 +1,2803 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_MOSIX_DFSA +#include + +#define CLEAR_ALL(fls,fld) memset(fls->fld, 0, fls->max_fdset/8) + +struct dfsatab *dfsatab; +struct vfsmount *dfsa_mounts[MAXDFSAFS]; +struct +{ + struct vfsmount *mnt; + struct dentry *dentry; +} dfsa_links[MAXDFSALINKS]; +char dfsa_being_cancelled[MAXDFSAFS]; +char dfsa_recheck_mount[MAXDFSAFS]; +struct dentry *bad_deputy_dfsaentry; +int dfsa_disabled = 1; +DECLARE_MUTEX(dfsa_modify_lock); + +extern struct inode_operations all_bad_inode_operations; +extern struct file_operations all_bad_file_operations; + +void +dfsa_init(void) +{ + struct inode *ip; + + if(!(dfsatab = kmalloc(sizeof(struct dfsatab), GFP_KERNEL))) + panic("dfsa_init"); + dfsatab->len = sizeof(struct dfsatab); + dfsatab->ndefs = 0; + if(!(bad_deputy_dfsaentry = + d_alloc(NULL, &(const struct qstr) { "/", 1, 0 }))) + panic("dfsa_init: d_alloc"); + if(!(ip = get_empty_inode())) + panic("dfsa_init: empty inode"); + ip->i_mode = S_IFREG; + ip->i_op = &all_bad_inode_operations; + ip->i_fop = &all_bad_file_operations; + bad_deputy_dfsaentry->d_inode = ip; + bad_deputy_dfsaentry->d_parent = bad_deputy_dfsaentry; + dfsa_disabled = 0; +} + +int +dfsa_option(char *data, int *result) +{ + int n; + int has = 0; + + if(!data) + return(0); + while(1) + { + while(*data && (*data != 'd' || *(data+1) != 'f' || + *(data+2) != 's' || *(data+3) != 'a' || + *(data+4) != '=')) + data++; + if(!*data) + return(has); + data += 5; + if(*data < '0' || *data > '9') + { + printk("WARNING: Invalid 'dfsa=' value <%s>\n", data); + continue; + } + for(n = 0 ; *data >= '0' && *data <= '9' ; data++) + { + n = n * 10 + *data - '0'; + if(n > 32767) + break; + } + if(n <= 32767) + { + has = 1; + *result = n; + } + else + printk("DFSA-number option overflow!\n"); + } + return(has); +} + +void +dfsa_is_not_up_to_date(void) +{ + tell_process(current, DREQ_NOTUPTODATE); +} + +void +dfsa_pwd_changed(void) +{ + current->mosix.dupdates |= DFSA_UPDCDIR; +} + +#define DFSA_MOUNT_MOUNTED 1 +#define DFSA_MOUNT_RDONLY 2 +#define DFSA_MOUNT_SYNC 4 +#define DFSA_MOUNT_NOATIME 8 +#define DFSA_MOUNT_NODIRATIME 16 +#define DFSA_MOUNT_SYMLINK 32 + +int +important_fs_flags(unsigned long flags) +{ + int f = DFSA_MOUNT_MOUNTED; + + if(flags & MS_RDONLY) + f |= DFSA_MOUNT_RDONLY; + if(flags & MS_SYNCHRONOUS) + f |= DFSA_MOUNT_SYNC; + if(flags & MS_NOATIME) + f |= DFSA_MOUNT_NOATIME; + if(flags & MS_NODIRATIME) + f |= DFSA_MOUNT_NODIRATIME; + return(f); +} + +/* + * 0 = same + * 1 = first string comes first + * -1 = second string comes first + * normally sorted alphabetically - + * but if one string begins with the other, the LONGER comes first. + */ +static inline int +dfsa_name_compare(char *n1, char *n2) +{ + register unsigned char *u1, *u2; + int a, b; + + for(u1 = n1 , u2 = n2 ; *u1 && *u1 == *u2 ; u1++ , u2++) + ; + a = *u1; + b = *u2; + if(!a) + return(b ? -1 : 0); + if(!b || a < b) + return(1); + return(-1); +} + +static inline char * +mntpath(struct vfsmount *mnt) +{ + char *page = (char *)__get_free_page(GFP_KERNEL); + struct vfsmount *rootmnt; + struct dentry *root; + + if(!page) + return(NULL); + read_lock(&init_task.fs->lock); + rootmnt = mntget(init_task.fs->rootmnt); + root = dget(init_task.fs->root); + read_unlock(&init_task.fs->lock); + spin_lock(&dcache_lock); + page = __d_path(mnt->mnt_root, mnt, root, rootmnt, page, PAGE_SIZE); + spin_unlock(&dcache_lock); + dput(root); + mntput(rootmnt); + return(page); +} + +static inline void +dfsa_set_dfsa(struct vfsmount *mnt, int now, int prev) +{ + int differ = (mnt->mnt_dfsano != now); + + MOSIX_LOCKED; + mnt->mnt_dfsaprev = prev; + mnt->mnt_dfsano = now; + if(differ && mnt->mnt_sb->s_op->dfsa_changed) + mnt->mnt_sb->s_op->dfsa_changed(mnt, now); +} + +int +dfsa_setmnt(struct vfsmount *mnt, int num, unsigned long flags) +{ + int ind, sz, oldnum = 0; + register int i, j; + char *x, *y; + struct dfsatab *new, *old; + int error = 0; + int c, done; + char *dir; + struct super_block *sb; + + if(num < 0 || num > MAXDFSAFS) + { + printk("Invalid DFSA index (%d), range is 1-%d " + "(or 0 to cancel)\n", num, MAXDFSAFS); + return(-EINVAL); + } + sb = mnt->mnt_sb; + if(num && (!sb->s_op || !sb->s_op->identify || !sb->s_op->reconstruct)) + { + printk("%s: Not a DFSA-compatible file-system\n", + sb->s_type->name); + return(-EPERM); + } + if(!(dir = mntpath(mnt))) + return(-ENOMEM); +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_DFSA) + printk("dfsa_setmnt(<%s>,%d)\n", dir, num); +#endif /* CONFIG_MOSIX_DEBUG */ + error = -EBUSY; + if(down_interruptible(&dfsa_modify_lock)) + goto freedir; + lock_mosix(); + if(dfsa_disabled) + { + if(num == 0) + error = 0; + goto out; + } + sz = sizeof(struct dfsatab); + ind = -1; + for(i = 0 ; i < dfsatab->ndefs ; i++) + { + x = &((char *)dfsatab)[dfsatab->dfsdef[i].offset]; + if(!strcmp(dir, x)) + { + oldnum = dfsatab->dfsdef[i].defno; + if(oldnum == num && dfsatab->dfsdef[i].mounted == + important_fs_flags(flags)) + { + error = 0; /* nothing to do */ + goto out; + } + if(oldnum > MAXDFSAFS) + { + printk("Conflict: mount-point <%s> is declared " + "as a DFSA symbolic link\n", dir); + error = -EBUSY; + goto out; + } + ind = i; + } + else if(dfsatab->dfsdef[i].defno == num) + { + printk("DFSA entry #%d already held for <%s>\n",num, x); + error = -EBUSY; + goto out; + } + else + sz += sizeof(struct dfsdef) + strlen(x) + 1; + } + if(ind == -1 && num == 0) + { + error = 0; /* nothing to do */ + goto out; + } + if(num) + sz += sizeof(struct dfsdef) + strlen(dir) + 1; + unlock_mosix(); + new = (sz >= 65535) ? NULL : kmalloc(sz, GFP_KERNEL); + lock_mosix(); + if(!new) + { + error = -ENOMEM; + goto out; + } + if(ind != -1) + { + if(num) + printk("Changing DFSA index of <%s> from %d to %d\n", + dir, oldnum, num); + else + printk("Cancelling DFSA entry #%d (was <%s>)\n", + oldnum, dir); + dfsa_being_cancelled[oldnum-1] = 1; + dfsa_set_dfsa(mnt, 0, mnt->mnt_dfsano); + unlock_mosix(); + error = dfsa_sync(1); + lock_mosix(); + if(error) + { + printk("DFSA interrupted: not %s <%s>\n", + num ? "Changing" : "Cancelling", dir); + dfsa_set_dfsa(mnt, mnt->mnt_dfsaprev, 0); + dfsa_being_cancelled[oldnum-1] = 0; + dfsa_sync(0); + kfree(new); + goto out; + } + mntput(dfsa_mounts[oldnum-1]); + dfsa_mounts[oldnum-1] = NULL; + dfsa_set_dfsa(mnt, num, 0); + dfsa_being_cancelled[oldnum-1] = 0; + } + else + { + printk("Adding <%s> as DFSA entry #%d\n", dir, num); + dfsa_set_dfsa(mnt, num, 0); + } + /* compile the new table */ + new->len = sz; + new->ndefs = dfsatab->ndefs - (ind != -1) + (num != 0); + y = (char *)&new->dfsdef[(int)new->ndefs]; + done = 0; + for(j = i = 0 ; i < dfsatab->ndefs ; i++) + { + x = &((char *)dfsatab)[dfsatab->dfsdef[i].offset]; + if(!done && (c = dfsa_name_compare(dir, x)) <= 0) + { + done = 1; + if(num) + { + new->dfsdef[j].defno = num; + new->dfsdef[j].mounted = + important_fs_flags(flags); + new->dfsdef[j].offset = y - (char *)new; + strcpy(y, dir); + while(*y++) + ; + j++; + } + if(c == 0) + continue; + } + new->dfsdef[j] = dfsatab->dfsdef[i]; + new->dfsdef[j].offset = y - (char *)new; + while(*x) + *y++ = *x++; + *y++ = '\0'; + j++; + } + if(num && !done) + { + new->dfsdef[j].defno = num; + new->dfsdef[j].mounted = important_fs_flags(flags); + new->dfsdef[j].offset = y - (char *)new; + strcpy(y, dir); + } + old = dfsatab; + dfsatab = new; + if(num) + dfsa_mounts[num-1] = mntget(mnt); + kfree(old); + dfsa_sync(0); + error = 0; + out: + unlock_mosix(); + up(&dfsa_modify_lock); + freedir: + free_page(((unsigned long)dir) & PAGE_MASK); + return(error); +} + +int +dfsa_addlink(char *link) +{ + int i = strlen(link), j; + char *x, *y; + int ret; + struct nameidata nd; + struct vfsmount *mnt; + struct dentry *dp; + struct dfsatab *new, *old; + int sz = sizeof(struct dfsatab) + sizeof(struct dfsdef) + i + 1; + int linkno; + int added = 0; + umode_t mode; + + if(i > MAXDFSALINKLEN) + return(-ENAMETOOLONG); + if(*link != '/') + { + printk("DFSA: <%s> must be an absolute path-name.\n", link); + return(-EINVAL); + } + if (path_init(link, LOOKUP_POSITIVE, &nd) && (ret=path_walk(link, &nd))) + return(ret); + ret = -EINVAL; + if(!S_ISLNK(nd.dentry->d_inode->i_mode)) + { + printk("DFSA: <%s> is not a link!\n", link); + goto release; + } + path_release(&nd); + if (path_init(link, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &nd) && + (ret = path_walk(link, &nd))) + { + printk("DFSA: <%s> symlink is loose (errno=%d)\n", link, ret); + return(-EINVAL); + } + mode = nd.dentry->d_inode->i_mode; + if(!S_ISREG(mode) && !S_ISDIR(mode) && !S_ISLNK(mode)) + { + printk("DFSA: <%s> points to a special file!\n", link); + goto release; + } + if(!nd.mnt->mnt_sb->s_op->identify) + { + printk("<%s> points to a file-system not supported by DFSA.\n", + link); + goto release; + } + ret = -EINTR; + if(down_interruptible(&dfsa_modify_lock)) + goto release; + lock_mosix(); + for(i = 0 ; i < dfsatab->ndefs ; i++) + { + x = &((char *)dfsatab)[dfsatab->dfsdef[i].offset]; + if(!strcmp(x, link)) + { + if(dfsatab->dfsdef[i].defno <= MAXDFSAFS) + { + printk("Conflict: DFSA link <%s> is declared " + "as a mount-point\n", link); + ret = -EBUSY; + goto out; + } + linkno = dfsatab->dfsdef[i].defno - (MAXDFSAFS+1); + mnt = dfsa_links[linkno].mnt; + dp = dfsa_links[linkno].dentry; + ret = 0; + if(mnt == nd.mnt && dp->d_inode == nd.dentry->d_inode) + goto out; + printk("Updating DFSA link <%s>\n", link); + printk("[But please, next time delete the DFSA-link " + "BEFORE changing the symbolic-link]!\n"); + dfsa_links[linkno].mnt = mntget(nd.mnt); + dfsa_links[linkno].dentry = dget(nd.dentry); + unlock_mosix(); + mntput(mnt); + dput(dp); + lock_mosix(); + goto sync; + } + sz += sizeof(struct dfsdef) + strlen(x) + 1; + } + ret = -EMLINK; + for(linkno = 0 ; linkno < MAXDFSALINKS ; linkno++) + if(!dfsa_links[linkno].mnt) + break; + if(linkno == MAXDFSALINKS) + goto out; + unlock_mosix(); + new = (struct dfsatab *)kmalloc(sz, GFP_KERNEL); + lock_mosix(); + ret = -ENOMEM; + if(!new) + goto out; + printk("Adding DFSA link <%s>\n", link); + dfsa_links[linkno].mnt = mntget(nd.mnt); + dfsa_links[linkno].dentry = dget(nd.dentry); + ret = 0; + new->len = sz; + new->ndefs = dfsatab->ndefs + 1; + y = (char *)&new->dfsdef[(int)new->ndefs]; + for(j = i = 0 ; i < dfsatab->ndefs ; i++) + { + x = &((char *)dfsatab)[dfsatab->dfsdef[i].offset]; + if(!added && dfsa_name_compare(link, x) == 1) + { + added = 1; + new->dfsdef[j].defno = linkno + MAXDFSAFS + 1; + new->dfsdef[j].mounted = 0; + new->dfsdef[j].offset = y - (char *)new; + strcpy(y, link); + while(*y++) + ; + j++; + } + new->dfsdef[j] = dfsatab->dfsdef[i]; + new->dfsdef[j].offset = y - (char *)new; + while(*x) + *y++ = *x++; + *y++ = '\0'; + j++; + } + if(!added) + { + new->dfsdef[j].defno = linkno + MAXDFSAFS + 1; + new->dfsdef[j].mounted = 0; + new->dfsdef[j].offset = y - (char *)new; + strcpy(y, link); + while(*y++) + ; + } + old = dfsatab; + dfsatab = new; + kfree(old); + sync: + dfsa_sync(0); + out: + unlock_mosix(); + up(&dfsa_modify_lock); + release: + path_release(&nd); + return(ret); +} + +int +dfsa_dellink(char *link) +{ + int i, j; + char *x, *y; + int ret; + struct vfsmount *mnt; + struct dentry *dp; + struct dfsatab *new, *old; + int sz = sizeof(struct dfsatab); + int ind = -1; + int linkno = 0; + + if(down_interruptible(&dfsa_modify_lock)) + return(-EINTR); + lock_mosix(); + for(i = 0 ; i < dfsatab->ndefs ; i++) + { + x = &((char *)dfsatab)[dfsatab->dfsdef[i].offset]; + if(dfsatab->dfsdef[i].defno <= MAXDFSAFS || strcmp(x, link)) + sz += sizeof(struct dfsdef) + strlen(x) + 1; + else + { + ind = i; + linkno = dfsatab->dfsdef[i].defno - MAXDFSAFS - 1; + } + } + ret = -EINVAL; + if(ind == -1) + goto out; + unlock_mosix(); + new = (struct dfsatab *)kmalloc(sz, GFP_KERNEL); + lock_mosix(); + ret = -ENOMEM; + if(!new) + goto out; + ret = 0; + printk("Deleting DFSA link <%s>\n", link); + new->len = sz; + new->ndefs = dfsatab->ndefs - 1; + y = (char *)&new->dfsdef[(int)new->ndefs]; + for(j = i = 0 ; i < dfsatab->ndefs ; i++) + if(i != ind) + { + x = &((char *)dfsatab)[dfsatab->dfsdef[i].offset]; + new->dfsdef[j] = dfsatab->dfsdef[i]; + new->dfsdef[j].offset = y - (char *)new; + while(*x) + *y++ = *x++; + *y++ = '\0'; + j++; + } + old = dfsatab; + dfsatab = new; + unlock_mosix(); + i = dfsa_sync(1); + lock_mosix(); + if(i) + { + printk("Clearing of DFSA links interrupted!\n"); + dfsatab = old; + kfree(new); + ret = -EINTR; + goto out; + } + kfree(old); + mnt = dfsa_links[linkno].mnt; + dfsa_links[linkno].mnt = NULL; + dp = dfsa_links[linkno].dentry; + dfsa_links[linkno].dentry = NULL; + unlock_mosix(); + mntput(mnt); + dput(dp); + lock_mosix(); + out: + unlock_mosix(); + up(&dfsa_modify_lock); + return(ret); +} + +int +dfsa_clearlinks(void) +{ + int i, j; + char *x, *y; + int ret; + struct vfsmount *mnt; + struct dentry *dp; + struct dfsatab *new = NULL, *old; + int sz = sizeof(struct dfsatab); + int found = 0; + + if(down_interruptible(&dfsa_modify_lock)) + return(-EINTR); + lock_mosix(); + for(i = 0 ; i < dfsatab->ndefs ; i++) + { + if(dfsatab->dfsdef[i].defno <= MAXDFSAFS) + { + x = &((char *)dfsatab)[dfsatab->dfsdef[i].offset]; + sz += sizeof(struct dfsdef) + strlen(x) + 1; + } + else + found++; + } + if(!found) + { + printk("No DFSA links to clear\n"); + ret = 0; + goto out; + } + ret = -ENOMEM; + unlock_mosix(); + new = (struct dfsatab *)kmalloc(sz, GFP_KERNEL); + lock_mosix(); + if(!new) + goto out; + printk("Clearing all DFSA links\n"); + ret = 0; + new->len = sz; + new->ndefs = dfsatab->ndefs - found; + y = (char *)&new->dfsdef[(int)new->ndefs]; + for(j = i = 0 ; i < dfsatab->ndefs ; i++) + if(dfsatab->dfsdef[i].defno <= MAXDFSAFS) + { + x = &((char *)dfsatab)[dfsatab->dfsdef[i].offset]; + new->dfsdef[j] = dfsatab->dfsdef[i]; + new->dfsdef[j].offset = y - (char *)new; + while(*x) + *y++ = *x++; + *y++ = '\0'; + j++; + } + old = dfsatab; + dfsatab = new; + unlock_mosix(); + i = dfsa_sync(1); + lock_mosix(); + if(i) + { + printk("Clearing of DFSA links interrupted!\n"); + dfsatab = old; + kfree(new); + ret = -EINTR; + goto out; + } + kfree(old); + for(i = 0 ; i < MAXDFSALINKS ; i++) + if((mnt = dfsa_links[i].mnt)) + { + dp = dfsa_links[i].dentry; + dfsa_links[i].mnt = NULL; + dfsa_links[i].dentry = NULL; + unlock_mosix(); + mntput(mnt); + dput(dp); + lock_mosix(); + } + out: + unlock_mosix(); + up(&dfsa_modify_lock); + return(ret); +} + +char * +dfsa_showlinks(void) +{ + int i, sz; + char *x; + char *links = NULL, *p; + + lock_mosix(); + again: + for(sz = 1 , i = 0 ; i < dfsatab->ndefs ; i++) + if(dfsatab->dfsdef[i].defno > MAXDFSAFS) + { + x = &((char *)dfsatab)[dfsatab->dfsdef[i].offset]; + sz += strlen(x) + 1; + } + unlock_mosix(); + if(!(links = kmalloc(sz, GFP_KERNEL))) + return(NULL); + lock_mosix(); + p = links; + for(i = 0 ; i < dfsatab->ndefs ; i++) + if(dfsatab->dfsdef[i].defno > MAXDFSAFS) + { + x = &((char *)dfsatab)[dfsatab->dfsdef[i].offset]; + while(*x) + { + *p++ = *x++; + if(--sz < 2) + { + kfree(links); + goto again; + } + } + *p++ = '\n'; + sz--; + } + *p = '\0'; + unlock_mosix(); + return(links); +} + +int +disable_dfsa(void) +{ + int i; + int ret; + struct vfsmount *mnt; + struct dentry *dp; + + if(down_interruptible(&dfsa_modify_lock)) + return(-EINTR); + lock_mosix(); + ret = 0; + if(dfsa_disabled++) + goto out; + printk("Disabling DFSA\n"); + for(i = 0 ; i < MAXDFSAFS ; i++) + { + dfsa_being_cancelled[i] = 1; + if((mnt = dfsa_mounts[i])) + dfsa_set_dfsa(mnt, 0, mnt->mnt_dfsa); + } + if((ret = dfsa_sync(1))) + { + printk("Disabling DFSA interrupted\n"); + for(i = 0 ; i < MAXDFSAFS ; i++) + if((mnt = dfsa_mounts[i])) + dfsa_set_dfsa(mnt, mnt->mnt_dfsaprev, 0); + unlock_mosix(); + enable_dfsa(); + goto out2; + } + dfsatab->ndefs = 0; + dfsatab->len = sizeof(struct dfsatab); + for(i = 0 ; i < MAXDFSAFS ; i++) + if((mnt = dfsa_mounts[i-1])) + { + dfsa_mounts[i-1] = NULL; + dfsa_set_dfsa(mnt, 0, 0); + unlock_mosix(); + mntput(mnt); + lock_mosix(); + } + for(i = 0 ; i < MAXDFSALINKS ; i++) + if((mnt = dfsa_links[i].mnt)) + { + dp = dfsa_links[i].dentry; + dfsa_links[i].mnt = NULL; + dfsa_links[i].dentry = NULL; + unlock_mosix(); + mntput(mnt); + dput(dp); + lock_mosix(); + } + out: + unlock_mosix(); + out2: + up(&dfsa_modify_lock); + return(ret); +} + +void +enable_dfsa() +{ + int i; + + lock_mosix(); + if(--dfsa_disabled == 0) + { + for(i = 0 ; i < MAXDFSAFS ; i++) + dfsa_being_cancelled[i] = 0; + dfsa_sync(0); + } + unlock_mosix(); +} + +char * +dfsa_getdir(int num, int *tofree) +{ + register int i, l; + char *x, *copy; + char *ret; + + *tofree = 0; + lock_mosix(); + again: + ret = "\n"; + for(i = 0 ; i < dfsatab->ndefs ; i++) + if(dfsatab->dfsdef[i].defno == num) + { + x = &((char *)dfsatab)[dfsatab->dfsdef[i].offset]; + /* try to copy - just in case of changes during copy_to_user: */ + l = strlen(x); + unlock_mosix(); + copy = kmalloc(l+2, GFP_KERNEL); + lock_mosix(); + if(!copy) + { + ret = "Sorry: no memory\n"; + break; + } + /* paranoid test - did the table change? */ + if((dfsatab) || + i >= dfsatab->ndefs || dfsatab->dfsdef[i].defno != num || + strlen(x = &((char *)dfsatab)[dfsatab->dfsdef[i].offset]) > l) + { + kfree(copy); + goto again; + } + sprintf(copy, "%s\n", x); + ret = copy; + *tofree = 1; + break; + } + unlock_mosix(); + return(ret); +} + +DECLARE_WAIT_QUEUE_HEAD(wait_for_all_to_dfsasync); + +int +dfsa_all_know(void) +{ + register struct task_struct *p; + + read_lock(&tasklist_lock); + for_each_task(p) + if(process_told(p, DREQ_DFSASYNC)) + break; + read_unlock(&tasklist_lock); + return(p == &init_task); +} + +int +dfsa_sync(int wait) +{ + struct task_struct *p; + int any = 0; + int ret = 0; + + read_lock(&tasklist_lock); + for_each_task(p) + if((p->mosix.dflags&(DDEPUTY|DREMOTE)) && !(p->mosix.dflags&DFINISHED)) + { + any = 1; + tell_process(p, DREQ_DFSASYNC); + wake_up_mosix(p); + } + read_unlock(&tasklist_lock); + if(!any) + wake_up(&wait_for_all_to_dfsasync); + if(wait) + { + DECLARE_WAITQUEUE(wait, current); + + add_wait_queue(&wait_for_all_to_dfsasync, &wait); + while(1) + { + set_current_state(TASK_INTERRUPTIBLE); + if(dfsa_all_know()) + break; + schedule(); + if(signal_pending(current)) + { + ret = -EINTR; + break; + } + } + remove_wait_queue(&wait_for_all_to_dfsasync, &wait); + set_current_state(TASK_RUNNING); + } + return(ret); +} + +void +clear_dfsasync(void) +{ + if(process_told(current, DREQ_DFSASYNC)) + { + process_ack(current, DREQ_DFSASYNC); + if(dfsa_all_know()) /* we were the last to ack */ + wake_up(&wait_for_all_to_dfsasync); + tell_process(current, DREQ_NOTUPTODATE); + } +} + +/* + * keep only those entries in "*tab" that also appear in our main "dfsatab" + * and have the same [important] mount-flags. + * both tables are already sorted. + */ +void +dfsa_adapt(struct dfsatab *tab) +{ + int i, j = 0, k = 0, n1 = tab->ndefs, n2; + struct dfsdef *def1, *def2; + char *x, *y; + + lock_mosix(); + n2 = dfsatab->ndefs; + def2 = &dfsatab->dfsdef[0]; + if(n2) + for(def1 = tab->dfsdef , i = 0 ; i < n1 ; i++ , def1++) + { + x = &((char *)tab)[def1->offset]; + next2: + y = &((char *)dfsatab)[def2->offset]; + switch(dfsa_name_compare(x, y)) + { + case 0: + if(def1->mounted == def2->mounted) + { + if(k != i) + tab->dfsdef[k] = tab->dfsdef[i]; + k++; + j++; + def2++; + } + case -1: + continue; + case 1: + j++; + def2++; + if(j == n2) + break; + goto next2; + } + } + unlock_mosix(); + tab->ndefs = k; +} + +int +name_starts_in_dfsa(char **nam, struct nameidata *nd) +{ + register struct dfsatab *tab; + register int i; + char *n, *x; + struct vfsmount *mnt; + int ind; + + lock_mosix(); + tab = (current->mosix.dflags & DREMOTE) ? current->mosix.ttab : dfsatab; + if(tab && !dfsa_disabled) + for(i = 0 ; i < tab->ndefs ; i++) + { + n = *nam; + x = &((char *)tab)[tab->dfsdef[i].offset]; + while(*n && *n == *x) + { + if(*n == '/') + { + while(*x == '/') + x++; + if(*x == '\0') + break; + while(*n == '/') + { + n++; + if(*n == '.' && (*(n+1) == '\0' || + *(n+1) == '/')) + n++; + } + } + else + { + n++; + x++; + } + } + if(*x || (*n && *n != '/')) + continue; + if((nd->flags & LOOKUP_PARENT) || (!tab->dfsdef[i].mounted && + !*n && !(nd->flags & LOOKUP_FOLLOW))) + { + while(*n == '/') + n++; + if(!*n) + continue; + } + else while(*n == '/') + n++; + ind = tab->dfsdef[i].defno - 1; + if(ind < MAXDFSAFS) + { + if(dfsa_being_cancelled[ind] || + !(mnt = dfsa_mounts[ind])) + break; + nd->mnt = mntget(mnt); + nd->dentry = dget(mnt->mnt_root); + } + else + { + ind -= MAXDFSAFS; + if(!(mnt = dfsa_links[ind].mnt)) + break; + if(!mnt->mnt_dfsano) + break; + nd->mnt = mntget(mnt); + nd->dentry = dget(dfsa_links[ind].dentry); + } + *nam = n; +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_DFSA) + printk("%s: Found a DFSA base: %s|%s\n", + desc_mostask(NULL), + &((char *)tab)[tab->dfsdef[i].offset], n); +#endif /* CONFIG_MOSIX_DEBUG */ + unlock_mosix(); + return(1); + } + unlock_mosix(); + return(0); +} + +int +within_dfsa(struct nameidata *nd) +{ + int dfsano; + struct dfsatab *tab; + struct dfsdef *def; + int i; + int ret; + + lock_mosix(); + if(dfsa_disabled || !(dfsano = nd->mnt->mnt_dfsano) || + dfsa_being_cancelled[dfsano-1] || !(dfsa_mounts[dfsano-1])) + ret = 0; + else if(!(current->mosix.dflags & DREMOTE)) + ret = 1; + else if(!(tab = current->mosix.ttab) || nd->mnt->mnt_dfsaprev) + ret = 0; + else + { + ret = 0; + for(def = tab->dfsdef , i = tab->ndefs ; i > 0 ; i-- , def++) + if(def->defno == dfsano) + { + ret = 1; + break; + } + } + unlock_mosix(); + return(ret); +} + +void +dfsa_close_file(int fd) +{ + register struct task_struct *p = current; + register struct files_struct *files = current->files; + + if(atomic_read(&files->users) > 1) + { + tell_process(p, DREQ_NOTUPTODATE); + return; + } + if(fd >= files->maxclosed) + files->maxclosed = fd + 1; + FD_SET(fd, files->closed); + p->mosix.dupdates |= DFSA_UPDCLOSE; +} + +void +dfsa_open_file(int fd) +{ + struct task_struct *p = current; + register struct files_struct *files = p->files; + struct file *f; + int n; + + if(atomic_read(&files->users) > 1) + { + tell_process(p, DREQ_NOTUPTODATE); + return; + } + /* since nobody shares our files, no locking is necessary */ + if(!can_dfsa_file(fd)) + { + p->mosix.dupdates |= DFSA_UPDUSED; + return; + } + p->mosix.dupdates |= DFSA_UPDOPEN; + f = files->fd[fd]; + if((n = file_count(f)) > 1) + { + /* must find all DUPs: */ + register int i, l, li; + struct file **dups = files->fd; + unsigned long *bits = files->open_fds->fds_bits; + int lim = files->max_fds, llim; + + if(lim > files->max_fdset) + lim = files->max_fdset; + llim = (lim + BITS_PER_LONG - 1) / BITS_PER_LONG; + for(i = l = 0 ; l < llim ; l++) + if(*bits++) + { + li = i + BITS_PER_LONG; + if(li > lim) + li = lim; + for(; i < li ; i++) + if(dups[i] == f) + { + if(i >= files->maxopened) + files->maxopened = i + 1; + FD_SET(i, files->opened); + FD_CLR(i, files->modified); +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_DFSA) + printk("%s-dfsa_open_file(%d): " + "opened %d\n", + desc_mostask(NULL), fd, i); +#endif /* CONFIG_MOSIX_DEBUG */ + if(--n == 0) + return; + } + } + else + i += BITS_PER_LONG; + } + else + { + if(fd >= files->maxopened) + files->maxopened = fd + 1; + FD_SET(fd, files->opened); + FD_CLR(fd, files->modified); +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_DFSA) + printk("%s-dfsa_open_file(%d): opened\n" + , desc_mostask(NULL), fd); +#endif /* CONFIG_MOSIX_DEBUG */ + } +} + +void +dfsa_touch_file(int fd) +{ + struct task_struct *p = current; + register struct files_struct *files = p->files; + struct file *f; + int n; + + if(atomic_read(&files->users) > 1) + { + tell_process(p, DREQ_NOTUPTODATE); + return; + } + /* since nobody shares our files, no locking is necessary */ + if(!can_dfsa_file(fd)) + return; + p->mosix.dupdates |= DFSA_UPDMODS; + f = files->fd[fd]; + if((n = file_count(f)) > 1) + { + /* must find all DUPs: */ + register int i, l, li; + struct file **dups = files->fd; + unsigned long *bits = files->open_fds->fds_bits; + int lim = files->max_fds; + int llim; + + if(lim > files->max_fdset) + lim = files->max_fdset; + llim = (lim + BITS_PER_LONG - 1) / BITS_PER_LONG; + for(i = l = 0 ; l < llim ; l++) + if(*bits++) + { + li = i + BITS_PER_LONG; + if(li > lim) + li = lim; + for(; i < li ; i++) + if(dups[i] == f) + { +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_DFSA) + printk("%s-dfsa_touch_file(%d): " + "touched %d\n", + desc_mostask(NULL), fd, i); +#endif /* CONFIG_MOSIX_DEBUG */ + if(FD_ISSET(i, files->opened)) + continue; + if(i >= files->maxmod) + files->maxmod = i + 1; + FD_SET(i, files->modified); + if(--n == 0) + return; + } + } + else + i += BITS_PER_LONG; + } + else + { +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_DFSA) + printk("%s-dfsa_touch_file(%d): touched\n", + desc_mostask(NULL), fd); +#endif /* CONFIG_MOSIX_DEBUG */ + if(FD_ISSET(fd, files->opened)) + return; + if(fd >= files->maxmod) + files->maxmod = fd + 1; + FD_SET(fd, files->modified); + } +} + +/* + * callers to "can_dfsa_file" must first make sure (perhaps with "CAN_DFSA") + * that their files_struct is not shared with clones: + * hence, neither the caller nor "can_dfsa_file" need to lock the files + * (which could deadlock the mosix-lock) + */ +int +can_dfsa_file(int fd) +{ + register int i, l, li; + struct file *file = fcheck(fd); + struct vfsmount *mnt; + int fs, mode, n; + + if(!file) + return(0); + mnt = file->f_vfsmnt; + lock_mosix(); + if(current->mosix.dflags & DREMOTE) + { + /* well, since it is open, it MUST be a DFSA file! */ + if(!(fs = mnt->mnt_dfsaprev) && !(fs = mnt->mnt_dfsano)) + panic("remote loose DFSA fd"); + unlock_mosix(); + return(fs); + } + if(!(fs = mnt->mnt_dfsano) || dfsa_being_cancelled[fs-1]) + { + unlock_mosix(); + return(0); + } + unlock_mosix(); + mode = file->f_dentry->d_inode->i_mode; + if(!S_ISREG(mode) && !S_ISDIR(mode) && !S_ISLNK(mode)) + return(0); + n = file_count(file); + if(n != 1) + { + /* Are all links merily our own DUPs? */ + struct file **files = current->files->fd; + unsigned long *bits = current->files->open_fds->fds_bits; + int lim = current->files->max_fds; + int llim; + + if(lim > current->files->max_fdset) + lim = current->files->max_fdset; + llim = (lim + BITS_PER_LONG - 1) / BITS_PER_LONG; + for(i = l = 0 ; l < llim ; l++) + if(*bits++) + { + li = i + BITS_PER_LONG; + if(li > lim) + li = lim; + for(; i < li ; i++) + if(files[i] == file && --n == 0) + return(fs); + } + else + i += BITS_PER_LONG; + return(0); + } + return(fs); +} + +void +dfsa_tinit(void) +{ + struct task_struct *p = current; + struct files_struct *files = p->files; + + CLEAR_ALL(files, closed); + files->maxclosed = 0; + CLEAR_ALL(files, modified); + files->maxmod = 0; + p->mosix.dupdates = 0; + p->fs->pwd_changed = 0; + p->mosix.ttab = NULL; +} + +/* + * Is it OK to accept and open a file/directory opened on the other side? + * A DEPUTY must always accept. + * A REMOTE is more reserved. + */ +int +ok_to_accept_in_dfsa(int dfsano) +{ + struct task_struct *p = current; + struct vfsmount *mnt; + struct dfsatab *tab; + struct dfsdef *def; + int i; + + MOSIX_LOCKED; + if(dfsano < 1 || dfsano > MAXDFSAFS || !(mnt = dfsa_mounts[dfsano-1])) + return(0); + if(!(p->mosix.dflags & DREMOTE)) + return(1); + if(dfsa_disabled) + return(0); + if(!(tab = p->mosix.ttab)) + return(0); + if(dfsa_being_cancelled[dfsano-1]) + return(0); + if(mnt->mnt_dfsaprev) + return(0); + for(def = tab->dfsdef , i = 0 ; i < tab->ndefs ; i++ , def++) + if(def->defno == dfsano) + return(1); + return(0); +} + +void +reconstruct(int dfsano, void *info, int infolen, struct vfsmount **mnt, + struct dentry **dp) +{ + char comma; + + lock_mosix(); + if(!ok_to_accept_in_dfsa(dfsano)) + { + unlock_mosix(); + goto bad; + } + *mnt = mntget(dfsa_mounts[dfsano-1]); + unlock_mosix(); + *dp = (*mnt)->mnt_sb->s_op->reconstruct(*mnt, info, infolen); + if(*dp) + return; + mntput(*mnt); + bad: + if(current->mosix.dflags & DREMOTE) + { + *mnt = NULL; + *dp = NULL; + return; + } + printk("%s: Failed to reopen DFSA file (dfsano=%d, info", + desc_mostask(NULL), dfsano); + comma = '='; + while(infolen >= sizeof(int)) + { + printk("%c%X", comma, *((int *)info++)); + infolen -= sizeof(int); + comma=','; + } + printk(")\n"); + *mnt = mntget(current->fs->rootmnt); + *dp = dget(bad_deputy_dfsaentry); +} + +void +dfsa_file_install(int fd, mode_t mode, loff_t pos, unsigned int flags, + int dfsano, void *info, int infolen) +{ + struct vfsmount *mnt; + struct dentry *dp; + register struct file *fp; + register struct inode *ip; + struct files_struct *files = current->files; + int failure; + + reconstruct(dfsano, info, infolen, &mnt, &dp); + if(!dp) /* (can only happen on REMOTE) */ + return; + if(atomic_read(&files->users) != 1) + panic("dfsa_file_install: sharing"); + write_lock(&files->file_lock); + failure = (fd >= files->max_fdset && expand_fdset(files, fd)); + write_unlock(&files->file_lock); + if(failure) + goto bad1; + if((fp = get_empty_filp())) + { + fp->f_dentry = dp; + fp->f_vfsmnt = mnt; + ip = dp->d_inode; + fp->f_op = fops_get(ip->i_fop); + if((fp->f_op && fp->f_op->open && fp->f_op->open(ip, fp))) + { + fput(fp); + goto bad; + } + flags &= ~O_NOWRITEACCESS; + if(mode & FMODE_WRITE) + { + if(get_write_access(ip)) + flags |= O_NOWRITEACCESS; + } + fp->f_flags = flags; + fp->f_pos = pos; + fp->f_mode = mode; + fp->f_reada = 0; +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_DFSA) + printk("%s: Installed fd #%d, pos=%d, mode=%d, flags=0x%x\n", + desc_mostask(NULL), fd, (int)pos, mode, flags); +#endif /* CONFIG_MOSIX_DEBUG */ + write_lock(&files->file_lock); + files->fd[fd] = fp; + FD_SET(fd, files->open_fds); + FD_CLR(fd, files->close_on_exec); + write_unlock(&files->file_lock); + /* the following "should not" happen, but it could due to an + * inconsistency in the file-system - at least we must now + * instruct our REMOTE to stop using that file! + */ + if((current->mosix.dflags & DDEPUTY) && !can_dfsa_file(fd)) + dfsa_close_file(fd); + } + else + { + bad1: + dput(dp); + mntput(mnt); + bad: + if(!(current->mosix.dflags & DREMOTE)) + printk("%s: Failed to allocate DFSA file for reopen\n", + desc_mostask(NULL)); + } +} + +void +deputy_resync_dfsa(void) +{ + register struct task_struct *p = current; + char *update; + int sz; + +#ifdef CONFIG_MOSIX_DIAG + if(!(p->mosix.dflags & DSYNC)) + panic("deputy_resync_dfsa: not synced"); +#endif /* CONFIG_MOSIX_DIAG */ + again: + p->mosix.dupdates |= DFSA_UPDATE; + clear_dfsasync(); + if(CAN_DFSA(p) && (update = deputy_pack_dfsa_changes(&sz))) + { + if(deputy_request(DEP_DFSA_CHANGES, &sz, sizeof(sz), + update, sz, 0, NULL, 0)) + deputy_communication_failed(); + kfree(update); + if(process_told(p, DREQ_DFSASYNC)) /* a new one (race) */ + goto again; + } + else if(deputy_request(DEP_DFSA_CLEAR, NULL, 0, NULL, 0, 0, NULL, 0)) + deputy_communication_failed(); +} + +void +remote_clear_dfsa(void) +{ + register struct task_struct *p = current; + extern void close_files(struct files_struct *); + + if(p->mosix.ttab) + { + kfree(p->mosix.ttab); + p->mosix.ttab = NULL; + } + close_files(p->files); + CLEAR_ALL(p->files, open_fds); + p->files->next_fd = 0; + if(p->fs->pwd) + { + dput(p->fs->pwd); + p->fs->pwd = NULL; + mntput(p->fs->pwdmnt); + p->fs->pwdmnt = NULL; + } +} + +int +dfsa_optimized_read(struct file *f, loff_t off, unsigned long addr, + unsigned long len) +{ + struct task_struct *p = current; + struct read_yourself_h r; + struct vfsmount *mnt; + char data[MAX_IDENT_RECORD_LEN]; + int result; + + if(!(p->mosix.dflags & DDEPUTY) || !CAN_DFSA(p)) + goto read; + mnt = f->f_vfsmnt; + lock_mosix(); + if(!(r.dfsano = mnt->mnt_dfsano) || dfsa_being_cancelled[r.dfsano-1]) + { + unlock_mosix(); + goto read; + } + unlock_mosix(); + if(!(r.infolen = mnt->mnt_sb->s_op->identify(f->f_dentry, data))) + goto read; + r.off = off; + r.addr = addr; + r.len = len; + if(deputy_request(DEP_READ_YOURSELF, &r, sizeof(r), data, r.infolen, 0, + (void **)&result, -sizeof(result))) + { + deputy_communication_failed(); + return(-EIO); + } + if(result == -ENOENT) + { + read: + return(f->f_op->read(f, (char *)addr, len, &off)); + } + return(result); +} + +int +remote_read_yourself(struct read_yourself_h *r) +{ + int result = -ENOENT; + char data[MAX_IDENT_RECORD_LEN]; + struct dentry *dp; + struct vfsmount *mnt; + struct file file; + struct inode *ip; + + if(r->infolen > MAX_IDENT_RECORD_LEN || + verify_area(VERIFY_WRITE, (char *)r->addr, r->len)) + { + comm_flushdata(COMM_ALLDATA); + goto reply; + } + if(comm_copydata(data, r->infolen, 0)) + goto reply; + reconstruct(r->dfsano, data, r->infolen, &mnt, &dp); + if(!dp) + goto reply; + ip = dp->d_inode; + if(ip->i_fop && ip->i_fop->read && !init_private_file(&file, dp, 1)) + { + result = file.f_op->read(&file, (char *)r->addr, r->len, + &r->off); + if(file.f_op->release) + file.f_op->release(ip, &file); + } + dput(dp); + mntput(mnt); + reply: +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_DFSA) + printk("remote_read_yourself: off=%ld, addr=0x%lx, len=0x%lx, " + "result=%d\n", (long)r->off, r->addr, r->len, result); +#endif /* CONFIG_MOSIX_DEBUG */ + comm_free(r); + result = comm_send(DEP_READ_YOURSELF|REPLY, &result, sizeof(int), + NULL, 0, 0); + return(result); +} + +int +enter_remote_dfsa_mode(void) +{ + struct task_struct *p = current; + + if(!p->mosix.ttab) + return(0); + spin_lock_irq(&runqueue_lock); /* ensures DREQ_EXITDFSA not lost */ + p->mosix.dflags |= DREMOTEDFSA; + spin_unlock_irq(&runqueue_lock); + if(remote_deputy_has_something_for_us(p) || + remote_need_while_asleep(p) || + (process_told(p, (DREQ_DFSASYNC|DREQ_NOTUPTODATE))) || + (p->mosix.dflags & DFAKESIGNAL)) + { + leave_remote_dfsa_mode(); + return(0); + } + return(1); +} + +void +leave_remote_dfsa_mode(void) +{ + register struct task_struct *p = current; + + spin_lock_irq(&runqueue_lock); /* ensures no new "DREQ_EXITDFSA" */ + p->mosix.dflags &= ~(DREMOTEDFSA|DFAKESIGNAL); + spin_unlock_irq(&runqueue_lock); + process_ack(p, DREQ_EXITDFSA); + spin_lock_irq(&p->sigmask_lock); + p->sigpending = 0; + spin_unlock_irq(&p->sigmask_lock); +} + +void +dfsa_syscall_on_file(int fd, int touching) +{ + register struct task_struct *p = current; + static unsigned char rand; + + if(!(p->mosix.dflags & (DDEPUTY|DREMOTE)) && CAN_DFSA(p) && + (fd >= p->files->max_fds || can_dfsa_file(fd))) + p->mosix.dflags |= DSTATSDOWN; + + /* consider a remote process that with regard to this file only runs a + * combination of fstat(), fcntl(F_GETFL), fchdir(), fchmod(), fchown(), + * fsync() and ftrunc() - but does lots of the above: if its parent + * initially held the file open, but not any more, there is no other + * way that the son will become aware of the fact that it may now do + * all those things remotely. Adding a file-flag on close is not very + * helpful because a chain of relatives may be constantly closing this + * file (or simply exiting). On the other hand, running CAN_DFSA + + * can_dfsa_file + dfsa_touch_file (including its communication + * overhead) is expensive. + * The compromise is to run them at random, once in about 256 times, + * and eventually the process will come to know. + */ + if(touching || ((p->mosix.dflags & DDEPUTY) && !rand++ && + CAN_DFSA(p) && can_dfsa_file(fd))) + dfsa_touch_file(fd); +} + +void +dfsa_exit(void) +{ + struct task_struct *p = current; + + if(process_told(p, DREQ_DFSASYNC)) + clear_dfsasync(); + if(p->mosix.ttab) + { + kfree(p->mosix.ttab); + p->mosix.ttab = NULL; + } +} + +/*** PACKING/UNPACKING UPDATES SECTION ***/ + +enum +{ + /* general info: */ + REC_CREDITS, REC_UMASK, REC_SELECT, REC_DFSATAB, + /* current directory */ + REC_CDIR, + /* file table */ + REC_MAX, REC_USED, + /* individual files */ + REC_CLOSED, REC_NONDFSA, REC_FILE, REC_MOD, REC_DUP +}; + +struct credits_rec +{ + uid_t uid, euid, suid, fsuid; + gid_t gid, egid, sgid, fsgid; + int ngroups; + gid_t groups[NGROUPS]; +}; + +/* the following is defined even if MFS is not configured, + * because it may possibly be configured by the peer node! */ +struct select_rec +{ + int selected; + int lastexec; + int lastmagic; +}; + +struct cdir_rec +{ + int dfsano; + int infolen; +}; + +struct max_rec +{ + int max_fds; + int max_fdset; +}; + +struct file_rec +{ + short fd; + mode_t mode; + loff_t pos; + unsigned int flags; + int dfsano; + int infolen; +}; + +struct mod_rec +{ + short fd; + mode_t mode; + loff_t pos; + unsigned int flags; +}; + +struct dup_rec +{ + short fd; + short orig; +}; + +static char * +add_changes_record(char *old, char type, void *add, int addlen) +{ + char *rec; + int sz, minsz; + +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_DFSA) + printk("%s: adding DFSA changes-record type %d, len %d\n", + desc_mostask(NULL), type, addlen); +#endif /* CONFIG_MOSIX_DEBUG */ + again: + if(old) + { + minsz = ((int *)old)[1] + sizeof(type) + addlen; + if(((int *)old)[0] < minsz) + { + sz = 2 * minsz; + if((sz & PAGE_MASK) >= minsz) + sz &= PAGE_MASK; + if(!(rec = kmalloc(sz, GFP_KERNEL))) + { + sz = minsz; + if(!(rec = kmalloc(sz, GFP_KERNEL))) + { + kfree(old); + return(NULL); + } + } + memcpy(rec, old, ((int *)old)[1]); + ((int *)rec)[0] = sz; + kfree(old); + old = rec; + if(type == REC_DFSATAB)/* make sure it is still valid */ + { + if(addlen < dfsatab->len) + goto again; + add = dfsatab; + addlen = dfsatab->len; + } + } + old[((int *)old)[1]] = type; + memcpy(old+((int *)old)[1] + sizeof(type), (char *)add, addlen); + ((int *)old)[1] += addlen + sizeof(type); + return(old); + } + else + { + minsz = 2 * sizeof(int) + sizeof(type) + addlen; + sz = 2 * sizeof(int) + 3 * (addlen + sizeof(type)); + if((sz & PAGE_MASK) >= minsz) + sz &= PAGE_MASK; + if(!(rec = kmalloc(sz, GFP_KERNEL))) + { + sz = minsz; + if(!(rec = kmalloc(sz, GFP_KERNEL))) + return(NULL); + } + ((int *)rec)[0] = sz; + if(type == REC_DFSATAB) /* make sure it is still valid */ + { + if(addlen < dfsatab->len) + { + kfree(rec); + goto again; + } + add = dfsatab; + addlen = dfsatab->len; + } + ((int *)rec)[1] = 2 * sizeof(int) + sizeof(type) + addlen; + rec[2*sizeof(int)] = type; + memcpy(rec+2*sizeof(int) + sizeof(type), (char *)add, addlen); + return(rec); + } +} + +static void * +get_record(char *buf, int *type) +{ + int pos = ((int *)buf)[0]; + int tot = ((int *)buf)[1]; + void *ret; + + if(pos == 0) /* "all done" mark */ + { + all_done: + kfree(buf); + return(NULL); + } + if(pos >= tot) /* first call */ + pos = 2 * sizeof(int); + *type = buf[pos++]; + ret = buf + pos; + switch(*type) + { + case REC_CREDITS: + pos += sizeof(struct credits_rec); + break; + case REC_UMASK: + pos += sizeof(int); + break; + case REC_SELECT: + pos += sizeof(struct select_rec); + break; + case REC_DFSATAB: + pos += ((struct dfsatab *)ret)->len; + break; + case REC_CDIR: + pos += sizeof(struct cdir_rec) + + ((struct cdir_rec *)ret)->infolen; + break; + case REC_MAX: + pos += sizeof(struct max_rec); + break; + case REC_CLOSED: + case REC_NONDFSA: + pos += sizeof(int); + break; + case REC_FILE: + pos += sizeof(struct file_rec) + + ((struct file_rec *)ret)->infolen; + break; + case REC_MOD: + pos += sizeof(struct mod_rec); + break; + case REC_DUP: + pos += sizeof(struct dup_rec); + break; + case REC_USED: + if((current->mosix.dflags & DDEPUTY) || + current->mosix.ttab) + pos += current->files->max_fdset / 8; + else + /* we can't be sure about size - and it does not matter + anyway: an expansion must have failed earlier */ + goto all_done; + break; + default: + printk("buf = 0x%x, type=%d\n", (int)buf, *type); + panic("get_record"); + } + ((int *)buf)[0] = (pos >= tot) ? 0 : pos; +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_DFSA) + printk("%s-get_record: type=%d, len=%d\n", + desc_mostask(NULL), *type, (buf+pos) - (char *)ret); +#endif /* CONFIG_MOSIX_DEBUG */ + return(ret); +} + +int +add_closed_files(char **bufp) +{ + int i; + unsigned long bit = 1; + register struct files_struct *files = current->files; + unsigned long *mask = files->closed->fds_bits; + unsigned long bits = *mask; + char *buf = *bufp; + + *mask = 0; + for(i = 0 ; i < files->maxclosed ; i++) + { + if((bits & bit) && !(buf = + add_changes_record(buf, REC_CLOSED, &i, sizeof(i)))) + { + *bufp = NULL; + return(-ENOMEM); + } + bits &= ~bit; + if(bits) + bit <<= 1; + else + { + i = (i + __NFDBITS) & ~(__NFDBITS-1); + bit = 1; + while(1) + { + if(i >= files->maxclosed - 1) + break; + bits = *(++mask); + *mask = 0; + if(bits) + break; + i += __NFDBITS; + } + } + } + files->maxclosed = 0; + *bufp = buf; + return(0); +} + +int +add_opened_files(char **bufp) +{ + int i, j, dfsano; + struct task_struct *p = current; + register struct files_struct *files = p->files; + unsigned long *mask = files->opened->fds_bits; + unsigned long bits = *mask; + unsigned long bit = 1; + struct + { + struct file_rec f; + char info[MAX_IDENT_RECORD_LEN]; + } fplus; + struct dup_rec d; + struct file **fds = files->fd; + char *buf = *bufp; + struct dentry *dp; + + *mask = 0; + for(i = 0 ; i < files->maxopened ; i++) + { + if(!(bits & bit)) + goto file_done; + if(!fds[i]) + { + if(p->mosix.dflags & DREMOTE) + /* tell DEPUTY that REMOTE needs full file-info */ + goto non_dfsa; + goto file_done; + } + if(!(dfsano = can_dfsa_file(i))) + { + non_dfsa: + if(!(buf = add_changes_record(buf, REC_NONDFSA, &i, + sizeof(i)))) + { + fail: + return(-ENOMEM); + } + goto file_done; + } + if(file_count(fds[i]) > 1) + for(j = 0 ; j < i ; j++) + if(fds[j] == fds[i]) + { + d.fd = i; + d.orig = j; + buf = add_changes_record(buf, REC_DUP, &d, sizeof(d)); + if(!buf) + goto fail; + goto file_done; + } + dp = fds[i]->f_dentry; + fplus.f.infolen = fds[i]->f_vfsmnt->mnt_sb->s_op->identify( + fds[i]->f_dentry, &fplus.info); + if(!fplus.f.infolen) + goto fail; + fplus.f.fd = i; + fplus.f.mode = fds[i]->f_mode; + fplus.f.pos = fds[i]->f_pos; + fplus.f.flags = fds[i]->f_flags; + fplus.f.dfsano = dfsano; + if(!(buf = add_changes_record(buf, REC_FILE, &fplus, + sizeof(struct file_rec) + fplus.f.infolen))) + goto fail; + file_done: + bits &= ~bit; + if(bits) + bit <<= 1; + else + { + i = (i + __NFDBITS) & ~(__NFDBITS-1); + bit = 1; + while(1) + { + if(i >= files->maxmod - 1) + break; + bits = *(++mask); + *mask = 0; + if(bits) + break; + i += __NFDBITS; + } + } + } + files->maxmod = 0; + *bufp = buf; + return(0); +} + +int +add_modified_files(char **bufp) +{ + int i, j; + struct task_struct *p = current; + register struct files_struct *files = p->files; + unsigned long *mask = files->modified->fds_bits; + unsigned long bits = *mask; + unsigned long bit = 1; + struct mod_rec m; + struct file **fds = files->fd; + char *buf = *bufp; + + *mask = 0; + for(i = 0 ; i < files->maxmod ; i++) + { + if(!(bits & bit) || !fds[i]) + goto file_done; + if(!can_dfsa_file(i)) + { + if(!(buf = add_changes_record(buf, REC_NONDFSA, + &i, sizeof(i)))) + { + fail: + return(-ENOMEM); + } + goto file_done; + } + if(file_count(fds[i]) > 1) + for(j = 0 ; j < i ; j++) + if(fds[j] == fds[i]) /* touched one - touched all! */ + goto file_done; + m.fd = i; + m.mode = fds[i]->f_mode; + m.pos = fds[i]->f_pos; + m.flags = fds[i]->f_flags; + if(!(buf = add_changes_record(buf, REC_MOD, &m, sizeof(m)))) + goto fail; + file_done: + bits &= ~bit; + if(bits) + bit <<= 1; + else + { + i = (i + __NFDBITS) & ~(__NFDBITS-1); + bit = 1; + while(1) + { + if(i >= files->maxmod - 1) + break; + bits = *(++mask); + *mask = 0; + if(bits) + break; + i += __NFDBITS; + } + } + } + files->maxmod = 0; + *bufp = buf; + return(0); +} + +char * +deputy_pack_dfsa_changes(int *retsz) +{ + char *buf = NULL; + register struct task_struct *p = current; + register struct files_struct *files = p->files; + register int i; + register uint32_t updates; + + if(files->count_dropped_to_one || current->fs->count_dropped_to_one || + (process_told(p, DREQ_NOTUPTODATE))) + { + /* so this is the first time after a while that CAN_DFSA */ + /* was TRUE: changes might have occured by our "partners" */ + /* so we must assume that our REMOTE does not know anything */ + /* (or its knowledge is obsolete) */ + + files->count_dropped_to_one = 0; + p->fs->count_dropped_to_one = 0; + process_ack(p, DREQ_NOTUPTODATE); + p->mosix.dupdates |= DFSA_UPDATE; + } + if(!(updates = (p->mosix.dupdates & DFSA_UPDATE))) + goto out; + if(updates & DFSA_UPDCRED) + { + struct credits_rec c; + + c.uid = p->uid; + c.euid = p->euid; + c.suid = p->suid; + c.fsuid = p->fsuid; + c.gid = p->gid; + c.egid = p->egid; + c.sgid = p->sgid; + c.fsgid = p->fsgid; + /* dirty compatibility-keeper: */ + c.ngroups = p->ngroups; + for(i = 0 ; i < c.ngroups ; i++) + c.groups[i] = p->groups[i]; + if(!(buf = add_changes_record(buf, REC_CREDITS, &c, sizeof(c)))) + { + fail: + *retsz = -ENOMEM; + return(NULL); + } + } + if(updates & DFSA_UPDUMASK) + { + if(!(buf = add_changes_record(buf, REC_UMASK, &p->fs->umask, + sizeof(int)))) + goto fail; + } +#ifdef CONFIG_MOSIX_FS + if(updates & DFSA_UPDSEL) + { + struct select_rec s; + + s.selected = p->mosix.selected; + s.lastexec = p->mosix.lastexec; + s.lastmagic = p->mosix.lastmagic; + if(!(buf = add_changes_record(buf, REC_SELECT, &s, sizeof(s)))) + goto fail; + } +#endif /* CONFIG_MOSIX_FS */ + if(updates & DFSA_UPDTABLE) + { + if(!(buf = add_changes_record(buf, REC_DFSATAB, dfsatab, + dfsatab->len))) + goto fail; + /* this causes the REMOTE to flush all it knows, so: */ + updates |= (DFSA_UPDCDIR|DFSA_UPDMAX|DFSA_UPDMODS|DFSA_UPDUSED); + memcpy(files->modified, files->open_fds, files->max_fdset / 8); + files->maxmod = (files->max_fds > files->max_fdset) ? + files->max_fdset : files->max_fds; + updates &= ~DFSA_UPDCLOSE; /* all will be closed anyway */ + CLEAR_ALL(files, closed); + files->maxclosed = 0; + } + if(updates & DFSA_UPDCDIR) + { + struct + { + struct cdir_rec c; + char info[MAX_IDENT_RECORD_LEN]; + } cplus; + struct vfsmount *mnt = p->fs->pwdmnt; + + if((cplus.c.dfsano = mnt->mnt_dfsano)) + cplus.c.infolen = mnt->mnt_sb->s_op->identify( + p->fs->pwd, cplus.info); + else + cplus.c.infolen = 0; + buf = add_changes_record(buf, REC_CDIR, &cplus, + sizeof(cplus.c) + cplus.c.infolen); + if(!buf) + goto fail; + } + if(updates & DFSA_UPDMAX) + { + struct max_rec m; + + m.max_fds = files->max_fds; + m.max_fdset = files->max_fdset; + if(!(buf = add_changes_record(buf, REC_MAX, &m, sizeof(m)))) + goto fail; + } + if(updates & DFSA_UPDCLOSE) + { + if(add_closed_files(&buf)) + goto fail; + } + if(updates & DFSA_UPDOPEN) + { + if(add_opened_files(&buf)) + goto fail; + } + if(updates & DFSA_UPDMODS) + { + if(add_modified_files(&buf)) + goto fail; + } + if(updates & DFSA_UPDUSED) + { + if(!(buf = add_changes_record(buf, REC_USED, + files->open_fds, files->max_fdset / 8))) + goto fail; + } + current->mosix.dupdates &= ~DFSA_UPDATE; + out: + *retsz = buf ? ((int *)buf)[1] : 0; + return(buf); +} + +static void +close_remote_fd(int fd, int code) +{ + register struct files_struct *files = current->files; + register struct file *fp = files->fd[fd]; + + /* no locking is required, since files->users is always 1 */ + if(fp) + { + files->fd[fd] = NULL; + filp_close(fp, files); + } + if(code == REC_CLOSED) + { + FD_CLR(fd, files->open_fds); + if(fd < files->next_fd) + files->next_fd = fd; + } + else + FD_SET(fd, files->open_fds); +} + +void +remote_unpack_dfsa_changes(char *changes) +{ + int type; + void *rec; + register struct task_struct *p = current; + register struct files_struct *files = p->files; + register int i, j; + int dfsano; + struct file *fp; + struct dfsatab *tab; + mode_t mode; + loff_t pos; + unsigned int flags; + void *info; + int infolen; + gid_t *grp; + + deeper_sleep(); +#ifdef CONFIG_MOSIX_DIAG + if(p->mosix.dupdates & DFSA_UPDATE) + mosix_panic("remote_unpack: has update(s)"); +#endif /* CONFIG_MOSIX_DIAG */ + while((rec = get_record(changes, &type))) + switch(type) + { + case REC_CREDITS: + p->uid = ((struct credits_rec *)rec)->uid; + p->euid = ((struct credits_rec *)rec)->euid; + p->suid = ((struct credits_rec *)rec)->suid; + p->fsuid = ((struct credits_rec *)rec)->fsuid; + p->gid = ((struct credits_rec *)rec)->gid; + p->egid = ((struct credits_rec *)rec)->egid; + p->sgid = ((struct credits_rec *)rec)->sgid; + p->fsgid = ((struct credits_rec *)rec)->fsgid; + p->ngroups = ((struct credits_rec *)rec)->ngroups; + grp = ((struct credits_rec *)rec)->groups; + for(i = 0 ; i < p->ngroups ; i++) + p->groups[i] = *grp++; + break; + case REC_UMASK: + p->fs->umask = *((int *)rec); + break; + case REC_SELECT: +#ifdef CONFIG_MOSIX_FS + p->mosix.selected = + ((struct select_rec *)rec)->selected; + p->mosix.lastexec = + ((struct select_rec *)rec)->lastexec; + p->mosix.lastmagic = + ((struct select_rec *)rec)->lastmagic; +#endif /* CONFIG_MOSIX_FS */ + break; + case REC_DFSATAB: + remote_clear_dfsa(); + i = ((struct dfsatab *)rec)->len; + if(!(tab = kmalloc(i, GFP_KERNEL))) + break; + memcpy((caddr_t)tab, (caddr_t)rec, i); + dfsa_adapt(tab); + p->mosix.ttab = tab; + process_ack(p, DREQ_NOTUPTODATE); + break; + case REC_CDIR: + if(p->fs->pwd) + { + dput(p->fs->pwd); + p->fs->pwd = NULL; + mntput(p->fs->pwdmnt); + p->fs->pwdmnt = NULL; + } + if(!(dfsano = ((struct cdir_rec *)rec)->dfsano)) + break; + info = (void *)(((char *)rec)+sizeof(struct cdir_rec)); + infolen = ((struct cdir_rec *)rec)->infolen; + reconstruct(dfsano, info, infolen, &p->fs->pwdmnt, + &p->fs->pwd); + break; + case REC_MAX: + i = ((struct max_rec *)rec)->max_fdset; + j = ((struct max_rec *)rec)->max_fds; + write_lock(&files->file_lock); + i = ((i > files->max_fdset && expand_fdset(files, i-1)) + || + (j > files->max_fds && expand_fd_array(files, j-1))); + write_unlock(&files->file_lock); + if(i) + { + remote_clear_dfsa(); + tell_process(p, DREQ_NOTUPTODATE); + } + break; + case REC_CLOSED: + case REC_NONDFSA: + if(current->mosix.ttab) + close_remote_fd(*((int *)rec), type); + break; + case REC_FILE: + if(!current->mosix.ttab) + break; + i = ((struct file_rec *)rec)->fd; + mode = ((struct file_rec *)rec)->mode; + pos = ((struct file_rec *)rec)->pos; + flags = ((struct file_rec *)rec)->flags; + if(p->files->fd[i]) + { + close_remote_fd(i, REC_FILE); +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_DFSA) + printk("%s: FD %d found already open " + "(closing it first)\n", + desc_mostask(NULL), i); +#endif /* CONFIG_MOSIX_DEBUG */ + } + dfsano = ((struct file_rec *)rec)->dfsano; + info = (void *)(((char *)rec)+sizeof(struct file_rec)); + infolen = ((struct file_rec *)rec)->infolen; + dfsa_file_install(i, mode, pos, flags, dfsano, info, + infolen); + /* even if we failed, file was still opened on DEPUTY */ + FD_SET(i, files->open_fds); + break; + case REC_MOD: + if(!current->mosix.ttab) + break; + i = ((struct file_rec *)rec)->fd; + if(!(fp = p->files->fd[i])) + { + /* asking DEPUTY to send REC_FILE */ + p->mosix.dupdates |= DFSA_UPDOPEN; + if(i >= files->maxopened) + files->maxopened = i + 1; + FD_SET(i, files->opened); + break; + } + mode = ((struct file_rec *)rec)->mode; + pos = ((struct file_rec *)rec)->pos; + flags = ((struct file_rec *)rec)->flags; + fp->f_mode = mode; + fp->f_pos = pos; + fp->f_flags = flags; +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_DFSA) + printk("%s: Updated fd #%d, pos=%d, mode=%d, " + "flags=0x%x\n", desc_mostask(NULL), i, + (int)pos, mode, flags); +#endif /* CONFIG_MOSIX_DEBUG */ + break; + case REC_DUP: + if(!current->mosix.ttab) + break; /* previous expanding-failure */ + i = ((struct dup_rec *)rec)->fd; + close_remote_fd(i, REC_DUP); + if((fp = files->fd[((struct dup_rec *)rec)->orig])) + { + get_file(fp); + current->files->fd[i] = fp; + } + break; + case REC_USED: + if(!current->mosix.ttab) + break; /* previous expanding-failure */ + memcpy((caddr_t)files->open_fds, (caddr_t)rec, + files->max_fdset / 8); + break; +#ifdef CONFIG_MOSIX_DIAG + default: + printk("record type = %d\n", type); + panic("remote_unpack_dfsa_changes"); +#endif /* CONFIG_MOSIX_DIAG */ + } + lighter_sleep(); +} + +char * +remote_pack_dfsa_changes(int *retsz) +{ + char *buf = NULL; + register struct task_struct *p = current; + register uint32_t updates; + + if(!(updates = (p->mosix.dupdates & DFSA_UPDATE))) + goto out; + /* NOTE: we do not allow max_fds or max_fdset to grow REMOTELY, + * so we do not act on this flag here */ +#ifdef CONFIG_MOSIX_FS + if(updates & DFSA_UPDSEL) + { + struct select_rec s; + + s.selected = p->mosix.selected; + s.lastexec = p->mosix.lastexec; + s.lastmagic = p->mosix.lastmagic; + if(!(buf = add_changes_record(buf, REC_SELECT, &s, sizeof(s)))) + goto fail; + } +#endif /* CONFIG_MOSIX_FS */ + if((updates & DFSA_UPDCDIR) && p->fs->pwd) + { + struct + { + struct cdir_rec c; + char info[MAX_IDENT_RECORD_LEN]; + } cplus; + struct vfsmount *mnt = p->fs->pwdmnt; + + lock_mosix(); + if(!(cplus.c.dfsano = mnt->mnt_dfsaprev ? : mnt->mnt_dfsano)) + panic("REMOTE non-DFSA cdir"); + unlock_mosix(); + cplus.c.infolen = mnt->mnt_sb->s_op->identify(p->fs->pwd, + cplus.info); + buf = add_changes_record(buf, REC_CDIR, &cplus, + sizeof(cplus.c) + cplus.c.infolen); + if(!buf) + goto fail; + } + if(updates & DFSA_UPDCLOSE) + { + if(add_closed_files(&buf)) + { + fail: + *retsz = -ENOMEM; + return(NULL); + } + } + if(updates & DFSA_UPDOPEN) + { + if(add_opened_files(&buf)) + goto fail; + } + if(updates & DFSA_UPDMODS) + { + if(add_modified_files(&buf)) + goto fail; + } + p->mosix.dupdates &= ~DFSA_UPDATE; + out: + *retsz = buf ? ((int *)buf)[1] : 0; + return(buf); +} + +static void +close_deputy_fd(int fd) +{ + register struct files_struct *files = current->files; + register struct file *fp; + + /* files->users must be 1, but we lock against /proc/{mypid}/fd */ + write_lock(&files->file_lock); + if((fp = files->fd[fd])) + { + files->fd[fd] = NULL; + FD_CLR(fd, files->open_fds); + FD_CLR(fd, files->close_on_exec); + if(fd < files->next_fd) + files->next_fd = fd; + filp_close(fp, files); + } + write_unlock(&files->file_lock); +} + +void +deputy_unpack_dfsa_changes(char *changes) +{ + int type; + void *rec; + register struct task_struct *p = current; + register struct files_struct *files = p->files; + register int i; + int dfsano; + struct file *fp; + void *info; + int infolen; + mode_t mode; + loff_t pos; + unsigned int flags; + + deeper_sleep(); + while((rec = get_record(changes, &type))) + switch(type) + { + case REC_SELECT: +#ifdef CONFIG_MOSIX_FS + p->mosix.selected = + ((struct select_rec *)rec)->selected; + p->mosix.lastexec = + ((struct select_rec *)rec)->lastexec; + p->mosix.lastmagic = + ((struct select_rec *)rec)->lastmagic; +#endif /* CONFIG_MOSIX_FS */ + break; + case REC_CDIR: + dfsano = ((struct cdir_rec *)rec)->dfsano; + info = (void *)(((char *)rec)+sizeof(struct cdir_rec)); + infolen = ((struct cdir_rec *)rec)->infolen; + write_lock(&p->fs->lock); + dput(p->fs->pwd); + p->fs->pwd = NULL; + mntput(p->fs->pwdmnt); + p->fs->pwdmnt = NULL; + reconstruct(dfsano, info, infolen, &p->fs->pwdmnt, + &p->fs->pwd); + write_unlock(&p->fs->lock); + break; + case REC_CLOSED: + i = *((int *)rec); + close_deputy_fd(i); + break; + case REC_FILE: + i = ((struct file_rec *)rec)->fd; + mode = ((struct file_rec *)rec)->mode; + pos = ((struct file_rec *)rec)->pos; + flags = ((struct file_rec *)rec)->flags; + if((fp = files->fd[i])) + /* only a change (or we would have to close first!) */ + { +#ifdef CONFIG_MOSIX_DIAG + if((fp->f_mode ^ mode) & FMODE_WRITE) + panic("how did FMODE_WRITE change REMOTELY?"); +#endif /* CONFIG_MOSIX_DIAG */ + close_deputy_fd(i); +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_DFSA) + printk("%s: FD %d found already open " + "(closing it first)\n", + desc_mostask(NULL), i); +#endif /* CONFIG_MOSIX_DEBUG */ + } + dfsano = ((struct file_rec *)rec)->dfsano; + info = (void *)(((char *)rec)+sizeof(struct file_rec)); + infolen = ((struct file_rec *)rec)->infolen; + dfsa_file_install(i, mode, pos, flags, dfsano, info, + infolen); + break; + case REC_NONDFSA: + /* this is a request for fuller info */ + i = *((int *)rec); + p->mosix.dupdates |= DFSA_UPDOPEN; + if(i >= files->maxopened) + files->maxopened = i + 1; + FD_SET(i, files->opened); + FD_CLR(i, files->modified); + break; + case REC_MOD: + i = ((struct file_rec *)rec)->fd; + mode = ((struct file_rec *)rec)->mode; + pos = ((struct file_rec *)rec)->pos; + flags = ((struct file_rec *)rec)->flags; + if(!(fp = files->fd[i])) + break; +#ifdef CONFIG_MOSIX_DIAG + if((fp->f_mode ^ mode) & FMODE_WRITE) + panic("how did FMODE_WRITE change REMOTELY?"); +#endif /* CONFIG_MOSIX_DIAG */ + fp->f_mode = mode; + fp->f_pos = pos; + fp->f_flags = flags; +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_DFSA) + printk("%s: Updated fd #%d, pos=%d, mode=%d, " + "flags=0x%x\n", desc_mostask(NULL), i, + (int)pos, mode, flags); +#endif /* CONFIG_MOSIX_DEBUG */ + break; + case REC_DUP: + i = ((struct dup_rec *)rec)->fd; + close_deputy_fd(i); + if((fp = files->fd[((struct dup_rec *)rec)->orig])) + { + write_lock(&files->file_lock); + FD_SET(i, files->open_fds); + get_file(fp); + files->fd[i] = fp; + write_unlock(&files->file_lock); + } + else + { + printk("%s: DFSA told that FD #%d is a DUP " + "of FD #%d... which is NULL\n", + desc_mostask(NULL), i, + ((struct dup_rec *)rec)->orig); + dfsa_file_install(i, 0, 0, 0, 0, NULL, 0); + } + break; + default: + printk("record type = %d\n", type); + panic("deputy_unpack_dfsa_changes"); + } + lighter_sleep(); +} + +void +dfsa_check_comm_send(int *type, char **result, int *reslen) +{ + register struct task_struct *p = current; + register struct mosix_task *m = &p->mosix; + + if(*type & DFSA_NOUPDATE) + { + *result = NULL; + *reslen = 0; + return; + } + if((m->dflags & (DREMOTE|DPSYNC|DINCOMING)) == DREMOTE) + { + if((*result = remote_pack_dfsa_changes(reslen))) + *type |= COMM_MFDFSA; + if(!m->ttab || (process_told(p, DREQ_NOTUPTODATE))) + *type |= COMM_MFNODFSA; +#ifdef CONFIG_MOSIX_DEBUG + if(*reslen < 0) + printk("dfsa_check_comm_send: result=%d\n", *reslen); +#endif /* CONFIG_MOSIX_DEBUG */ + return; + } + *result = NULL; + *reslen = 0; + if(!(*type & USERMODE)) + return; +#ifdef CONFIG_MOSIX_DIAG + if((m->dflags & (DDEPUTY|DSYNC)) != (DDEPUTY|DSYNC)) + mosix_panic("dfsa_check_comm_send: not DEPUTY|DSYNC"); +#endif /* CONFIG_MOSIX_DIAG */ + if(process_told(p, DREQ_DFSASYNC)) + clear_dfsasync(); + if(process_told(p, DREQ_NOTUPTODATE)) + m->dupdates |= DFSA_UPDATE; + if(!(m->dupdates & DFSA_UPDATE)) + return; + again: + if(!CAN_DFSA(p)) + goto no_dfsa; + *result = deputy_pack_dfsa_changes(reslen); + if(process_told(p, DREQ_DFSASYNC)) /* a new one (race) */ + { + clear_dfsasync(); + m->dupdates |= DFSA_UPDATE; + if(*result) + kfree(*result); + goto again; + } + if(*result) + { + *type |= COMM_MFDFSA; + return; + } + if(*reslen == 0) + return; + no_dfsa: + *type |= COMM_MFNODFSA; + *reslen = 0; +} + +void +dfsa_comm_recv(int type, char *changes) +{ + register struct task_struct *p = current; +#ifdef CONFIG_MOSIX_DIAG + if(type & DFSA_NOUPDATE) + { + mosix_panic("dfsa_comm_recv: inhibited-type got through"); + tell_process(p, DREQ_NOTUPTODATE); + return; + } +#endif /* CONFIG_MOSIX_DIAG */ + if(type & COMM_MFNODFSA) + { + if(p->mosix.dflags & DREMOTE) + remote_clear_dfsa(); + tell_process(p, DREQ_NOTUPTODATE); + } + if(type & COMM_MFDFSA) + { + if(p->mosix.dflags & DREMOTE) + remote_unpack_dfsa_changes(changes); + else + deputy_unpack_dfsa_changes(changes); + } +} + +static void printset(char *title, int max, fd_set *set) +{ + register int i; + + printk("%s (max=%d): ", title, max); + for(i = 0 ; i < max ; i++) + if(FD_ISSET(i, set)) + printk(" %d", i); + printk("\n"); +} + +void +dfsa_dump(int pid) +{ + register struct task_struct *p = current; + register struct files_struct *files = p->files; + register int i, fs; + struct file *f; + + if(pid) + { + read_lock(&tasklist_lock); + for_each_task(p) + if(p->pid == pid || (int)p == pid) + break; + read_unlock(&tasklist_lock); + if(p == &init_task) + { + printk("Pid %d not found.\n", pid); + return; + } + files = p->files; + } + printk("%s : CAN_DFSA=%d\n", desc_mostask(&p->mosix), CAN_DFSA(p)); + printk("Update flags:"); + if(p->mosix.dupdates & DFSA_UPDCRED) + printk(" UPDCRED"); + if(p->mosix.dupdates & DFSA_UPDSEL) + printk(" UPDSEL"); + if(p->mosix.dupdates & DFSA_UPDTABLE) + printk(" UPDTABLE"); + if(p->mosix.dupdates & DFSA_UPDCDIR) + printk(" UPDCDIR"); + if(p->mosix.dupdates & DFSA_UPDCDIR) + printk(" UPDCDIR"); + if(p->mosix.dupdates & DFSA_UPDCLOSE) + printk(" UPDCLOSE"); + if(p->mosix.dupdates & DFSA_UPDMODS) + printk(" UPDMODS"); + if(p->mosix.dupdates & DFSA_UPDUSED) + printk(" UPDUSED"); + printk("\n"); + printset("Files to close", files->maxclosed, files->closed); + printset("Files to modify", files->maxmod, files->modified); + if(p->mosix.ttab) + printk("DFSA table at 0x%x of size %d\n", + (int)p->mosix.ttab, p->mosix.ttab->len); + else + printk("No DFSA table.\n"); + printset("Open files", p->files->max_fdset, p->files->open_fds); + if(p == current) + { + printk("DFSA files:\n"); + for(i = 0 ; i < files->max_fds ; i++) + if((fs = can_dfsa_file(i))) + { + printk("%d: #%d/%d\n", i, fs, + (int)files->fd[i]->f_dentry->d_inode->i_ino); + } + } + else + { + printk("file/count/dfsano/ino:"); + for(i = 0 ; i < files->max_fds ; i++) + if((f = files->fd[i])) + { + printk(" %d:%d/#%d/%d", i, file_count(f), + f->f_vfsmnt->mnt_dfsano ? : -9, + (int)f->f_dentry->d_inode->i_ino); + } + printk("\n"); + } +} + +#endif /* CONFIG_MOSIX_DFSA */ diff -urN linux-2.4.17/mos/div.c linux_umopenmosix/mos/div.c --- linux-2.4.17/mos/div.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/mos/div.c Wed Jun 26 23:45:18 2002 @@ -0,0 +1,127 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh. + */ +#define low(x) (((unsigned int *)&x)[0]) +#define high(x) (((unsigned int *)&x)[1]) + +/* the slow, division by 64-bit is only provided for copleteness */ +/* but is not expected to be actually ever required by the kernel*/ +unsigned long long +__slowdivdi3(unsigned long long a, unsigned long long b) +{ + long long result = 0; + long long bit = 1; + + while((b << 1) < a) + { + b <<= 1; + bit <<= 1; + } + while(bit) + { + if(a > b) + { + a -= b; + result |= bit; + } + b >>= 1; + bit >>= 1; + } + return(result); +} + +long long +__divdi3(long long a, long long b) +{ + int sign; + long long result; + unsigned int al, ah, dl, dh; + int hd; + long long bit; + int i; + + if(!high(b) && !high(a)) /* by far the most common case */ + return(low(a) / low(b)); + sign = 1; + if(high(b) & 0x80000000) + { + b = -b; + if(!high(b) && !high(a)) + return(-low(a) / low(b)); + sign = -1; + } + if(high(a) & 0x80000000) + { + sign = -sign; + a = -a; + if(!high(a) && !high(b)) + return(sign * low(a) / low(b)); + } + dh = high(b); + if(dh) + return(sign * __slowdivdi3(a, b)); + dl = low(b); + if(!dl) + dh /= dl; /* divide by 0! */ + al = low(a); + ah = high(a); + for(i = 0 ; dh < ah || (dh == ah && dl <= al) ; i++) + { + dh += dh; + if(dl & 0x80000000) + dh++; + dl += dl; + if(i) + { + high(b) <<= 1; + if(low(b) & 0x80000000) + high(b)++; + low(b) <<= 1; + } + } + if(i == 0) + return(0); + if(i <= 32) + { + low(bit) = 1 << (i-1); + high(bit) = 0; + } + else + { + low(bit) = 0; + high(bit) = 1 << (i-33); + } + result = 0; + while(i--) + { + hd = high(a) - high(b); + if((hd = high(a) - high(b)) >= 0 && + (low(a) >= low(b) || hd--)) + { + high(a) = hd; + low(a) = low(a) - low(b); + result |= bit; + } + low(bit) >>= 1; + if(high(bit) == 1) + low(bit) = 0x80000000; + high(bit) >>= 1; + low(b) >>= 1; + if(high(b) & 1) + low(b) |= 0x80000000; + high(b) >>= 1; + } + if(sign < 0) + result = -result; + return(result); +} diff -urN linux-2.4.17/mos/export.c linux_umopenmosix/mos/export.c --- linux-2.4.17/mos/export.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/mos/export.c Wed Jun 26 23:45:18 2002 @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh. + */ +#include +#include +#include +#include +#include +#include + +EXPORT_SYMBOL(deputy_copy_to_user); +EXPORT_SYMBOL(deputy_copy_from_user); +EXPORT_SYMBOL(deputy_clear_user); +EXPORT_SYMBOL(deputy_strncpy_from_user); +EXPORT_SYMBOL(deputy_strnlen_user); +EXPORT_SYMBOL(deputy_verify_write); +EXPORT_SYMBOL(mosix_deputy_init_aout_mm); +EXPORT_SYMBOL(mosix_obtain_registers); +EXPORT_SYMBOL(mosix_deputy_setup_args); +EXPORT_SYMBOL(mosix_deputy_dump_thread); +EXPORT_SYMBOL(mosix_config_get_table); +EXPORT_SYMBOL(adjust_task_mosix_context); + +#ifdef CONFIG_MOSIX_DFSA +EXPORT_SYMBOL(dfsa_open_file); +EXPORT_SYMBOL(dfsa_touch_file); +EXPORT_SYMBOL(dfsa_close_file); +#endif /* CONFIG_MOSIX_DFSA */ + +#ifdef CONFIG_MOSIX_UDB +EXPORT_SYMBOL(mosix_debugger); +#endif /* CONFIG_MOSIX_UDB */ + +#if !defined(DO_NOT_FIX_LINUX_BUGS) +#include +EXPORT_SYMBOL(lookup_exec_domain); +#endif + +#ifdef CONFIG_DRM_I810_MODULE +extern asmlinkage long sys_munmap(unsigned long,size_t); +EXPORT_SYMBOL(mosix_go_home); +EXPORT_SYMBOL(sys_munmap); +#endif /* CONFIG_DRM_I810_MODULE */ diff -urN linux-2.4.17/mos/freemem.c linux_umopenmosix/mos/freemem.c --- linux-2.4.17/mos/freemem.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/mos/freemem.c Wed Jun 26 23:45:18 2002 @@ -0,0 +1,461 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int pages_to_keep_free; +int latest_free_mem; +static int sort_age_maxchunk; +static int marker; + +static inline int +current_free_mem(void) +{ + return(latest_free_mem); +} + +int +export_mem(void) +{ + register int i, n; + struct task_struct *p; + + n = current_free_mem() - pages_to_keep_free; + read_lock(&tasklist_lock); + for_each_task(p) + if((i = p->mosix.pages_i_bring) > 0) + n -= i; + read_unlock(&tasklist_lock); + if(n < 0) + n = 0; + return(n); +} + +int +memory_badly_required(void) +{ + register int n, s = 0; + struct task_struct *p; + struct mm_struct *mm; + + n = current_free_mem() - pages_to_keep_free; + write_lock_irq(&tasklist_lock); + ++marker; + for_each_task(p) + { + n -= p->mosix.pages_i_bring; + task_lock(p); + if((mm = p->mm) && mm->mark != marker && mm->last_memsort) + { + s += mm->swapped; + mm->mark = marker; + } + task_unlock(p); + } + write_unlock_irq(&tasklist_lock); + if(n < 0 && n > -MIN_EXPECTED_PROC_SIZE) + n = -MIN_EXPECTED_PROC_SIZE; + if(n < 0) + n -= s; + return(-n); +} + +#define MDP_HIGHEST_QUALITY (MIN_EXPECTED_PROC_SIZE * 10) + +/* "memory_relief_quality" is called with tasklist_lock read-locked */ +int +memory_relief_quality(struct task_struct *p, int need) +{ + int q; + int used, unused, swapped; /* (dirty pages only) */ + struct mm_struct *mm; + + read_lock(&tasklist_lock); + task_lock(p); + if(!(mm = p->mm) || !mm->last_memsort) + { + task_unlock(p); + read_unlock(&tasklist_lock); + return(0); + } + used = mm->used; + unused = mm->unused; + swapped = mm->swapped; + task_unlock(p); + read_unlock(&tasklist_lock); + if (used >= need && used < 2 * need) + q = MDP_HIGHEST_QUALITY * (150 - used * 50 / need) / 100; + else if(used >= need) + q = MDP_HIGHEST_QUALITY * need / (1 + used - need); + else if (used < need) + { + if(need > MDP_HIGHEST_QUALITY/2) + q = MDP_HIGHEST_QUALITY / 2 * used / need; + else + q = used; + } + if(swapped) + q = q * used / (used + swapped); + +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MEM) + printk("relief_quality(need=%d,used=%d,unused=%d,swapped=%d)" + "=%d\n", need, used, unused, swapped, q); +#endif /* CONFIG_MOSIX_DEBUG */ + return(q); +} + +/* + * The memory-sorting daemon is only loosely connected with MOSIX: + * it should have been part of Linux, but Linux does not age pages + * belonging to processes (unless memory is under pressure) and in + * MOSIX we do need to know how much memory is really free ALL THE TIME. + * The code here is largely stolen from "mm/vmscan.c". + */ + +void +compute_freemem(void) +{ + int count; + static int trouble_time; + int kernel, inactive, tasked; + + if(acpuse < MF * smp_num_cpus * UTIL_TOLLERANCE) + trouble_time = 11; + tasked = 0; + if(trouble_time && --trouble_time > 0) + kernel = inactive = 0; + else + { + struct task_struct *p; + struct mm_struct *mm; + + kernel = (dentry_stat.nr_unused * sizeof(struct dentry) + + inodes_stat.nr_unused * sizeof(struct inode)) + >> PAGE_SHIFT; + inactive = nr_inactive_pages; + write_lock_irq(&tasklist_lock); + ++marker; + for_each_task(p) + { + task_lock(p); + if((mm = p->mm) && mm->mark != marker && + time_before(jiffies, mm->last_memsort + 15*HZ)) + { + tasked += mm->private_unused; + mm->mark = marker; + } + task_unlock(p); + } + write_unlock_irq(&tasklist_lock); + } + count = nr_free_pages() + kernel + inactive + tasked; +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MEM) + printk("free=%d,ker=%d,inac=%d,tasked=%d,count=%d/%lu\n", + nr_free_pages(), kernel, inactive, tasked, count, + num_physpages); +#endif /* CONFIG_MOSIX_DEBUG */ + if(count >= num_physpages) /* whatever went wrong... */ + count = num_physpages - 1; + if(count < 0) + count = 0; + latest_free_mem = count; +} + +static inline int +sort_interval(int initial) +{ + if(latest_free_mem < pages_to_keep_free) + return(initial ? 0 : 3*HZ); + else if(latest_free_mem < 2 * pages_to_keep_free) + return(initial ? 2*HZ : 4*HZ); + else if(latest_free_mem < 4 * pages_to_keep_free) + return(initial ? 4*HZ : 8*HZ); + else + return(10*HZ); +} + +/* + * sort pages into 3 piles: "used", "unused" and "swapped" + * plus another distinction for unused pages not otherwise accounted for + */ +void +sort_and_age_pages(struct mm_struct *mm) +{ + int used = 0, unused = 0, swapped = 0, private_unused = 0; + int need_flush = 0; + register struct vm_area_struct *vma; + pgd_t *pgdir; + pmd_t *pmdir; + pte_t *pte; + struct page *page; + unsigned long addr, start, pgd_end, pmd_end; + static int let_go = -1; + int interval = sort_interval(1); + + if(!mm->last_memsort && interval) + { + if(!mm->used) + mm->used = (jiffies & 0xffff) ? : 0xffff; + if((u16)((jiffies & 0xffff) - mm->used) < interval) + return; + } + addr = 0; + loop: + if(let_go == -1) + let_go = sort_age_maxchunk; + spin_lock(&mm->page_table_lock); + for(vma = mm->mmap ; vma != NULL ; vma = vma->vm_next) + if(addr < vma->vm_end) + { + start = vma->vm_start; + if(start < addr) + start = addr; + pgdir = pgd_offset(mm, start); + for(addr = start ; addr < vma->vm_end ; + addr = pgd_end , pgdir++) + { + if(let_go-- == 0) + { + spin_unlock(&mm->page_table_lock); + current->policy |= SCHED_YIELD; + schedule(); + goto loop; + } + pgd_end = (addr + PGDIR_SIZE) & PGDIR_MASK; + if(pgd_end > vma->vm_end) + pgd_end = vma->vm_end; + if(pgd_none(*pgdir) || pgd_bad(*pgdir)) + continue; + pmdir = pmd_offset(pgdir, addr); + for(; addr < pgd_end ; addr = pmd_end , pmdir++) + { + pmd_end = (addr + PMD_SIZE) & PMD_MASK; + if(pmd_end > pgd_end) + pmd_end = pgd_end; + if(pmd_none(*pmdir) || pmd_bad(*pmdir)) + continue; + pte = pte_offset(pmdir, addr); + page = pte_page(*pte); + for(; addr < pmd_end ; addr += PAGE_SIZE, pte++) + { + if(pte_none(*pte)) + continue; + if(!pte_present(*pte)) + { + swapped++; + continue; + } + page = pte_page(*pte); + if(!VALID_PAGE(page) || PageReserved(page)) + continue; + if(PageActive(page)) + { + used++; + continue; + } + if(!PageActive(page)) + { + unused++; + continue; + } + if(time_before(jiffies, page->last_young + OLD_SECONDS * HZ)) + { + used++; + continue; + } + if(ptep_test_and_clear_young(pte)) + { + page->young = 1; + page->last_young = jiffies; + need_flush = 1; + used++; + continue; + } + unused++; + private_unused++; + } + } + } + } + if(need_flush) + flush_tlb_mm(mm); + spin_unlock(&mm->page_table_lock); + write_lock_irq(&tasklist_lock); + mm->used = used; + mm->unused = unused; + mm->swapped = swapped; + mm->private_unused = private_unused; + mm->last_memsort = jiffies ? : 1; + write_unlock_irq(&tasklist_lock); +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_SUPERMEM) + printk("memory: used=%d, unused=%d (%d private), swapped=%d\n", + used, unused, private_unused, swapped); +#endif /* CONFIG_MOSIX_DEBUG */ +} + +#define SORT_AGE_MAGIC 2490368 + +int +mosix_mem_daemon(void *nothing) +{ + struct task_struct *p; + struct mm_struct *mm; + struct mm_struct **mms, **newmms; + int i, n, mmno = 100, need_increase = 0; + int interval; + + common_daemon_setup("memsorter", 0); + neutralize_my_load(1); + if(!(mms = (struct mm_struct **)kmalloc(mmno * sizeof(mms[0]), + GFP_KERNEL))) + panic("no mem for mms"); + pages_to_keep_free = num_physpages / KEEP_FREE_PORTION; + if(pages_to_keep_free > MAX_PAGES_TO_KEEP_FREE) + pages_to_keep_free = MAX_PAGES_TO_KEEP_FREE; + compute_freemem(); + + /* "sort_and_age_pages" can take very long for large memories: + * we must therefore allow other processes to take over at least + * about every millisecond. + */ + sort_age_maxchunk = loops_per_jiffy / SORT_AGE_MAGIC; + if(!sort_age_maxchunk) /* very slow processors */ + sort_age_maxchunk = 1; + while (1) + { + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(mosadmin_mode_quiet ? 100 * HZ : HZ); + if(current->sigpending) + flush_signals(current); + loop: + if (!PE) + wait_for_mosix_configuration(NULL); + if(mosadmin_mode_quiet) + continue; + compute_freemem(); + interval = sort_interval(0); + n = 0; + write_lock_irq(&tasklist_lock); + ++marker; + for_each_task(p) + if(!(p->mosix.stay & DSTAY_PER_MM)) + { + task_lock(p); + mm = p->mm; + if((mm = p->mm) && mm != &init_mm && mm->mark != marker + && time_before(mm->last_memsort + interval, + jiffies)) + { + if(n >= mmno) + need_increase = 1; + else + { + atomic_inc(&mm->mm_users); + mms[n++] = mm; + mm->mark = marker; + } + } + task_unlock(p); + } + write_unlock_irq(&tasklist_lock); + for(i = 0 ; i < n ; i++) + { + mm = mms[i]; + if(atomic_read(&mm->mm_realusers) > 0) + sort_and_age_pages(mm); + mmput(mm); + } + if(need_increase) + { + if((newmms = (struct mm_struct **)kmalloc( + (mmno + 50) * sizeof(mms[0]), GFP_KERNEL))) + { + kfree(mms); + mms = newmms; + mmno += 50; + } + need_increase = 0; + goto loop; + } + wake_up_interruptible(&kswapd_wait); + } +} + +#if 0 +/* needed once-only to calibrate the "sort_age_maxchunk" calculation */ +/* not bothering using locks here, since we are only called from the debugger */ +int +calibrate_sort_age(int pid) +{ + int sv = sort_age_maxchunk; + struct task_struct *p = find_task_by_pid(pid); + struct mm_struct *mm; + int chunks = 0; + register struct vm_area_struct *vma; + unsigned long addr, pgd_end; + __typeof__ (jiffies) before; + int milli, magic; + int i; + + if(!p || !(mm = p->mm)) + { + printk("No PID %d\n", pid); + return(0); + } + + for(vma = mm->mmap ; vma != NULL ; vma = vma->vm_next) + for(addr = vma->vm_start ; addr < vma->vm_end ; addr = pgd_end) + { + chunks++; + pgd_end = (addr + PGDIR_SIZE) & PGDIR_MASK; + if(pgd_end > vma->vm_end) + pgd_end = vma->vm_end; + } + printk("%d chunks: starting 1000 times...", chunks); + sort_age_maxchunk = 0x7fffffff; /* unlimit */ + if(!mm->last_memsort) + mm->last_memsort = 1; + sti(); + before = jiffies; + for(i = 0 ; i < 1000 ; i++) + sort_and_age_pages(mm); + milli = (jiffies - before) * 1000 / HZ; + printk("%d miliseconds, ", milli); + sort_age_maxchunk = sv; + if(milli < 100) + { + printk("That was too quick -- try a larger process!\n"); + return((int)milli); + } + magic = loops_per_jiffy / (chunks * 1000 / milli); + printk("SORT_AGE_MAGIC=%d\n", magic); + return(magic); +} +#endif /* MOSIX_ONCE_ONLY */ diff -urN linux-2.4.17/mos/info.c linux_umopenmosix/mos/info.c --- linux-2.4.17/mos/info.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/mos/info.c Wed Jun 26 23:45:18 2002 @@ -0,0 +1,1029 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh, Amnon Barak, Oren Laadan. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int info_seed1, info_seed2; + +rwlock_t loadinfo_lock = RW_LOCK_UNLOCKED; +spinlock_t uplist_lock = SPIN_LOCK_UNLOCKED; + +struct uplist { + struct uplist *next; + unsigned short pe; + short age; +}; + +static struct uplist uplist[MAXKNOWNUP]; +static struct uplist *uphead, *upfree; +static int info_nup; /* number of processes in uplist */ + +struct loadinfo loadinfo[INFO_WIN]; + +static int info_recv_message(struct infomsg *, int); +static int info_send_message(int, struct infomsg *); +static void update_uplist(struct loadinfo *); +static void update_window(struct loadinfo *); +static void inform(void); +static void not_responding(int); +static void age_uplist(void); +static int rand(int, int); + +static char INFODSTR[] = "infodaemon"; + +#define INFO_BUFSIZE 8192 + +int +mosix_info_daemon(void *nothing) +{ + struct task_struct *p = current; + struct loadinfo *load; + struct infomsg *msg; + static char info_buf[INFO_BUFSIZE]; /* (large) buffer for info load */ + + common_daemon_setup(INFODSTR, 1); + lock_mosix(); + info_proc = current; + unlock_mosix(); + +restart: + wait_for_mosix_configuration(&info_daemon_active); + comm_init_linkpool(); + if (!p->mosix.contact) { + comm_use(p, comm_open(COMM_INFO, 0, 0UL)); + if (!p->mosix.contact) + { + printk("%s: failed comm_open - exiting\n", INFODSTR); + comm_free_linkpool(); + if(p->mosix.contact) + comm_close(NULL); + lock_mosix(); + info_daemon_active = 0; + info_proc = NULL; + unlock_mosix(); + do_exit(0); + } + } + + msg = (struct infomsg *) info_buf; + load = (struct loadinfo *) &msg->load; + + while (1) { + comm_wait(); + + /* if MOSIX was shut down - restart everything */ + if (!PE) { +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_INFO) + printk("%s: sleeping after MOSIX shutdown\n", + INFODSTR); +#endif + comm_close(NULL); + comm_free_linkpool(); + goto restart; + } + + while (info_recv_message(msg, INFO_BUFSIZE)) + { +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_INFO) + printk("INFO arrived from %d\n", load->pe); +#endif + update_uplist(load); + update_window(load); + if (!mosadmin_mode_quiet) { + load_balance(); + memory_balance(); + } + } + + if(sigismember(&p->pending.signal, SIGALRM)) + { + spin_lock_irq(&p->sigmask_lock); + flush_signals(p); + spin_unlock_irq(&p->sigmask_lock); + + if(mosadmin_mode_quiet) + continue; + + if(PE) + inform(); + inc_decays(); + comm_age_linkpool(); + age_uplist(); + loadinfo[0].mem = latest_free_mem; + loadinfo[0].rmem = nr_free_pages(); + age_balancing(); + } + } +} + +static void +age_uplist(void) +{ + struct uplist *up, *prev, *cutpoint; + int old = 10 * (NPE-1); + /* (the more nodes around, the less often they are + * likely to come up again, so we hold them longer) */ + + if(old > 32767) + old = 32767; + spin_lock(&uplist_lock); + for (prev = NULL, up = uphead; up; prev = up , up = up->next) + if (++up->age >= old) + { + /* the list is sorted by age, so all the rest are too old! */ + if (prev) + prev->next = NULL; + else + uphead = NULL; + cutpoint = up; + while(1) + { + info_nup--; + if(up->next) + up = up->next; + else + break; + } + prev = upfree; + upfree = cutpoint; + up->next = prev; + break; + } + spin_unlock(&uplist_lock); +} + +static void +update_uplist(struct loadinfo *load) +{ + struct uplist *up, *prev; + + spin_lock(&uplist_lock); + for (prev = NULL , up = uphead ; up ; prev = up , up = up->next) + if (up->pe == load->pe) + { + up->age = 0; + if (prev) /* put it first */ + { + prev->next = up->next; + up->next = uphead; + uphead = up; + } + break; + } + if (!up) { + if (upfree) { + up = upfree; + upfree = upfree->next; + info_nup++; + } else { + for (prev = uphead ; prev->next->next ; + prev = prev->next) + ; /* nothing */ + up = prev->next; + prev->next = NULL; + } + up->pe = load->pe; + up->age = 0; + up->next = uphead; + uphead = up; + } +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_INFO) { + printk("UPLIST:"); + for (up = uphead; up; up = up->next) + printk(" %d/%d", up->pe, up->age); + printk("\n"); + } +#endif /* CONFIG_MOSIX_DEBUG */ + spin_unlock(&uplist_lock); +} + + +static void +update_window(struct loadinfo *load) +{ + static int info_slot; /* pointer to next information to fill */ + int i; + + info_slot = (info_slot % (INFO_WIN - 1)) + 1; + loadinfo[info_slot] = *load; + + for (i = 1 ; i < INFO_WIN ; i++) + if (i != info_slot && loadinfo[i].pe == load->pe) + { + write_lock_bh(&loadinfo_lock); + loadinfo[i].pe = 0; + write_unlock_bh(&loadinfo_lock); + break; + } + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_INFO) { + printk("INFODATA:"); + for (i = 0; i < INFO_WIN; i++) + if (loadinfo[i].pe) + printk(", %d=%d", + loadinfo[i].pe, (int)loadinfo[i].load); + printk("\n"); + } +#endif +} + +#define speed_adjust(x) ((x) * ((int64_t)STD_SPD) / loadinfo[0].speed) +void +info_update_costs(void) +{ + register int i; + + write_lock_bh(&loadinfo_lock); + for(i = 0 ; i < MAX_MOSIX_TOPOLOGY ; i++) + { +#ifdef CONFIG_MOSIX_TOPOLOGY + loadinfo[0].costs[i].page = mosix_cost[i].PAGE_R; + loadinfo[0].costs[i].syscall = mosix_cost[i].SYSCALL_R; + loadinfo[0].costs[i].out = mosix_cost[i].COPYOUT_BASE_R; + loadinfo[0].costs[i].outkb = mosix_cost[i].COPYOUT_PER_KB_R; + loadinfo[0].costs[i].in = mosix_cost[i].COPYIN_BASE_R; + loadinfo[0].costs[i].inkb = mosix_cost[i].COPYIN_PER_KB_R; + loadinfo[0].costs[i].first = mosix_cost[i].first; + loadinfo[0].costs[i].last = mosix_cost[i].last; +#else + remote_here.page = mosix_cost[i].PAGE_R; + remote_here.syscall = mosix_cost[i].SYSCALL_R; + remote_here.out = mosix_cost[i].COPYOUT_BASE_R; + remote_here.outkb = mosix_cost[i].COPYOUT_PER_KB_R; + remote_here.in = mosix_cost[i].COPYIN_BASE_R; + remote_here.inkb = mosix_cost[i].COPYIN_PER_KB_R; +#endif /* CONFIG_MOSIX_TOPOLOGY */ + remote_here_adjusted[i].page = + speed_adjust(mosix_cost[i].PAGE_R); + remote_here_adjusted[i].syscall = + speed_adjust(mosix_cost[i].SYSCALL_R); + remote_here_adjusted[i].out = + speed_adjust(mosix_cost[i].COPYOUT_BASE_R); + remote_here_adjusted[i].outkb = + speed_adjust(mosix_cost[i].COPYOUT_PER_KB_R); + remote_here_adjusted[i].in = + speed_adjust(mosix_cost[i].COPYIN_BASE_R); + remote_here_adjusted[i].inkb = + speed_adjust(mosix_cost[i].COPYIN_PER_KB_R); +#ifdef CONFIG_MOSIX_TOPOLOGY + remote_here_adjusted[i].first = mosix_cost[i].first; + remote_here_adjusted[i].last = mosix_cost[i].last; +#endif /* CONFIG_MOSIX_TOPOLOGY */ + } + write_unlock_bh(&loadinfo_lock); +} + +void +info_update_mfscosts(void) +{ +#ifdef CONFIG_MOSIX_TOPOLOGY + memcpy(loadinfo[0].mfscosts, mfs_cost, sizeof(mfs_cost)); +#endif /* CONFIG_MOSIX_TOPOLOGY */ +} + +void +set_my_cpuspeed(void) +{ + int s = cpuspeed; + + if(sizeof(loadinfo[0].speed) < 4 && s > 65535) + { + printk("Computer Too Fast! Time to Update Standard-Speed.\n"); + s = 65535; + } + stable_export = (MF+2) * STD_SPD / (s * smp_num_cpus); + if(stable_export == MF * STD_SPD / (s * smp_num_cpus)) + stable_export++; + write_lock_bh(&loadinfo_lock); + loadinfo[0].speed = s; + write_unlock_bh(&loadinfo_lock); + info_update_costs(); + info_update_mfscosts(); +} + +void +info_init(void) +{ + loadinfo[0].ncpus = smp_num_cpus; + loadinfo[0].tmem = num_physpages; + set_my_cpuspeed(); +} + +void +info_startup(void) +{ + int i; + + info_seed1 = PE; + info_seed2 = PE*PE*PE*PE; + write_lock_bh(&loadinfo_lock); + loadinfo[0].pe = PE; + for (i = 1 ; i < INFO_WIN ; i++) { + loadinfo[i].pe = 0; + loadinfo[i].load = 0xffffffff; + } + write_unlock_bh(&loadinfo_lock); + + spin_lock(&uplist_lock); + upfree = uphead = NULL; + info_nup = 0; + + memset(uplist, 0, sizeof(struct uplist) * MAXKNOWNUP); + for (i = 0; i < MAXKNOWNUP; i++) { + uplist[i].next = upfree; + upfree = &uplist[i]; + } + spin_unlock(&uplist_lock); +} + +void +info_reconfig() +{ + int i; + struct uplist *up, *prev; + + lock_mosix(); /* because "mos_to_net" does! */ + spin_lock(&uplist_lock); +recheck: + prev = NULL; + for (up = uphead ; up ; prev = up, up = up->next) + if (!mos_to_net(up->pe, NULL)) { + if (prev) + prev->next = up->next; + else + uphead = up->next; + up->next = upfree; + upfree = up; + info_nup--; + goto recheck; + } + spin_unlock(&uplist_lock); + write_lock_bh(&loadinfo_lock); + for (i = 1; i < INFO_WIN; i++) + if (loadinfo[i].pe && !mos_to_net(loadinfo[i].pe, NULL)) + loadinfo[i].pe = 0; + write_unlock_bh(&loadinfo_lock); + unlock_mosix(); +} + +static int +info_recv_message(struct infomsg *info, int bufsize) +{ + static mosix_addr ra; /* reply address */ + struct mosix_link *l = current->mosix.contact; + struct loadinfo *load = &(info->load); + int n; + int sender; + + while (1) { + n = comm_recvfrom(info, bufsize, l, &ra, 0); +#ifdef CONFIG_MOSIX_DIAG + if (!n) + mosix_panic("comm_recvfrom: n == 0"); +#endif + + if (n == -EDIST) + continue; /* message > bufsize */ + + if (n < 0) + return (0); + + if (n < sizeof(*info)) { +#ifdef CONFIG_MOSIX_DEBUG + printk("%s-info_recv_message: data too short\n", + desc_mostask(NULL)); +#endif + continue; + } + + if (info->version != MOSIX_BALANCE_VERSION) { +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_INFO) + printk("%s-info_recv_message: wrong MOSIX " + "version (expect %ld, got %ld)\n", + desc_mostask(NULL), + MOSIX_BALANCE_VERSION, info->version); +#endif /* CONFIG_MOSIX_DEBUG */ + continue; + } + if (info->topology != MAX_MOSIX_TOPOLOGY) { +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_INFO) + printk("%s-info_recv_message: wrong MOSIX " + "topology: we have %d, they have %d", + desc_mostask(NULL), MAX_MOSIX_TOPOLOGY, + info->topology); +#endif /* CONFIG_MOSIX_DEBUG */ + continue; + } + + sender = load->pe ? : load->speed; + if (sender > MAXPE || sender != net_to_mos(&ra)) { +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_INFO) + printk("%s-info_recv_message: discarded " + "load from extra-cluster machine #%d\n", + desc_mostask(NULL), sender); +#endif + continue; + } + + if (sender == PE) + { + printk("WARNING: Another computer is masquerading as same MOSIX node as this (%d)!\n", PE); + continue; + } + +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_LOADS) + printk("recv: load at 0x%x : (%d,%d,%d,%d)\n", + (int)load,load->pe,(int)load->load,load->speed, + load->ncpus); +#endif /* CONFIG_MOSIX_DEBUG */ + + if (load->pe) + return (1); + + /* + * Ugly convention: !load->pe ==> this is a GETLOAD request + */ + + lock_mosix(); + write_lock_bh(&loadinfo_lock); + loadinfo[0].status = my_mosix_status(); + loadinfo[0].free_slots = get_free_guest_slots(); + unlock_mosix(); + loadinfo[0].mem = latest_free_mem; + loadinfo[0].rmem = nr_free_pages(); + loadinfo[0].util = acpuse; +#ifdef CONFIG_MOSIX_RESEARCH + loadinfo[0].rio = io_read_rate; + loadinfo[0].wio = io_write_rate; +#endif /* CONFIG_MOSIX_RESEARCH */ + *load = loadinfo[0]; + write_unlock_bh(&loadinfo_lock); + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_INFO) + printk("%s-info_recv_message: reply query of %d\n", + desc_mostask(NULL), load->pe); +#endif + + comm_sendto(COMM_TOADDR, info, sizeof(*info), l, &ra); + } + return(0); +} + +static int +info_send_message(int mos, struct infomsg *info) +{ +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_INFO) + printk("%s-info_send_message to %d\n", desc_mostask(NULL), mos); + + if(ds_debug & DSDEB_LOADS) + printk("send: load at 0x%x (%d,%d,%d,%d)\n", (int)&info->load, + info->load.pe, (int)info->load.load, info->load.speed, + info->load.ncpus); +#endif + + return(comm_sendto(mos, info, sizeof(*info), current->mosix.contact, + NULL) < 0); +} + +int +load_to_mosix_info(struct loadinfo l, struct mosix_info *info, int touser) +{ + struct mosix_info tmp, *uaddr; + int error; + + if(touser) + { + uaddr = info; + info = &tmp; + } + else + uaddr = NULL; /* pacify angry stupid gcc */ + info->load = ((int64_t)l.load) * standard_speed / STD_SPD; + info->speed = l.speed; + info->ncpus = l.ncpus; + info->mem = l.mem * PAGE_SIZE; + info->rmem = l.rmem * PAGE_SIZE; + info->tmem = l.tmem * PAGE_SIZE; + info->util = l.util; +#ifdef CONFIG_MOSIX_RESEARCH + info->rio = l.rio; + info->wio = l.wio; +#endif /* CONFIG_MOSIX_RESEARCH */ + + info->status = l.status; + if(touser && (error = copy_to_user((char *)uaddr, (char *)info, + sizeof(*info)))) + return(-EFAULT); + return(0); +} + +static int info_serialno = 0; +static int info_timo; +static int info_retry; +static int info_retry_cnt; + +#define INFO_TIMO 50000 /* ms */ +#define INFO_RETRY 20000 /* after 20 & 40 ms */ + +#define SIMULTANEOUS_QUERIES 10 + +void +mosinfo_update_gateways(void) +{ + /* the following may be further optimized in the future: */ + info_timo = INFO_TIMO * (mosadmin_gateways + 1); +#if INFO_RETRY * 3 > 65535 +#error: "timtosend" below must be made "unsigned int". +#endif + info_retry = INFO_RETRY * (mosadmin_gateways + 1); + info_retry_cnt = (info_timo + info_retry - 1) / info_retry; +} + +int +balance_get_infos(int first, int num, struct mosix_info *info, int touser) +{ + char *donebits, fewbits[20]; /* 20*8=160 seems to cover most cases */ + struct progress + { + unsigned short node; + unsigned short timtosend; + unsigned int timo; + int serialno; + } progress[SIMULTANEOUS_QUERIES]; + int hint = 0; + int ntaken = 0; + int ndone = 0; + int inpot = 0; + int node, from, i, n; + int error = 0; + int timo; + now_t before; + mosix_link *mlink; + struct loadinfo l; + struct infomsg infomsg; + + if(num <= 8*sizeof(fewbits)) + donebits = fewbits; + else if(!(donebits = (char *)kmalloc((num+7)/8, GFP_KERNEL))) + return(-ENOMEM); + memset(donebits, 0, (num+7)/8); + if (!(mlink = comm_borrow_linkpool())) + error = -EDIST; + loop: + while(!error && ndone < num) + { + while(inpot < SIMULTANEOUS_QUERIES && ntaken < num) + { + while(donebits[hint/8] & (1 << (hint%8))) + if(++hint == num) + hint = 0; + donebits[hint/8] |= (1 << (hint%8)); + ntaken++; + node = first + hint; + if (node == PE) + { + read_lock_bh(&loadinfo_lock); + l = loadinfo[0]; + read_unlock_bh(&loadinfo_lock); + l.status = my_mosix_status(); + l.util = acpuse; +#ifdef CONFIG_MOSIX_RESEARCH + l.rio = io_read_rate; + l.wio = io_write_rate; +#endif /* CONFIG_MOSIX_RESEARCH */ + ready_immediate: + ndone++; + if((error = load_to_mosix_info(l, &info[hint], + touser))) + break; + continue; + } + if (!mos_to_net(node, NULL)) + { + l.status = 0; + goto ready_immediate; + } + progress[inpot].node = node; + progress[inpot].timo = info_timo; + progress[inpot].timtosend = 0; + progress[inpot].serialno = info_serialno++; + inpot++; + } + if(error || ndone == num) + break; + timo = info_retry; + for(i = 0 ; i < inpot ; i++) + { + if(progress[i].timo < timo) + timo = progress[i].timo; + if(progress[i].timo && progress[i].timtosend <= 0) + { + progress[i].timtosend = info_retry; + infomsg.version = MOSIX_BALANCE_VERSION; + infomsg.serialno = progress[i].serialno; + infomsg.topology = MAX_MOSIX_TOPOLOGY; + infomsg.load.pe = 0; /* eg. GETLOAD */ + infomsg.load.speed = PE; + if(comm_sendto(progress[i].node, &infomsg, + sizeof(infomsg), mlink, NULL) <= 0) + { + node = progress[i].node; + progress[i] = progress[--inpot]; + ndone++; + l.status = DS_MOSIX_DEF; + error = load_to_mosix_info(l, + &info[node - first], touser); + goto loop; + } + } + if(progress[i].timtosend < timo) + timo = progress[i].timtosend; + } + before = time_now(); + n = comm_recvfrompe(&infomsg, sizeof(infomsg), mlink, &from, + info_retry); + if (n == sizeof(infomsg)) + for (i = 0; i < inpot; i++) { + if (from == progress[i].node && + infomsg.load.pe == progress[i].node && + infomsg.serialno == progress[i].serialno) { + ndone++; + node = progress[i].node; + progress[i] = progress[--inpot]; + error = load_to_mosix_info(infomsg.load, + &info[node - first], touser); + break; + } + } else if (signal_pending(current)) + error = -EINTR; + if (error) + break; + + before = time_since(before); + if(before > timo) + before = timo; + if(before) + for(i = 0 ; i < inpot ; i++) + { + progress[i].timo -= before; + if(progress[i].timtosend < before) + progress[i].timtosend = 0; + else + progress[i].timtosend -= before; + } + if(n <= 0) + for(i = 0 ; i < inpot ; i++) + if(progress[i].timo <= 0) /* cannot realy be < */ + { + node = progress[i].node; + progress[i] = progress[--inpot]; + ndone++; + l.status = DS_MOSIX_DEF; + if((error = load_to_mosix_info(l, + &info[node - first], touser))) + break; + i--; + } + } + if (mlink) + comm_return_linkpool(mlink); + if(num > 8*sizeof(fewbits)) + kfree(donebits); + return(error); +} + +int +balance_ask_node(int node, struct infomsg *info) +{ + mosix_link *mlink = NULL; + int from, n, error = 0; + int serialno; + int tries; + now_t before; + int timo; + + if (!(mlink = comm_borrow_linkpool())) + error = -EDIST; + serialno = info_serialno++; + tries = info_retry_cnt; + timo = info_retry; + while (!error && tries--) { + info->version = MOSIX_BALANCE_VERSION; + info->serialno = serialno; + info->topology = MAX_MOSIX_TOPOLOGY; + info->load.pe = 0; /* eg. GETLOAD */ + info->load.speed = PE; + error = comm_sendto(node, info, sizeof(*info), mlink, NULL); + if (error < sizeof(*info)) + { + if(error >= 0) + error = -EDIST; + goto out; + } + error = 0; + before = time_now(); + n = comm_recvfrompe(info, sizeof(*info), mlink, &from, timo); + if (n == sizeof(*info) && from == node && + info->load.pe == node && info->serialno == serialno) + goto out; + before = time_since(before); + if(before < timo) + { + timo -= before; + tries++; + } + else + timo = info_retry; + } + error = -EAGAIN; +out: + if (mlink) + comm_return_linkpool(mlink); + return (error); +} + +int +balance_get_info(int node, struct mosix_info *info) +{ + struct infomsg infomsg; + int error; + + if (node == PE || node == 0) /* local info */ + { + read_lock_bh(&loadinfo_lock); + infomsg.load = loadinfo[0]; + read_unlock_bh(&loadinfo_lock); + infomsg.load.status = my_mosix_status(); + infomsg.load.util = acpuse; +#ifdef CONFIG_MOSIX_RESEARCH + infomsg.load.rio = io_read_rate; + infomsg.load.wio = io_write_rate; +#endif /* CONFIG_MOSIX_RESEARCH */ + + load_to_mosix_info(infomsg.load, info, 0); + } + else if (!mos_to_net(node, NULL)) + info->status = 0; + else if((error = balance_ask_node(node, &infomsg))) + { + info->status = DS_MOSIX_DEF; + return(error); + } + else + load_to_mosix_info(infomsg.load, info, 0); + return(0); +} + +int +balance_get_load(int node, struct loadinfo *l) +{ + struct infomsg infomsg; + + if (node == PE || node == 0) + { + *l = loadinfo[0]; + return(0); + } + else if (!mos_to_net(node, NULL)) + return(-1); + else if(balance_ask_node(node, &infomsg)) + return(-1); + *l = infomsg.load; + return(0); +} + +static void +inform() +{ + int to; + int i; + struct uplist *up; + struct infomsg info; + + info.version = MOSIX_BALANCE_VERSION; + info.topology = MAX_MOSIX_TOPOLOGY; + info.serialno = 0; /* meaning no serial number */ + write_lock_bh(&loadinfo_lock); + loadinfo[0].free_slots = get_free_guest_slots(); + info.load = loadinfo[0]; + write_unlock_bh(&loadinfo_lock); + info.load.load = export_load; + + /* first select any node, and send the load */ + lock_mosix(); + to = (NPE > 1) ? nth_node(rand(NPE-1, 1)) : 0; + unlock_mosix(); + if(to && info_send_message(to, &info)) + not_responding(to); + + /* then select a node that seems to be up */ + spin_lock(&uplist_lock); + if (info_nup) + { + for (up = uphead , i = rand(info_nup, 0) ; i-- ; up = up->next) + ; /* just stop at random element */ + to = (up->pe == to) ? 0 : up->pe; + } + else + to = 0; + spin_unlock(&uplist_lock); + if (to && info_send_message(to, &info)) + not_responding(to); +} + +void +not_responding(int pe) +{ + int i; + struct uplist *up, *prev; + + spin_lock(&uplist_lock); + prev = NULL; + for (up = uphead ; up ; prev = up , up = up->next) + if (up->pe == pe) + { + if (prev) + prev->next = up->next; + else + uphead = up->next; + up->next = upfree; + upfree = up; + info_nup--; + break; + } + spin_unlock(&uplist_lock); + write_lock_bh(&loadinfo_lock); + for (i = 1; i < INFO_WIN; i++) + if (loadinfo[i].pe == pe) + loadinfo[i].pe = 0; + write_unlock_bh(&loadinfo_lock); +} + +void +this_machine_is_favourite(int which) +{ + register struct uplist *up; + + spin_lock(&uplist_lock); + for(up = uphead ; up ; up = up->next) + if(up->pe == which) + break; + if(!up && upfree) +#ifdef CONFIG_MOSIX_CHEAT_MIGSELF + if(which != PE) +#endif /* CONFIG_MOSIX_CHEAT_MIGSELF */ + { + up = upfree; + upfree = upfree->next; + up->next = uphead; + up->pe = which; + up->age = 0; + uphead = up; + info_nup++; + } + spin_unlock(&uplist_lock); +} + +static int +rand(int modulo, int regen) +{ + if(regen) + { + info_seed2++; + /* alternating even/odd values: */ + info_seed1 = info_seed1*info_seed2 + 1; + return((info_seed1 & 0x7fffffff) % modulo); + } + else + return((((info_seed2+1)*info_seed1+1) & 0x7fffffff) % modulo); +} + +/* + * we are 99.99% going to migrate and let other processes be migrated, + * but not before we adjust the local and remote loads to discourage + * further migrations. + */ +void +release_migrations(int whereto) +{ + register struct mosix_task *m = ¤t->mosix; + register int load, i; + int pages = m->migpages ? : count_migrating_pages(); + + this_machine_is_favourite(whereto); + + /* Decrease the local load by the load caused by this process, + * to avoid over-migration. + */ + write_lock_bh(&loadinfo_lock); + spin_lock_irq(&runqueue_lock); + load = m->load * STD_SPD / 4 / cpuspeed; + load /= smp_num_cpus; + /* It is ON PURPOSE that `acpuse' is not taken into account */ + if(loadinfo[0].load < load) /* should not happen, but ... */ + load = loadinfo[0].load; + load_left += load; + spin_unlock_irq(&runqueue_lock); + + loadinfo[0].load -= load; +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & (DSDEB_LOADS|DSDEB_CONSIDER)) + printk("local load down by %d to %d due to leaving (to %d)\n", + load, (int)loadinfo[0].load, whereto); +#endif + + /* increase the receiver's-load */ + for(i = 1 ; i < INFO_WIN ; i++) + if(loadinfo[i].pe == whereto) + { + /* add slightly more than 1 process worth of load */ + loadinfo[i].load += MF * 102 * STD_SPD/ + (loadinfo[i].speed * loadinfo[i].ncpus * 100); + loadinfo[i].mem -= pages; +#ifdef CONFIG_MOSIX_DEBUG + if(loadinfo[i].free_slots) + loadinfo[i].free_slots--; + if(ds_debug & (DSDEB_LOADS|DSDEB_CONSIDER)) + printk("loadinfo[%d], pe=%d increased to %d due to " + "migration; mem decreased to %d\n", + i, whereto, (int)loadinfo[i].load, + (int)loadinfo[i].mem); +#endif + break; + } + write_unlock_bh(&loadinfo_lock); +#ifdef CONFIG_MOSIX_DEBUG + if((ds_debug & (DSDEB_LOADS|DSDEB_CONSIDER)) && i == INFO_WIN) + printk("release_migrations: Could not adjust machine #%d (not in list)\n", whereto); +#endif /* CONFIG_MOSIX_DEBUG */ + m->pages_i_bring = -pages; /* discourage 'memory_badly_required' */ + unchoose_me(); +} + +void +info_someone_came_in(void) +{ + write_lock_bh(&loadinfo_lock); + coming_in++; + came_lately4 += 4; + export_load += MF * STD_SPD / (smp_num_cpus * cpuspeed); + write_unlock_bh(&loadinfo_lock); +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_LOADS) + printk("someone_came_in: coming_in=%d, came_lately4=%d, export_load now %d\n", + coming_in, came_lately4, export_load); +#endif +} + +void +end_coming_in(int error) +{ + write_lock_bh(&loadinfo_lock); + coming_in--; + if(error) + { + if((int)(came_lately4 -= 4) < 0) + came_lately4 = 0; + } + write_unlock_bh(&loadinfo_lock); +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_LOADS) + printk("end_coming_in: coming_in=%d, came_lately4=%d\n", + coming_in, came_lately4); +#endif +} diff -urN linux-2.4.17/mos/init.c linux_umopenmosix/mos/init.c --- linux-2.4.17/mos/init.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/mos/init.c Wed Jun 26 23:45:18 2002 @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_MOSIX_FS +#include +#endif /* CONFIG_MOSIX_FS */ +#ifdef CONFIG_MOSIX_DFSA +#include +#endif /* CONFIG_MOSIX_DFSA */ + +#define STD_LOOPS 9961472 /* loops per jiffy on standard processor */ + +void +init_mosix(void) +{ + extern int x86_udelay_tsc; +#ifdef CONFIG_MOSIX_UDB + extern void config_udb(void); + + config_udb(); +#endif /* CONFIG_MOSIX_UDB */ + cpuspeed = ((int64_t)loops_per_jiffy) * STD_SPD / STD_LOOPS; + if(!x86_udelay_tsc) + cpuspeed *= 2; + info_init(); + proc_update_costs(); +#ifdef CONFIG_MOSIX_FS + init_mfs(); +#endif /* CONFIG_MOSIX_FS */ +#ifdef CONFIG_MOSIX_DFSA + dfsa_init(); +#endif /* CONFIG_MOSIX_DFSA */ + kernel_thread(mosix_mig_daemon, NULL, 0); + info_startup(); + mosix_load_init(); + mosinfo_update_gateways(); + kernel_thread(mosix_info_daemon, NULL, 0); + kernel_thread(mosix_mem_daemon, NULL, 0); +} diff -urN linux-2.4.17/mos/kernel.c linux_umopenmosix/mos/kernel.c --- linux-2.4.17/mos/kernel.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/mos/kernel.c Wed Jun 26 23:45:18 2002 @@ -0,0 +1,1117 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_MOSIX_DFSA +#include +#endif /* CONFIG_MOSIX_DFSA */ + +/************* some MOSIX global variables: *************/ + +spinlock_t mosix_flag = SPIN_LOCK_UNLOCKED; +spinlock_t whereto_lock = SPIN_LOCK_UNLOCKED; +int active_cpus; +int cpuspeed = STD_SPD; +int standard_speed = STD_SPD; +int mosix_running; +struct task_struct *chosen_for_balance, *chosen_for_mdp; + +#ifdef CONFIG_MOSIX_DEBUG +unsigned long ds_debug; /* generic debug mask */ +#endif /* CONFIG_MOSIX_DEBUG */ + +struct mosix_cost mosix_cost[MAX_MOSIX_TOPOLOGY] = +{{ + PAGE_COST_D, PAGE_COST_R, + SYSCALL_COST_D, SYSCALL_COST_R, + COPYOUT_COST_BASE_D, COPYOUT_COST_PER_KB_D, + COPYOUT_COST_BASE_R, COPYOUT_COST_PER_KB_R, + COPYIN_COST_BASE_D, COPYIN_COST_PER_KB_D, + COPYIN_COST_BASE_R, COPYIN_COST_PER_KB_R, + MIGRATION_COST_BASIC, MIGRATION_COST_PER_PAGE +#ifdef CONFIG_MOSIX_TOPOLOGY + , 0, 0 +#endif /* CONFIG_MOSIX_TOPOLOGY */ +}}; + +struct mfs_cost mfs_cost[MAX_MOSIX_TOPOLOGY] = +{{ + MFS_COST_CONN_S, MFS_COST_CONN_C, + MFS_COST_INKB_S, MFS_COST_INKB_C, + MFS_COST_OUTKB_S, MFS_COST_OUTKB_C, +}}; + +struct opcost deputy_here[MAX_MOSIX_TOPOLOGY]; +struct task_struct *info_proc; +int info_daemon_active = 0; + +/**************************************************/ + +int +balpriority(int w) +{ + switch(w) + { + case MUSTGOHOME: + return(7); + case GOBACKHOME: + return(6); + default: + return(5); + case MFSBALANCE: + return(4); + case IOBALANCE: + return(3); + case MEMBALANCE: + return(2); + case BALANCE: + return(1); + case 0: + return(0); + } + /*NOTREACHED*/ +} + +inline void +mosix_do_add_to_whereto(struct task_struct *p, int w) +{ + if(balpriority(p->mosix.whereto) >= balpriority(w)) + return; + p->mosix.whereto = w; + if(w != BALANCE && p == chosen_for_balance) + { + free_task_struct(p); + chosen_for_balance = NULL; + } + if(w != MEMBALANCE && p == chosen_for_mdp) + { + free_task_struct(p); + chosen_for_mdp = NULL; + } + if((p->mosix.dflags & (DREMOTE|DSENTURGENT)) != (DREMOTE|DSENTURGENT)) + wake_up_mosix(p); +} + +void +mosix_add_to_whereto(struct task_struct *p, int w) +{ + spin_lock_irq(&whereto_lock); + mosix_do_add_to_whereto(p, w); + spin_unlock_irq(&whereto_lock); +} + +/* + * must = 0: normal priority - if it fails, return. + * must = 1: high priority - but still return if it fails. + * must = 2: high priority and die (never return) on failure. + * otherwise, return 1 on success, 0 on failure. + * WARNING: do not call with "must==2" where resources were allocated. + */ +int +mosix_go_home(int must) +{ + register struct task_struct *p = current; + + if(p->mosix.dflags & DDEPUTY) + { + if(current->mosix.dflags & DINCOMING) + /* (eg. within do_mmap while migrating back) */ + return(1); + passto(must ? MUSTGOHOME : GOBACKHOME, 0); + } + else if(p->mosix.dflags & DREMOTE) + { + remote_request(must ? REM_MUST_COME_HOME : REM_BRING_ME_HOME, + NULL, 0, NULL, 0, 0, NULL, 0); + /* should not return, but */ + } + if(p->mosix.dflags & (DDEPUTY|DREMOTE)) + { + printk("%s: Failed to Go Back Home.\n", desc_mostask(NULL)); + if(must < 2) + return(0); + if(p->mosix.dflags & DDEPUTY) + { + printk("Process %s, uid=%d, killed because it failed to migrate back here\n", + desc_mostask(NULL), p->uid); + printk("(possible reasons are exhaustion of swap-space, or network disconnection)\n"); + do_exit(SIGKILL); + } + else + remote_disappear(); + } + return(1); +} + +int +mosix_go_home_for_reason(int must, int reason) +{ + struct task_struct *p = current; + int ret; + + task_lock(p); + if((p->mosix.stay & reason) && (p->mosix.dflags & (DDEPUTY|DREMOTE))) + panic("mosix_go_home_for_reason - why not earlier?"); + p->mosix.stay |= reason; + task_unlock(p); + if(!(ret = mosix_go_home(must))) + { + task_lock(p); + p->mosix.stay &= ~reason; + task_unlock(p); + } + return(ret); +} + +int +stay_me_and_my_clones(uint32_t reasons) +{ + struct task_struct *p, *me = current; + struct mm_struct *mm = me->mm; + + if(me->mosix.dflags & DDEPUTY) + return(!mosix_go_home(1)); /* there are no clones! */ + task_lock(me); + me->mosix.stay |= reasons; + task_unlock(me); + if(atomic_read(&mm->mm_realusers) > 1) + { + read_lock(&tasklist_lock); + for_each_task(p) + if(p->mm == mm && p != me) + { + task_lock(p); + if(p->mm == mm) + p->mosix.stay |= reasons; + task_unlock(p); + } + read_unlock(&tasklist_lock); + } + return(0); +} + +void +unstay_mm(struct mm_struct *mm) +{ + struct task_struct *p; + + if(atomic_read(&mm->mm_realusers) == 1 && mm == current->mm) + { + if(!(current->mosix.dflags & DREMOTE)) + tell_process(current, DREQ_CHECKSTAY); + return; + } + read_lock(&tasklist_lock); + for_each_task(p) + if(p->mm == mm) + tell_process(p, DREQ_CHECKSTAY); + read_unlock(&tasklist_lock); +} + +int +mosix_wakeable(struct task_struct *p) +{ +#ifdef CONFIG_MOSIX_DFSA + /* the following side-effect of this boolean function is allowed + * because it is only called by "wake_up_mosix" and it is a good + * practice to hide such MOSIX details from "wake_up_mosix". + * note that we are already protected by runqueue_lock, + * making DREMOTEDFSA, DINSCHED and DHEAVYSLEEP stable. + */ + if(p->mosix.dflags & DREMOTEDFSA) + { + tell_process(p, DREQ_EXITDFSA); + return(!(p->state & TASK_UNINTERRUPTIBLE) && + !(p->mosix.dflags & (DINSCHED|DHEAVYSLEEP))); + } +#endif /* CONFIG_MOSIX_DFSA */ + if(p->mosix.dflags & (DINSCHED|DFAKESIGNAL|DHEAVYSLEEP|DFINISHED)) + return(0); + if(p->state & TASK_UNINTERRUPTIBLE) + return(0); + if(p->mosix.dflags & DREMOTE) + { + if(!remote_need_while_asleep(p)) + return(0); + } + return(1); +} + +/* + * the following two boolean routines are very sensitive, + * it may not even lock the kernel or mosix, so if there is any need to do so, + * it must return 1. + */ + +int +remote_need_while_asleep(struct task_struct *p) +{ + /* if DREQ_CHECKCONF is set, we can't even call + * "i_am_in_a_wrong_place()", since it uses locks + */ + return((process_told(p, DREQ_CHECKCONF) && + p->mosix.whereto != MUSTGOHOME) || + process_told(p, DREQ_EXITDFSA) || + (URGENT_REMOTE_CONDITIONS(p) && + !(p->mosix.dflags & (DSENTURGENT|DPASSING|DINCOMING)))); +} + +/* + * called from schedule() with the runqueue_lock locked. + */ +int +mosix_need_while_asleep(void) +{ + register struct task_struct *p = current; + register struct mosix_task *m = &p->mosix; + +#ifdef CONFIG_MOSIX_DFSA + if((m->dflags & DREMOTEDFSA) && (process_told(p, DREQ_EXITDFSA))) + { + process_ack(p, DREQ_EXITDFSA); + spin_lock_irq(&p->sigmask_lock); + m->dflags |= DFAKESIGNAL; + if(!(m->dflags & DHEAVYSLEEP)) + { + p->sigpending = 1; + set_current_state(TASK_RUNNING); + } + spin_unlock_irq(&p->sigmask_lock); + return(0); + } +#endif /* CONFIG_MOSIX_DFSA */ + if(m->dflags & (DINSCHED|DFAKESIGNAL|DHEAVYSLEEP)) + return(0); + if(m->dflags & DREMOTE) + return(remote_need_while_asleep(p)); + if(m->dirty_bits & MMAP_MMDOWNED) + return(0); + if(m->prequest) + return(1); + if(process_told(p, (DREQ_NICECNG|DREQ_UPDOVERHEADS|DREQ_CHECKCONF| + DREQ_INFOCNG|DREQ_DFSASYNC|DREQ_CAPCNG|DREQ_CHECKSTAY| + DREQ_URGENT))) + return(1); + if(m->whereto) + return(1); + if((m->dflags & DDEPUTY) && (m->stay & DSTAY)) + return(1); + return(0); +} + +void +remote_run_while_asleep(void) +{ + struct task_struct *p = current; + + if(process_told(p, DREQ_CHECKCONF)) + { + if(i_am_in_a_wrong_place()) + mosix_add_to_whereto(current, MUSTGOHOME); + /* else DREQ_CHECKCONF is cleared */ + } + if(URGENT_REMOTE_CONDITIONS(p) && + !(p->mosix.dflags & (DSENTURGENT|DPASSING|DINCOMING))) + inform_deputy_of_urgent(); +} + +void +deputy_run_while_asleep(void) +{ + register struct task_struct *p = current; + + if(p->mosix.dflags & DSYNC) + deputy_async_requests(); + else + { + p->mosix.dflags |= DFAKESIGNAL; + evaluate_pending_signals_in_mosix_context(); + } +} + +void +mosix_run_while_asleep(void) +{ + register struct task_struct *p = current; + register struct mosix_task *m = &p->mosix; + +#ifdef CONFIG_MOSIX_DIAG + if(!(m->dflags & DINSCHED)) + panic("mosix_run_while_asleep: dflags=%x\n", m->dflags); +#endif /* CONFIG_MOSIX_DIAG */ + if(m->dflags & DREMOTE) + { + remote_run_while_asleep(); + return; + } + scan: + if(m->dflags & DDEPUTY) + { + deputy_run_while_asleep(); + if(mosix_need_while_asleep()) + goto scan; + return; + } + /* LOCAL PROCESSES */ + if(m->prequest) + { + process_requests(); + goto scan; + } + process_ack(p, DREQ_NICECNG|DREQ_INFOCNG|DREQ_UPDOVERHEADS|DREQ_URGENT); + if(m->whereto) + { + follow_whereto(); + goto scan; + } + if(process_told(p, DREQ_CHECKCONF)) + { + done_checking_conf(); + goto scan; + } + if(process_told(p, DREQ_CHECKSTAY)) + { + if(p->mm) + down_read(&p->mm->mmap_sem); + mosix_check_for_freedom_to_move(); + if(p->mm) + up_read(&p->mm->mmap_sem); + goto scan; + } + if(process_told(p, DREQ_CAPCNG)) + { + sync_caps(); + goto scan; + } +#ifdef CONFIG_MOSIX_DFSA + if(process_told(p, DREQ_DFSASYNC)) + { + clear_dfsasync(); + goto scan; + } +#endif /* CONFIG_MOSIX_DFSA */ +} + +int +remote_pre_usermode_actions(void) +{ + struct task_struct *p = current; + struct mosix_task *m = &p->mosix; + int i, nfsigs; + siginfo_t *fsigs; + + loop: + do_decay(); + if(remote_deputy_has_something_for_us(p)) + { + wait_for_permission_to_continue(); + goto loop; + } + if(process_told(p, DREQ_CHECKCONF) && i_am_in_a_wrong_place()) + mosix_add_to_whereto(p, MUSTGOHOME); + if(URGENT_REMOTE_CONDITIONS(p) && !(m->dflags & DSENTURGENT)) + { + inform_deputy_of_urgent(); + goto loop; + } + if(m->asig || m->nforced_sigs) + { + spin_lock_irq(&p->sigmask_lock); + i = m->asig; + m->asig = 0; + fsigs = m->forced_sigs; + nfsigs = m->nforced_sigs; + m->forced_sigs = NULL; + m->nforced_sigs = 0; + spin_unlock_irq(&p->sigmask_lock); + transfer_signals_to_deputy((unsigned int) i, fsigs, nfsigs); + if(fsigs) + kfree(fsigs); + goto loop; + } + return(0); +} + +int +local_pre_usermode_actions(void) +{ + struct task_struct *p = current; + int any = 0; + + while(1) + { + do_decay(); + if(p->mosix.whereto) + { + follow_whereto(); + any = 1; + if(p->mosix.dflags & DDEPUTY) + return(1); + continue; + } + if(process_told(p, DREQ_CHECKCONF)) + { + done_checking_conf(); + any = 1; + continue; + } + if(process_told(p, DREQ_CAPCNG)) + { + sync_caps(); + any = 1; + continue; + } +#ifdef CONFIG_MOSIX_DFSA + if(process_told(p, DREQ_DFSASYNC)) + { + clear_dfsasync(); + any = 1; + continue; + } +#endif /* CONFIG_MOSIX_DFSA */ + if(process_told(p, DREQ_CHECKSTAY)) + { + down_read(&p->mm->mmap_sem); + mosix_check_for_freedom_to_move(); + up_read(&p->mm->mmap_sem); + any = 1; + continue; + } + process_ack(p, DREQ_NICECNG|DREQ_INFOCNG|DREQ_UPDOVERHEADS| + DREQ_URGENT); + return(any); + } +} + +int +mosix_pre_usermode_actions(struct pt_regs regs) +{ + register struct task_struct *p = current; + int any = 0; + + if((regs.xcs & 3) != 3) /* going back to kernel -- not to user mode */ + return(0); + __sti(); /* could be called after an interrupt */ +#ifdef CONFIG_MOSIX_DIAG + if(current->mosix.altregs != (uint32_t *)®s) + { + task_lock(current); + current->mosix.altregs = (uint32_t *)®s; + task_unlock(current); + } +#endif /* CONFIG_MOSIX_DIAG */ + + if(p->mosix.dflags & DREMOTE) + return(remote_pre_usermode_actions()); + + while(1) + { + if(p->mosix.dflags & DDEPUTY) + { + deputy_main_loop(); + p = current; /* fork? */ + any = 1; + continue; + } + if(local_pre_usermode_actions()) + { + any = 1; + continue; + } + if(p->mosix.prequest) + { + process_requests(); + any = 1; + continue; + } + return(any); + } +} + +int +mosix_fork_init_fields(struct task_struct *p) +{ + struct mosix_task *m = &p->mosix; + struct task_struct *parent = current; + +#ifdef CONFIG_MOSIX_DFSA + tell_process(parent, DREQ_NOTUPTODATE); + tell_process(p, DREQ_NOTUPTODATE); + if(parent->mosix.dflags & (DREMOTEDAEMON|DREMOTE)) + dfsa_tinit(); + m->ttab = NULL; +#endif /* CONFIG_MOSIX_DFSA */ + if(!(parent->mosix.dflags & DDEPUTY)) + tell_process(p, DREQ_CHECKSTAY); + m->dflags = parent->mosix.dflags & ~(DREMOTEDAEMON|DSENTURGENT); + process_ack(p, DREQ_HOMEWAKE|DREQ_URGENT); + m->commpri = 0; + if(parent->mosix.dflags & DREMOTEDAEMON) + { + m->dflags |= DREMOTE|DHEAVYSLEEP; + m->hsleep_count = 1; + } + else if(parent->mosix.dflags & DREMOTE) + sprintf(p->comm, "remote(%d)", m->mypid = m->sonpid); + if(parent->mosix.dflags & DDEPUTY) + m->dflags &= ~(DDEPUTY|DSYNC); + if(!(parent->mosix.dpolicy & DADV_INHERIT)) + { + m->dpolicy = DADV_DEFAULT; +#if DADV_DEFAULT == DADV_OWNDECAY + m->decay = DECAY_QUOTIENT; + m->deccycle = DEFAULT_DECAY_INTERVAL; +#endif + } + if(p->pid == 1) + m->stay |= DSTAY_ITS_INIT; + else + m->stay &= ~DSTAY_ITS_INIT; + m->held_files = NULL; + m->held_allocated = 0; + rwlock_init(&m->state_lock); + m->contact = 0; + m->ucache = NULL; + m->prequest = NULL; + if(parent->mm == &init_mm || !parent->mm || + (parent->mosix.dflags & DDEPUTY) || + (parent->mosix.stay & DSTAY_ITS_DAEMON)) + m->ancesstor = NULL; + else + m->ancesstor = parent; + m->last_sigxcpu = 0; + m->deputytime = 0; + m->passedtime = 0; + m->uttime = 0; + m->dctime = 0; + m->ndemandpages = 0; + m->nsyscalls = 0; + m->ncopyouts = 0; + m->copyoutbytes = 0; + m->ncopyins = 0; + m->copyinbytes = 0; + m->iocounter = 0; + m->cutime = 0; + m->last_consider = 0; + m->last_mconsider = 0; + m->decsecs = 0; + memset((caddr_t)&m->asig, 0, sizeof(m->asig)); + m->forced_sigs = 0; + m->nforced_sigs = 0; + m->pages_i_bring = 0; + m->page_allocs = 0; + m->ran = 0; + m->runstart = 0; + m->load = 0; + m->inexec = NULL; + m->nmigs = 0; + m->pass_regs = 0; + init_waitqueue_head(mos_to_waitp(m)); + m->mosix_log = NULL; + init_mosix_log(p); + m->dirty_bits = 0; +#ifdef CONFIG_MOSIX_FS + m->mfs_stats = NULL; +#endif /* CONFIG_MOSIX_FS */ + return(fork_mosix_remote_files(p)); +} + +void +mosix_fork_free_fields(struct task_struct *p) +{ + if(p->mosix.dflags & DDEPUTY) + undeputy(p); + clear_mosix_log(p); +} + +void +mosix_exit(void) +{ + struct task_struct *p = current; + struct mosix_task *m = &p->mosix; + int i; + struct linux_binprm *bprm = m->inexec; + + lock_mosix(); + write_lock_irq(&tasklist_lock); + spin_lock(&runqueue_lock); + m->dflags |= DFINISHED; + m->bstate = TASK_SAME;/* we are not going back to original sleep/stop */ + spin_unlock(&runqueue_lock); + write_unlock_irq(&tasklist_lock); + unlock_mosix(); + m->pages_i_bring = 0; + unchoose_me(); + free_ucache(); + if(m->dflags & DDEPUTY) + mosix_deputy_rusage(1); /* (includes setting "m->exit_mem") */ + if(m->dflags & DREMOTE) + expel_progress = 1; + if(m->dflags & (DDEPUTY|DREMOTE)) + comm_close(NULL); /* for a DEPUTY, this will kill our REMOTE */ + if(m->dflags & DDEPUTY) + undeputy(p); + clear_mosix_log(p); + if(process_told(p, DREQ_CHECKCONF)) + done_checking_conf(); +#ifdef CONFIG_MOSIX_DFSA + dfsa_exit(); +#endif /* CONFIG_MOSIX_DFSA */ + if(bprm) + { + for(i = 0 ; i < MAX_ARG_PAGES ; i++) + if(bprm->page[i]) + __free_page(bprm->page[i]); + if(m->dflags & DREMOTE) + kfree(bprm); + } + if(m->prequest) + discard_requests(); +} + +void +mosix_very_exit(void) +{ + struct task_struct *p = current, *a; + struct mosix_task *m = &p->mosix; + struct task_struct *q; +#ifdef CONFIG_MOSIX_FS + struct mfs_stats *mfst; +#endif /* CONFIG_MOSIX_FS */ + +#ifdef CONFIG_MOSIX_FS + lock_mosix(); + mfst = m->mfs_stats; + m->mfs_stats = NULL; + unlock_mosix(); + if(mfst) + kfree(mfst); +#endif /* CONFIG_MOSIX_FS */ + read_lock(&tasklist_lock); + if((a = m->ancesstor)) + get_task_struct(a); + for_each_task(q) + if(q->mosix.ancesstor == p) + q->mosix.ancesstor = a; + read_unlock(&tasklist_lock); + if(a) + { + add_statistics_to_ancesstor(p, a); + free_task_struct(a); + } + if(m->forced_sigs) + { + kfree(m->forced_sigs); + m->nforced_sigs = 0; + } +} + +void +mosix_obtain_registers(unsigned long mask) +{ + register struct mosix_task *m = ¤t->mosix; + unsigned int bringme; + +// disabled if(mask & ~ALL_REGISTERS) +// disabled panic("mosix_obtain_registers, mask=0x%x", (int)mask); + + + if(m->dflags & DDEPUTY) + { + if((bringme = mask & ~m->deputy_regs)) + { + if(deputy_request(DEP_BRING_ME_REGS, &bringme, + sizeof(bringme), NULL, 0, 0, NULL, 0)) + deputy_communication_failed(); + else if((m->deputy_regs & mask) != mask) + { +#ifdef CONFIG_MOSIX_DIAG + printk("%s: Asked for regs(%x), but has only %x\n", + desc_mostask(NULL), + (int)mask, m->deputy_regs); +#endif /* CONFIG_MOSIX_DIAG */ + send_sig(SIGKILL, current, 1); + } + } + } + else if(m->dflags & DREMOTE) + { + if((bringme = mask & m->deputy_regs)) + { + if(remote_request(REM_BRING_ME_REGS, &bringme, + sizeof(bringme), NULL, 0, 0, NULL, 0)) + send_sig(SIGKILL, current, 1); + else if((~m->deputy_regs & mask) != mask) + { +#ifdef CONFIG_MOSIX_DIAG + printk("%s: Asked for regs(%x), but has only %x\n", + desc_mostask(NULL), (int)mask, + ALL_REGISTERS & ~m->deputy_regs); +#endif /* CONFIG_MOSIX_DIAG */ + send_sig(SIGKILL, current, 1); + mosix_panic("remote-regs"); + } + } + } +} + +unsigned int +which_regs_to_send() +{ + register struct mosix_task *m = ¤t->mosix; + + if(!m->pass_regs) + return(0); + return(m->pass_regs & + ((m->dflags & DDEPUTY) ? m->deputy_regs : ~m->deputy_regs)); +} + +void +regs_were_sent(void) +{ + register struct mosix_task *m = ¤t->mosix; + + if(m->dflags & DDEPUTY) + m->deputy_regs &= ~m->pass_regs; + else + m->deputy_regs |= m->pass_regs; + m->pass_regs = 0; +} + +void +mosix_bring_monkey_users_back(struct inode *ip) +{ + register struct task_struct *p, *found, *me = current; + DECLARE_WAITQUEUE(wait, me); + + while(1) + { + found = NULL; + read_lock(&tasklist_lock); + for_each_task(p) + if(p != me && !(p->mosix.dflags & DREMOTE) && + task_maps_ip(p, ip)) + { + task_lock(p); + p->mosix.stay |= DSTAY_FOR_MONKEY; + task_unlock(p); + if(p->mosix.dflags & DDEPUTY) + { + tell_process(p, DREQ_HOMEWAKE); + wake_up_mosix(p); + if(!found) + { + found = p; + get_task_struct(p); + } + } + } + if(!found) + { + read_unlock(&tasklist_lock); + /* VMODIFIED is needed against a nearly-impossible + * REMOTE race, when ip is modified, then unmonkied + * and a process migrates there even before the + * previous completed exiting: + */ + VMODIFIED(ip); + return; + } + set_current_state(TASK_UNINTERRUPTIBLE); + add_wait_queue(mos_to_waitp(&found->mosix), &wait); + read_unlock(&tasklist_lock); + schedule(); + remove_wait_queue(mos_to_waitp(&found->mosix), &wait); + set_current_state(TASK_RUNNING); + free_task_struct(found); + } +} + +void +mosix_no_longer_monkey(struct inode *ip) +{ + register struct task_struct *p; + + read_lock(&tasklist_lock); + for_each_task(p) + if(!(p->mosix.dflags & (DREMOTE|DDEPUTY)) && task_maps_ip(p, ip)) + { + tell_process(p, DREQ_CHECKSTAY); + wake_up_mosix(p); + } + read_unlock(&tasklist_lock); +} + +/* + * caller is responsible to down_[read|write](¤t->mm->mmap_sem) + * (unless deemed unnecessary - as in "exec" or after migrating back, + * when we are sure to have no clones. + */ +void +mosix_check_for_freedom_to_move(void) +{ + struct task_struct *p = current; + register struct vm_area_struct *mpnt; + register struct inode *ip; + int stay; + struct mm_struct *mm; + + if(p->mosix.dflags & DDEPUTY) + { +#ifdef CONFIG_MOSIX_DIAG + if(p->mosix.stay & (DSTAY & ~(DSTAY_FOR_MONKEY|DSTAY_FOR_RT))) + panic("DEPUTY with STAY"); +#endif /* CONFIG_MOSIX_DIAG */ + return; + } + process_ack(p, DREQ_CHECKSTAY); + if(!(p->mosix.stay & (DSTAY_PER_MM|DSTAY_FOR_CLONE))) + return; + task_lock(p); + stay = p->mosix.stay & ~(DSTAY_PER_MM|DSTAY_FOR_CLONE); + if(!(mm = current->mm)) + stay |= DSTAY_FOR_CLONE; + else + { + if(atomic_read(&mm->mm_kiocount)) + stay |= DSTAY_FOR_KIOBUF; + if(atomic_read(&mm->mm_realusers) > 1) + stay |= DSTAY_FOR_CLONE; + if(mm->def_flags & VM_LOCKED) + stay |= DSTAY_FOR_MLOCK; + for(mpnt = mm->mmap ; mpnt ; mpnt = mpnt->vm_next) + { + if(mpnt->vm_file) + { + mode_t mode; + + ip = mpnt->vm_file->f_dentry->d_inode; + mode = ip->i_mode; + if(ip->i_mapping->i_mmap_shared) + stay |= DSTAY_FOR_MONKEY; + if(S_ISCHR(mode) || S_ISFIFO(mode) || + S_ISSOCK(mode)) + stay |= DSTAY_FOR_DEV; + } + if(mpnt->vm_flags & VM_LOCKED) + stay |= DSTAY_FOR_MLOCK; + } + } + p->mosix.stay = stay; + task_unlock(p); +} + +/* + * caller must use 'get_task_struct(p)' + */ +int +mosix_send_back_home(struct task_struct *p) +{ + struct task_struct *me = current; + int timeout = 20 * HZ; + DECLARE_WAITQUEUE(wait, me); + + if(p == me) + return(mosix_go_home(1) ? 0 : -ENOMEM); + + /* there is a paranoidic possibility of a tricky deadlock: if two + * (or more) remote processes send each other home simultaneously: + * we want to sleep interruptibly, so we can migrate, but no signals + * should stop us - unless the other process failed to migrate for a + * very long time. e.g. we are to give up only with the combination + * of a signal + timeout. + */ + add_wait_queue(mos_to_waitp(&p->mosix), &wait); + while(p->mosix.dflags & DDEPUTY) + { + tell_process(p, DREQ_HOMEWAKE); + mosix_add_to_whereto(p, MUSTGOHOME); + spin_lock_irq(&me->sigmask_lock); + me->mosix.ignoreoldsigs = 1; + me->sigpending = 0; + spin_unlock_irq(&me->sigmask_lock); + set_current_state(TASK_INTERRUPTIBLE); + task_lock(p); + if(!(p->mosix.dflags & DDEPUTY)) + set_current_state(TASK_RUNNING); + task_unlock(p); + timeout = schedule_timeout(timeout); + if(timeout <= 0) + { + spin_lock_irq(&me->sigmask_lock); + recalc_sigpending(me); + spin_unlock_irq(&me->sigmask_lock); + if(signal_pending(current)) + { + remove_wait_queue(mos_to_waitp(&p->mosix), + &wait); + free_task_struct(p); + return(-EINTR); + } + timeout = HZ; + } + } + set_current_state(TASK_RUNNING); + remove_wait_queue(mos_to_waitp(&p->mosix), &wait); + spin_lock_irq(&me->sigmask_lock); + recalc_sigpending(me); + spin_unlock_irq(&me->sigmask_lock); + return(0); +} + +void +mosix_pre_dropping_mm(struct task_struct *myself, struct mm_struct *mm) +{ + register struct task_struct *p; + + if(!mm) + return; + atomic_dec(&mm->mm_realusers); + if(atomic_read(&mm->mm_realusers) == 1) + { + read_lock(&tasklist_lock); + for_each_task(p) + if(p != myself && p->mm == mm) + { + tell_process(p, DREQ_CHECKSTAY); + break; + } + read_unlock(&tasklist_lock); + } +} + +void +mosix_exit_mm(struct task_struct *p) +{ + if((p->mosix.dflags & (DPASSING|DREMOTE)) == DPASSING) + return; + if(p->mosix.held_files) + mosix_clear_all_held_files(p); + if(!(p->mosix.dflags & DDEPUTY)) + mosix_pre_dropping_mm(p, p->mm); + task_lock(p); + p->mosix.stay = (p->mosix.stay | DSTAY_FOR_CLONE) & ~DSTAY_PER_MM; + task_unlock(p); + p->mosix.pages_i_bring = 0; +} + +void +mosix_exec_mmap(struct mm_struct *old_mm) +{ + struct task_struct *p = current; + + if(p->mosix.dflags & DDEPUTY) + return; + + mosix_pre_dropping_mm(p, old_mm); + task_lock(p); + p->mosix.stay &= ~(DSTAY_PER_MM|DSTAY_FOR_CLONE); + task_unlock(p); + p->mosix.pages_i_bring = 0; + if(p->mosix.held_files) + mosix_clear_all_held_files(p); +} + +int +mosix_pre_clone(void) +{ + struct task_struct *p = current; + + if(!mosix_go_home(0)) + return(-ENOMEM); + + /* prevent race with "mosix_pre_dropping_mm" */ + if(p->mm) + atomic_inc(&p->mm->mm_realusers); + read_lock(&tasklist_lock); + task_lock(p); + p->mosix.stay |= DSTAY_FOR_CLONE; + task_unlock(p); + read_unlock(&tasklist_lock); + return(0); +} + +void +mosix_post_clone(void) +{ + struct task_struct *p = current; + struct mm_struct *mm = p->mm; + + if(mm) + { + atomic_dec(&mm->mm_realusers); + read_lock(&tasklist_lock); + if(atomic_read(&mm->mm_realusers) == 1) + { + task_lock(p); + p->mosix.stay &= ~DSTAY_FOR_CLONE; + task_unlock(p); + } + read_unlock(&tasklist_lock); + } +} + +/* 'run_on' and 'run_off' must be called with the runqueue_lock! */ +void +run_on(struct task_struct *p) +{ + if(!(p->mosix.dflags & DPAGEIN)) + { + mosix_running++; + p->mosix.runstart = load_ticks + 1; + } +} + +void +run_off(struct task_struct *p) +{ + if(!(p->mosix.dflags & DPAGEIN)) + { + mosix_running--; + p->mosix.ran += load_ticks + 1 - p->mosix.runstart; + p->mosix.runstart = 0; + } +} + +void +down_half_interruptible(struct semaphore *sem) +{ + struct task_struct *p = current; + + do + { + spin_lock_irq(&p->sigmask_lock); + p->mosix.ignoreoldsigs = 1; + p->sigpending = 0; + spin_unlock_irq(&p->sigmask_lock); + } + while(down_interruptible(sem) == -EINTR); + recalc_sigpending(p); +} diff -urN linux-2.4.17/mos/load.c linux_umopenmosix/mos/load.c --- linux-2.4.17/mos/load.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/mos/load.c Wed Jun 26 23:45:18 2002 @@ -0,0 +1,206 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh, Amnon Barak. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define VV1 5 +#define VV2 8 +#define DECAY VV1/VV2 /* decay of load */ +#define NEWDATA (VV2-VV1)/VV2 /* weight of new load data */ + +unsigned load_adder; /* accumulator of `running' every clock tick */ +unsigned load_ticks; /* # of ticks included in load_adder */ +unsigned cpuse; /* # of ticks when CPU was not thrashing */ +unsigned acpuse = MF; /* long term non-trashing (start value = non-0) */ +unsigned coming_in; /* number of arriving processes */ +unsigned came_lately4; /* processes that arrived lately (*4) */ +unsigned load_left; /* load of processes that just left */ + +int export_load; /* load reported to other processors */ +int stable_export; /* machine dependent stabilizing factor */ + +int Tvis; +int Tload; + +#ifdef CONFIG_MOSIX_RESEARCH +unsigned int io_read_rate; +unsigned int io_write_rate; +unsigned int old_io_read; +unsigned int old_io_write; +#endif /* CONFIG_MOSIX_RESEARCH */ + + +void +mosix_calc_load(unsigned long unused) +{ + struct task_struct *p; + register struct mosix_task *m; + register int ladd, cpu, ticks; + unsigned long flags; + unsigned long newload; + int new_expload; + unsigned new_cpuse; + unsigned new_came; + static unsigned upper_load; /* over estimated load */ + static unsigned accload; /* accumulated load (*128) */ + static int display_counter = 0; +#ifdef CONFIG_MOSIX_RESEARCH + unsigned int new_io_read; + unsigned int new_io_write; + int major,disk; +#endif /* CONFIG_MOSIX_RESEARCH */ + + + ticks = load_ticks; + cpu = cpuse; + ladd = load_adder; + cpuse = load_adder = load_ticks = 0; + + ladd = ladd * ((long long)(MF * STD_SPD)) / + (ticks * cpuspeed * smp_num_cpus); + if(ladd * 128 > accload) /* slowly up */ + accload = accload * DECAY + ladd * 128 * NEWDATA; + else /* quickly down */ + accload = ladd * 128; + if(ladd >= upper_load) /* quickly up */ + upper_load = ladd; + else /* very slowly down */ + upper_load = (upper_load * 7 + ladd) / 8; + new_cpuse = (acpuse * 3 + cpu * MF / ticks + 3) / 4; + newload = (accload+64) / 128; + new_expload = (upper_load + stable_export + + came_lately4 * MF * STD_SPD / + (4 * cpuspeed * smp_num_cpus)) * + MF * smp_num_cpus / new_cpuse; + if(newload < load_left) + newload = 0; + else + newload -= load_left; + newload = newload * MF * smp_num_cpus / new_cpuse; + new_came = came_lately4 * DECAY + coming_in * 4 * NEWDATA; + spin_lock_irqsave(&runqueue_lock, flags); + for_each_task(p) + { + m = &p->mosix; + if(m->runstart) + { + m->ran += ticks + 1 - m->runstart; + m->runstart = 1; + } + m->load = m->load * DECAY + m->ran * MF * 4*NEWDATA/ticks; + m->ran = 0; + m->page_allocs >>= 1; /* decay in time */ + } + spin_unlock_irqrestore(&runqueue_lock, flags); + + if(Tvis) + printk("\0337\033[22;55HL=%d,E=%d,R=%d,U=%d \0338", + (int)newload, new_expload, mosix_running, + new_cpuse); + if(Tload) { + if (!(display_counter = (display_counter + 1) & 0xf)) + printk("\naccload upper_load\tload_adder\tload_ticks\n"); + printk("%7d\t%10d\t%10d\t%d\n", + accload, upper_load, ladd, ticks); + } + write_lock(&loadinfo_lock); + loadinfo[0].load = newload; + export_load = new_expload; + acpuse = new_cpuse; + came_lately4 = new_came; + load_left = 0; + +#ifdef CONFIG_MOSIX_RESEARCH + new_io_read = new_io_write = 0; + for (major = 0; major < DK_MAX_MAJOR; major++) { + for (disk = 0; disk < DK_MAX_DISK; disk++) { + new_io_read += kstat.dk_drive_rblk[major][disk]; new_io_write += kstat.dk_drive_wblk[major][disk]; + } + } + + /* the sectors are 512 bytes size */ + + new_io_read = new_io_read >> 1; + new_io_write = new_io_write >> 1; + io_read_rate = (new_io_read - old_io_read); + io_write_rate = (new_io_write - old_io_write); + old_io_read = new_io_read; + old_io_write = new_io_write; +#endif /* CONFIG_MOSIX_RESEARCH */ + + write_unlock(&loadinfo_lock); + + if((p = (struct task_struct *)info_proc)) + send_sig(SIGALRM, p, 1); +} + +void +mosix_load_init(void) +{ + acpuse = MF * smp_num_cpus; + load_ticks = MF; /* fake it to begin with (must be non-zero) */ + cpuse = MF * smp_num_cpus; + load_adder = mosix_running * MF; + mosix_calc_load(0); +#ifdef CONFIG_MOSIX_RESEARCH + io_read_rate = io_write_rate = old_io_read = old_io_write = 0; +#endif /* CONFIG_MOSIX_RESEARCH */ +} + +void +mosix_snap_load(int ticks) +{ + unsigned long flags; + extern struct list_head runqueue_head; + + load_adder += mosix_running; + if(mosix_running <= active_cpus) + cpuse += ticks * smp_num_cpus; + else + { + /* a possible race between the time of making a process runable + * and actual picking up by a processor: so count runable + * processes as if already occupying a processor: + */ + int n; + register struct task_struct *p; + struct list_head *tmp; + + spin_lock_irqsave(&runqueue_lock, flags); + if((n = active_cpus) < smp_num_cpus) + for(tmp = runqueue_head.next ; tmp != &runqueue_head ; + tmp = tmp->next) + { + p = list_entry(tmp, struct task_struct, run_list); + if(p->mosix.runstart && ++n == smp_num_cpus) + break; + } + spin_unlock_irqrestore(&runqueue_lock, flags); + cpuse += ticks * n; + } + load_ticks += ticks; + if(load_ticks >= MF) + mosix_calc_load(0); +} diff -urN linux-2.4.17/mos/log.c linux_umopenmosix/mos/log.c --- linux-2.4.17/mos/log.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/mos/log.c Wed Jun 26 23:45:18 2002 @@ -0,0 +1,154 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Oren Laadan. + */ +#include +#include +#include +#include +#include + +#ifdef CONFIG_MOSIX_DEBUG +spinlock_t mosixlog_lock = SPIN_LOCK_UNLOCKED; + +static char *log_type_str[] = { + "nothing", + "SENDTYPE", + "RECVTYPE", + "RECEIVE", + "SENDHDR", + "RECVHDR", + "SENDDATA", + "RECVDATA", + "SENDURG", + "RECVURG", + "RETVAL" +}; + +int log_mask = 0x1ff; /* for now: only 32 bits. default: first 9 bits */ + + +void +init_mosix_log(struct task_struct *p) +{ + int *log; + + if(!p->mosix.mosix_log && (log = + (int *)kmalloc((MOSIX_LOG_LEN+1) * sizeof(int), GFP_KERNEL))) + { + memset(log, 0, sizeof(int) * (MOSIX_LOG_LEN + 1)); + MOSIX_LOG_POS(log) = 0; + spin_lock_irq(&mosixlog_lock); + p->mosix.mosix_log = (void *)log; + spin_unlock_irq(&mosixlog_lock); + } +} + + +void +clear_mosix_log(struct task_struct *p) +{ + void *log; + + if ((log = (void *)p->mosix.mosix_log)) + { + spin_lock_irq(&mosixlog_lock); + p->mosix.mosix_log = NULL; + spin_unlock_irq(&mosixlog_lock); + kfree(log); + } +} + + +void +add_mosix_log(struct task_struct *p, int type, int val) +{ + int *log; + unsigned long flags; + + spin_lock_irqsave(&mosixlog_lock, flags); + if(p && (log = (int *)p->mosix.mosix_log) && + ((1 << (type - 1)) & log_mask)) + { + log[MOSIX_LOG_POS(log)] = (val & 0x00ffffff) | (type << 24); + if (++MOSIX_LOG_POS(log) == MOSIX_LOG_LEN) + MOSIX_LOG_POS(log) = 0; + } + spin_unlock_irqrestore(&mosixlog_lock, flags); +} + +void +dump_mosix_log(struct task_struct *p) +{ + int i, n; + int *log; + int type, val; + + if (!p) + p = current; + else if(((unsigned int)p) < 65536) + p = find_any_task_by_pid((int)p); + if (!p) + return; + + log = p->mosix.mosix_log; + if(!log) + { + printk("No MOSIX log for %s\n", desc_mostask(&p->mosix)); + return; + } + n = MOSIX_LOG_POS(log); + printk("MOSIX event log for %s:\n", desc_mostask(&p->mosix)); + for (i = (n+1)%MOSIX_LOG_LEN; i != n; i = (i+1)%MOSIX_LOG_LEN) { + type = log[i] >> 24; + val = log[i] & 0x00ffffff; + if (type < sizeof(log_type_str) / sizeof(char *)) + printk("%s: ", log_type_str[type]); + else + printk("????(%d): ", type); + switch (type) { + case MOSIX_LOG_SENDTYPE: + case MOSIX_LOG_RECVTYPE: + printk("type of %s msg: 0x%x\n", + (type == MOSIX_LOG_SENDTYPE ? "SEND" : "RECV"), + val); + break; + case MOSIX_LOG_RECEIVE: + break; + case MOSIX_LOG_SENDHDR: + case MOSIX_LOG_RECVHDR: + printk("%s header size: hlen = %d olen = %d\n", + (type == MOSIX_LOG_SENDHDR ? "SEND" : "RECV"), + val & 0xffff, val >> 16); + break; + case MOSIX_LOG_SENDDATA: + case MOSIX_LOG_RECVDATA: + printk("%s data size: %d (0x%x)\n", + (type == MOSIX_LOG_SENDDATA ? "SEND" : "RECV"), + val, val); + break; + case MOSIX_LOG_SENDURG: + case MOSIX_LOG_RECVURG: + printk("urgent signal %s\n", + type == MOSIX_LOG_SENDURG ? "SENT" : "ARRIVED"); + break; + case MOSIX_LOG_RETURNVAL: + printk(!val ? "zero\n" : "value %d (0x%x)\n", val, val); + break; + default: + printk("with value: 0x%x (%d)\n", val, val); + break; + } + } + printk("\n"); +} +#endif /* CONFIG_MOSIX_DEBUG */ diff -urN linux-2.4.17/mos/mig.c linux_umopenmosix/mos/mig.c --- linux-2.4.17/mos/mig.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/mos/mig.c Wed Jun 26 23:45:18 2002 @@ -0,0 +1,1816 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Oren Laadan, Amnon Shiloh. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_MOSIX_DFSA +#include +#endif /* CONFIG_MOSIX_DFSA */ + +static int mig_remote_receive_proc(void *); +static int mig_local_passto_remote(int, int); +static int mig_remote_passto_remote(int, int); +static int mig_remote_passto_local(int, int); + +#define MIGDSTR "migdaemon" +static char REMOTESTR[] = "remote"; +static char REMOTESTR2[] = "remote(%d)"; + +int mig_daemon_active = 0; +struct task_struct *mig_proc = NULL; +#ifdef CONFIG_MOSIX_UDB +#define SHOW_MIGRATIONS do_weeeeeeeee +#ifdef CONFIG_MOSIX_WEEEEEEEEE +static int do_weeeeeeeee = 1; +#else +static int do_weeeeeeeee = 0; +#endif /* CONFIG_MOSIX_WEEEEEEEEE */ +#else +#ifdef CONFIG_MOSIX_WEEEEEEEEE +#define SHOW_MIGRATIONS 1 +#endif /* CONFIG_MOSIX_WEEEEEEEEE */ +#endif /* CONFIG_MOSIX_UDB */ + +int +mosix_mig_daemon(void *dummy) +{ + struct task_struct *p = current; + mosix_link *mlink; + mosix_addr saddr; + int error; + + common_daemon_setup(MIGDSTR, 0); + p->mosix.dflags |= DREMOTEDAEMON; + init_guest_user_struct(); + p->rlim[RLIMIT_STACK].rlim_cur = p->rlim[RLIMIT_STACK].rlim_max; + lock_mosix(); + mig_proc = p; + unlock_mosix(); + +restart: + wait_for_mosix_configuration(&mig_daemon_active); + error = 0; + if (!p->mosix.contact && + !(p->mosix.contact = comm_open(COMM_MIGD, 0, 0))) + { + if(PE) + printk(MIGDSTR ": failed opening migration service" + " -- retrying\n"); + spin_lock_irq(&p->sigmask_lock); + flush_signals(p); + spin_unlock_irq(&p->sigmask_lock); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ); + goto restart; + } + + while (1) + { + if(!PE) + { +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug) + printk(MIGDSTR ": MOSIX shutdown\n"); +#endif + comm_close(NULL); + goto restart; + } + + error = comm_accept(p->mosix.contact, &mlink, &saddr, 0UL); + if (error == -EINTR || error == -ERESTART || error == -EAGAIN + || error == -ERESTARTSYS || error == -EDIST) + { +#ifdef CONFIG_MOSIX_DEBUG + if(error != -EDIST && PE) + printk(MIGDSTR + ": error.. retrying (%d)\n", error); +#endif + spin_lock_irq(&p->sigmask_lock); + flush_signals(p); + spin_unlock_irq(&p->sigmask_lock); + continue; + } + else if (error) + { + printk("%s: failed to accept connection (err = %d)" + " - re-initializing service\n", MIGDSTR, error); + comm_close(NULL); + goto restart; + } + + if(!PE || mosadmin_mode_block) + { +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & (DSDEB_MIG | DSDEB_ERROR)) + printk(MIGDSTR ": not accepting right now\n"); +#endif /* CONFIG_MOSIX_DEBUG */ + comm_close(mlink); + continue; + } + + current->mosix.deppe = comm_getpeer(mlink); + error = user_thread(mig_remote_receive_proc, (void *)mlink, 0); + if (error < 0) + { +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_MIG) + printk(MIGDSTR ": fork failed %d\n", error); +#endif + comm_close(mlink); + continue; + } +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_MIG) + printk(MIGDSTR ": forked a child pid = %d\n", error); +#endif + } +} + +static int +mig_remote_receive_proc(void *ptr) +{ + struct mosix_link *contact = (struct mosix_link *)ptr; + struct mosix_link *remlink = NULL; + struct task_struct *p = current; + struct mosix_task *m = &p->mosix; + struct mig_request_h *mrp; + int type; + int error; + int reply = 0; + int load_came_in = 0; + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_MIG) + printk("%s-mig_remote_receive_proc: BORN\n", desc_mostask(m)); +#endif + memcpy(p->comm, REMOTESTR, sizeof(REMOTESTR)); + p->mosix.stay = 0; + comm_use(p, contact); + + lighter_sleep(); + if((error = obtain_mm())) + goto failed; + + if (p->mosix.dflags & DREMOTE) + p->mosix.dflags |= DPSYNC; +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-migrating: 1\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + if ((error = mig_recv_request(&mrp))) + goto failed; + type = mrp->request_type; + if(type != DEPUTY_PROBE) + load_came_in = 1; +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-migrating: 2\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + + if(mrp->wp_works_ok && !boot_cpu_data.wp_works_ok) + reply = -ENOEXEC; + + set_personality(mrp->personality); + if(current->personality != mrp->personality) + reply = -EDIST; + comm_free((void *) mrp); +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-migrating: 3\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + + if (type == DEPUTY_PROBE && reply == 0) { + mosix_addr sa; + + /* + * prepare another mosix_link for the REMOTE and send its + * address through the DEPUTY. + */ + +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-migrating: 3.1\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + remlink = comm_open(COMM_ACCEPT, &sa, comm_remote_timo); + if (!remlink) { +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & (DSDEB_MIG | DSDEB_ERROR)) + printk("%s-mig_remote_receive_proc:" + "error new link\n", desc_mostask(NULL)); +#endif + reply = -EDIST; + goto send_reply; + } +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-migrating: 3.2\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + if ((error = comm_send(MIG_REQUEST|REPLY, (void *)&reply, + sizeof(int), (void *)&sa, sizeof(sa), 0))) + goto failed; + comm_use(p, remlink); + reply = mig_recv_request(&mrp); + + /* 'remlink' was possibly switched by comm_recv(): */ + remlink = p->mosix.contact; + + if (reply) + goto failed; + load_came_in = 1; + comm_free((void *) mrp); + goto send_reply; + } else { + send_reply: +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-migrating: 3.3 (ack = %d)\n", + desc_mostask(NULL), reply); +#endif /* CONFIG_MOSIX_DEBUG */ + if ((error = comm_send(MIG_REQUEST|REPLY, (void *)&reply, + sizeof(int), NULL, 0, 0))) + goto failed; + } + +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-migrating: 4\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + if (reply) + goto failed; + +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-migrating: 5 (type %d)\n", desc_mostask(NULL), type); +#endif /* CONFIG_MOSIX_DEBUG */ + + error = mig_do_receive(); +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-migrating: 7\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + + if (remlink) { + comm_use(p, contact); + comm_close(remlink); + } + if (error) + { + end_coming_in(error); + m->pages_i_bring = 0; + remote_disappear(); + } +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-migrating: 8 (kickstart !)\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + mosix_clear_statistics(); + end_coming_in(0); + m->pages_i_bring = 0; +#ifdef CONFIG_MOSIX_DFSA + tell_process(p, DREQ_NOTUPTODATE); + dfsa_tinit(); +#endif /* CONFIG_MOSIX_DFSA */ +#ifdef SHOW_MIGRATIONS + if(SHOW_MIGRATIONS) + printk("Weeeeeeeee.....\n"); +#endif /* SHOW_MIGRATIONS */ + kickstart(); + panic("kickstart returned"); + /*NOTREACHED*/ + +failed: + if(load_came_in) + { + end_coming_in(-EDIST); /* any error would do */ + m->pages_i_bring = 0; + } +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & (DSDEB_MIG|DSDEB_MIGSTAGE)) + printk("%s-migrating: failed, error = %d, reply = %d\n", + desc_mostask(NULL), error, reply); +#endif /* CONFIG_MOSIX_DEBUG */ + if (remlink) { + comm_use(p, contact); + comm_close(remlink); + } +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & (DSDEB_MIG | DSDEB_MIGSTAGE | DSDEB_ERROR)) + printk("%s-mig_remote_receive_proc: aborting\n", + desc_mostask(NULL)); +#endif + do_exit(SIGKILL); + /*NOTREACHED*/ +} + +void +follow_whereto() +{ + int t; + register struct task_struct *p = current; + + spin_lock_irq(&whereto_lock); + t = p->mosix.whereto; + p->mosix.whereto = 0; + spin_unlock_irq(&whereto_lock); + if(!t) + return; + if(t > 0 || t == GOBACKHOME || t == MUSTGOHOME) + passto(t, 0); + else if(p->mosix.dflags & DDEPUTY) + coordinate(t, 0); + else + consider(t, NULL); +} + +int +passto(int whereto, int reason) +{ + register struct task_struct *p = current; + int error; + +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & (DSDEB_MIG|DSDEB_MIGSTAGE)) + printk("%s-passto: %d %d\n", + desc_mostask(NULL), whereto, reason); +#endif /* CONFIG_MOSIX_DEBUG */ + +#ifdef CONFIG_MOSIX_DIAG + if (p->mosix.dflags & DREMOTE) + panic("REMOTE passto"); +#endif /* CONFIG_MOSIX_DIAG */ + if (!PE) + return (whereto && whereto != GOBACKHOME && + whereto != MUSTGOHOME ? -EDIST : 0); + if (whereto == MUSTGOHOME) + { + whereto = GOBACKHOME; + p->mosix.dflags |= DMUSTBEBACK; + } +#ifdef CONFIG_MOSIX_CHEAT_MIGSELF + if (whereto == GOBACKHOME) + { + if(!p->mosix.whereami) + { + p->mosix.dflags &= ~DMUSTBEBACK; + return(0); + } + whereto = 0; + } +#else + if (whereto == GOBACKHOME || whereto == PE) + whereto = 0; + if (whereto == p->mosix.whereami) + { + p->mosix.dflags &= ~DMUSTBEBACK; + return(0); + } +#endif /* CONFIG_MOSIX_CHEAT_MIGSELF */ + if (whereto && ((p->mosix.stay & DSTAY) || !mos_to_net(whereto, 0))) + return (-EDIST); + +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & (DSDEB_MIG|DSDEB_MIGSTAGE)) + printk("%s Passing to %d!\n", desc_mostask(NULL), whereto); +#endif /* CONFIG_MOSIX_DEBUG */ + + spin_lock_irq(&runqueue_lock); + p->mosix.dflags |= DPASSING; + spin_unlock_irq(&runqueue_lock); + store_common_ps_info(); + + if (!p->mosix.whereami) + error = mig_local_passto_remote(whereto, reason); + else if (!whereto) + error = mig_remote_passto_local(whereto, reason); + else + error = mig_remote_passto_remote(whereto, reason); + +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-passto: handler returned %d\n", + desc_mostask(NULL), error); +#endif /* CONFIG_MOSIX_DEBUG */ + + stop_storing_common_ps_info(); + spin_lock_irq(&runqueue_lock); /* for DPASSING (not for DMUSTBEBACK) */ + p->mosix.dflags &= ~(DPASSING|DMUSTBEBACK); + spin_unlock_irq(&runqueue_lock); + + if(error) + return (-EDIST); + + task_lock(p); + p->mosix.whereami = whereto; + p->mosix.nmigs++; + task_unlock(p); + if(p->mosix.sigmig) + { + struct siginfo info; + + info.si_signo = p->mosix.sigmig; + info.si_errno = 0; + info.si_code = SI_MIGRATED; + info.si_newplace = whereto; + info.si_reason = reason; + send_sig_info(p->mosix.sigmig, &info, p); + } + mosix_clear_statistics(); + return(0); +} + +void +coordinate(int reason, int already_in) +{ + struct mosix_task *m = ¤t->mosix; + void *head; + int hdln; + int where; + +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_CONSIDER) + printk("%s-coordinate\n", desc_mostask(NULL)); +#endif + while(1) + { + if(already_in) + { + if(deputy_wait(DEP_CONSIDER|REPLY, &head, &hdln)) + goto die; + where = *((int *)head); + comm_free(head); + already_in = 0; + } + else if(deputy_request(DEP_CONSIDER, &reason, sizeof(reason), + NULL, 0, 0, (void *)&where, -sizeof(where))) + { + die: + deputy_communication_failed(); + return; + } +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_CONSIDER) + printk("coordinate: where=%d\n", where); +#endif + if(!where) + return; + if(reason && where != GOBACKHOME && + (m->whereto == GOBACKHOME || m->whereto == MUSTGOHOME)) + { + where = m->whereto; + reason = 0; + } + if(!passto((where == GOBACKHOME || where == MUSTGOHOME) ? 0 : + where, reason)) + return; +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_CONSIDER) + printk("coordinate: did not pass to %d - next one please\n", where); +#endif + } +} + +int +mig_send_request(int reason, int type) +{ + struct mig_request_h mrp; + int *nack; + int mfident = (current->mosix.dflags & DDEPUTY) ? COMM_MFIDENT : 0; + int command = MIG_REQUEST | mfident; + int error; + int len; + + mrp.version = MOSIX_MIGRATION_VERSION; + mrp.topology = MAX_MOSIX_TOPOLOGY; + mrp.reason = reason; + mrp.personality = current->personality; + mrp.wp_works_ok = boot_cpu_data.wp_works_ok; + mrp.request_type = type; +#ifdef CONFIG_MOSIX_DFSA + mrp.has_dfsa = 1; +#else + mrp.has_dfsa = 0; +#endif /* CONFIG_MOSIX_DFSA */ + +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-send_request 1\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + if(type == DEPUTY_PROBE) + error = comm_send(command, &mrp, sizeof(mrp), NULL, 0, 0); + else + { + mrp.pages_sent = + current->mosix.migpages ? : count_migrating_pages(); + error = send_with_miginfo(command, &mrp, sizeof(mrp)); + } + if(error) + return (error); +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-send_request 2\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + if ((error = comm_recv((void **)&nack, &len)) < 0) + return (error); +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-send_request 3\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + if (error != (MIG_REQUEST|REPLY)) { + error = -EDIST; + goto out; + } +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-send_request 4\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ +#ifdef CONFIG_MOSIX_DIAG + if (len != sizeof(int)) + panic("mig_send_request: bad reply length"); +#endif + + /* at this point the other party *knows* us */ + if (mfident) + config_set_status(comm_getpeer(current->mosix.contact)); + + error = *nack; /* should be: 0 or -EDIST */ +out: + comm_free((void *) nack); + return (error); +} + +int +mig_recv_request(struct mig_request_h **mrp) +{ + int type, hlen; + int reply = -EDIST; + +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-recv_request 1\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + if ((type = comm_recv((void **) mrp, &hlen)) < 0) + return (type); + +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-mig_recv_request: type 0x%x head 0x%x len %d\n", + desc_mostask(NULL), type, (int)mrp, hlen); +#endif /* CONFIG_MOSIX_DEBUG */ + + if (type != MIG_REQUEST || hlen != sizeof(struct mig_request_h)) { +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & (DSDEB_ERROR | DSDEB_MIG | DSDEB_MIGSTAGE)) + printk("%s-mig_recv_request: bad MIG_REQUEST type=%d " + " head=0x%x len=%d\n", + desc_mostask(NULL), type, (int)mrp, hlen); +#endif + goto failed; + } + +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-recv_request 2\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + if ((*mrp)->version != MOSIX_MIGRATION_VERSION) + { + static int complained; + + if(++complained < 10 || complained % 10000 == 0) + printk("Migration request denied: " + "Incompatible versions of MOSIX.\n"); + goto failed; + } + if ((*mrp)->topology != MAX_MOSIX_TOPOLOGY) + { + static int complained; + + if(++complained < 10 || complained % 10000 == 0) + printk("Migration request denied: Incompatible topology" + " (here=%d, there=%d)\n", MAX_MOSIX_TOPOLOGY, + (*mrp)->topology); + goto failed; + } +#ifdef CONFIG_MOSIX_DFSA + if(!(*mrp)->has_dfsa) +#else + if((*mrp)->has_dfsa) +#endif /* CONFIG_MOSIX_DFSA */ + { + static int complained; + + if(++complained < 10 || complained % 10000 == 0) + printk("Migration request denied: " + "Cannot mix DFSA and NON-DFSA kernels\n"); + goto failed; + } + + if ((*mrp)->request_type != DEPUTY_PROBE) + { + if (!balance_commit_mig((*mrp)->reason, (*mrp)->pages_sent)) + goto failed; + current->mosix.pages_i_bring = (*mrp)->pages_sent; + } + + return (0); + +failed: +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & (DSDEB_ERROR|DSDEB_MIGSTAGE)) + printk("%s-recv_request 3 (failed)\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + comm_flushdata(COMM_ALLDATA); + comm_free(*mrp); + *mrp = NULL; + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & (DSDEB_ERROR|DSDEB_MIG|DSDEB_MIGSTAGE)) + printk("%s-mig_recv_request: nacking with %d\n", + desc_mostask(NULL), reply); +#endif + + comm_send(MIG_REQUEST|REPLY, (void *) &reply, sizeof(int), NULL, 0, 0); + return (reply); +} + +/* + * mig_local_passto_remote: migrate local -> remote + */ + +static int +mig_local_passto_remote(int whereto, int reason) +{ + struct task_struct *p = current; + struct mosix_link *mlink; + int error; + int omigpages; + + if(!p->mosix.held_files && (error = mosix_rebuild_file_list())) + return(error); +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-local_passto_remote: 1\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + + lock_mosix(); + write_lock_irq(&tasklist_lock); + p->mosix.remote_caps = current->cap_effective; + task_lock(p); + spin_lock(&runqueue_lock); + if(p->mm->dumpable) + p->mosix.dflags |= DDUMPABLE; + else + p->mosix.dflags &= ~DDUMPABLE; + p->mosix.dflags |= (DDEPUTY | DSYNC); + spin_unlock(&runqueue_lock); + task_unlock(p); + write_unlock_irq(&tasklist_lock); + unlock_mosix(); + p->mosix.deputy_regs = ALL_REGISTERS; + p->mosix.pass_regs = 0; + + if (!(mlink = comm_open(whereto, 0, comm_connect_timo))) { + error = -EDIST; + goto failed; + } +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-local_passto_remote: 2\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + if (comm_use(p, mlink)) + panic("local_passto_remote: previous contact not null"); + +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-local_passto_remote: 3\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + if(!(omigpages = p->mosix.migpages)) + p->mosix.migpages = count_migrating_pages(); + if ((error = mig_send_request(reason, FROM_DEPUTY))) + { + p->mosix.migpages = omigpages; + goto failed; + } + +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-local_passto_remote: returning...\n", + desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + + release_migrations(whereto); + if (mig_do_send()) { + error = -EDIST; + p->mosix.migpages = omigpages; + goto failed; + } + p->mosix.migpages = omigpages; + + deputy_startup(); + return (0); + +failed: + if(mlink) + comm_close(NULL); + undeputy(current); + return (error); +} + + +/* + * mig_remote_passto_local: migrate remote -> local + */ + +static int +mig_remote_passto_local(int whereto, int reason) +{ + struct task_struct *p = current; + struct mig_request_h *mrp; + int error = 0; + long orlim_as; + long orlim_rss; + long orlim_stack; + int load_came_in = 0; + +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-mig_remote_to_local: 1\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + mosix_deputy_rusage(0); + if(obtain_mm()) + { +#ifdef CONFIG_MOSIX_DIAG + printk("%s-remote_passto_local: mm allocation failed\n", + desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DIAG */ + return(-EDIST); + } +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-mig_remote_to_local: 2\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + + /* rlimits are no excuse to prevent coming back home: */ + /* save old values for the case of failure */ + orlim_as = p->rlim[RLIMIT_AS].rlim_cur; + orlim_rss = p->rlim[RLIMIT_RSS].rlim_cur; + orlim_stack = p->rlim[RLIMIT_STACK].rlim_cur; + p->rlim[RLIMIT_AS].rlim_cur = RLIM_INFINITY; + p->rlim[RLIMIT_RSS].rlim_cur = RLIM_INFINITY; + p->rlim[RLIMIT_STACK].rlim_cur = RLIM_INFINITY; + + if ((error = comm_send(DEP_COME_BACK, (void *) &reason, sizeof(reason), + NULL, 0, 0))) { +#ifdef CONFIG_MOSIX_DIAG + printk("%s-remote_passto_local: error DEP_COME_BACK\n", + desc_mostask(NULL)); +#endif + end_coming_in(error); + current->mosix.pages_i_bring = 0; + deputy_die_on_communication(); + } + +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-mig_remote_to_local: 3\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + if ((error = mig_recv_request(&mrp))) + goto fail; + load_came_in = 1; + comm_free((void *) mrp); +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-mig_remote_to_local: 4\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + if ((error = comm_send(MIG_REQUEST|REPLY, (void *)&error, sizeof(int), + NULL, 0, 0))) { +#ifdef CONFIG_MOSIX_DIAG + printk("%s-remote_passto_local: error %d (MIG_REQUEST)\n", + desc_mostask(NULL), error); +#endif + end_coming_in(error); + current->mosix.pages_i_bring = 0; + deputy_die_on_communication(); + /*NOTREACHED*/ + } + +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-mig_remote_to_local: 5\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + if (!(error = mig_do_receive())) { + comm_close(NULL); + undeputy(p); + mosix_clear_statistics(); +#ifdef SHOW_MIGRATIONS + if(SHOW_MIGRATIONS) + printk("Wooooooooo.....\n"); +#endif /*SHOW_MIGRATIONS*/ + end_coming_in(0); + current->mosix.pages_i_bring = 0; + if(p->mosix.dflags & DDELAYHELD) + { + p->mosix.dflags &= ~DDELAYHELD; + mosix_rebuild_file_list(); + } + return(0); + } + +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-mig_remote_to_local: 6\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ +fail: + p->rlim[RLIMIT_AS].rlim_cur = orlim_as; + p->rlim[RLIMIT_RSS].rlim_cur = orlim_rss; + p->rlim[RLIMIT_STACK].rlim_cur = orlim_stack; + exit_mm(p); +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-mig_remote_to_local: 7 (err = %d)\n", + desc_mostask(NULL), error); +#endif /* CONFIG_MOSIX_DEBUG */ + if(load_came_in) + { + end_coming_in(error); + current->mosix.pages_i_bring = 0; + } + + return (error); +} + + +/* + * mig_remote_passto_remote: migrate remote -> remote + * + * (1) open mosix_link to mig-daemon at new remote site (and use it) + * (2) send migration request, obtain remote's mosix_link address. + * (3) send DEP_PLEASE_MIGRATE request to old remote site, with "reason" + * and the mosix_link address to which it should connect + * (4) if the reply is zero - close old mosix_link and use the new one (SUCCESS) + * FAILURE: if not fatal - close new connection and return error + * if fatal - close connection and call deputy_die_on_communication() + */ + +static int +mig_remote_passto_remote(int whereto, int reason) +{ + struct task_struct *p = current; + struct mosix_link *newmlink = 0, *oldmlink = 0; + struct please_migrate_h pm; + int status; + int error; + + /* connect to remote mig daemon */ + +#ifdef CONFIG_MOSIX_DIAG + if (!(p->mosix.dflags & DSYNC)) + panic("remote_passto_remote: deputy not synced"); +#endif + + p->mosix.pass_regs = 0; + +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-mig_remote_to_remote: 0\n", desc_mostask(NULL)); +#endif + + if (!(newmlink = comm_open(whereto, 0, comm_connect_timo))) + return (-EDIST); + +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-mig_remote_to_remote: 1\n", desc_mostask(NULL)); +#endif + + if (!(oldmlink = comm_use(p, newmlink))) + panic("remote_passto_remote: no previous contact"); + + if ((error = mig_send_request(reason, DEPUTY_PROBE))) + goto failed; + +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-mig_remote_to_remote: 2 \n", desc_mostask(NULL)); +#endif + + if ((error = comm_copydata((void *)&pm.ma, sizeof(pm.ma), 0))) + goto failed; + + comm_use(p, oldmlink); + +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-mig_remote_to_remote: 3 \n", desc_mostask(NULL)); +#endif + + pm.reason = reason; + pm.to = whereto; + mosix_deputy_rusage(0); + if ((error = deputy_request(DEP_PLEASE_MIGRATE, &pm, sizeof(pm), + NULL, 0, 0, (void **)&status, -sizeof(status)))) + goto fatal; + if ((error = status)) + goto failed; + + comm_use(p, newmlink); + comm_close(oldmlink); + +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIGSTAGE) + printk("%s-mig_remote_to_remote: 4 (DONE) \n", + desc_mostask(NULL)); +#endif + + return (0); + +failed: +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & (DSDEB_ERROR | DSDEB_MIG | DSDEB_MIGSTAGE)) + printk("%s-remote_passto_remote: failed %d\n", + desc_mostask(NULL), error); +#endif + comm_use(p, oldmlink); + comm_close(newmlink); + return (error); + +fatal: +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & (DSDEB_ERROR | DSDEB_MIG | DSDEB_MIGSTAGE)) + printk("%s-remote_passto_remote: fatal %d\n", + desc_mostask(NULL), error); +#endif + comm_use(p, oldmlink); + comm_close(newmlink); +#ifdef CONFIG_MOSIX_DIAG + printk("%s-remote_passto_remote: fatal error %d\n", desc_mostask(NULL), + error); +#endif + deputy_die_on_communication(); + /* NOTREACHED */ +} + + +void +undeputy(struct task_struct *p) +{ + struct mosix_task *m = &p->mosix; + int myself = (p == current);/* else it is a new-son, not yet on lists */ + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & DSDEB_MIG) + { + printk("%s-undepty: ", desc_mostask(NULL)); + printk("undeputy'ing process %s\n", desc_mostask(m)); + } +#endif + + if(myself) + { + spin_lock_irq(&p->sigmask_lock); + m->dflags &= ~DFAKESIGNAL; + spin_unlock_irq(&p->sigmask_lock); + lock_mosix(); + write_lock_irq(&tasklist_lock); + task_lock(p); + spin_lock(&runqueue_lock); + } + if(p->mm) + p->mm->dumpable = (m->dflags & DDUMPABLE) != 0; + m->dflags &= ~(DDEPUTY|DSYNC|DDUMPABLE); + if(myself) + { + spin_unlock(&runqueue_lock); + task_unlock(p); + write_unlock_irq(&tasklist_lock); + if(process_told(p, DREQ_HOMEWAKE)) + { + process_ack(p, DREQ_HOMEWAKE); + wake_up(mos_to_waitp(m)); + } + unlock_mosix(); + evaluate_pending_signals_in_mosix_context(); + absorb_deptime(m->deputytime); + m->deputytime = 0; + stop_storing_common_ps_info(); + flush_read_cache(); + free_ucache(); + if(m->stay & DSTAY_FOR_MONKEY) + mosix_check_for_freedom_to_move(); + } + else + { + if(m->contact) + { + comm_close(m->contact); + m->contact = NULL; + } + } + m->commpri = 0; + m->pages_i_bring = 0; +} + +int +mig_send_mm_stats(void) +{ + struct mm_stats_h s; + + if(sizeof(struct mm_stats_h) != + offsetof(struct mm_struct, env_end) - + offsetof(struct mm_struct, start_code) + sizeof(long)) + /* something changed in the Linux header - need to fix! */ + panic("mig_send_mm_stats"); + memcpy((caddr_t)&s, (caddr_t)¤t->mm->start_code, sizeof(s)); + expel_progress = 1; + return(comm_send(MIG_MM_STATS, &s, sizeof(s), NULL, 0, 0)); +} + +int +mig_send_mm_areas(void) +{ + struct task_struct *p = current; + register struct vm_area_struct *vma; + struct file *fp; + struct mmap_parameters_h m; + + m.origin = (p->mosix.dflags & DDEPUTY) ? PE : p->mosix.deppe; + m.fixed = 1; + for(vma = p->mm->mmap ; vma != NULL ; vma = vma->vm_next) + { + m.addr = vma->vm_start; + m.len = vma->vm_end - vma->vm_start; + m.flags = vma->vm_flags; + if((fp = vma->vm_file)) + { + struct inode *ip = fp->f_dentry->d_inode; + + m.pgoff = vma->vm_pgoff; + if(p->mosix.dflags & DREMOTE) + { + m.fp = home_file(fp); + m.dp = ip->u.remote_i.dp; + m.uniq = ip->u.remote_i.unique; + m.isize = ip->i_size; + m.nopage = ip->u.remote_i.nopage; + } + else + { + m.fp = vma->vm_file; + m.dp = m.fp->f_dentry; + m.uniq = ip->i_unique; + m.isize = ip->i_size; + m.nopage = vma->vm_ops->nopage; + } + } + else + { + m.fp = NULL; + m.pgoff = 0; + } + if(comm_send(MIG_MM_AREA, &m, sizeof(m), NULL, 0, 0)) + return(-1); + } + expel_progress = 1; + return(0); +} + +int +mig_send_page(unsigned long addr, int size) +{ +#ifdef CONFIG_MOSIX_DIAG + if(--current->mosix.mig_page_count == -1) + { + printk("Here I am sending too many pages, was sending %d " + "and current recount is %d\n", + current->mosix.migpages, count_migrating_pages()); +#ifdef CONFIG_MOSIX_UDB + mosix_debugger("pages increased"); +#endif /* CONFIG_MOSIX_UDB */ + } +#endif /* CONFIG_MOSIX_DIAG */ + expel_progress = 1; + /* + * Although the page is in user-space, we deliberately send it from + * kernel-space to prevent requesting it from the "REMOTE" + * (because we are already marked as DEPUTY...) + * This is safe since we just checked that the page is there. + */ + return(comm_send(MIG_PAGE, &addr, sizeof(addr), (void *)addr, size, 0)); +} + +int +mig_send_pages(void) +{ + int credit; + +#ifdef CONFIG_MOSIX_DIAG + current->mosix.mig_page_count = current->mosix.migpages; +#endif /* CONFIG_MOSIX_DIAG */ + credit = run_over_dirty_pages(mig_send_page, 1); +#ifdef CONFIG_MOSIX_DIAG + if(credit >= 0 && current->mosix.mig_page_count > 0) + printk("not all pages sent: was sending %d, left %d, recount=%d\n", + -current->mosix.pages_i_bring, current->mosix.mig_page_count, + count_migrating_pages()); +#endif /* CONFIG_MOSIX_DIAG */ + return(credit); +} + +int +mig_send_fp(void) +{ + unlazy_fpu(current); + expel_progress = 1; + return(comm_send(has_fxsr() ? MIG_XFP : MIG_FP, ¤t->thread.i387, + sizeof(current->thread.i387), NULL, 0, 0)); +} + +int +mig_send_ldt(void) +{ + expel_progress = 1; + return(comm_send(MIG_LDT, NULL, 0, + current->mm->context.segments, LDT_ENTRIES*LDT_ENTRY_SIZE, 0)); +} + +int +mig_send_misc(int credit) +{ + struct mig_misc_h m; + clock_t utime, stime; + extern unsigned long do_it_virt(struct task_struct *, unsigned long); + void *hd; + int hdln; + register struct task_struct *p = current; + siginfo_t *forced_sigs; + + m.ptrace = p->ptrace; + m.dflags = p->mosix.dflags & (DTRACESYS1|DTRACESYS2); + memcpy((caddr_t)m.debugreg, (caddr_t)p->thread.debugreg, + sizeof(m.debugreg)); + m.nice = p->nice; + m.caps = p->cap_effective; + p->mosix.remote_caps = m.caps; + m.it_prof_incr = p->it_prof_incr; + m.it_virt_incr = p->it_virt_incr; + if(((p->mosix.dflags & DDEPUTY) && p->mosix.deputy_regs) || + ((p->mosix.dflags & DREMOTE) && + p->mosix.deputy_regs != ALL_REGISTERS)) + memcpy((caddr_t)&m.regs, (caddr_t)p->mosix.altregs, sizeof(m.regs)); + /* else do not bother - DEPUTY will bring */ + m.rlim_cpu = p->rlim[RLIMIT_CPU]; + m.rlim_data = p->rlim[RLIMIT_DATA]; + m.rlim_stack = p->rlim[RLIMIT_STACK]; + m.rlim_rss = p->rlim[RLIMIT_RSS]; + m.rlim_as = p->rlim[RLIMIT_AS]; +#ifdef CONFIG_MOSIX_DFSA + m.rlim_nofile = p->rlim[RLIMIT_NOFILE]; + m.rlim_fsz = p->rlim[RLIMIT_FSIZE]; +#endif /* CONFIG_MOSIX_DFSA */ + m.stay = (p->mosix.stay & DNOMIGRATE) != 0; + if(p->mosix.dflags & DDEPUTY) + { + m.deppe = PE; + m.mypid = p->pid; + memcpy(m.features, boot_cpu_data.x86_capability, + sizeof(m.features)); + } + else + { + m.deppe = p->mosix.deppe; + m.mypid = p->mosix.mypid; + memcpy(m.features, p->mosix.features, sizeof(m.features)); + m.passedtime = p->mosix.passedtime; + } + m.deputy_regs = p->mosix.deputy_regs; + m.deccycle = p->mosix.deccycle; + m.decay = p->mosix.decay; + m.dpolicy = p->mosix.dpolicy; + memcpy(m.depcost, deputy_here, sizeof(deputy_here)); + m.depspeed = cpuspeed; + m.nmigs = p->mosix.nmigs + 1; + m.info.disclosure = p->mosix.disclosure; + m.info.uid = p->uid; + m.info.gid = p->gid; + m.info.pgrp = p->pgrp; + m.info.session = p->session; + memcpy(m.info.comm, p->comm, sizeof(m.info.comm)); + m.info.tgid = p->tgid; + cli(); + m.it_virt_value = p->it_virt_value; + m.it_prof_value = p->it_prof_value; + p->it_prof_value = 0; + p->it_virt_value = 0; + if(p->mosix.dflags & DDEPUTY) + m.passedtime = p->times.tms_utime + p->times.tms_stime; + utime = p->times.tms_utime; + stime = p->times.tms_stime; + m.asig.sigs = p->mosix.asig; + m.asig.nforced = p->mosix.nforced_sigs; + forced_sigs = p->mosix.forced_sigs; + m.pagecredit = credit; + m.lastxcpu = p->mosix.last_sigxcpu; + sti(); + if(comm_send(MIG_MISC, &m, sizeof(m), forced_sigs, + m.asig.nforced * sizeof(siginfo_t), 0)) + goto fail; + comm_migration_mode(0); + expel_progress = 1; + if(comm_recv(&hd, &hdln) == (MIG_MISC|REPLY)) + return(0); /* commit point */ + + fail: + cli(); + p->it_prof_value = m.it_prof_value; + p->it_virt_value = m.it_virt_value; + /* maintain accurate correlation between process-times and timers: */ + utime = p->times.tms_utime - utime; + stime = p->times.tms_stime - stime; + sti(); + if(utime > 0) + { + extern rwlock_t xtime_lock; + + write_lock_irq(&xtime_lock); + do_it_virt(p, utime); + write_unlock_irq(&xtime_lock); + } + if(!(p->mosix.dflags & DDEPUTY) && /* (DEPUTY gets it in "depticks") */ + utime + stime > 0) + absorb_deptime(utime + stime); + return(-1); +} + +void +neutralize_my_load(int neut) +{ + struct task_struct *p = current; + + spin_lock_irq(&runqueue_lock); + if(neut) + { + run_off(p); + p->mosix.dflags |= DPAGEIN; /* disabling run_on/off */ + } + else if(p->mosix.dflags & DPAGEIN) + { + p->mosix.dflags &= ~DPAGEIN; + run_on(p); + } + spin_unlock_irq(&runqueue_lock); +} + +int +mig_do_send(void) +{ + int credit; /* # of clean demand-pages */ + + comm_migration_mode(1); + neutralize_my_load(1); /* don't count me: I'm going to disappear */ + if(mig_send_mm_stats() || mig_send_mm_areas() || + (credit = mig_send_pages()) < 0 || + (current->used_math && mig_send_fp()) || + (current->mm->context.segments && mig_send_ldt()) || + mig_send_misc(credit)) + { + comm_send(MIG_NOT_COMING, NULL, 0, NULL, 0, 0); + comm_migration_mode(0); + neutralize_my_load(0); + changed_my_mind_and_staying(); + return(-1); + } + /* "comm_migration_mode(0);" was done by "mig_send_misc" */ + neutralize_my_load(0); + return(0); +} + +void +mig_do_receive_mm_stats(struct mm_stats_h *s) +{ + if(sizeof(struct mm_stats_h) != + offsetof(struct mm_struct, env_end) - + offsetof(struct mm_struct, start_code) + sizeof(long)) + /* something changed in the Linux header - need to fix! */ + panic("mig_do_receive_mm_stats"); + memcpy((caddr_t)¤t->mm->start_code, (caddr_t)s, sizeof(*s)); + current->mosix.rfreepages = 0; +} + +int +deputy_remmap(struct mmap_parameters_h *m, int dummy) +{ + unsigned long result; + unsigned long prot, flags; + extern asmlinkage long sys_madvise(unsigned long, size_t, int); + + /* unconvert prot+flags: */ + flags = MAP_FIXED|MAP_PRIVATE; + prot = 0; + if(m->flags & VM_GROWSDOWN) + flags |= MAP_GROWSDOWN; + if(m->flags & VM_DENYWRITE) + flags |= MAP_DENYWRITE; + if(m->flags & VM_EXECUTABLE) + flags |= MAP_EXECUTABLE; + if(m->flags & VM_READ) + prot |= VM_READ; + if(m->flags & VM_WRITE) + prot |= VM_WRITE; + if(m->flags & VM_EXEC) + prot |= VM_EXEC; + if(m->flags & VM_MAYSHARE) + current->mosix.dirty_bits |= MMAP_MAYSHARE; + deeper_sleep(); + result = do_mmap_pgoff(m->fp, m->addr, m->len, prot, flags, m->pgoff); + lighter_sleep(); + if(m->flags & VM_MAYSHARE) + current->mosix.dirty_bits &= ~MMAP_MAYSHARE; + if(m->flags & VM_READHINTMASK) + sys_madvise(m->addr, m->len, (m->flags & VM_SEQ_READ) ? + MADV_RANDOM : MADV_SEQUENTIAL); + return(IS_ERR((const void *)result)); +} + +#define AVAILABLE_PAGES (nr_free_pages() + atomic_read(&page_cache_size) + \ + nr_swap_pages + swapper_space.nrpages) +#define VERY_LOW_ON_MEMORY (nr_swap_pages < p->mosix.pages_i_bring && \ + AVAILABLE_PAGES <= num_physpages / 30) +#define I_CAN_FIT (AVAILABLE_PAGES > p->mosix.pages_i_bring) + +int +mig_do_receive_page(unsigned long addr) +{ + pgd_t * pgd; + pmd_t * pmd; + pte_t * pte; + struct page *page = NULL; + struct vm_area_struct *vma; + static int stuck_here; + int tries = 0; + long pri, mypri; + struct task_struct *p = current, *pp; + struct mm_struct *mm = p->mm; + int must = (p->mosix.dflags & DMUSTBEBACK); + static DECLARE_WAIT_QUEUE_HEAD(wait_for_someone_to_give_up); + + if(!(vma = find_vma(mm, addr))) + { + comm_flushdata(COMM_ALLDATA); + return(-1); + } + while(VERY_LOW_ON_MEMORY) + { +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIG) + printk("%s: Caution taken with page, previous attempts=%d/%d\n", desc_mostask(NULL), tries, stuck_here); +#endif /* CONFIG_MOSIX_DEBUG */ + stuck_here++; + tries++; + if(must ? (tries > 200 && stuck_here >= 300) : + (tries > 5 && stuck_here >= 100)) + { + give_up: + stuck_here = 0; + wake_up(&wait_for_someone_to_give_up); + printk("%s: Arrival rejected due to severe memory " + "shortage.\n", desc_mostask(NULL)); + comm_flushdata(COMM_ALLDATA); + return(-1); + } + mypri = ((p->mosix.dflags & DREMOTE) ? 0x20000000 : 0) + + (must ? 0 : 0x40000000) + + p->mosix.pages_i_bring + mm->rss; + pri = 0; + read_lock(&tasklist_lock); + for_each_task(pp) + if(pp != p && (pp->mosix.dflags & DINCOMING)) + { + pri = ((pp->mosix.dflags & DREMOTE) ? 0x20000000 : 0) + + ((pp->mosix.dflags & DMUSTBEBACK) ? 0 : 0x40000000) + + pp->mosix.pages_i_bring + pp->mm->rss; + if(pri >= mypri) + break; + } + if(must && tries > 5) + for_each_task(pp) + if((pp->mosix.dflags & (DREMOTE | DREMOTEDAEMON)) == DREMOTE && + (tries < 50 || !pp->mosix.stay)) + { + mosix_add_to_whereto(pp, GOBACKHOME); + pri = mypri + 1; + } + read_unlock(&tasklist_lock); + if(pri >= mypri || (must ? (tries < 50 || stuck_here < 100) : + (stuck_here < 2 || + ((p->mosix.dflags & DDEPUTY) && tries < 3)))) + { +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIG) + printk("%s: %s\n", desc_mostask(NULL), + pri >= mypri ? + "I am not the worst, so I wait" : + "waiting a bit longer"); +#endif /* CONFIG_MOSIX_DEBUG */ + sleep_on_timeout(&wait_for_someone_to_give_up, HZ/20); + continue; + } + if(must && I_CAN_FIT && (page = alloc_page(GFP_HIGHUSER))) + break; + goto give_up; + } + if(stuck_here) + { + wake_up(&wait_for_someone_to_give_up); + stuck_here = 0; +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIG) + printk("%s: got through to continue arriving%s\n", + desc_mostask(NULL), + page ? " (also got a page)" : ""); +#endif /* CONFIG_MOSIX_DEBUG */ + } + if(--p->mosix.pages_i_bring < 0) + mosix_panic("too many pages arrived"); + p->mosix.rfreepages--; + if(!page && !(page = alloc_page(GFP_HIGHUSER))) + { + comm_flushdata(COMM_ALLDATA); + return(-1); + } + if(comm_copydata(kmap(page), PAGE_SIZE, 0)) + { + kunmap(page); + __free_page(page); + return(-1); + } + kunmap(page); + pgd = pgd_offset(mm, addr); + spin_lock(&mm->page_table_lock); + if (!(pmd = pmd_alloc(mm, pgd, addr))) + { +#ifdef CONFIG_MOSIX_DIAG + printk("%s-mig_do_receive_page: failed allocating pmd for addr 0x%x\n", + desc_mostask(NULL), (int)addr); +#endif /* CONFIG_MOSIX_DIAG */ + spin_unlock(&mm->page_table_lock); + __free_page(page); + return(-1); + } + if (!(pte = pte_alloc(mm, pmd, addr))) + { +#ifdef CONFIG_MOSIX_DIAG + printk("%s-mig_do_receive_page: failed allocating pte for addr 0x%x\n", + desc_mostask(NULL), (int)addr); +#endif /* CONFIG_MOSIX_DIAG */ + __free_page(page); + spin_unlock(&mm->page_table_lock); + return(-1); + } + if (!pte_none(*pte)) { + printk("%s-mig_do_receive_page: double page at addr 0x%x\n", + desc_mostask(NULL), (int)addr); + __free_page(page); + spin_unlock(&mm->page_table_lock); + return(-1); + } + set_pte(pte, pte_mkdirty(mk_pte(page, vma->vm_page_prot))); + mm->rss++; + spin_unlock(&mm->page_table_lock); + flush_page_to_ram(page); + return(0); +} + +void +mig_do_receive_fp(union i387_union *i) +{ + current->used_math = 1; + unlazy_fpu(current); + if(has_fxsr()) + fsave_to_fxsave(i, ¤t->thread.i387); + else + memcpy((caddr_t)¤t->thread.i387, (caddr_t)i, sizeof(*i)); +} + +void +mig_do_receive_xfp(union i387_union *i) +{ + current->used_math = 1; + unlazy_fpu(current); + if(has_fxsr()) + memcpy((caddr_t)¤t->thread.i387, (caddr_t)i, sizeof(*i)); + else + fxsave_to_fsave(i, ¤t->thread.i387); +} + +int +mig_do_receive_ldt(void) +{ + struct mm_struct *mm = current->mm; + +#ifdef CONFIG_MOSIX_DIAG + if(mm->context.segments) + panic("mig_do_receive_ldt: double"); +#endif /* CONFIG_MOSIX_DIAG */ + if (!(mm->context.segments = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE))) + return(-1); + if(comm_copydata(current->mm->context.segments, + LDT_ENTRIES*LDT_ENTRY_SIZE, 0)) + { + vfree(current->mm->context.segments); + mm->context.segments = NULL; + return(-1); + } + mm->context.cpuvalid = 1UL << smp_processor_id(); + load_LDT(mm); + return(0); +} + +void +mig_do_receive_misc(struct mig_misc_h *m) +{ + int i, bit; + register struct task_struct *p = current; + + if(p->mosix.dflags & DREMOTE) + { + p->ptrace |= m->ptrace; + p->nice = m->nice; + p->cap_effective = (p->cap_effective & ~REMOTE_CAPS) | + (m->caps & REMOTE_CAPS); + memcpy(p->mosix.features, m->features, sizeof(m->features)); + task_lock(p); + if(m->stay) + p->mosix.stay |= DNOMIGRATE; + else + p->mosix.stay &= ~DNOMIGRATE; + task_unlock(p); + p->mosix.nmigs = m->nmigs; + remote_do_updinfo(&m->info); + } + else + { + p->ptrace &= ~PT_TRACESYS; + p->ptrace |= (m->ptrace & PT_TRACESYS); + } + p->mosix.dflags &= ~(DTRACESYS1|DTRACESYS2); + p->mosix.dflags |= m->dflags; + memcpy((caddr_t)p->thread.debugreg, (caddr_t)m->debugreg, + sizeof(m->debugreg)); + p->it_prof_incr = m->it_prof_incr; + p->it_virt_incr = m->it_virt_incr; + p->mosix.deputy_regs = m->deputy_regs; + for(bit = 1, i = 0 ; i < sizeof(struct pt_regs) / sizeof(int) ; + i++ , bit <<= 1) + if(!(m->deputy_regs & bit)) + ((int *)p->mosix.altregs)[i] = ((int *)&m->regs)[i]; + p->rlim[RLIMIT_CPU] = m->rlim_cpu; + p->rlim[RLIMIT_DATA] = m->rlim_data; + p->rlim[RLIMIT_STACK] = m->rlim_stack; + p->rlim[RLIMIT_RSS] = m->rlim_rss; + p->rlim[RLIMIT_AS] = m->rlim_as; +#ifdef CONFIG_MOSIX_DFSA + p->rlim[RLIMIT_NOFILE] = m->rlim_nofile; + p->rlim[RLIMIT_FSIZE] = m->rlim_fsz; +#endif /* CONFIG_MOSIX_DFSA */ + if((p->mosix.deppe = m->deppe) != PE) + this_machine_is_favourite(p->mosix.deppe); + p->mosix.deccycle = m->deccycle; + p->mosix.decay = m->decay; + p->mosix.dpolicy = m->dpolicy; + p->mosix.mypid = m->mypid; + memcpy(p->mosix.depcost, m->depcost, sizeof(m->depcost)); + p->mosix.depspeed = m->depspeed; + if(p->mosix.dflags & DREMOTE) + p->mosix.passedtime = m->passedtime; + else + p->mosix.passedtime = 0; + p->mosix.rpagecredit = m->pagecredit; + if(p->mosix.dflags & DREMOTE) + { + sprintf(p->comm, REMOTESTR2, m->mypid); + spin_lock_irq(&p->sigmask_lock); + p->mosix.asig |= m->asig.sigs; + spin_unlock_irq(&p->sigmask_lock); + while(m->asig.nforced--) + { + siginfo_t info; + + comm_copydata(&info, sizeof(info), 0); + force_sig_info(info.si_signo, &info, p); + } + } + cli(); + p->it_virt_value = m->it_virt_value; + p->it_prof_value = m->it_prof_value; + p->mosix.last_sigxcpu = m->lastxcpu; + sti(); + if(p->mosix.dflags & DDEPUTY) + deputy_analyse_remote_signals(&m->asig); +} + +int +mig_do_receive(void) +{ + struct mosix_task *m = ¤t->mosix; + int type; + void *head; + int hlen; + int (*mmap_func)(struct mmap_parameters_h *, int); + int got_not_coming = 0; + + spin_lock_irq(&runqueue_lock); + m->dflags |= DINCOMING; + spin_unlock_irq(&runqueue_lock); + current->used_math = 0; + while(1) + { + switch(type = comm_recv(&head, &hlen)) + { + case MIG_MM_STATS: + mig_do_receive_mm_stats((struct mm_stats_h *)head); + break; + case MIG_MM_AREA: + if(m->dflags & DREMOTE) + mmap_func = remote_mmap; + else + mmap_func = deputy_remmap; + if(mmap_func((struct mmap_parameters_h *)head, 1)) + { +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIG) + printk("%s-mig_receive: map_area failed\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + comm_free(head); + goto fail; + } + break; + case MIG_PAGE: + if(mig_do_receive_page(*((unsigned long *)head))) + { +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIG) + printk("%s-mig_receive: page failed\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + comm_free(head); + goto fail; + } + break; + case MIG_FP: + mig_do_receive_fp((union i387_union *)head); + break; + case MIG_XFP: + mig_do_receive_xfp((union i387_union *)head); + break; + case MIG_LDT: + if(mig_do_receive_ldt()) + { +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIG) + printk("%s-mig_receive: ldt failed\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + comm_free(head); + goto fail; + } + break; + case MIG_MISC: + mig_do_receive_misc((struct mig_misc_h *)head); + comm_free(head); + spin_lock_irq(&runqueue_lock); + m->dflags &= ~DINCOMING; + spin_unlock_irq(&runqueue_lock); + flush_tlb(); /* for all the new pages */ + comm_send(MIG_MISC|REPLY, NULL, 0, NULL, 0, 0); + return(0); + case MIG_NOT_COMING: + got_not_coming = 1; + goto fail; + default: + if(m->dflags & DDEPUTY) + deputy_communication_failed(); +#ifdef CONFIG_MOSIX_DEBUG + printk("%s - Bad Message Type (0x%x) on mig_do_receive\n", + desc_mostask(NULL), type); +#endif /* CONFIG_MOSIX_DEBUG */ + comm_free(head); + goto fail; + } + comm_free(head); + if((m->dflags & DREMOTE) && (mosadmin_mode_block || !NPE)) + goto fail; + } + fail: +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_MIG) + printk("%s: Arrival Failed\n", desc_mostask(NULL)); +#endif /* CONFIG_MOSIX_DEBUG */ + + if(type >= 0) + comm_flushdata(COMM_ALLDATA); + spin_lock_irq(&runqueue_lock); + m->dflags &= ~DINCOMING; + spin_unlock_irq(&runqueue_lock); + if((m->dflags & DDEPUTY) && !got_not_coming) + { + /* receiving all the debris can take time and + someone may need the memory meanwhile! */ + current->mosix.pages_i_bring = 0; + do_munmap(current->mm, 0, PAGE_OFFSET); + while((type = comm_recv(&head, &hlen)) >= 0 && + type != MIG_NOT_COMING) + { + comm_free(head); + comm_flushdata(COMM_ALLDATA); + if(type == MIG_MISC) + /* send anything but MIG_MISC|REPLY: */ + /* they will then send MIG_NOT_COMING! */ + comm_send(DEP_SYNC, NULL, 0, NULL, 0, 0); + } + } + return(-1); +} + +void +mig_set_lock(int lock) +{ + struct task_struct *p = current; + + task_lock(p); + if (lock) + p->mosix.stay |= DNOMIGRATE; + else + p->mosix.stay &= ~DNOMIGRATE; + task_unlock(p); + if(current->mosix.dflags & DDEPUTY) + deputy_request(DEP_UPDATE_LOCK, &lock, sizeof(int), NULL, 0, 0, + NULL, 0); +} + +int +remote_set_lock(int *lockp) +{ + mig_set_lock(*lockp); + comm_free(lockp); + return(comm_send(DEP_UPDATE_LOCK|REPLY, NULL, 0, NULL, 0, 0)); +} + +int +mig_migrate(int to) +{ + if (to > 0) + return(passto(to, 0) ? + (mos_to_net(to, NULL) ? -ENETUNREACH : -ENXIO) : 0); + + switch(to) + { + case DM_GOBACKHOME: + return(passto(GOBACKHOME, 0) ? -EDIST : 0); + case DM_BALANCE: + if(current->mosix.dflags & DDEPUTY) + coordinate(0, 0); + else + consider(0, NULL); + return(0); + } + return(-EINVAL); +} + +int +count_migrating_pages(void) +{ + return(run_over_dirty_pages(NULL, 0)); +} diff -urN linux-2.4.17/mos/mkdefcalls.c linux_umopenmosix/mos/mkdefcalls.c --- linux-2.4.17/mos/mkdefcalls.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/mos/mkdefcalls.c Wed Jun 26 23:45:18 2002 @@ -0,0 +1,79 @@ +/* Changes since Feb 12, 2002 by Moshe Bar + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * Author(s): Amnon Shiloh, Moshe Bar + */ +#include +#include +#include + +struct file; +#define FILE struct file + +extern int fprintf(FILE *, char *, ...); +extern int fgets(char *, int, FILE *); +extern int perror(char *); +extern FILE *fopen(char *, char *); + +char line[2048]; +char sys_name[NR_syscalls][60]; + +FILE * +myopen(char *file, char *mode) +{ + FILE *ret = fopen(file, mode); + + if(!ret) + perror(file); + return(ret); +} + +int +main(void) +{ + FILE *entry = myopen(ENTRY_IN, "r"); + FILE *sysin = myopen(SYSCALLS_IN, "r"); + FILE *sysout = myopen(SYSCALLS_OUT, "w"); + register char *c; + register int i; + long t, time(long *); + char *ctime(long *); + + time(&t); + while(fgets(line, 2048, entry) && + strcmp(line, "ENTRY(sys_call_table)\n")) + ; + for(i = 0 ; i < NR_syscalls && fgets(line, 2048, entry) ; i++) + if(!strncmp(line, "\t.long SYMBOL_NAME(", 19)) + { + for(c = &line[19] ; *c && *c != ')' ; c++); + *c = '\0'; + if(strcmp(&line[19], "sys_ni_syscall")) + strcpy(sys_name[i], &line[19]); + } + while(fgets(line, 2048, sysin)) + if(!strncmp(line, "remote_sys_", 11) || + !strncmp(line, "remote_old_", 11)) + for(i = 0 ; i < NR_syscalls ; i++) + if(!strncmp(&line[7], sys_name[i], strlen(sys_name[i])) && + line[7+strlen(sys_name[i])] == '(') + { + sys_name[i][0] = '\0'; + break; + } + fprintf(sysout, "/* Please do not edit -- this file is created automatically */\n"); + fprintf(sysout, "/* %.24s */\n\n", ctime(&t)); + fprintf(sysout, "#include \n"); + fprintf(sysout, "#include \n"); + fprintf(sysout, "#include \n\n"); + for(i = 0 ; i < NR_syscalls ; i++) + if(sys_name[i][0]) + { + fprintf(sysout, "long remote_%s(int n, struct pt_regs regs){\n", sys_name[i]); + fprintf(sysout, "return(remote_standard_system_call(n, ®s));\n}\n"); + } + exit(0); +} diff -urN linux-2.4.17/mos/mosadmin.c linux_umopenmosix/mos/mosadmin.c --- linux-2.4.17/mos/mosadmin.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/mos/mosadmin.c Wed Jun 26 23:45:18 2002 @@ -0,0 +1,363 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_MOSIX_FS +#include +#endif /* CONFIG_MOSIX_FS */ + +int mosadmin_mode_stay; +int mosadmin_mode_lstay; +int mosadmin_mode_block; +int mosadmin_mode_quiet; +int mosadmin_mode_nomfs; +int mosadmin_gateways; + +int backed_block; +int backed_lstay; + +char bootexpel, expel_progress; +static int in_bring, in_expel; + +int +admin_set_mode(int mode, int on) +{ + int *m; + + lock_mosix(); + switch(mode) + { + case DS_STAY: + m = &mosadmin_mode_stay; + break; + case DS_LSTAY: + m = in_bring ? &backed_lstay : &mosadmin_mode_lstay; + break; + case DS_BLOCK: + m = in_expel ? &backed_block : &mosadmin_mode_block; + break; + case DS_QUIET: + m = &mosadmin_mode_quiet; + break; + case DS_NOMFS: + m = &mosadmin_mode_nomfs; + break; + default: + panic("admin_set_unknown_mode"); + /*NOTREACHED*/ + } + on = (on != 0); /* sanity */ + if(on == *m) + { + unlock_mosix(); + return(0); + } + if(on) + switch (mode) + { + case DS_STAY: + printk(KERN_NOTICE "MOSIX: Automatic process-migration from this node disabled.\n"); + break; + case DS_LSTAY: + printk(KERN_NOTICE "MOSIX: Auto-migration of local-processes from this node disabled.\n"); + break; + case DS_BLOCK: + printk(KERN_NOTICE "MOSIX: This node will not accept further guest-processes.\n"); + break; + case DS_QUIET: + printk(KERN_NOTICE "MOSIX: Automatic load-dissemination stopped.\n"); + break; + case DS_NOMFS: + printk(KERN_NOTICE "MOSIX: This node will stop providing MFS services.\n"); + break; + } + else + { + switch(mode) + { + case DS_STAY: + if(in_bring ? backed_lstay:mosadmin_mode_lstay) + printk(KERN_NOTICE "MOSIX: Auto-migration of guest-processes from this node re-enabled.\n"); + else + printk(KERN_NOTICE "MOSIX: Auto-migration from this node re-enabled.\n"); + break; + case DS_LSTAY: + if(mosadmin_mode_stay) + printk(KERN_NOTICE "MOSIX: Once auto-migration is enabled, local-processes may migrate as well.\n"); + else + printk(KERN_NOTICE "MOSIX: Auto-migration of local-processes from this node re-enabled.\n"); + break; + case DS_BLOCK: + printk(KERN_NOTICE "MOSIX: guest-processes now accepted again.\n"); + break; + case DS_QUIET: + printk(KERN_NOTICE "MOSIX: Automatic load-dissemination resumed.\n"); + break; + case DS_NOMFS: + printk(KERN_NOTICE "MOSIX: This node will may now provide MFS services.\n"); + break; + } + } + *m = on; + unlock_mosix(); +#ifdef CONFIG_MOSIX_FS + if(mode == DS_NOMFS) + mfs_change_pe(); +#endif /* CONFIG_MOSIX_FS */ + return (0); +} + +int +admin_get_mode(int mode) +{ + int ret; + + switch(mode) + { + case DS_STAY: + return(mosadmin_mode_stay); + case DS_LSTAY: + lock_mosix(); + ret = in_bring ? backed_lstay : mosadmin_mode_lstay; + unlock_mosix(); + return(ret); + case DS_BLOCK: + lock_mosix(); + ret = in_expel ? backed_block : mosadmin_mode_block; + unlock_mosix(); + return(ret); + case DS_QUIET: + return(mosadmin_mode_quiet); + case DS_NOMFS: + return(mosadmin_mode_nomfs); + } + return (-EINVAL); +} + +int +my_mosix_status(void) +{ + int s = DS_MOSIX_UP; + + if(mosix_config_get_pe()) + s |= DS_MOSIX_DEF; + if(mosadmin_mode_stay) + s |= DS_STAY; + lock_mosix(); + if(in_bring ? backed_lstay : mosadmin_mode_lstay) + s |= DS_LSTAY; + if(in_expel ? backed_block : mosadmin_mode_block) + s |= DS_BLOCK; + unlock_mosix(); + if(mosadmin_mode_quiet) + s |= DS_QUIET; + if(mosadmin_mode_nomfs) + s |= DS_NOMFS; + return(s); +} + +int +expel(int boot) +{ + int left = 0, oleft; + int round = 0; + struct task_struct *p; + int err = 0; + + if(boot) + { + if(current->mosix.dflags & DREMOTE) + printk("Foops: cannot expel myself.\n"); + + tasklist_lock = RW_LOCK_UNLOCKED; + } + else + { + lock_mosix(); + if(in_expel) + { + unlock_mosix(); + return(-EBUSY); + } + in_expel = 1; + } + backed_block = mosadmin_mode_block; + mosadmin_mode_block = 1; + if(!boot) + unlock_mosix(); + while(1) + { + oleft = left; + left = 0; + read_lock(&tasklist_lock); + for_each_task(p) + if((p->mosix.dflags & DREMOTE) && p != current && + !(p->mosix.dflags & DFINISHED)) + { + if(boot) + mosix_do_add_to_whereto(p, MUSTGOHOME); + else + mosix_add_to_whereto(p, MUSTGOHOME); + if(p->mosix.deppe != PE) /* don't count cheaters */ + left++; + } + read_unlock(&tasklist_lock); + if(!left) + { + if(boot && round) + printk("Done. "); + else if(!backed_block) + printk(KERN_NOTICE "MOSIX: " + "All guest processes evacuated and" + " no further guests allowed in\n"); + break; + } + if(boot && round++ == 0) + { + int this_cpu = smp_processor_id(); + + printk("Expelling..."); + bootexpel = 1; + cli(); + if(local_irq_count(this_cpu)) + { + local_irq_count(this_cpu)--; +#ifdef CONFIG_SMP + release_irqlock(this_cpu); +#endif /* CONFIG_SMP */ + } + while(in_softirq()) + local_bh_enable(); + sti(); + } + if(boot) + printk("%d ", left); +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_EXPEL) + printk("expel sleep for %d left\n", left); +#endif /* CONFIG_MOSIX_DEBUG */ + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ); + if(signal_pending(current)) + { + if(boot) /* do not worry about locking */ + flush_signals(current); + else + { + lock_mosix(); + mosadmin_mode_block = backed_block; + unlock_mosix(); + in_expel = 0; + return(-EINTR); + } + } + if((boot && expel_progress) || left < oleft) + round = 1; + expel_progress = 0; + if(round == 100) + { + printk("Giving up\n"); + lock_mosix(); + mosadmin_mode_block = backed_block; + unlock_mosix(); + err = -EINTR; + break; + } + } + if(!boot) + { + lock_mosix(); + in_expel = 0; + unlock_mosix(); + } + return(err); +} + +int +bring(void) +{ + register struct task_struct *p, *w = NULL; + int left; + int err = -EBUSY; + + lock_mosix(); + if(in_bring) + { + unlock_mosix(); + return(-EBUSY); + } + in_bring = 1; + backed_lstay = mosadmin_mode_lstay; + mosadmin_mode_lstay = 1; + unlock_mosix(); + while(1) + { + left = 0; + w = NULL; + read_lock(&tasklist_lock); + for_each_task(p) + if((p->mosix.dflags & DDEPUTY) || + (p->mosix.whereto && !(p->mosix.dflags & DREMOTE))) + { + mosix_add_to_whereto(p, MUSTGOHOME); + if(p->mosix.deppe != PE) /* don't count cheating_for_test */ + { + left++; + if(!w) + { + w = p; + get_task_struct(w); + } + } + } + read_unlock(&tasklist_lock); + if(!left) + { + if(!backed_lstay) + printk(KERN_NOTICE "MOSIX: " + "All local processes brought home" + " and will not auto-migrate away\n"); + err = 0; + break; + } +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_EXPEL) + printk("bring sleep for %d left\n", left); +#endif /* CONFIG_MOSIX_DEBUG */ + tell_process(w, DREQ_HOMEWAKE); + interruptible_sleep_on_timeout(mos_to_waitp(&w->mosix), HZ); + free_task_struct(w); + if(signal_pending(current)) + { + lock_mosix(); + mosadmin_mode_lstay = backed_lstay; + unlock_mosix(); + err = -EINTR; + break; + } + } + lock_mosix(); + in_bring = 0; + unlock_mosix(); + return(err); +} diff -urN linux-2.4.17/mos/mosproc.c linux_umopenmosix/mos/mosproc.c --- linux-2.4.17/mos/mosproc.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/mos/mosproc.c Wed Jun 26 23:45:18 2002 @@ -0,0 +1,2242 @@ +/* Changes since February 12, 2002 copyright by Moshe Bar (moshe@moelabs.com) + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * Author(s): Oren Laadan, Amnon Shiloh, Moshe Bar + * Some stuff 'borrowed' from "fs/proc". + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_MOSIX_DFSA +#include +#endif /* CONFIG_MOSIX_DFSA */ +#ifdef CONFIG_MOSIX_FS +#include +#endif /* CONFIG_MOSIX_FS */ + + +/* general interface */ + +static int zero = 0; +static int one = 1; +static int uintmax = 0x7fffffff; + +typedef struct ctl_mosix ctl_mosix; + +typedef ssize_t ctl_mosix_t (ctl_mosix *, int, struct file *, void *, size_t *); + +struct ctl_mosix { + int ctl_name; + const char *procname; + void *data; + int maxlen; + mode_t mode; + ctl_mosix_t *handler; + struct inode_operations *alt_iops; + struct file_operations *alt_fops; + void *extra[2]; +}; + +/* values of 'do_intarr' 'write' argument: */ +#define CTLF_SIMPLE 0x01 /* nothing special */ +#define CTLF_UNSIGNED 0x02 /* use unsigned semantics */ +#define CTLF_MINMAXP 0x04 /* use min/max (via pointer) */ +#define CTLF_MINMAXV 0x08 /* use min/max (via values) */ +#define CTLF_BOOLEAN 0x10 /* boolean values (0/1) */ +#define CTLF_POSITIVE 0x20 /* > 1 */ + +static void proc_mosix_register(struct proc_dir_entry *de, ctl_mosix *table); + +static ssize_t do_intarr(ctl_mosix *, int, struct file*, void *, size_t *, int); +static inline ssize_t get_intarr(struct file *, const char *, size_t, + struct ctl_mosix *); +static inline ssize_t put_intarr(struct file *, const char *, size_t, + struct ctl_mosix *); + +static inline ctl_mosix_t ctl_boolean; +static inline ctl_mosix_t ctl_uintarr; +static inline ctl_mosix_t ctl_positive; +static inline ctl_mosix_t ctl_minmaxv; + +/* dynamic directories: */ +/* the following are Or'ed with the pid/node << 16 */ +enum +{ + PROC_REMPID_INO = PROC_MOSIX_USE_START, + PROC_REMPID_FROM, + PROC_REMPID_GOTO, + PROC_REMPID_STATM, + PROC_REMPID_STATS, + PROC_REMPID_IDENT, + PROC_NODEID_INO, + PROC_NODEID_CPUS, + PROC_NODEID_INFO, + PROC_NODEID_LOAD, + PROC_NODEID_MEM, + PROC_NODEID_RMEM, + PROC_NODEID_SPEED, + PROC_NODEID_TMEM, + PROC_NODEID_STATUS, + PROC_NODEID_UTIL, +#ifdef CONFIG_MOSIX_RESEARCH + PROC_NODEID_RIO, + PROC_NODEID_WIO, +#endif /* CONFIG_MOSIX_RESEARCH */ +}; + +struct subnames +{ + int id; + char *name; + int namelen; +}; + +struct subnames rempid_subs[] = +{ + {PROC_REMPID_FROM, "from", 4}, + {PROC_REMPID_GOTO, "goto", 4}, + {PROC_REMPID_STATM, "statm", 5}, + {PROC_REMPID_STATS, "stats", 5}, + {PROC_REMPID_IDENT, "identity", 8}, + {} +}, nodeid_subs[] = +{ + {PROC_NODEID_CPUS, "cpus", 4}, + {PROC_NODEID_LOAD, "load", 4}, + {PROC_NODEID_MEM, "mem", 3}, + {PROC_NODEID_RMEM, "rmem", 4}, + {PROC_NODEID_SPEED, "speed", 5}, + {PROC_NODEID_TMEM, "tmem", 4}, + {PROC_NODEID_STATUS, "status", 6}, + {PROC_NODEID_UTIL, "util", 4}, +#ifdef CONFIG_MOSIX_RESEARCH + {PROC_NODEID_RIO, "rio", 3}, + {PROC_NODEID_WIO, "wio", 3}, +#endif /* CONFIG_MOSIX_RESEARCH */ + {} +}; + +static ssize_t proc_mosix_sub_read(struct file *, char *, size_t, loff_t *); +static ssize_t proc_mosix_sub_write(struct file *, const char *, size_t, loff_t *); + +static struct file_operations proc_mosix_sub_file_operations = +{ + read: proc_mosix_sub_read, + write: proc_mosix_sub_write, +}; + +static struct inode_operations proc_mosix_sub_inode_operations = +{ +}; + +static int proc_mosix_sub_readdir(struct file *, void *, filldir_t); +static struct dentry *proc_mosix_sub_lookup(struct inode *, struct dentry *); + +static struct file_operations proc_mosix_subdir_file_operations = +{ + readdir: proc_mosix_sub_readdir, +}; + +static struct inode_operations proc_mosix_subdir_inode_operations = +{ + lookup: proc_mosix_sub_lookup, +}; + +static int proc_mosix_nodes_readdir(struct file *, void *, filldir_t); +static struct dentry *proc_mosix_nodes_lookup(struct inode *,struct dentry *); + +static struct file_operations proc_mosix_nodes_file_operations = +{ + readdir: proc_mosix_nodes_readdir, +}; + +static struct inode_operations proc_mosix_nodes_inode_operations = +{ + lookup: proc_mosix_nodes_lookup, +}; + +static int proc_mosix_remote_readdir(struct file *, void *, filldir_t); +static struct dentry *proc_mosix_remote_lookup(struct inode *,struct dentry *); + +static struct file_operations proc_mosix_remote_file_operations = +{ + readdir: proc_mosix_remote_readdir, +}; + +static struct inode_operations proc_mosix_remote_inode_operations = +{ + lookup: proc_mosix_remote_lookup, +}; + +static struct proc_dir_entry proc_mosix_nodes = { + 0, 5, "nodes", + S_IFDIR | S_IRUGO | S_IXUGO, 2, 0, 0, + 0, &proc_mosix_nodes_inode_operations, + &proc_mosix_nodes_file_operations +}; + +static struct proc_dir_entry proc_mosix_remote = { + 0, 6, "remote", + S_IFDIR | S_IRUGO | S_IXUGO, 2, 0, 0, + 0, &proc_mosix_remote_inode_operations, + &proc_mosix_remote_file_operations +}; + +static ssize_t proc_mosix_set_remote_goto(struct file*,int,const char*,size_t); +static int proc_mosix_get_remote_from(struct task_struct *, char *); +static int proc_mosix_get_remote_stats(struct task_struct *, char *); +static int proc_mosix_get_remote_identity(struct task_struct *, char *); + +static int +proc_mosix_delete_dentry(struct dentry *dentry) +{ + return(1); +} + +static struct dentry_operations proc_mosix_dynamic_dentry_operations = +{ + d_delete: proc_mosix_delete_dentry +}; + +/* static directories: */ + +static inline ssize_t proc_readmosix(struct file*,char*,size_t,loff_t*); +static inline ssize_t proc_writemosix(struct file*,const char*,size_t,loff_t*); + +static struct file_operations proc_mosix_file_operations = +{ + read: proc_readmosix, + write: proc_writemosix, +}; + +static loff_t proc_lseekmosixinfo(struct file *, long long, int); + +static struct file_operations proc_mosix_info_file_operations = +{ + llseek: proc_lseekmosixinfo, + read: proc_readmosix, + write: proc_writemosix, +}; + +/* /proc/mosix/admin: */ + +enum { + ADMIN_CONFIG = 1, ADMIN_STAY, ADMIN_LSTAY, ADMIN_BLOCK, ADMIN_QUIET, + ADMIN_NOMFS, ADMIN_EXPEL, ADMIN_BRING, ADMIN_SPEED, ADMIN_SSPEED, + ADMIN_OVERHEADS, ADMIN_SLOWDECAY, ADMIN_FASTDECAY, ADMIN_DECAYINTERVAL, + ADMIN_GATEWAYS, ADMIN_MOSPE, ADMIN_VERSION, +#ifdef CONFIG_MOSIX_DFSA + ADMIN_DFSALINKS, +#endif /* CONFIG_MOSIX_DFSA */ +#ifdef CONFIG_MOSIX_FS + ADMIN_MFSCOSTS, ADMIN_MFS_TUNEINFO, ADMIN_MFSKILL, +#endif /* CONFIG_MOSIX_FS */ +#ifdef CONFIG_MOSIX_UDB + ADMIN_DEBUGGER, ADMIN_INSMOD, ADMIN_INSUDB, +#endif /* CONFIG_MOSIX_UDB */ +}; + +static ctl_mosix_t ctl_admin_gateways; +static ctl_mosix_t ctl_admin_mospe; +static ctl_mosix_t ctl_admin_config; +static ctl_mosix_t ctl_admin_expel; +static ctl_mosix_t ctl_admin_bring; +static ctl_mosix_t ctl_admin_speed; +static ctl_mosix_t ctl_admin_mode; +static ctl_mosix_t ctl_admin_slowdecay; +static ctl_mosix_t ctl_admin_fastdecay; +static ctl_mosix_t ctl_admin_decayinterval; +static ctl_mosix_t ctl_admin_overheads; +static ctl_mosix_t ctl_admin_version; +#ifdef CONFIG_MOSIX_UDB +static ctl_mosix_t ctl_admin_debugger; +static ctl_mosix_t ctl_admin_insmod; +#endif /* CONFIG_MOSIX_UDB */ +#ifdef CONFIG_MOSIX_DFSA +static ctl_mosix_t ctl_admin_dfsalinks; +#endif /* CONFIG_MOSIX_DFSA */ +#ifdef CONFIG_MOSIX_FS +static ctl_mosix_t ctl_admin_mfskill; +static ctl_mosix_t ctl_admin_mfscosts; +#endif /* CONFIG_MOSIX_FS */ + +static struct ctl_mosix admin_ctl[] = { + { ADMIN_CONFIG, "config", NULL, 0, 0644, ctl_admin_config }, + { ADMIN_STAY, "stay", NULL, sizeof(int), 0644, ctl_admin_mode }, + { ADMIN_LSTAY, "lstay", NULL, sizeof(int), 0644, ctl_admin_mode }, + { ADMIN_BLOCK, "block", NULL, sizeof(int), 0644, ctl_admin_mode }, + { ADMIN_QUIET, "quiet", NULL, sizeof(int), 0644, ctl_admin_mode }, + { ADMIN_NOMFS, "nomfs", NULL, sizeof(int), 0644, ctl_admin_mode }, + { ADMIN_EXPEL, "expel", NULL, 0, 0200, ctl_admin_expel }, + { ADMIN_BRING, "bring", NULL, 0, 0200, ctl_admin_bring }, + { ADMIN_SPEED, "speed", NULL, sizeof(int), 0644, ctl_admin_speed }, + { ADMIN_SSPEED, "sspeed", &standard_speed, sizeof(int), 0644, + ctl_positive }, + { ADMIN_OVERHEADS, "overheads", mosix_cost, sizeof(mosix_cost), 0644, + ctl_admin_overheads }, + { ADMIN_SLOWDECAY, "slowdecay", NULL, sizeof(int), 0644, + ctl_admin_slowdecay }, + { ADMIN_FASTDECAY, "fastdecay", NULL, sizeof(int), 0644, + ctl_admin_fastdecay }, + { ADMIN_DECAYINTERVAL, "decayinterval", NULL, sizeof(int), 0644, + ctl_admin_decayinterval }, + { ADMIN_GATEWAYS, "gateways", NULL, 0, 0644, ctl_admin_gateways }, + { ADMIN_MOSPE, "mospe", NULL, 0, 0644, ctl_admin_mospe }, + { ADMIN_VERSION, "version", NULL, 0, 0444, ctl_admin_version }, +#ifdef CONFIG_MOSIX_DFSA + { ADMIN_DFSALINKS, "dfsalinks", NULL, 0, 0644, ctl_admin_dfsalinks }, +#endif /* CONFIG_MOSIX_DFSA */ +#ifdef CONFIG_MOSIX_FS + { ADMIN_MFSCOSTS, "mfscosts", mfs_cost, sizeof(mfs_cost), 0644, + ctl_admin_mfscosts }, + { ADMIN_MFSKILL, "mfskill", NULL, 0, 0200, ctl_admin_mfskill }, +#endif /* CONFIG_MOSIX_FS */ +#ifdef CONFIG_MOSIX_UDB + { ADMIN_DEBUGGER, "debugger", NULL, 0, 0200, ctl_admin_debugger }, + { ADMIN_INSMOD, "insmod", NULL, 0, 0200, ctl_admin_insmod }, +#endif /* CONFIG_MOSIX_UDB */ + { 0 } +}; + +enum { + DECAY_CPUJOB = 1, DECAY_IOJOB, DECAY_SLOW, DECAY_FAST, DECAY_OWN, + DECAY_INHERIT, DECAY_EXEC, DECAY_EXECONCE, DECAY_CLEAR +}; + +/* /proc/mosix/decay: */ + +static ctl_mosix_t ctl_decay; + +static struct ctl_mosix decay_ctl[] = { + { DECAY_CPUJOB, "cpujob", NULL, 0, 0666, ctl_decay }, + { DECAY_IOJOB, "iojob", NULL, 0, 0666, ctl_decay }, + { DECAY_SLOW, "slow", NULL, 0, 0666, ctl_decay }, + { DECAY_FAST, "fast", NULL, 0, 0666, ctl_decay }, + { DECAY_OWN, "own", NULL, 0, 0666, ctl_decay }, + { DECAY_INHERIT, "inherit", NULL, 0, 0666, ctl_decay }, + { DECAY_EXEC, "exec", NULL, 0, 0666, ctl_decay }, + { DECAY_EXECONCE, "execonce", NULL, 0, 0666, ctl_decay }, + { DECAY_CLEAR, "clear", NULL, 0, 0222, ctl_decay }, + { 0 } +}; + +/* /proc/mosix/info: */ + +enum +{ + INFO_CPUS = 1, INFO_INFOS, INFO_LOADS, INFO_MEMS, INFO_RMEMS, + INFO_SPEEDS, INFO_TMEMS, INFO_STATA, INFO_UTILS, +#ifdef CONFIG_MOSIX_RESEARCH + INFO_RIO, INFO_WIO, +#endif /* CONFIG_MOSIX_RESEARCH */ +}; + +static ctl_mosix_t ctl_info_fill; +static ctl_mosix_t ctl_info_infos; + +static struct ctl_mosix info_ctl[] = { + { INFO_LOADS, "loads", NULL, 0, 0444, ctl_info_fill, + NULL, &proc_mosix_info_file_operations }, + { INFO_SPEEDS, "speeds", NULL, 0, 0444, ctl_info_fill, + NULL, &proc_mosix_info_file_operations }, + { INFO_CPUS, "cpus", NULL, 0, 0444, ctl_info_fill, + NULL, &proc_mosix_info_file_operations }, + { INFO_MEMS, "mems", NULL, 0, 0444, ctl_info_fill, + NULL, &proc_mosix_info_file_operations }, + { INFO_RMEMS, "rmems", NULL, 0, 0444, ctl_info_fill, + NULL, &proc_mosix_info_file_operations }, + { INFO_TMEMS, "tmems", NULL, 0, 0444, ctl_info_fill, + NULL, &proc_mosix_info_file_operations }, + { INFO_UTILS, "utils", NULL, 0, 0444, ctl_info_fill, + NULL, &proc_mosix_info_file_operations }, + { INFO_STATA, "stata", NULL, 0, 0444, ctl_info_fill, + NULL, &proc_mosix_info_file_operations }, + { INFO_INFOS, "infos", NULL, 0, 0444, ctl_info_infos, + NULL, &proc_mosix_info_file_operations }, +#ifdef CONFIG_MOSIX_RESEARCH + { INFO_RIO, "rios", NULL, 0, 0444, ctl_info_fill, + NULL, &proc_mosix_info_file_operations }, + { INFO_WIO, "wios", NULL, 0, 0444, ctl_info_fill, + NULL, &proc_mosix_info_file_operations }, +#endif /* CONFIG_MOSIX_RESEARCH */ + { 0 } +}; + +#ifdef CONFIG_MOSIX_DFSA +static ssize_t +ctl_admin_dfsalinks(ctl_mosix *ctl, int write, struct file *filp, + void *buf, size_t *len) +{ + ssize_t error = 0; + int l; + char *filename = NULL, c, *e; + loff_t pos = filp->f_pos; + int done = 0; + int more = 1; + + while(write) + { + if(!filename && !(filename = __getname())) + return(done ? : -ENOMEM); + for(e = filename , l = 0 ; l < *len ; l++) + if(get_user(c, ((char *)buf)++)) + { + putname(filename); + return(done ? : -EFAULT); + } + else if(c == '\0') + { + more = 0; + *e++ = '\0'; + break; + } + else if(c == '\n') + { + *e++ = '\0'; + break; + } + else if(l == PAGE_SIZE-1) + { + putname(filename); + return(done ? : -ENAMETOOLONG); + } + else + *e++ = c; + if(l == *len) + { + putname(filename); + return(done ? : -EINVAL); + } + l = e - filename; + if(filename[0] == '-') + { + if(filename[1]) + error = dfsa_dellink(&filename[1]); + else + error = dfsa_clearlinks(); + } + else + error = dfsa_addlink(&filename[filename[0] == '+']); + putname(filename); + if(error) + return(done ? : error); + pos += l; + filp->f_pos = pos; + done += l; + if(!more || !(*len -= l)) + return(done); + } + /* READ */ + if(!*len) + return(0); + if(!(filename = dfsa_showlinks())) + error = -ENOMEM; + else if(pos < (l = strlen(filename))) + { + filename += pos; + l -= pos; + if(l > *len) + l = *len; + if(copy_to_user(buf, filename, l)) + error = -EFAULT; + else + *len = l; + } + else + *len = 0; + if(filename) + kfree(filename); + if(!error) + filp->f_pos = pos + *len; + return (error ? : 0); +} +#endif /* CONFIG_MOSIX_DFSA */ + +void +mosix_proc_init(void) +{ + struct proc_dir_entry *parent; + + proc_register(proc_mosix, &proc_mosix_remote); + + proc_register(proc_mosix, &proc_mosix_nodes); + + parent = proc_mkdir("hpc/info", NULL); + proc_mosix_register(parent, info_ctl); + + parent = proc_mkdir("hpc/decay", NULL); + proc_mosix_register(parent, decay_ctl); + + parent = proc_mkdir("hpc/admin", NULL); + proc_mosix_register(parent, admin_ctl); +} + +static void +proc_mosix_register(struct proc_dir_entry *root, ctl_mosix *table) +{ + int mode; + struct proc_dir_entry *de; + + for ( ; table->procname ; table++) + { + mode = table->mode | S_IFREG; + de = create_proc_entry(table->procname, mode, root); + if (!de) { + printk("proc_openmosix_register: create_proc_entry" + " failed for %s!\n", table->procname); + mosix_panic("proc_openmosix_register: null de"); + continue; + } + de->data = (void *) table; + de->proc_iops = table->alt_iops ? : NULL; + de->proc_fops = table->alt_fops ? : &proc_mosix_file_operations; + } +} + +static ssize_t +do_rw_mosix(int write, struct file *file, char *buf, size_t count, loff_t *ppos) +{ + struct proc_dir_entry *de; + struct ctl_mosix *ctl; + ssize_t res; + ssize_t error; + +#ifdef CONFIG_MOSIX_DEBUG + if (debug_proc) + printk("%d(%s)-do_rw_mosix: 1\n", current->pid, current->comm); +#endif + + de = (struct proc_dir_entry *) file->f_dentry->d_inode->u.generic_ip; + if (!de || !de->data) + return (-ENOTDIR); + ctl = (struct ctl_mosix *) de->data; + if (!ctl || !ctl->handler) + return (-ENOTDIR); + +#ifdef CONFIG_MOSIX_DEBUG + if (debug_proc) + printk("%d(%s)-do_rw_mosix: 2\n", current->pid, current->comm); +#endif + + res = count; + if((error = (*ctl->handler) (ctl, write, file, buf, &res))) + res = error; +#ifdef CONFIG_MOSIX_DEBUG + if (debug_proc) + printk("%d(%s)-do_rw_mosix: error = %d\n", + current->pid, current->comm, error); +#endif + return (res); +} + +static inline ssize_t +proc_readmosix(struct file *file, char *buf, size_t count, loff_t *ppos) +{ + return (do_rw_mosix(0, file, buf, count, ppos)); +} + +static inline ssize_t +proc_writemosix(struct file *file, const char *buf, size_t count, loff_t *ppos) +{ + return(do_rw_mosix(1, file, (char *) buf, count, ppos)); +} + +#define TMPBUFLEN 20 + +static ssize_t +do_intarr(ctl_mosix *ctl, int write, struct file *filp, + void *buffer, size_t *lenp, int scale) +{ + int *datap, vleft, first=1, len, left, neg, val; + int *min = NULL, *max = NULL; + char buf[TMPBUFLEN], *p; + +#ifdef CONFIG_MOSIX_DEBUG + if (debug_proc) + printk("%d(%s)-do_intarr: 1\n", current->pid, current->comm); +#endif + + if (!ctl->data || !ctl->maxlen || !*lenp || + (filp->f_pos && !write)) { + *lenp = 0; + return 0; + } + +#ifdef CONFIG_MOSIX_DEBUG + if (debug_proc) + printk("%d(%s)-do_intarr: 2\n", current->pid, current->comm); +#endif + + datap = (int *) ctl->data; + if (write & CTLF_BOOLEAN) { + min = &zero; + max = &one; + } else if(write & CTLF_POSITIVE) { + min = &one; + max = &uintmax; + } else if (write & CTLF_UNSIGNED) { + min = &zero; + max = &uintmax; + } else if (write & CTLF_MINMAXP) { + min = ((int *) ctl->extra[0]) - 1; + max = ((int *) ctl->extra[1]) - 1; + } else if (write & CTLF_MINMAXV) { + min = (int *) &(ctl->extra[0]); + max = (int *) &(ctl->extra[1]); + } + + vleft = ctl->maxlen / sizeof(int); + left = *lenp; + + for (; left && vleft--; datap++, first=0) { + +#ifdef CONFIG_MOSIX_DEBUG + if (debug_proc) + printk("%d(%s)-do_intarr: left=%d, vleft=%d, wrt=%d\n", + current->pid,current->comm,left,vleft, write); +#endif + + if (write) { + + int nex = TMPBUFLEN; + while (left) { + char c; + if(nex == TMPBUFLEN) + { + len = left; + if(len > TMPBUFLEN) + len = TMPBUFLEN; + if(copy_from_user(buf, buffer, len)) + return(-EFAULT); + nex = 0; + } + c = buf[nex++]; + if (!isspace(c)) + break; + left--; + ((char *) buffer)++; + } + if (!left) + break; + neg = 0; + len = left; + if (len > TMPBUFLEN-1) + len = TMPBUFLEN-1; + if(copy_from_user(buf, buffer, len)) + return -EFAULT; + buf[len] = 0; + p = buf; + if (*p == '-' && left > 1) { + neg = 1; + left--, p++; + } + if (*p < '0' || *p > '9') + break; + val = simple_strtoul(p, &p, 0) * scale; + len = p-buf; + if ((len < left) && *p && !isspace(*p)) + break; + if (neg) + val = -val; + buffer += len; + left -= len; + + if (write & CTLF_MINMAXP) { + min++; + max++; + } + +#ifdef CONFIG_MOSIX_DEBUG + if (debug_proc) { + printk("%d(%s)-do_intarr: val = %d ", + current->pid, current->comm, val); + if (min) + printk("min = %d ", *min); + if (max) + printk("max = %d ", *max); + printk("\n"); + } +#endif + + if (min && val < *min) + continue; + if (max && val > *max) + continue; + +#ifdef CONFIG_MOSIX_DEBUG + if (debug_proc) + printk("%d(%s)-do_intarr: setting val\n", + current->pid, current->comm); +#endif + + *datap = val; + } else { + p = buf; + if (!first) + *p++ = '\t'; + sprintf(p, "%d", (*datap) / scale); + len = strlen(buf); + if (len > left) + len = left; + if(copy_to_user(buffer, buf, len)) + return -EFAULT; + left -= len; + buffer += len; + } + } + + if (!write && !first && left) { + if(put_user('\n', (char *) buffer)) + return -EFAULT; + left--, buffer++; + } + + if (write) { + int nex = TMPBUFLEN; + + p = (char *) buffer; + while (left) { + char c; + + if(nex == TMPBUFLEN) + { + len = left; + if(len > TMPBUFLEN) + len = TMPBUFLEN; + if(copy_from_user(buf, p, len)) + return(-EFAULT); + nex = 0; + } + c = buf[nex++]; + p++; + if (!isspace(c)) + break; + left--; + } + } + if (write && first) + return -EINVAL; + *lenp -= left; + filp->f_pos += *lenp; + return 0; +} + +static inline ssize_t +get_intarr(struct file *file,const char *buf,size_t count,struct ctl_mosix *ctl) +{ + ssize_t res; + ssize_t error; + + res = count; + error = (*ctl->handler) (ctl, 1, file, (char *)buf, &res); + + if (error) + return (error); + return (res); +} + +static inline ssize_t +put_intarr(struct file *file,const char *buf,size_t count,struct ctl_mosix *ctl) +{ + ssize_t res; + ssize_t error; + + res = count; + error = (*ctl->handler) (ctl, 0, file, (char *)buf, &res); + + if (error) + return (error); + return (res); +} + +static inline ssize_t +ctl_boolean(ctl_mosix *ctl,int write,struct file *filp,void *buf,size_t *len) +{ + return (do_intarr(ctl, write ? CTLF_BOOLEAN : 0, filp, buf, len, 1)); +} + +static inline ssize_t +ctl_uintarr(ctl_mosix *ctl,int write,struct file *filp,void *buf,size_t *len) +{ + return (do_intarr(ctl, write ? CTLF_UNSIGNED : 0, filp, buf, len, 1)); +} + +static inline ssize_t +ctl_positive(ctl_mosix *ctl,int write,struct file *filp,void *buf,size_t *len) +{ + return (do_intarr(ctl, write ? CTLF_POSITIVE : 0, filp, buf, len, 1)); +} + +static inline ssize_t +ctl_minmaxv(ctl_mosix *ctl,int write,struct file *filp,void *buf,size_t *len) +{ + return (do_intarr(ctl, write ? CTLF_MINMAXV : 0, filp, buf, len, 1)); +} + +static ssize_t +ctl_admin_gateways(ctl_mosix *ctl, int write, struct file *filp, + void *buf, size_t *len) +{ + int val = mosadmin_gateways; + int error; + struct ctl_mosix tmpctl = {0, NULL, NULL, sizeof(int), 0, ctl_minmaxv}; + + tmpctl.data = &val; + tmpctl.extra[0] = (void *)0; + tmpctl.extra[1] = (void *)2; + + if (write) + error = get_intarr(filp, buf, *len, &tmpctl); + else + error = put_intarr(filp, buf, *len, &tmpctl); + if (error < 0) + return (error); + *len = error; + if(write) + { + lock_mosix(); + mosadmin_gateways = val; + mosinfo_update_gateways(); + unlock_mosix(); + } + return (0); +} + +static ssize_t +ctl_admin_mospe(ctl_mosix *ctl, int write, struct file *filp, + void *buf, size_t *len) +{ + int this, tent; + + if (!*len || (filp->f_pos && !write)) { + *len = 0; + return 0; + } + if (write) { + int pe, error; + struct ctl_mosix tmpctl = + { 0, "goto", NULL, sizeof(int), 0, ctl_minmaxv }; + + tmpctl.data = &pe; + tmpctl.extra[0] = (void *) 0; + tmpctl.extra[1] = (void *) MOSIX_MAX; + + error = get_intarr(filp, buf, *len, &tmpctl); + if (error < 0) + return (error); + if(pe < 0) + return(-ENOSYS); + return (mosix_config_set_pe(pe)); + } else { + int n; + char tmpbuf[20]; + + if (filp->f_pos) + return (0); + this = mosix_config_get_pe(); + tent = mosix_config_get_tentative_pe(); + if (this == tent) + sprintf(tmpbuf, "%d\n", this); + else + sprintf(tmpbuf, "%d (%d)\n", this, tent); + n = strlen(tmpbuf); + if(n > *len) + n = *len; + if (copy_to_user(buf, tmpbuf, n)) + return(-EFAULT); + *len = n; + filp->f_pos += n; + return (0); + } +} + +#ifdef CONFIG_MOSIX_FS +static ssize_t +ctl_admin_mfscosts(ctl_mosix *ctl, int write, struct file *filp, + void *buf, size_t *len) +{ + ssize_t result = ctl_uintarr(ctl, write, filp, buf, len); + +#ifdef CONFIG_MOSIX_TOPOLOGY + if(write) + info_update_mfscosts(); +#endif /* CONFIG_MOSIX_TOPOLOGY */ + return(result); +} + +static ssize_t +ctl_admin_mfskill(ctl_mosix *ctl, int write, struct file *filp, + void *buf, size_t *len) +{ + ssize_t error = 0; + int l; + char *filename; + + if(!write) + return(-EPERM); + if(filp->f_pos) + return(0); + filename = getname(buf); + if(IS_ERR(filename)) + return((int)filename); + l = strlen(filename); + if(l > *len) + { + filename[*len] = '\0'; + l = *len; + } + if(l && filename[l-1] == '\n') + filename[--l] = '\0'; + if(strchr(filename, '\n') || l >= *len) + error = -EINVAL; + else + error = mfs_kill(filename); + putname(filename); + if(!error) + filp->f_pos += *len; + return (error ? : 0); +} +#endif /* CONFIG_MOSIX_FS */ + +static ssize_t +ctl_admin_version(ctl_mosix *ctl, int write, struct file *filp, + void *buf, size_t *len) +{ + int n, left; + char tmpbuf[100]; + + if ((filp->f_pos) || !*len) { + *len = 0; + return (0); + } + + left = *len; + *len = 0; + + sprintf(tmpbuf, "Mosix Version %d.%d.%d\nDFSA is %sabled\n" +#ifdef CONFIG_MOSIX_TOPOLOGY + "Maximum network topology = %d\n" +#else + "Simple network topology\n" +#endif /* CONFIG_MOSIX_TOPOLOGY */ + , MOSIX_VERSION_PARTS(MOSIX_KERNEL_VERSION), +#ifdef CONFIG_MOSIX_DFSA + "en" +#else + "dis" +#endif /* CONFIG_MOSIX_DFSA */ +#ifdef CONFIG_MOSIX_TOPOLOGY + , MAX_MOSIX_TOPOLOGY +#endif /* CONFIG_MOSIX_TOPOLOGY */ + ); + n = strlen(tmpbuf); + if(n > left) + n = left; + if (copy_to_user(buf, tmpbuf, n)) + return (-EFAULT); + *len += n; + filp->f_pos += *len; + return (0); +} + +#ifdef CONFIG_MOSIX_UDB +static ssize_t +ctl_admin_debugger(ctl_mosix *ctl, int write, struct file *filp, + void *buf, size_t *len) +{ + int error, val; + struct ctl_mosix tmpctl = {0, NULL, NULL, sizeof(int), 0, ctl_boolean}; + + tmpctl.data = &val; + + if (!write) + return (-EPERM); + error = get_intarr(filp, buf, *len, &tmpctl); + if (error < 0) + return (error); + if (val != 1) + return (-EINVAL); + mosix_debugger("/proc/mosix/admin/debugger"); + return(0); +} + +static ssize_t +ctl_admin_insmod(ctl_mosix *ctl, int write, struct file *filp, + void *buf, size_t *len) +{ + int copy = *len; + int error = 0; + +#ifdef CONFIG_MODULES + extern int udb_proc_insmod(char *buf, int copy); +#else + +#define udb_proc_insmod(a,b) (-ENXIO) + +#endif /* CONFIG_MODULES */ + + if (!write) + return (-EPERM); + else if (filp->f_pos || !copy) { + *len = 0; + return (0); + } + +#ifdef CONFIG_MOSIX_DEBUG + if (debug_proc) + printk("ctl_admin_insmod: calling udb_proc_insmod()\n"); +#endif + + error = udb_proc_insmod(buf, copy); + + if (error < 0) + return (error); + *len = copy; + filp->f_pos += copy; + return (0); +} +#endif /* CONFIG_MOSIX_UDB */ + +static ssize_t +ctl_admin_config(ctl_mosix *ctl, int write, struct file *filp, + void *buf, size_t *len) +{ + struct mosixnet *mos; + int copy = *len; + int pos = copy / sizeof(struct mosixnet); + int fpos = filp->f_pos / sizeof(struct mosixnet); + int error = 0; + + /* position in this file is entry # in the config */ + +#ifdef CONFIG_MOSIX_DEBUG + if (debug_proc) + printk("ctl_admin_config: 1\n"); +#endif + + /* truncate read/write to multiples of sizeof(strcut mosixnet) */ + if (*len % sizeof(struct mosixnet)) + *len -= *len % sizeof(struct mosixnet); + + if ((write && filp->f_pos) || !*len) { + *len = 0; + return (0); + } + +#ifdef CONFIG_MOSIX_DEBUG + if (debug_proc) + printk("ctl_admin_config: 2\n"); +#endif + + if (write) { +#ifdef CONFIG_MOSIX_DEBUG + if (debug_proc) + printk("ctl_admin_config: 3\n"); +#endif + pos = copy / sizeof(struct mosixnet); + if(pos <= 0) + { + *len = 0; + return(0); + } + if (!(mos = (struct mosixnet *) kmalloc(copy, GFP_USER))) + return (-ENOMEM); + if (copy_from_user((char *) mos, buf, copy)) + { + kfree(mos); + return (-EFAULT); + } + error = mosix_config_set_table(mos, pos, + mosix_config_get_tentative_pe()); + if (error) + kfree((void *) mos); + } else { +#ifdef CONFIG_MOSIX_DEBUG + if (debug_proc) + printk("ctl_admin_config: 4\n"); +#endif + pos = mosix_config_get_table(&mos, fpos, pos); + if (pos < 0) + return (pos); + copy = pos * sizeof(struct mosixnet); + if (copy) + { + if(copy_to_user(buf, (char *) mos, copy)) + error = -EFAULT; + kfree((void *) mos); + } + } + + if (error < 0) + return (error); + *len = copy; + filp->f_pos += copy; + return (0); +} + +static ssize_t +ctl_admin_expel(ctl_mosix *ctl, int write, struct file *filp, + void *buf, size_t *len) +{ + int val = 0; + int error; + struct ctl_mosix tmpctl = {0, NULL, NULL, sizeof(int), 0, ctl_boolean}; + + if (!write) + return (-EPERM); + + tmpctl.data = &val; + error = get_intarr(filp, buf, *len, &tmpctl); + if (error < 0) + return (error); + *len = error; + return (val ? expel(0) : 0); +} + +static ssize_t +ctl_admin_bring(ctl_mosix *ctl, int write, struct file *filp, + void *buf, size_t *len) +{ + int val = 0; + int error; + struct ctl_mosix tmpctl = {0, NULL, NULL, sizeof(int), 0, ctl_boolean}; + + if (!write) + return (-EPERM); + + tmpctl.data = &val; + error = get_intarr(filp, buf, *len, &tmpctl); + if (error < 0) + return (error); + *len = error; + return (val ? bring() : 0); +} + +static ssize_t +ctl_admin_speed(ctl_mosix *ctl, int write, struct file *filp, + void *buf, size_t *len) +{ + int val = cpuspeed; + int error; + struct ctl_mosix tmpctl = {0, NULL, NULL, sizeof(int), 0, ctl_minmaxv}; + + tmpctl.data = &val; + tmpctl.extra[0] = (void *)1; + tmpctl.extra[1] = (void *)65535; + + if (write) + error = get_intarr(filp, buf, *len, &tmpctl); + else + error = put_intarr(filp, buf, *len, &tmpctl); + if (error < 0) + return (error); + *len = error; + if(write) + { + lock_mosix(); + cpuspeed = val; + set_my_cpuspeed(); + unlock_mosix(); + } + return (0); +} + +static ssize_t +ctl_admin_mode(ctl_mosix *ctl, int write, struct file *filp, + void *buf, size_t *len) +{ + int val, mode; + int error; + struct ctl_mosix tmpctl = {0, NULL, NULL, sizeof(int), 0, ctl_boolean}; + + tmpctl.data = &val; + + switch (ctl->ctl_name) { + case ADMIN_STAY: + mode = DS_STAY; + break; + case ADMIN_LSTAY: + mode = DS_LSTAY; + break; + case ADMIN_BLOCK: + mode = DS_BLOCK; + break; + case ADMIN_QUIET: + mode = DS_QUIET; + break; + case ADMIN_NOMFS: + mode = DS_NOMFS; + break; + default: + panic("ctl_admin_mode: invalid ctl_name"); + } + + if (write) { + error = get_intarr(filp, buf, *len, &tmpctl); + if (error < 0) + return (error); + *len = error; + return (admin_set_mode(mode, val)); + } else { + val = admin_get_mode(mode); + error = put_intarr(filp, buf, *len, &tmpctl); + if (error < 0) + return (error); + *len = error; + return (0); + } +} + +static ssize_t +ctl_admin_slowdecay(ctl_mosix *ctl, int write, struct file *filp, + void *buf, size_t *len) +{ + ctl->data = (void *) &slow_alpha; + ctl->extra[0] = (void *) fast_alpha; + ctl->extra[1] = (void *) DECAY_QUOTIENT; + return (do_intarr(ctl, write ? CTLF_MINMAXV : 0, filp, buf, len, 1)); +} + +static ssize_t +ctl_admin_fastdecay(ctl_mosix *ctl, int write, struct file *filp, + void *buf, size_t *len) +{ + ctl->data = (void *) &fast_alpha; + ctl->extra[0] = (void *) 0; + ctl->extra[1] = (void *) slow_alpha; + return (do_intarr(ctl, write ? CTLF_MINMAXV : 0, filp, buf, len, 1)); +} + +static ssize_t +ctl_admin_decayinterval(ctl_mosix *ctl, int write, struct file *filp, + void *buf, size_t *len) +{ + ctl->data = (void *) &decay_interval; + ctl->extra[0] = (void *) 1; + ctl->extra[1] = (void *) 65535; + return (do_intarr(ctl, write ? CTLF_MINMAXV : 0, filp, buf, len, 1)); +} + +void +proc_update_costs(void) +{ + register int i; + register struct task_struct *p; + + lock_mosix(); + for(i = 0 ; i < MAX_MOSIX_TOPOLOGY ; i++) + { + deputy_here[i].page = mosix_cost[i].PAGE_D; + deputy_here[i].syscall = mosix_cost[i].SYSCALL_D; + deputy_here[i].out = mosix_cost[i].COPYOUT_BASE_D; + deputy_here[i].outkb = mosix_cost[i].COPYOUT_PER_KB_D; + deputy_here[i].in = mosix_cost[i].COPYIN_BASE_D; + deputy_here[i].inkb = mosix_cost[i].COPYIN_PER_KB_D; +#ifdef CONFIG_MOSIX_TOPOLOGY + deputy_here[i].first = mosix_cost[i].first; + deputy_here[i].last = mosix_cost[i].last; +#endif /* CONFIG_MOSIX_TOPOLOGY */ + } + info_update_costs(); + read_lock(&tasklist_lock); + for_each_task(p) + if(p->mosix.dflags & DDEPUTY) + { + tell_process(p, DREQ_UPDOVERHEADS); + wake_up_mosix(p); + } + read_unlock(&tasklist_lock); + unlock_mosix(); +} + +static ssize_t +ctl_admin_overheads(ctl_mosix *ctl, int write, struct file *filp, + void *buf, size_t *len) +{ + ssize_t result = ctl_uintarr(ctl, write, filp, buf, len); + + if(write) + proc_update_costs(); + return(result); +} + +static loff_t +proc_lseekmosixinfo(struct file *filp, loff_t offset, int orig) +{ + switch (orig) { + case 0: + break; + case 1: + offset += filp->f_pos; + break; + case 2: + offset += mosix_config_get_limit(); + break; + default: + return (-EINVAL); + } + if(offset < 0 || offset > MOSIX_MAX) + return(-EINVAL); + filp->f_pos = offset; + return (offset); +} + +static ssize_t +ctl_info_infos(ctl_mosix *ctl, int write, struct file *filp, + void *buf, size_t *len) +{ + int end; + int n, err; + + end = (*len / sizeof(struct mosix_info)) + filp->f_pos; + if(end > (n = mosix_config_get_limit())) + end = n; + *len = 0; + if((n = end - filp->f_pos) > 0) + { + if((err = balance_get_infos(filp->f_pos + 1, n, + (struct mosix_info *)buf, 1))) + return(err); + filp->f_pos += n; + *len = n * sizeof(struct mosix_info); + } + + return (0); +} + +#define get_item(x, node, undef, notup) ( { \ + struct mosix_info info; \ + (void) balance_get_info(node, &info); \ + (info.status & DS_MOSIX_UP) ? info.x : \ + (info.status & DS_MOSIX_DEF) ? notup : undef; } ) + +static ssize_t +ctl_info_fill(ctl_mosix *ctl, int write, struct file *filp, + void *buf, size_t *len) +{ + int i, end; + int val, total = 0; + + end = (*len / sizeof(int)) + filp->f_pos; + if(end > (i = mosix_config_get_limit())) + end = i; + for (i = filp->f_pos + 1; i <= end; i++) { + switch (ctl->ctl_name) { + case INFO_LOADS: + val = get_item(load, i, -ENXIO, -ENETUNREACH); + break; + case INFO_SPEEDS: + val = get_item(speed, i, -ENXIO, -ENETUNREACH); + break; + case INFO_CPUS: + val = get_item(ncpus, i, -ENXIO, -ENETUNREACH); + break; + case INFO_MEMS: + val = get_item(mem, i, -ENXIO, -ENETUNREACH); + break; + case INFO_RMEMS: + val = get_item(rmem, i, -ENXIO, -ENETUNREACH); + break; + case INFO_TMEMS: + val = get_item(tmem, i, -ENXIO, -ENETUNREACH); + break; + case INFO_UTILS: + val = get_item(util, i, -ENXIO, -ENETUNREACH); + break; + case INFO_STATA: + val = get_item(status, i, 0, DS_MOSIX_DEF); + break; +#ifdef CONFIG_MOSIX_RESEARCH + case INFO_RIO: + val = get_item(rio, i, -ENXIO, -ENETUNREACH); + break; + case INFO_WIO: + val = get_item(wio, i, -ENXIO, -ENETUNREACH); + break; +#endif /* CONFIG_MOSIX_RESEARCH */ + default: + mosix_panic("ctl_info_fill - bad name"); + return (-EDIST); + } + if (copy_to_user(buf, (char *) &val, sizeof(int))) + return (-EFAULT); + filp->f_pos++; + buf += sizeof(int); + total += sizeof(int); + } + *len = total; + return (0); +} + +static ssize_t +ctl_decay(ctl_mosix *ctl, int write, struct file *filp, void *buf, size_t *len) +{ + int val, policy; + int error; + int (*func)(int); + int decay[2]; + struct ctl_mosix tmpctl = {0, NULL, NULL, sizeof(int), 0, ctl_boolean}; + + tmpctl.data = &val; + + switch (ctl->ctl_name) { + case DECAY_INHERIT: + func = decay_inherit; + goto r01w01; + case DECAY_EXEC: + func = decay_exec; + goto r01w01; + case DECAY_EXECONCE: + func = decay_execonce; + r01w01: + if (write) { + error = get_intarr(filp, buf, *len, &tmpctl); + if (error < 0) + return (error); + (*func)(val); + } else { + val = (*func)(-1); + error = put_intarr(filp, buf, *len, &tmpctl); + if (error < 0) + return (error); + } + break; + + case DECAY_CPUJOB: + policy = DADV_CPU; + goto r01w1; + case DECAY_IOJOB: + policy = DADV_NOCPU; + goto r01w1; + case DECAY_SLOW: + policy = DADV_SLOWDECAY; + goto r01w1; + case DECAY_FAST: + policy = DADV_FASTDECAY; + r01w1: + if (write) { + error = get_intarr(filp, buf, *len, &tmpctl); + if (error < 0) + return (error); + if (val != 1) + return (-EINVAL); + decay_set(policy, 0, 1); + } else { + val = decay_get(policy); + error = put_intarr(filp, buf, *len, &tmpctl); + if (error < 0) + return (error); + } + break; + + case DECAY_OWN: + tmpctl.data = (void *) decay; + tmpctl.maxlen = 2 * sizeof(int); + tmpctl.handler = ctl_uintarr; + + /* to force an error in case of partial data */ + decay[0] = decay[1] = 0; + + if (write) { + error = get_intarr(filp, buf, *len, &tmpctl); + if (error < 0) + return (error); + if (decay[0] < 0 || decay[0] > DECAY_QUOTIENT + || decay[1] < 1 || decay[1] > 65535) + return (-EINVAL); + decay_set(DADV_OWNDECAY, decay[0], decay[1]); + } else { + if (decay_get(DADV_OWNDECAY)) { + decay[0] = current->mosix.decay; + decay[1] = current->mosix.deccycle; + } else + tmpctl.maxlen = sizeof(int); + error = put_intarr(filp, buf, *len, &tmpctl); + if (error < 0) + return (error); + } + break; + + case DECAY_CLEAR: + if (!write) + return (-EPERM); + error = get_intarr(filp, buf, *len, &tmpctl); + if (error < 0) + return (error); + if (val != 1) + return (-EINVAL); + decay_clear(); + break; + default: + return (-EBADF); + } + *len = error; + return (0); +} + + +#define for_each_remote_task(p) \ + for (p = &init_task ; (p = p->next_task) != &init_task ; ) \ + if(p->mosix.dflags & DREMOTE) + + +#define NUMBUF 10 + +static int +proc_mosix_remote_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + struct task_struct *p; + char buf[NUMBUF]; + unsigned int nr = filp->f_pos; + pid_t *pids; + int k, n = 0; + + if(filp->f_pos == 0) + { + if(filldir(dirent, ".", 1, 0, filp->f_dentry->d_inode->i_ino, + DT_DIR) < 0) + return(0); + filp->f_pos = 1; + } + if(filp->f_pos == 1) + { + if(filldir(dirent, "..", 2, 1, + filp->f_dentry->d_parent->d_inode->i_ino, DT_DIR) < 0) + return(0); + filp->f_pos = 2; + } + + n = 1; + while(1) + { + k = count_guests(); /* usually correct, but not 100% reliable */ + if(k > n) + n = k; + if(!(pids = (pid_t *)kmalloc(n * sizeof(pid_t), GFP_KERNEL))) + return(-ENOMEM); + read_lock(&tasklist_lock); + /* new guest(s) could come in while we waited for memory! */ + k = 0; + for_each_remote_task(p) + { + if(k < n) + pids[k++] = p->pid; + else + k++; + } + read_unlock(&tasklist_lock); + if(k <= n) + { + n = k; + break; + } + n = k + 1; + } + nr = 2; + for(k = 0 ; k < n ; k++) { + unsigned int pid; + unsigned long i, j; + + if(nr++ < filp->f_pos) + continue; + pid = pids[k]; + + for (j = NUMBUF, i = pid; i; i /= 10) { + j--; + buf[j] = '0' + (i % 10); + } + if (filldir(dirent, buf+j, NUMBUF-j, filp->f_pos, + (pid << 16) + PROC_REMPID_INO, DT_DIR) < 0) + break; + filp->f_pos++; + } + kfree(pids); + return(0); +} + +static int +proc_mosix_sub_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + struct dentry *dp = filp->f_dentry; + int id = dp->d_inode->i_ino & ~0xffff; + struct subnames *s; + int i; + + switch(dp->d_inode->i_ino & 0xffff) + { + case PROC_REMPID_INO: + s = rempid_subs; + break; + case PROC_NODEID_INO: + s = nodeid_subs; + break; + default: + printk("ino=0x%lx\n", dp->d_inode->i_ino); + panic("proc_mosix_sub_readdir"); + } + if(filp->f_pos == 0) + { + if(filldir(dirent, ".", 1, 0, dp->d_inode->i_ino, DT_DIR) < 0) + return(0); + filp->f_pos = 1; + } + if(filp->f_pos == 1) + { + if(filldir(dirent, "..", 2, 1, dp->d_parent->d_inode->i_ino, + DT_DIR) < 0) + return(0); + filp->f_pos = 2; + } + for(i = 2 ; i < filp->f_pos ; i++) + if(s->name) + s++; + else + return(0); + while(s->name && filldir(dirent, s->name, strlen(s->name), filp->f_pos, + id | s->id, DT_REG) >= 0) + { + filp->f_pos++; + s++; + } + return(0); +} + +static struct dentry * +proc_mosix_sub_lookup(struct inode *dir, struct dentry *dentry) +{ + unsigned long dino = dir->i_ino; + struct subnames *s; + const char *name = dentry->d_name.name; + int len = dentry->d_name.len; + struct inode *inode = NULL; + + switch(dino & 0xffff) + { + case PROC_REMPID_INO: + s = rempid_subs; + break; + case PROC_NODEID_INO: + s = nodeid_subs; + break; + default: + printk("ino=0x%lx\n", dino); + panic("proc_mosix_sub_lookup"); + } + while(s->name) + if(s->namelen == len && !strncmp(s->name, name, len)) + { + inode = proc_get_inode(dir->i_sb, (dino & ~0xffff)|s->id, NULL); + if(!inode) + return(ERR_PTR(-EINVAL)); + switch(s->id) + { + case PROC_REMPID_GOTO: + inode->i_mode = S_IFREG | 0222; + break; + default: + inode->i_mode = S_IFREG | 0444; + break; + } + inode->i_op = &proc_mosix_sub_inode_operations; + inode->i_fop = &proc_mosix_sub_file_operations; + break; + } + else + s++; + dentry->d_op = &proc_mosix_dynamic_dentry_operations; + d_add(dentry, inode); + return(NULL); +} + +int +atoi16(const char *num, int len) +{ + int n = 0; + char c; + + while(len-- > 0) + { + c = *num++; + if(c < '0' || c > '9') + return(0); + n = n * 10 + c - '0'; + if(n & 0xffff0000) + return(0); + } + return(n); +} + +static struct dentry * +proc_mosix_remote_lookup(struct inode *dir, struct dentry *dentry) +{ + unsigned int pid, n; + struct task_struct *p; + struct inode *inode = NULL; + + n = proc_mosix_remote.nlink; + read_lock(&tasklist_lock); + for_each_remote_task(p) + n++; + read_unlock(&tasklist_lock); + dir->i_nlink = n; + + pid = atoi16(dentry->d_name.name, dentry->d_name.len); + if(!pid) + goto out; + + read_lock(&tasklist_lock); + p = find_any_task_by_pid(pid); + if (p && !(p->mosix.dflags & DREMOTE)) + p = NULL; + read_unlock(&tasklist_lock); + + inode = NULL; + if (pid && p) { + inode = proc_get_inode(dir->i_sb, + (pid << 16) | PROC_REMPID_INO, NULL); + if (!inode) + return ERR_PTR(-EINVAL); + inode->i_mode = S_IFDIR | 0555; + inode->i_op = &proc_mosix_subdir_inode_operations; + inode->i_fop = &proc_mosix_subdir_file_operations; + } + + out: + dentry->d_op = &proc_mosix_dynamic_dentry_operations; + d_add(dentry, inode); + return(NULL); +} + +static int +proc_mosix_nodes_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + struct mosixnet *mos; + char buf[NUMBUF]; + unsigned int nr = filp->f_pos; + int nmos, k, l, node; + + if(filp->f_pos == 0) + { + if(filldir(dirent, ".", 1, 0, filp->f_dentry->d_inode->i_ino, + DT_DIR) < 0) + return(0); + filp->f_pos = 1; + } + if(filp->f_pos == 1) + { + if(filldir(dirent, "..", 2, 1, + filp->f_dentry->d_parent->d_inode->i_ino, + DT_DIR) < 0) + return(0); + filp->f_pos = 2; + } + + if((nmos = mosix_config_get_table(&mos, 0, 0)) <= 0) + return (nmos); + + nr = 2; + for(k = 0 ; k < nmos ; k++) + for (l = 0; l < mos[k].cnt; l++) { + unsigned long i, j; + + if(nr++ < filp->f_pos) + continue; + + node = mos[k].base + l; + for (j = NUMBUF, i = node; i; i /= 10) { + j--; + buf[j] = '0' + (i % 10); + } + if (filldir(dirent, buf+j, NUMBUF-j, filp->f_pos, + (node << 16)+PROC_NODEID_INO, DT_REG) < 0) + goto out; + filp->f_pos++; + } +out: + if(nmos) + kfree(mos); + return (0); +} + +static struct dentry * +proc_mosix_nodes_lookup(struct inode *dir, struct dentry *dentry) +{ + unsigned int node; + struct inode *inode = NULL; + + dir->i_nlink = proc_mosix_nodes.nlink + count_mosix_nodes(); + + node = atoi16(dentry->d_name.name, dentry->d_name.len); + + if(!mos_to_net(node, NULL)) + node = 0; + if (node) + { + inode = proc_get_inode(dir->i_sb, + (node << 16) | PROC_NODEID_INO, NULL); + if (!inode) + return ERR_PTR(-EINVAL); + inode->i_mode = S_IFDIR | 0555; + inode->i_op = &proc_mosix_subdir_inode_operations; + inode->i_fop = &proc_mosix_subdir_file_operations; + } + + dentry->d_op = &proc_mosix_dynamic_dentry_operations; + d_add(dentry, inode); + return(NULL); +} + +ssize_t +proc_mosix_sub_read(struct file *filp, char *buf, size_t count, loff_t *ppos) +{ + struct task_struct *r; + char *page; + unsigned long rino = filp->f_dentry->d_inode->i_ino; + unsigned long ino = rino & 0xffff; + int pid_or_node = rino >> 16; + int result; + int (*func)(struct task_struct *, char *); + + if(!(page = (char *)__get_free_page(GFP_KERNEL))) + return(-ENOMEM); + switch(ino) + { + case PROC_REMPID_FROM: + func = proc_mosix_get_remote_from; + break; + case PROC_REMPID_STATM: + func = proc_pid_statm; + break; + case PROC_REMPID_STATS: + func = proc_mosix_get_remote_stats; + break; + case PROC_REMPID_IDENT: + func = proc_mosix_get_remote_identity; + break; + case PROC_NODEID_CPUS: + result = sprintf(page, "%d\n", (int)get_item(ncpus, + pid_or_node, -ENXIO, -ENETUNREACH)); + goto copy; + case PROC_NODEID_LOAD: + result = sprintf(page, "%d\n", (int)get_item(load, + pid_or_node, -ENXIO, -ENETUNREACH)); + goto copy; + case PROC_NODEID_MEM: + result = sprintf(page, "%d\n", (int)get_item(mem, + pid_or_node, -ENXIO, -ENETUNREACH)); + goto copy; + case PROC_NODEID_RMEM: + result = sprintf(page, "%d\n", (int)get_item(rmem, + pid_or_node, -ENXIO, -ENETUNREACH)); + goto copy; + case PROC_NODEID_SPEED: + result = sprintf(page, "%d\n", (int)get_item(speed, + pid_or_node, -ENXIO, -ENETUNREACH)); + goto copy; + case PROC_NODEID_TMEM: + result = sprintf(page, "%d\n", (int)get_item(tmem, + pid_or_node, -ENXIO, -ENETUNREACH)); + goto copy; + case PROC_NODEID_STATUS: + result = sprintf(page, "%d\n", (int)get_item(status, + pid_or_node, 0, DS_MOSIX_DEF)); + goto copy; + case PROC_NODEID_UTIL: + result = sprintf(page, "%d\n", (int)get_item(util, + pid_or_node, -ENXIO, -ENETUNREACH)); + goto copy; +#ifdef CONFIG_MOSIX_RESEARCH + case PROC_NODEID_RIO: + result = sprintf(page, "%d\n", (int)get_item(rio, + pid_or_node, -ENXIO, -ENETUNREACH)); + goto copy; + case PROC_NODEID_WIO: + result = sprintf(page, "%d\n", (int)get_item(wio, + pid_or_node, -ENXIO, -ENETUNREACH)); + goto copy; +#endif /* CONFIG_MOSIX_RESEARCH */ + default: + result = -EPERM; /* write-only */ + goto out; + } + result = -EBADF; + read_lock(&tasklist_lock); + r = find_any_task_by_pid(pid_or_node); + if(r && !(r->mosix.dflags & DREMOTE)) + r = NULL; + if(r) + get_task_struct(r); + read_unlock(&tasklist_lock); + if(!r) + goto out; + result = (*func)(r, page); + free_task_struct(r); + copy: + if(result > 0) + { + if(*ppos + count > result) + { + count = result - *ppos; + if(count < 0) + count = 0; + } + if(count && copy_to_user(buf, page, count)) + return(-EFAULT); + *ppos += count; + result = count; + } + out: + free_page((unsigned long)page); + return(result); +} + +ssize_t +proc_mosix_sub_write(struct file *filp, const char *buf, size_t count, loff_t *ppos) +{ + unsigned long ino = filp->f_dentry->d_inode->i_ino & 0xffff; + int pid_or_node = ino >> 16; + + switch(ino & 0xffff) + { + case PROC_REMPID_GOTO: + return(proc_mosix_set_remote_goto(filp, pid_or_node, + buf, count)); + default: + return(-EPERM); /* read-only */ + } +} + +ssize_t +proc_mosix_pid_set_migrate(struct file *filp, struct task_struct *task, + const char *buf, size_t cnt) +{ + int mig, error, len; + struct ctl_mosix tmpctl = + { 0, "migrate", NULL, sizeof(int), 0, ctl_minmaxv }; + + if(task != current) + return(-EPERM); + + tmpctl.data = &mig; + tmpctl.extra[0] = (void *) DM_BALANCE; + tmpctl.extra[1] = (void *) MOSIX_MAX; + error = get_intarr(filp, buf, cnt, &tmpctl); + if (error < 0) + return (error); + len = error; + + if ((error = mig_migrate(mig))) + return (error); + else + return (len); +} + +ssize_t +proc_mosix_pid_set_goto(struct file *filp, struct task_struct *task, + const char *buf, size_t cnt) +{ + int whereto, error; + struct ctl_mosix tmpctl = + { 0, "goto", NULL, sizeof(int), 0, ctl_minmaxv }; + + /* the rule is: if you can send it a signal, you may also migrate it: */ + /* so keep looking for changes in "kernel/signal.c" */ + + if ((current->euid ^ task->suid) && (current->euid ^ task->uid) && + (current->uid ^ task->suid) && (current->uid ^ task->uid) && + !capable(CAP_KILL)) + return(-EPERM); + + tmpctl.data = &whereto; + tmpctl.extra[0] = (void *) DM_BALANCE; + tmpctl.extra[1] = (void *) MOSIX_MAX; + error = get_intarr(filp, buf, cnt, &tmpctl); + if (error < 0) + return (error); + if(whereto > 0 && !mos_to_net(whereto, NULL)) + return(-ENODEV); + + if (whereto <= 0) { + switch (whereto) { + case DM_GOBACKHOME: + whereto = GOBACKHOME; + break; + case DM_BALANCE: + whereto = BALANCE; + break; + + default: + return(-EINVAL); + } + } + if (task->mosix.stay & DSTAY) + return (-EROFS); + mosix_add_to_whereto(task, whereto); + return (error); +} + +ssize_t +proc_mosix_pid_set_lock(struct file *filp, struct task_struct *task, + const char *buf, size_t cnt) +{ + int lock, len; + struct ctl_mosix ctl = + { 0, "lock", NULL, sizeof(int), 0, ctl_boolean }; + + ctl.data = (void *)&lock; + + if(task == current) + { + len = get_intarr(filp, buf, cnt, &ctl); + if (len < 0) + return (len); + mig_set_lock(lock); + return (len); + } + if(!suser()) + return(-EPERM); + /* The SU has the special privillege of unlocking local processes */ + len = get_intarr(filp, buf, cnt, &ctl); + if(len < 0) + return(len); + if(lock) + return(-EINVAL); + task_lock(task); + if (task->mosix.dflags & (DDEPUTY|DFINISHED)) + len = -EACCES; + else + task->mosix.stay &= ~DNOMIGRATE; + task_unlock(task); + return(len); +} + +ssize_t +proc_mosix_pid_set_disclosure(struct file *filp, struct task_struct *task, + const char *buf, size_t cnt) +{ + int disclosure, len; + struct ctl_mosix ctl = + { 0, "disclosure", NULL, sizeof(int), 0, ctl_minmaxv }; + + ctl.data = (void *)&disclosure; + ctl.extra[0] = (void *) 0; + ctl.extra[1] = (void *) 3; + + if(task != current) + return(-EPERM); + len = get_intarr(filp, buf, cnt, &ctl); + if (len < 0) + return (len); + current->mosix.disclosure = disclosure; + tell_process(current, DREQ_INFOCNG); + return(len); +} + +ssize_t +proc_mosix_pid_set_sigmig(struct file *filp, struct task_struct *task, + const char *buf, size_t cnt) +{ + int sig = current->mosix.sigmig, len; + struct ctl_mosix tmpctl = + { 0, "sigmig", NULL, sizeof(int), 0, ctl_minmaxv }; + + if(task != current) + return(-EPERM); + + tmpctl.data = &sig; + tmpctl.extra[0] = (void *) 0; + tmpctl.extra[1] = (void *) _NSIG; + len = get_intarr(filp, buf, cnt, &tmpctl); + if(len >= 0) + current->mosix.sigmig = sig; + return (len); +} + +#ifdef CONFIG_MOSIX_FS +ssize_t +proc_mosix_pid_set_selected(struct file *filp, struct task_struct *task, + const char *buf, size_t cnt) +{ + int sel, len; + struct ctl_mosix ctl = + { 0, "lock", NULL, sizeof(int), 0, ctl_minmaxv }; + + if(task != current) + return(-EPERM); + + ctl.data = (void *) &sel; + ctl.extra[0] = (void *) 0; + ctl.extra[1] = (void *) MOSIX_MAX; + sel = current->mosix.selected; + len = get_intarr(filp, buf, cnt, &ctl); + if (len >= 0 && sel != current->mosix.selected) + { + current->mosix.selected = sel; +#ifdef CONFIG_MOSIX_DFSA + current->mosix.dupdates |= DFSA_UPDSEL; +#endif /* CONFIG_MOSIX_DFSA */ + } + return (len); +} +#endif /* CONFIG_MOSIX_FS */ + +static ssize_t +proc_mosix_set_remote_goto(struct file *filp, int pid, + const char *buf, size_t cnt) +{ + struct task_struct *task; + int error, whereto; + struct ctl_mosix tmpctl = + { 0, "goto", NULL, sizeof(int), 0, ctl_minmaxv }; + + if(!suser()) + return(-EACCES); + read_lock(&tasklist_lock); + task = find_any_task_by_pid(pid); + if(task && !(task->mosix.dflags & DREMOTE)) + task = NULL; + if(task) + get_task_struct(task); + read_unlock(&tasklist_lock); + if (!task) + return(-EBADF); + + tmpctl.data = &whereto; + tmpctl.extra[0] = (void *) DM_BALANCE; + tmpctl.extra[1] = (void *) MOSIX_MAX; + error = get_intarr(filp, buf, cnt, &tmpctl); + if (error < 0) + goto free; + if(whereto > 0) + { + error = -ENODEV; + if(!mos_to_net(whereto, NULL)) + goto free; + } + else switch(whereto) + { + case DM_GOBACKHOME: + whereto = GOBACKHOME; + break; + case DM_BALANCE: + whereto = BALANCE; + break; + default: + mosix_panic("remote_goto: invalid negative"); + } + + lock_mosix(); + if(!(task->mosix.dflags & DFINISHED)) + mosix_add_to_whereto(task, whereto); + unlock_mosix(); + error = 0; + free: + free_task_struct(task); + return (error); +} + +static int +proc_mosix_get_remote_from(struct task_struct *task, char *buf) +{ + int node; + int len; + + node = task->mosix.deppe; +#ifdef CONFIG_MOSIX_DIAG + if (!node) { + mosix_panic("get_remote_from: deppe is zero"); + node = mosix_config_get_pe(); + } +#endif + sprintf(buf, "%-6d", node); + if (mos_to_ascii(node, buf + 6, 1000) < 0) { +#ifdef CONFIG_MOSIX_DIAG + printk("%d(%s)-proc_mosix_get_remote_from: mos_to_ascii failed" + " for %d\n", current->pid, current->comm, node); +#endif + return (-EDIST); + } + len = strlen(buf); + buf[len++] = '\n'; + return (len); +} + +static int +proc_mosix_get_remote_stats(struct task_struct *tsk, char *buf) +{ + struct mm_struct *mm; + struct vm_area_struct *vma; + unsigned long vsize; + unsigned long s = LOGICAL_STATE(tsk); + char state; + + if(s == TASK_RUNNING) + state = 'R'; + else if(s & TASK_INTERRUPTIBLE) + state = 'S'; + else if(s & TASK_UNINTERRUPTIBLE) + state = 'D'; + else + { + printk("Oops, remote-status = 0x%lx\n", s); + state = '?'; + } + vsize = 0; + mm = tsk->mm; + if (mm && mm != &init_mm) + { + down_read(&mm->mmap_sem); + for(vma = mm->mmap ; vma ; vma = vma->vm_next) + vsize += vma->vm_end - vma->vm_start; + up_read(&mm->mmap_sem); + } + + return(sprintf(buf, "utime=%lu\ncutime=%lu\n" + "nice=%ld\nstate=%c\nvsize=%lu\nrss=%lu\n" + "nswap=%lu\ncnswap=%lu\n", + (unsigned long)(tsk->times.tms_utime + tsk->mosix.uttime), + (unsigned long)(tsk->times.tms_cutime + tsk->mosix.cutime), + tsk->nice, + state, + vsize, + tsk->mm ? tsk->mm->rss : 0, + tsk->nswap, + tsk->cnswap) + 1); +} + +static int +proc_mosix_get_remote_identity(struct task_struct *tsk, char *buf) +{ + char *obuf = buf; + + buf[0] = '\0'; + if(tsk->mosix.disclosure) + { + buf += sprintf(buf, "pid=%d\n", tsk->mosix.mypid); + if(tsk->mosix.depinfo.tgid != tsk->mosix.mypid) + buf += sprintf(buf, "tgid=%d\n", + tsk->mosix.depinfo.tgid); + } + if(tsk->mosix.disclosure > 1) + buf += sprintf(buf, "uid=%d\ngid=%d\n", tsk->uid, tsk->gid); + if(tsk->mosix.disclosure > 2) + buf += sprintf(buf, "pgrp=%d\nsession=%d\nnmigs=%d\ncommand=%.16s\n", + tsk->mosix.depinfo.pgrp, tsk->mosix.depinfo.session, + tsk->mosix.nmigs, tsk->mosix.depinfo.comm); + return(buf - obuf); +} diff -urN linux-2.4.17/mos/prequest.c linux_umopenmosix/mos/prequest.c --- linux-2.4.17/mos/prequest.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/mos/prequest.c Wed Jun 26 23:45:18 2002 @@ -0,0 +1,813 @@ +/* Changes since Feb 12, 2002 by Moshe Bar + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * Author(s): Amnon Shiloh, Moshe Bar + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int request_on_remote(struct prequest *); + +static inline void +issue_request(struct prequest *pr, struct task_struct *to) +{ + sigset_t blocked; + register struct task_struct *p = current; + unsigned long flags; + DECLARE_WAITQUEUE(wait, p); + + if(to->mosix.dflags & DFINISHED) + { + pr->rflags = PR_ERROR; + goto out; + } + pr->rflags = PR_WAITING; + init_waitqueue_head(&pr->waitq); + pr->rnext = to->mosix.prequest; + to->mosix.prequest = pr; + wake_up_mosix(to); + if(!p->sig) /* can happen when calling from EXIT */ + { + pr->rflags &= ~PR_WAITING; + goto out; + } + spin_lock_irqsave(&p->sigmask_lock, flags); + blocked = p->blocked; + siginitsetinv(&p->blocked, sigmask(SIGKILL)); + if(p->sig->action[SIGTERM-1].sa.sa_handler == SIG_DFL) + p->blocked.sig[0] &= ~sigmask(SIGTERM); + if(p->sig->action[SIGINT-1].sa.sa_handler == SIG_DFL) + p->blocked.sig[0] &= ~sigmask(SIGINT); + if(p->sig->action[SIGQUIT-1].sa.sa_handler == SIG_DFL) + p->blocked.sig[0] &= ~sigmask(SIGQUIT); + if(p->sig->action[SIGHUP-1].sa.sa_handler == SIG_DFL) + p->blocked.sig[0] &= ~sigmask(SIGHUP); + recalc_sigpending(p); + spin_unlock_irqrestore(&p->sigmask_lock, flags); + add_wait_queue(&pr->waitq, &wait); + set_current_state(TASK_INTERRUPTIBLE); + while((pr->rflags == PR_WAITING) && !signal_pending(p)) + { + unlock_mosix(); + schedule(); + set_current_state(TASK_INTERRUPTIBLE); + lock_mosix(); + } + remove_wait_queue(&pr->waitq, &wait); + set_current_state(TASK_RUNNING); + spin_lock_irqsave(&p->sigmask_lock, flags); + p->blocked = blocked; + recalc_sigpending(p); + spin_unlock_irqrestore(&p->sigmask_lock, flags); + pr->rflags &= ~PR_WAITING; + out: + free_task_struct(to); +} + +char * +request_process_to_buf_arg(struct task_struct *p, char *buffer, int command, int arg) +{ + struct prequest *pr; + char *reply = buffer; + + get_task_struct(p); + if(!(pr = kmalloc(sizeof(struct prequest), GFP_KERNEL))) + { + free_task_struct(p); + return(reply); + } + pr->command = command; + pr->param = arg; + pr->ereply = buffer; + pr->len = 0; + pr->reply = 0; + if(command == PR_PROCFS_TASK_MEM && pick_ps(p, pr)) + { + reply = (char *)pr->reply; + kfree(pr); + free_task_struct(p); + return(reply); + } + lock_mosix(); + issue_request(pr, p); + if(pr->rflags == PR_DONE) + reply = (char *)pr->reply; + if(pr->rflags & (PR_DONE|PR_ERROR)) + kfree(pr); + unlock_mosix(); + return(reply); +} + +int +request_process_arg2(struct task_struct *p, void *indata, int command, int arg, int arg2) +{ + struct prequest *pr; + int reply = -ENOMEM; + + get_task_struct(p); + if(!(pr = kmalloc(sizeof(struct prequest), GFP_KERNEL))) + { + free_task_struct(p); + return(reply); + } + reply = -EIO; + pr->command = command; + pr->param = arg; + pr->param2 = arg2; + pr->ereply = indata; + pr->len = 0; + pr->reply = 0; + switch(command) + { + case PR_PROCFS_GET_ARG: + case PR_PROCFS_GET_ENV: + case PR_PROCFS_GET_STAT_PARTS: + case PR_PROCFS_GET_STATM: + if(pick_ps(p, pr)) + { + reply = pr->reply; + kfree(pr); + free_task_struct(p); + return(reply); + } + break; +#ifdef CONFIG_MOSIX_DFSA + case PR_DFSA_SYNCHRONIZE: + if(p->mosix.ps) + { + kfree(pr); + free_task_struct(p); + return(0); + } +#endif /* CONFIG_MOSIX_DFSA */ + } + lock_mosix(); + issue_request(pr, p); + if(pr->rflags == PR_DONE) + reply = pr->reply; + if(pr->rflags & (PR_DONE|PR_ERROR)) + kfree(pr); + else + reply = -EINTR; + unlock_mosix(); + return(reply); +} + +int +request_on_local(struct prequest *pr) +{ + switch(pr->command) + { + case PR_PROCFS_TASK_MEM: + pr->reply = (long)(current->mm ? + task_mem(current->mm, pr->ereply) : pr->ereply); + break; + case PR_PROCFS_GET_ENV: + pr->reply = proc_pid_environ(current, pr->ereply); + break; + case PR_PROCFS_GET_ARG: + pr->reply = proc_pid_cmdline(current, pr->ereply); + break; + case PR_PROCFS_GET_STAT_PARTS: + proc_get_stat_parts(current, current->mm, + (struct proc_remote_stat_parts *)pr->ereply); + pr->reply = 0; + break; + case PR_PROCFS_GET_STATM: + pr->reply = proc_pid_statm(current, pr->ereply); + break; + case PR_PROCFS_GET_EXE: + pr->reply = (int)first_executable(); + break; + case PR_PROCFS_MAP_INFO: + pr->reply = proc_list_maps((struct vmamaps *)pr->ereply, + pr->param); + ref_mapped_files((struct vmamaps *)pr->ereply, + pr->reply); + break; + case PR_PROCFS_MEM_READ: + pr->reply = access_process_vm(current, + (unsigned long)pr->param, pr->ereply, + pr->param2, 0); + break; + case PR_PROCFS_MEM_WRITE: + pr->reply = access_process_vm(current, + (unsigned long)pr->param, pr->ereply, + pr->param2, 1); + break; +#ifdef CONFIG_MOSIX_DFSA + case PR_DFSA_SYNCHRONIZE: +#endif /* CONFIG_MOSIX_DFSA */ + case PR_PTRACE_NOT_TRACED: + case PR_PROCFS_UPDATE_TIMES: + pr->reply = 0; + break; + case PR_PTRACE_GET_STACK_LONG: + pr->reply = get_stack_long(current, pr->param); + break; + case PR_PTRACE_PUT_STACK_LONG: + pr->reply = put_stack_long(current, pr->param, + (unsigned long)pr->param2); + break; + case PR_PTRACE_GETREGS: + ptrace_getregs((unsigned long *)pr->ereply); + break; + case PR_PTRACE_SETREGS: + ptrace_putregs((unsigned long *)pr->ereply); + pr->reply = 0; + break; + case PR_PTRACE_PEEKUSER: + *((unsigned long *)pr->ereply) = + ptrace_peekuser(pr->param); + pr->reply = 0; + break; + case PR_PTRACE_POKEUSER: + ptrace_pokeuser(pr->param, pr->param2); + pr->reply = 0; + break; + case PR_PTRACE_CONT: + ptrace_cont(pr->param); + pr->reply = 0; + break; + case PR_PTRACE_SINGLE_STEP: + ptrace_single_step(); + pr->reply = 0; + break; + case PR_PTRACE_GETFPREGS: + ptrace_getfpregs((struct user_i387_struct *)pr->ereply); + pr->reply = 0; + break; + case PR_PTRACE_SETFPREGS: + ptrace_setfpregs((struct user_i387_struct *)pr->ereply); + pr->reply = 0; + break; + case PR_PTRACE_GETFPXREGS: + ptrace_getfpxregs( + (struct user_fxsr_struct *)pr->ereply); + pr->reply = 0; + break; + case PR_PTRACE_SETFPXREGS: + ptrace_setfpxregs( + (struct user_fxsr_struct *)pr->ereply); + pr->reply = 0; + break; + } + return(PR_DONE); +} + +void +free_unwanted_request(struct prequest *pr) +{ + struct file *filp; + + if((pr->rflags & (PR_WAITING|PR_DONE|PR_ERROR)) == PR_DONE) + switch(pr->command) + { + case PR_PROCFS_GET_EXE: + filp = (struct file *)pr->reply; + if(filp) + fput(filp); + break; + } + kfree(pr); +} + +void +process_requests(void) +{ + register struct prequest *pr; + int res; + + lock_mosix(); + while((pr = current->mosix.prequest)) + { + current->mosix.prequest = pr->rnext; + if(!(pr->rflags & PR_WAITING)) + { + kfree(pr); + continue; + } + unlock_mosix(); + pr->reply = 0; + if(current->mosix.dflags & DDEPUTY) + res = request_on_remote(pr); + else + res = request_on_local(pr); + lock_mosix(); + pr->rflags |= res; + if(pr->rflags & PR_WAITING) + wake_up(&pr->waitq); + else + { + unlock_mosix(); + free_unwanted_request(pr); + lock_mosix(); + } + } + unlock_mosix(); +} + +void +discard_requests(void) +{ + register struct prequest *pr; + + lock_mosix(); + while((pr = current->mosix.prequest)) + { + current->mosix.prequest = pr->rnext; + if(pr->rflags & PR_WAITING) + { + pr->rflags |= PR_ERROR; + wake_up(&pr->waitq); + } + else + { + unlock_mosix(); + free_unwanted_request(pr); + lock_mosix(); + } + } + unlock_mosix(); +} + +int +request_on_remote(struct prequest *pr) +{ + struct prequest_h p; + struct prequest_reply_h r; + void *snd; + int sndlen; + int err = 0; + + p.command = pr->command; + p.param = pr->param; + p.param2 = pr->param2; + p.len = pr->len; + snd = NULL; + sndlen = 0; + switch(pr->command) + { + case PR_PROCFS_MEM_READ: + flush_read_cache(); + break; + case PR_PROCFS_MEM_WRITE: + flush_ucache(); + snd = pr->ereply; + sndlen = pr->param2; + break; +#ifdef CONFIG_MOSIX_DFSA + case PR_DFSA_SYNCHRONIZE: + /* once we are here - we have already achieved + * the purpose of synchronization! + */ + pr->reply = 0; + goto done; +#endif /* CONFIG_MOSIX_DFSA */ + case PR_PTRACE_SETREGS: + snd = pr->ereply; + sndlen = 17 * sizeof(long); + break; + case PR_PTRACE_SETFPREGS: + snd = pr->ereply; + sndlen = sizeof(struct user_i387_struct); + break; + case PR_PTRACE_SETFPXREGS: + snd = pr->ereply; + sndlen = sizeof(struct user_fxsr_struct); + break; + /* the following may be done locally */ + case PR_PROCFS_GET_STAT_PARTS: + case PR_PROCFS_GET_STATM: + case PR_PROCFS_TASK_MEM: + if(pick_ps(current, pr)) + goto done; + break; + /* the following read from user: */ + case PR_PROCFS_GET_ARG: + case PR_PROCFS_GET_ENV: + if(pick_ps(current, pr)) + goto done; + flush_read_cache(); + break; + case PR_PROCFS_UPDATE_TIMES: + mosix_deputy_rusage(0); + pr->reply = 0; + goto done; + } + if(deputy_request(DEP_PREQUEST, &p, sizeof(p), snd, sndlen, 0, + (void **)&r, -sizeof(r)) || r.error) + { + err = PR_ERROR; + goto done; + } + pr->reply = r.reply; + if(r.datalen > 0 && comm_copydata(pr->ereply, r.datalen, 0)) + err = PR_ERROR; + else + switch(pr->command) + { + case PR_PROCFS_TASK_MEM: + pr->reply = (int)(pr->ereply + r.reply); + break; + case PR_PROCFS_GET_STAT_PARTS: + mosix_deputy_rusage(0); + break; + case PR_PROCFS_GET_ENV: + case PR_PROCFS_GET_ARG: + case PR_PROCFS_GET_STATM: + case PR_PTRACE_GET_STACK_LONG: + case PR_PTRACE_PUT_STACK_LONG: + case PR_PTRACE_GETREGS: + case PR_PTRACE_SETREGS: + case PR_PTRACE_PEEKUSER: + case PR_PTRACE_POKEUSER: + case PR_PTRACE_CONT: + case PR_PTRACE_SINGLE_STEP: + case PR_PTRACE_GETFPREGS: + case PR_PTRACE_SETFPREGS: + case PR_PTRACE_GETFPXREGS: + case PR_PTRACE_SETFPXREGS: + case PR_PROCFS_MEM_READ: + case PR_PROCFS_MEM_WRITE: + break; + case PR_PROCFS_MAP_INFO: + ref_mapped_files((struct vmamaps *)pr->ereply, + pr->reply); + break; + case PR_PROCFS_GET_EXE: + { + struct file *filp = (struct file *)pr->reply; + if(filp) + get_file(filp); + } + break; + default: + printk("request on remote - unknown code\n"); + } + done: + return(PR_DONE|err); +} + +int +remote_prequest(struct prequest_h *p) +{ + struct prequest_reply_h r; + char *buffer; + int err; + unsigned long data[17]; + int allocated = 0; + + r.error = r.datalen = 0; + switch(p->command) + { + case PR_PROCFS_TASK_MEM: + case PR_PROCFS_GET_ENV: + case PR_PROCFS_GET_ARG: + case PR_PROCFS_GET_STAT_PARTS: + case PR_PROCFS_GET_STATM: + case PR_PROCFS_MAP_INFO: + case PR_PROCFS_MEM_READ: + case PR_PTRACE_GETFPREGS: + case PR_PTRACE_GETFPXREGS: + if((buffer = (char *)__get_free_page(GFP_KERNEL))) + allocated = 1; + else + { + r.error = -ENOMEM; + buffer = NULL; + } + break; + case PR_PTRACE_GETREGS: + case PR_PTRACE_PEEKUSER: + buffer = (char *)data; + break; + default: + buffer = NULL; + break; + } + if(!r.error) + switch(p->command) + { + case PR_PROCFS_TASK_MEM: + r.datalen = r.reply = current->mm ? + task_mem(current->mm, buffer) - buffer : 0; + break; + case PR_PROCFS_GET_ENV: + r.datalen = r.reply = proc_pid_environ(current, buffer); + break; + case PR_PROCFS_GET_ARG: + r.datalen = r.reply = proc_pid_cmdline(current, buffer); + break; + case PR_PROCFS_GET_STAT_PARTS: + proc_get_stat_parts(current, current->mm, + (struct proc_remote_stat_parts *)buffer); + r.reply = 0; + r.datalen = sizeof(struct proc_remote_stat_parts); + break; + case PR_PROCFS_GET_STATM: + r.datalen = r.reply = proc_pid_statm(current, buffer); + break; + case PR_PROCFS_GET_EXE: + r.reply = (int)first_executable(); + break; + case PR_PROCFS_MAP_INFO: + r.datalen = r.reply = proc_list_maps( + (struct vmamaps *)buffer, p->param); + break; + case PR_PROCFS_MEM_READ: + r.datalen = r.reply = access_process_vm(current, + p->param, (void *)buffer, p->param2, 0); + break; + case PR_PROCFS_MEM_WRITE: + if(comm_recvdata((void **)&buffer)) + r.reply = 0; + else + { + r.reply = access_process_vm(current, p->param, + (void *)buffer, p->param2, 1); + comm_free(buffer); + } + break; + case PR_PTRACE_GET_STACK_LONG: + r.reply = get_stack_long(current, p->param); + break; + case PR_PTRACE_PUT_STACK_LONG: + r.reply = put_stack_long(current, p->param, + (unsigned long)p->param2); + break; + case PR_PTRACE_GETREGS: + ptrace_getregs((unsigned long *)buffer); + r.datalen = 17 * sizeof(unsigned long); + r.reply = 0; + break; + case PR_PTRACE_SETREGS: + r.reply = 0; + if(!comm_recvdata((void **)&buffer)) + { + ptrace_putregs((unsigned long *)buffer); + comm_free(buffer); + } + break; + case PR_PTRACE_PEEKUSER: + data[0] = ptrace_peekuser(p->param); + r.datalen = sizeof(unsigned long); + r.reply = 0; + break; + case PR_PTRACE_POKEUSER: + ptrace_pokeuser(p->param, p->param2); + r.datalen = r.reply = 0; + break; + case PR_PTRACE_CONT: + ptrace_cont(p->param); + r.datalen = r.reply = 0; + break; + case PR_PTRACE_NOT_TRACED: + current->ptrace = 0; + r.datalen = r.reply = 0; + break; + case PR_PTRACE_SINGLE_STEP: + ptrace_single_step(); + r.datalen = r.reply = 0; + break; + case PR_PTRACE_GETFPREGS: + ptrace_getfpregs((struct user_i387_struct *)buffer); + r.datalen = sizeof(struct user_i387_struct); + r.reply = 0; + break; + case PR_PTRACE_SETFPREGS: + r.reply = 0; + if(!comm_recvdata((void **)&buffer)) + { + ptrace_setfpregs( + (struct user_i387_struct *)buffer); + comm_free(buffer); + } + break; + case PR_PTRACE_GETFPXREGS: + ptrace_getfpxregs((struct user_fxsr_struct *)buffer); + r.datalen = sizeof(struct user_fxsr_struct); + r.reply = 0; + break; + case PR_PTRACE_SETFPXREGS: + r.reply = 0; + if(!comm_recvdata((void **)&buffer)) + { + ptrace_setfpxregs( + (struct user_fxsr_struct *)buffer); + comm_free(buffer); + } + break; + } + err = comm_send(DEP_PREQUEST|REPLY, &r, sizeof(r), + r.datalen ? buffer : NULL, r.datalen, 0); + if(allocated) + free_page((unsigned long)buffer); + comm_free(p); + return(err); +} + +void +process_only_easy_requests(void) +{ + register struct prequest *pr, *prev, *next; + + if(!current->mosix.ps) + return; + lock_mosix(); + for(prev = NULL, pr = current->mosix.prequest ; pr ; ) + switch(pr->command) + { +#ifdef CONFIG_MOSIX_DFSA + case PR_DFSA_SYNCHRONIZE: + pr->reply = 0; + pr->rflags |= PR_DONE; + goto next_one; +#endif /* CONFIG_MOSIX_DFSA */ + case PR_PROCFS_GET_ARG: + case PR_PROCFS_GET_ENV: + case PR_PROCFS_TASK_MEM: + case PR_PROCFS_GET_STAT_PARTS: + case PR_PROCFS_GET_STATM: + pick_ps(current, pr); +#ifdef CONFIG_MOSIX_DFSA + next_one: +#endif /* CONFIG_MOSIX_DFSA */ + next = pr->rnext; + if(prev) + prev->rnext = next; + else + current->mosix.prequest = next; + if(pr->rflags & PR_WAITING) + wake_up(&pr->waitq); + else + kfree(pr); + pr = next; + break; + default: + prev = pr; + pr = pr->rnext; + } + unlock_mosix(); +} + +/* Saved record structure: + * 1: args (null terminated) + * 2: env (null terminated) + * 3: task_mem (null terminated) + * 4: struct proc_remote_stat_parts (32 bytes at the moment) + * 5: statm output (null terminated) + * Below is an overkill estimate of 3+4+5: + */ +#define MAX_REQ_FOR_STATS 350 + +char * +fill_common_ps_info(int *n) +{ + char *buffer = kmalloc(2*PAGE_SIZE+MAX_REQ_FOR_STATS, GFP_KERNEL); + char *e, *p; + struct task_struct *t = current; + int l1, l2, l3; + + if(n) + *n = 0; + if(!buffer) + return(NULL); + e = buffer + (l1 = proc_pid_cmdline(t, buffer)); + e += (l2 = proc_pid_environ(t, e)); + e += (l3 = task_mem(t->mm, e) - e); + proc_get_stat_parts(t, t->mm, (struct proc_remote_stat_parts *)e); + e += sizeof(struct proc_remote_stat_parts); + e += proc_pid_statm(t, e); + *e++ = '\0'; + if((p = kmalloc(e - buffer + 3*sizeof(short), GFP_KERNEL))) + { + ((short *)p)[0] = l1; + ((short *)p)[1] = l2; + ((short *)p)[2] = l3; + memcpy(p + 3 * sizeof(short), buffer, l1 = e - buffer); + } + kfree(buffer); + if(n) + *n = p ? l1 + 3*sizeof(short) : 0; + return(p); +} +void +store_common_ps_info(void) +{ + int n; + char *p; + +#ifdef CONFIG_MOSIX_DIAG + if(current->mosix.ps) + { + mosix_panic("already has ps"); + return; + } +#endif /* CONFIG_MOSIX_DIAG */ + if(current->mosix.dflags & DDEPUTY) + { + flush_read_cache(); + if(deputy_request(DEP_PSINFO, NULL, 0, NULL, 0, 0, + (void **)&n, -sizeof(n)) || n <= 0) + return; + if(!(p = kmalloc(n, GFP_KERNEL))) + { + comm_flushdata(COMM_ALLDATA); + return; + } + if(comm_copydata(p, n, 0)) + kfree(p); + else + { + lock_mosix(); + current->mosix.ps = p; + unlock_mosix(); + } + } + else + { + p = fill_common_ps_info(NULL); + lock_mosix(); + current->mosix.ps = p; + unlock_mosix(); + } + if(current->mosix.prequest) + process_only_easy_requests(); +} + +void +stop_storing_common_ps_info(void) +{ + char *ps; + + lock_mosix(); + ps = current->mosix.ps; + current->mosix.ps = NULL; + unlock_mosix(); + if(ps) + kfree(ps); +} + +int +pick_ps(struct task_struct *task, struct prequest *pr) +{ + register char *p, *t; + int l1, l2, l3; + + lock_mosix(); + if(!(p = task->mosix.ps)) + { + unlock_mosix(); + return(0); + } + l1 = *((short *)p)++; + l2 = *((short *)p)++; + l3 = *((short *)p)++; + switch(pr->command) + { + case PR_PROCFS_GET_STATM: + p += l1+l2+l3 + sizeof(struct proc_remote_stat_parts); + t = pr->ereply; + while(*p) + *t++ = *p++; + pr->reply = t - pr->ereply; + break; + case PR_PROCFS_GET_STAT_PARTS: + p += l1 + l2 + l3; + memcpy(pr->ereply, p, + sizeof(struct proc_remote_stat_parts)); + pr->reply = sizeof(struct proc_remote_stat_parts); + break; + case PR_PROCFS_TASK_MEM: + memcpy(pr->ereply, p + l1 + l2, l3); + pr->reply = (int)(pr->ereply + l3); + break; + case PR_PROCFS_GET_ENV: + memcpy(pr->ereply, p + l1, l2); + pr->reply = l2; + break; + case PR_PROCFS_GET_ARG: + memcpy(pr->ereply, p, l1); + pr->reply = l1; + break; + default: + panic("pick_ps: command"); + } + pr->rflags |= PR_DONE; + unlock_mosix(); + return(1); +} diff -urN linux-2.4.17/mos/remote.c linux_umopenmosix/mos/remote.c --- linux-2.4.17/mos/remote.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/mos/remote.c Wed Jun 26 23:45:18 2002 @@ -0,0 +1,1667 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#ifdef CONFIG_MOSIX_DFSA +#include +#endif /* CONFIG_MOSIX_DFSA */ + + + + + +int +remote_wait(int expect, void **head, int *hlen) +{ + int type, error = 0; + register struct task_struct *p = current; + struct syscall_ret_h *rp; + char has_ret_value = 0; + int ret_value = 0; /* value set only to pacify compiler */ + +#ifdef CONFIG_MOSIX_DIAG + if(!(p->mosix.dflags & DPSYNC)) + mosix_panic("remote_wait: not synced"); +#endif /* CONFIG_MOSIX_DIAG */ + while(1) + { + if(URGENT_REMOTE_CONDITIONS(p) && + !(p->mosix.dflags & DSENTURGENT)) + inform_deputy_of_urgent(); + if((type = comm_recv(head, hlen)) < 0) /* an error */ + return(type); + if(expect == DEP_USERMODE) + /* after migration from REMOTE to REMOTE, certain "replies" + * can arrive as a result from the original call. + * Fortunately, there are not too many of them: + */ + switch(type & ~USERMODE) + { + case REM_SYSCALL|REPLY: + rp = *head; + absorb_deptime(rp->deputytime); + remote_unpack_read_cache_data(rp); + has_ret_value = 1; + ret_value = rp->ret; + /* fall through */ + case REM_SYSCALL_TRACE|REPLY: + if(!(type & USERMODE)) + { + comm_free(*head); + *head = NULL; + continue; + } + } + if(type & USERMODE) + { + p->mosix.dflags &= ~DPSYNC; + if(expect == DEP_USERMODE) + { + if(has_ret_value) + mos_to_regs(&p->mosix)->eax = ret_value; + return(0); + } + if(type != (expect | USERMODE)) + { + printk("REMOTE-%d: Unexected USERMODE while waiting for 0x%x\n", + p->pid, expect); + mosix_panic("Unexpected USERMODE"); + return(-EINVAL); + } + } + if((type & ~USERMODE) == expect) + return(0); + switch(type & ~USERMODE) + { + case DEP_SYNC: + comm_free(*head); + break; + case DEP_NOTHING: + comm_free(*head); + error = comm_send(DEP_NOTHING|REPLY, NULL, 0, + NULL, 0, 0); + break; + case DEP_MMAP: + error = remote_mmap((struct mmap_parameters_h *)*head, 0); + break; + case DEP_BRK: + error = remote_brk((struct brk_parameters_h *)*head); + break; + case DEP_MUNMAP: + error = remote_munmap((struct munmap_parameters_h *)*head); + break; + case DEP_MPROTECT: + error = remote_mprotect((struct mprotect_parameters_h *)*head); + break; + case DEP_LISTHOLD: + comm_free(*head); + error = remote_report_files(); + break; + case DEP_SETUPFRAME: + error = remote_setup_frame((struct setupframe_parameters_h *)*head); + break; + case DEP_NICE: + error = remote_nice((long *)*head); + break; + case DEP_INFO: + error = remote_updinfo((struct disclosure_h *)*head); + break; + case DEP_CAPS: + error = remote_caps((kernel_cap_t *)*head); + break; + case DEP_OPCOSTS: + error = remote_depcosts(*head); + break; + case DEP_RESTORESIGCONTEXT: + error = remote_restore_sigcontext((struct sigcontext **)*head); + break; + case DEP_PREQUEST: + error = remote_prequest((struct prequest_h *)*head); + break; + case DEP_COPY_FROM_USER: + error = remote_copy_from_user((struct user_copy_h *)*head); + break; + case DEP_COPY_TO_USER: + error = remote_copy_to_user((struct user_copy_h *)*head); + break; + case DEP_DATA_TO_USER: + error = remote_data_to_user((struct user_copy_h *)*head); + break; + case DEP_CLEAR_USER: + error = remote_clear_user((struct user_copy_h *)*head); + break; + case DEP_STRNCPY_FROM_USER: + error = remote_strncpy_from_user((struct user_copy_h *)*head); + break; + case DEP_STRNLEN_USER: + error = remote_strnlen_user((struct strnlen_user_h *)*head); + break; + case DEP_VERIFY_WRITE: + error = remote_verify_write((struct user_copy_h *)*head); + break; + case DEP_CSUM_COPY_FROM_USER: + error = remote_csum_copy_from_user((struct user_csum_copy_h *)*head); + break; + case DEP_CACHE_READ_DATA: + error = remote_unpack_read_cache_data(NULL); + /* NO REPLY! */ + break; + case DEP_RLIMIT: + error = remote_rlimit((struct rlimit_h *)*head); + break; + case DEP_TAKEURGENT: + comm_free(*head); + error = remote_urgent(); + break; + case DEP_RUSAGE: + error = remote_rusage((int *)*head); + break; + case DEP_PERSONALITY: + error = remote_personality((unsigned long *)*head); + break; + case DEP_EXECVE_COUNTS: + error = remote_execve_counts((struct execve_counts_h *)*head); + break; + case DEP_BRING_STRINGS: + error = remote_bring_strings((struct execve_bring_strings_h *)*head); + break; + case DEP_SETUP_ARGS: + error = remote_setup_args((struct execve_setup_args_h *)*head); + break; + case DEP_EXEC_MMAP: + error = remote_exec_mmap(); + break; + case DEP_INIT_AOUT_MM: + error = remote_init_aout_mm((struct exec *)*head); + break; + case DEP_ELF_SETUP: + error = remote_elf_setup((struct execve_elf_setup_h *)*head); + break; + case DEP_FIX_ELF_AOUT: + error = remote_fix_elf_aout((struct execve_fix_elf_aout_h *)*head); + break; + case DEP_DUMP_THREAD: + /* comm_free(*head); is not needed (NULL) */ + error = remote_dump_thread(); + break; + case DEP_LIST_VMAS: + /* comm_free(*head); is not needed (NULL) */ + error = remote_list_vmas(); + break; + case DEP_PLEASE_FORK: + error = remote_fork((struct fork_h *)*head); + break; + case DEP_BRING_ME_REGS: + error = remote_bring_me_regs((unsigned long *)*head); + break; + case DEP_DUMP_FPU: + /* comm_free(*head); is not needed (NULL) */ + error = remote_dump_fpu(); + break; + case DEP_COME_BACK: + if (!remote_come_back(*head) || bootexpel) + remote_disappear(); + break; + case DEP_PLEASE_MIGRATE: + error = remote_goto_remote(*head); + break; + case DEP_CONSIDER: + error = remote_consider((int *)*head); + break; + case DEP_UPDATE_DECAY: + error = remote_setdecay((struct decay_h *)*head); + break; + case DEP_UPDATE_LOCK: + error = remote_set_lock((int *)*head); + break; + case DEP_PSINFO: + /* comm_free(*head); is not needed (NULL) */ + error = remote_psinfo(); + break; +#ifdef CONFIG_MOSIX_DFSA + case DEP_DFSA_CLEAR: + remote_clear_dfsa(); + error = comm_send(DEP_DFSA_CLEAR|REPLY, NULL, 0, + NULL, 0, 0); + break; + case DEP_DFSA_CHANGES: + error = remote_receive_dfsachanges((int *)*head); + break; + case DEP_READ_YOURSELF: + error = remote_read_yourself( + (struct read_yourself_h *)*head); + break; +#endif /* CONFIG_MOSIX_DFSA */ + default: + printk("REMOTE: Unexpected request type %x\n", type); + if(type != MIG_REQUEST) + mosix_panic("deputy_wait"); + return(-EINVAL); + } + if(error) + { +#ifdef CONFIG_MOSIX_DEBUG + printk("%d-remote_wait: Error %d\n", p->pid, error); +#endif /* CONFIG_MOSIX_DEBUG */ + return(error); + } + } +} + +int +remote_request(int type, void *header, int hlen, void *data, int dlen, + int uspace, void **result, int reslen) +{ + void *head; + int rhlen; + int error; + + if((error = comm_send(type, header, hlen, data, dlen, uspace))) + return(error); + if((error = remote_wait(type|REPLY, &head, &rhlen))) + return(error); + if(reslen > 0) + { + if(rhlen > reslen) + goto lenerr; + if(result) + *result = head; + } + else + { + if(rhlen > -reslen) + { + comm_free(head); + lenerr: + printk("remote_request type %x: %d instead of %d\n", + type, rhlen, reslen); + return(-EDIST); + } + if(reslen) + memcpy(result, head, rhlen); + comm_free(head); + } + return(error); +} + +int +remote_deputy_has_something_for_us(struct task_struct *p) +{ + return((p->mosix.dflags & DPSYNC) || comm_peek()); +} + +void +wait_for_permission_to_continue() +{ + struct task_struct *p = current; + void *head; + int hlen; + + if(!p->mosix.contact) + { + DECLARE_WAITQUEUE(wait, current); + + add_wait_queue(mos_to_waitp(&p->mosix), &wait); + set_current_state(TASK_UNINTERRUPTIBLE); + while(!p->mosix.contact) + { + set_current_state(TASK_UNINTERRUPTIBLE); + if(p->mosix.contact) + break; + schedule(); + } + remove_wait_queue(mos_to_waitp(&p->mosix), &wait); + set_current_state(TASK_RUNNING); + } + if(!(p->mosix.dflags & DPSYNC)) + { + if(!comm_peek()) + return; + if(URGENT_REMOTE_CONDITIONS(p) && + !(p->mosix.dflags & DSENTURGENT)) + inform_deputy_of_urgent(); + if(comm_send(REM_NULLMSG, NULL, 0, NULL, 0, 0)) + remote_disappear(); + } + if(remote_wait(DEP_USERMODE, &head, &hlen)) + remote_disappear(); + comm_free(head); + if(p->mosix.deputy_regs) + { + printk("%s: Missing registers on User-Mode",desc_mostask(NULL)); + mosix_panic("missing regs"); + remote_disappear(); + } +} + +void +inform_deputy_of_urgent(void) +{ + struct mosix_task *m = ¤t->mosix; + + if(m->dflags & (DPASSING|DINCOMING)) + return; +#ifdef CONFIG_MOSIX_DIAG + if (m->dflags & DSENTURGENT) + mosix_panic("urgent already sent"); +#endif /* CONFIG_MOSIX_DIAG */ + + spin_lock_irq(&runqueue_lock); + m->dflags |= DSENTURGENT; + spin_unlock_irq(&runqueue_lock); + comm_send_urgent(); + /* (any errors will be detected by a later communication failure) */ +} + +void +transfer_signals_to_deputy(unsigned int sigs, siginfo_t *info, int ninfo) +{ + struct asig_h a; + + a.sigs = sigs; + a.nforced = ninfo; + comm_send(REM_ASIG, &a, sizeof(a), info, ninfo * sizeof(siginfo_t), 0); +} + +NORET_TYPE void +remote_disappear(void) +{ +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug) /* any */ + printk("%d-disappear\n", current->pid); +#endif /* CONFIG_MOSIX_DEBUG */ + do_exit(SIGKILL); + /*NOTREACHED*/ +} + +long +remote_standard_system_call(int n, struct pt_regs *regs) +{ + struct syscall_h s; + struct syscall_ret_h r; + char *p, *tofree; + int l; + int data_from_user; + void *head; + int rhlen; + int error; + + s.n = n; + memcpy((caddr_t)s.args, (caddr_t)regs, sizeof(s.args)); + switch(n) + { + /* the following system-calls are supposed to ask for the + * following registers - but why wait for them to ask? + */ + case __NR_fork: + case __NR_vfork: + case __NR_clone: + case __NR_sigaltstack: + current->mosix.pass_regs |= BIT_OF_REGISTER(esp); + break; + case __NR_execve: + current->mosix.pass_regs |= BIT_OF_REGISTER(edx); + break; + case __NR_sigsuspend: + case __NR_rt_sigsuspend: + current->mosix.pass_regs |= BIT_OF_REGISTER(orig_eax) | + BIT_OF_REGISTER(eax) | BIT_OF_REGISTER(eip); + break; + case __NR_sigreturn: + case __NR_rt_sigreturn: + current->mosix.pass_regs = ALL_REGISTERS; + break; + } + p = construct_ucache_envelope(&l, &data_from_user, &s, &tofree); + bump_syscalls(); + if(p) + bump_copyin(l); + /* Cannot use "remote_request" here, because we MUST free "tofree" */ + error = comm_send(REM_SYSCALL, &s, sizeof(s), p, l, data_from_user); + if(tofree) + kfree(tofree); + if(error || remote_wait(REM_SYSCALL|REPLY, &head, &rhlen)) + remote_disappear(); + memcpy(&r, head, sizeof(r)); + comm_free(head); + absorb_deptime(r.deputytime); + remote_unpack_read_cache_data(&r); + return(r.ret); +} + +void +mosix_remote_syscall_trace(void) +{ + struct rusage r; + +#ifdef CONFIG_MOSIX_DIAG + if(current->mosix.dflags & DPSYNC) + panic("mosix_remote_syscall_trace: synced"); +#endif /* CONFIG_MOSIX_DIAG */ + remote_fill_rusage(&r, 0); + if(remote_request(REM_SYSCALL_TRACE, &r, sizeof(r), NULL, 0, 0, + NULL, 0)) + remote_disappear(); + bump_syscalls(); /* no accurate numbers, but it costs! */ + wait_for_permission_to_continue(); +} + +int +remote_readpage(struct file *fp, struct page *page) +{ + int error; + char *buffer = kmap(page); + struct bring_page_h b; + struct page_ret_h r; + int dpagein = (current->mosix.dflags & DPAGEIN) ^ DPAGEIN; + register struct task_struct *p = current; + +#ifdef CONFIG_MOSIX_DIAG + if(p->mosix.dflags & DINCOMING) + panic("remote_readpage while migrating\n"); +#endif /* CONFIG_MOSIX_DIAG */ + if(!PageLocked(page)) + PAGE_BUG(page); + b.fp = home_file(fp); + b.offset = page->index << PAGE_CACHE_SHIFT; + b.nopage = fp->f_dentry->d_inode->u.remote_i.nopage; + if(p->mosix.rfreepages > 0) + p->mosix.rfreepages--; + else + bump_demandpages(); + if(p->mosix.rpagecredit > 0) + p->mosix.rpagecredit--; + else + { +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_CONSIDER) + printk("%d: credit is over\n", p->pid); +#endif /* CONFIG_MOSIX_DEBUG */ + spin_lock_irq(&runqueue_lock); + p->mosix.dflags |= dpagein; + spin_unlock_irq(&runqueue_lock); + } + if(!b.nopage) /* home rebooted, we should be dead anyway */ + error = -EIO; + else if(!(error = remote_request(REM_PAGE, &b, sizeof(b), NULL, 0, 0, + (void **)&r, -sizeof(r)))) + error = r.ret; + if(!error) + error = comm_copydata(buffer, PAGE_SIZE, 0); + else if(error == -EFBIG) + { + memset(buffer, 0, PAGE_SIZE); + error = 0; + } + if(error) + { + ClearPageUptodate(page); + SetPageError(page); + } + else + SetPageUptodate(page); + UnlockPage(page); + kunmap(page); + spin_lock_irq(&runqueue_lock); + p->mosix.dflags &= ~dpagein; + spin_unlock_irq(&runqueue_lock); + absorb_deptime(r.deputytime); + return(error); +} + +u64 +mosix_remote_tsc(void) +{ + u64 r; + + if(remote_request(REM_GETTSC, NULL, 0, NULL, 0, 0, + (void **)&r, -sizeof(r))) + force_sig(SIGSEGV, current); + bump_syscalls(); + return(r); +} + +int +remote_setup_frame(struct setupframe_parameters_h *s) +{ + struct k_sigaction ka; + extern void setup_frame(int, struct k_sigaction *, + sigset_t *, struct pt_regs *); + extern void setup_rt_frame(int, struct k_sigaction *, siginfo_t *, + sigset_t *, struct pt_regs *); + + ka.sa.sa_flags = s->flags; + ka.sa.sa_handler = s->handler; + ka.sa.sa_restorer = s->restorer; + current->sas_ss_sp = s->ss_sp; + current->sas_ss_size = s->ss_size; + if(s->flags & SA_SIGINFO) + + setup_rt_frame(s->sig, &ka, &s->info, &s->set, mos_to_regs(¤t->mosix)); + + else + + setup_frame(s->sig, &ka, &s->set, mos_to_regs(¤t->mosix)); + + comm_free(s); + return(comm_send(DEP_SETUPFRAME|REPLY, NULL, 0, NULL, 0, 0)); +} + +int +remote_nice(long *n) +{ + current->nice = *n; + comm_free(n); + return(comm_send(DEP_NICE|REPLY, NULL, 0, NULL, 0, 0)); +} + +void +remote_do_updinfo(struct disclosure_h *info) +{ + int i, j; + struct task_struct *p = current; + + p->mosix.disclosure = info->disclosure; + p->uid = info->uid; + p->gid = info->gid; + p->mosix.depinfo.pgrp = info->pgrp; + p->mosix.depinfo.session = info->session; + p->mosix.depinfo.tgid = info->tgid; + for(i = j = 0 ; j < 16 ; i++) + switch(info->comm[i]) + { + case '\0': + p->mosix.depinfo.comm[j] = '\0'; + return; + case '\n': + if(j == 15) + p->mosix.depinfo.comm[j] = '\0'; + p->mosix.depinfo.comm[j++] = '\\'; + p->mosix.depinfo.comm[j++] = 'n'; + break; + default: + p->mosix.depinfo.comm[j++] = info->comm[i]; + } +} + +int +remote_updinfo(struct disclosure_h *info) +{ + remote_do_updinfo(info); + comm_free(info); + return(comm_send(DEP_INFO|REPLY, NULL, 0, NULL, 0, 0)); +} + +int +remote_caps(kernel_cap_t *caps) +{ + kernel_cap_t effective = *caps & REMOTE_CAPS; + + comm_free(caps); + current->cap_effective &= ~REMOTE_CAPS; + current->cap_effective |= effective; + return(comm_send(DEP_CAPS|REPLY, NULL, 0, NULL, 0, 0)); +} + +int +remote_depcosts(void *info) +{ + memcpy((caddr_t)current->mosix.depcost, info, + sizeof(current->mosix.depcost)); + comm_free(info); + return(comm_send(DEP_OPCOSTS|REPLY, NULL, 0, NULL, 0, 0)); +} + +int +remote_restore_sigcontext(struct sigcontext **frame) +{ + extern int restore_sigcontext(struct pt_regs *, struct sigcontext *, + int *); + struct restore_sigcontext_ret_h r; + +// disabled ?? r.result = restore_sigcontext(mos_to_regs(¤t->mosix), *frame, +// disabled ?? &r.eax); + + comm_free(frame); + current->mosix.pass_regs |= BIT_OF_REGISTER(esp); + return(comm_send(DEP_RESTORESIGCONTEXT|REPLY, &r, sizeof(r), + NULL, 0, 0)); +} + +/* + * the following routine, if called within process-migration, + * returns the fact of an error, rather than sends back a reply: + */ +int +remote_mmap(struct mmap_parameters_h *m, int inmig) +{ + unsigned long result; + struct file *rf; + unsigned long prot, flags; + extern asmlinkage long sys_madvise(unsigned long, size_t, int); + + if(m->fp) + { + if(!(rf = get_remote_file(m->origin, m->fp, m->dp, m->uniq, + m->isize, m->nopage))) + { + result = -EDOITATHOME; + goto err; + } + } + else + rf = NULL; + /* unconvert prot+flags: */ + prot = 0; + flags = MAP_PRIVATE; + if(m->fixed) + flags |= MAP_FIXED; + if(m->flags & VM_GROWSDOWN) + flags |= MAP_GROWSDOWN; + if(m->flags & VM_DENYWRITE) + flags |= MAP_DENYWRITE; + if(m->flags & VM_EXECUTABLE) + flags |= MAP_EXECUTABLE; + if(m->flags & VM_READ) + prot |= VM_READ; + if(m->flags & VM_WRITE) + prot |= VM_WRITE; + if(m->flags & VM_EXEC) + prot |= VM_EXEC; + if(m->flags & VM_MAYSHARE) + current->mosix.dirty_bits |= MMAP_MAYSHARE; + result = do_mmap_pgoff(rf, m->addr, m->len, prot, flags, m->pgoff); + if(m->flags & VM_MAYSHARE) + current->mosix.dirty_bits &= ~MMAP_MAYSHARE; + if(rf && !IS_ERR((const void *)result)) + { + int pages = (m->len + PAGE_SIZE - 1) / PAGE_SIZE; + + current->mosix.rpagecredit += pages; + if(current->mosix.dflags & DINCOMING) + current->mosix.rfreepages += pages; + } + if(m->flags & VM_READHINTMASK) + sys_madvise(m->addr, m->len, (m->flags & VM_SEQ_READ) ? + MADV_RANDOM : MADV_SEQUENTIAL); + err: + if(rf) + fput(rf); + if(inmig) + return(IS_ERR((const void *)result)); + comm_free(m); + return(comm_send(DEP_MMAP|REPLY, &result, sizeof(result), NULL, 0, 0)); +} + +int +remote_brk(struct brk_parameters_h *b) +{ + long result = do_brk(b->addr, b->len); + + comm_free(b); + return(comm_send(DEP_BRK|REPLY, &result, sizeof(result), NULL, 0, 0)); +} + +int +remote_munmap(struct munmap_parameters_h *m) +{ + int result = do_munmap(current->mm, m->addr, m->len); + + comm_free(m); + return(comm_send(DEP_MUNMAP|REPLY, &result, sizeof(result), NULL, 0, 0)); +} + +int +remote_mprotect(struct mprotect_parameters_h *m) +{ + extern asmlinkage int sys_mprotect(unsigned long,size_t,unsigned long); + int result = sys_mprotect(m->addr, m->len, m->prot); + + comm_free(m); + return(comm_send(DEP_MPROTECT|REPLY, &result, sizeof(result), NULL, 0, 0)); +} + +int +remote_copy_from_user(struct user_copy_h *u) +{ + int result; + void *to; + int direct = ucache_ok((unsigned long)u->addr, (unsigned long)u->size, + VM_READ); + + if(direct) + { + result = 0; + to = u->addr; + } + else + { + if(!(to = kmalloc(u->size, GFP_KERNEL))) + result = u->size; + else if(u->verify) + result = copy_from_user(to, u->addr, u->size); + else + result = __copy_from_user(to, u->addr, u->size); + } + result = comm_send(DEP_COPY_FROM_USER|REPLY, &result, sizeof(result), + (result == u->size) ? NULL : to, u->size - result, direct); + bump_copyin(u->size); + comm_free(u); + if(!direct && to) + kfree(to); + return(result); +} + +int +remote_copy_to_user(struct user_copy_h *u) +{ + int result; + + if(u->verify && !access_ok(VERIFY_WRITE, u->addr, u->size)) + result = u->size; + else + { + result = comm_copydata(u->addr, u->size, 1); + if(result < 0) + result = u->size; + } + if(result) + comm_flushdata(COMM_ALLDATA); + bump_copyout(u->size); + comm_free(u); + return(comm_send(DEP_COPY_TO_USER|REPLY, &result, sizeof(result), + NULL, 0, 0)); +} + +int +remote_data_to_user(struct user_copy_h *u) +{ + int error = (comm_copydata(u->addr, u->size, 1) != 0); + + bump_copyout(u->size); + comm_free(u); + if(error) + comm_flushdata(COMM_ALLDATA); + /* No Reply! */ + return(error); +} + +int +remote_clear_user(struct user_copy_h *u) +{ + int result = u->verify ? clear_user(u->addr, u->size) : + __clear_user(u->addr, u->size); + + comm_free(u); + return(comm_send(DEP_CLEAR_USER|REPLY, &result, sizeof(result), + NULL, 0, 0)); +} + + + + + + + + +#define __do_strncpy_from_user(dst,src,count,res) \ +do { \ + int __d0, __d1, __d2; \ + __asm__ __volatile__( \ + " testl %1,%1\n" \ + " jz 2f\n" \ + "0: lodsb\n" \ + " stosb\n" \ + " testb %%al,%%al\n" \ + " jz 1f\n" \ + " decl %1\n" \ + " jnz 0b\n" \ + "1: subl %1,%0\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl %5,%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + ".previous" \ + : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \ + "=&D" (__d2) \ + : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ + : "memory"); \ +} while (0) + + + +long +__strncpy_from_user(char *dst, const char *src, long count) +{ + long res; + +#ifdef CONFIG_MOSIX + if(USER_IS_REMOTE) + return(deputy_strncpy_from_user(dst, (char *)src, count, 0)); +#endif /* CONFIG_MOSIX */ + __do_strncpy_from_user(dst, src, count, res); + return res; +} + + + + + + + + + + + + + + + + + + + + + + + + +int +remote_strncpy_from_user(struct user_copy_h *u) +{ + int result; + void *to = kmalloc(u->size, GFP_KERNEL); + + if(!to) + result = -ENOMEM; + else if(u->verify) + result = strncpy_from_user(to, u->addr, u->size); + else + result = __strncpy_from_user(to, u->addr, u->size); + comm_free(u); + result = comm_send(DEP_STRNCPY_FROM_USER|REPLY, &result, sizeof(result), + result > 0 ? to : NULL, result > 0 ? result : 0, 0); + if(to) + kfree(to); + if(result > 0) + bump_copyin(result); + return(result); +} + +int +remote_strnlen_user(struct strnlen_user_h *u) +{ + long result; + char c; + + if(u->len) + result = strnlen_user(u->addr, u->len); + else /* "strnlen_user" does not handle "0 bytes remaining" correctly */ + result = !copy_from_user(&c, u->addr, 1); + comm_free(u); + return(comm_send(DEP_STRNLEN_USER|REPLY, &result, sizeof(result), + NULL, 0, 0)); +} + + + + + + + + +int __verify_write(const void * addr, unsigned long size) + { + struct vm_area_struct * vma; + unsigned long start = (unsigned long) addr; + + if (!size) + return 1; + +#ifdef CONFIG_MOSIX + if(USER_IS_REMOTE) + return(deputy_verify_write((void *)addr, size)); +#endif /* CONFIG_MOSIX */ + vma = find_vma(current->mm, start); + if (!vma) + goto bad_area; + if (vma->vm_start > start) + goto check_stack; + +good_area: + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + size--; + size += start & ~PAGE_MASK; + size >>= PAGE_SHIFT; + start &= PAGE_MASK; + + for (;;) { + survive: + { + int fault = handle_mm_fault(current->mm, vma, start, 1); + if (!fault) + goto bad_area; + if (fault < 0) + goto out_of_memory; + } + if (!size) + break; + size--; + start += PAGE_SIZE; + if (start < vma->vm_end) + continue; + vma = vma->vm_next; + if (!vma || vma->vm_start != start) + goto bad_area; + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area;; + } + return 1; + +check_stack: + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (expand_stack(vma, start) == 0) + goto good_area; + +bad_area: + return 0; + +out_of_memory: + if (current->pid == 1) { + current->policy |= SCHED_YIELD; + schedule(); + goto survive; + } + goto bad_area; +} + + + + + + + + + + + + + + +int +remote_verify_write(struct user_copy_h *u) +{ + int result = __verify_write(u->addr, u->size); + + comm_free(u); + return(comm_send(DEP_VERIFY_WRITE|REPLY, &result, sizeof(result), + NULL, 0, 0)); +} + +int +remote_csum_copy_from_user(struct user_csum_copy_h *u) +{ + struct user_csum_copy_ret_h r; + void *to = kmalloc(u->len, GFP_KERNEL); + int result; + + r.error = 0; + if(!to) + r.error = -ENOMEM; + else + r.newsum = csum_partial_copy_from_user(u->addr, to, u->len, + u->sum, &r.error); + result = comm_send(DEP_CSUM_COPY_FROM_USER|REPLY, &r, sizeof(r), + r.error ? NULL : to, r.error ? 0 : u->len, 0); + bump_copyin(u->len); + comm_free(u); + if(to) + kfree(to); + return(result); +} + +int +remote_rlimit(struct rlimit_h *l) +{ + current->rlim[l->resource] = l->limit; + comm_free(l); + return(comm_send(DEP_RLIMIT|REPLY, NULL, 0, NULL, 0, 0)); +} + +int +remote_urgent(void) +{ + register struct task_struct *p = current; + int w; + + spin_lock_irq(&runqueue_lock); + p->mosix.dflags &= ~DSENTURGENT; + spin_unlock_irq(&runqueue_lock); + spin_lock_irq(&whereto_lock); + w = p->mosix.whereto; + p->mosix.whereto = 0; + spin_unlock_irq(&whereto_lock); +#ifdef CONFIG_MOSIX_DFSA + if(process_told(p, DREQ_DFSASYNC)) + { + remote_clear_dfsa(); + clear_dfsasync(); + } +#endif /* CONFIG_MOSIX_DFSA */ + return(comm_send(DEP_TAKEURGENT|REPLY, &w, sizeof(w), NULL, 0, 0)); +} + +int +remote_rusage(int *finalp) +{ + struct rusage r; + + /* "*finalp" just means that calculation of "exit_mem" is required */ + remote_fill_rusage(&r, *finalp); + comm_free(finalp); + return(comm_send(DEP_RUSAGE|REPLY, &r, sizeof(r), NULL, 0, 0)); +} + +void +remote_fill_rusage(struct rusage *r, int final) +{ + register struct task_struct *p = current; + struct vm_area_struct *vma; + clock_t ut, st; + + cli(); + ut = p->times.tms_utime; + st = p->times.tms_stime; + p->times.tms_utime = 0; + p->times.tms_stime = 0; + sti(); + r->ru_utime.tv_sec = CT_TO_SECS(ut); + r->ru_utime.tv_usec = CT_TO_USECS(ut); + p->mosix.passedtime += ut; + p->mosix.uttime += ut; + r->ru_stime.tv_sec = CT_TO_SECS(st); + r->ru_stime.tv_usec = CT_TO_USECS(st); + p->mosix.passedtime += st; + r->ru_minflt = p->min_flt; + p->min_flt = 0; + r->ru_majflt = p->maj_flt; + p->maj_flt = 0; + r->ru_nswap = p->nswap; + p->nswap = 0; + if(final) + { + r->ru_maxrss = 0; + if (current->mm) + for(vma = current->mm->mmap ; vma ; vma = vma->vm_next) + r->ru_maxrss += vma->vm_end - vma->vm_start; + } +} + +int +remote_execve_counts(struct execve_counts_h *e) +{ + struct execve_counts_ret_h r; + + execve_remote_counts(e->argp, e->envp, &r.argc, &r.envc); + comm_free(e); + return(comm_send(DEP_EXECVE_COUNTS|REPLY, &r, sizeof(r), NULL, 0, 0)); +} + +int +remote_bring_strings(struct execve_bring_strings_h *b) +{ + struct linux_binprm bprm; + struct execve_bring_strings_ret_h r; + struct execve_more_strings_h m; + int i; + int err = 0; + char *kaddr; + + bprm.p = b->p; + bprm.filename = b->filename; + bprm.envc = b->envc; + bprm.argc = b->argc; + memset(bprm.page, 0, MAX_ARG_PAGES * sizeof(bprm.page[0])); + current->mosix.inexec = &bprm; + r.result = execve_remote_bring_strings(&bprm, b->envp, b->argp); + r.p = bprm.p; + r.exec = bprm.exec; + comm_free(b); + if(r.result == 0) + { + for(i = r.p / PAGE_SIZE ; i < MAX_ARG_PAGES - 1 ; i++) + if(bprm.page[i]) + { + m.pgno = i + 1; + m.len = (i + 1) * PAGE_SIZE - r.p; + if(m.len > PAGE_SIZE) + m.len = PAGE_SIZE; + if(!err) + { + kaddr = kmap(bprm.page[i]); + err = comm_send(REM_MORESTRINGS, &m, sizeof(m), + kaddr + (PAGE_SIZE - m.len), m.len, 0); + kunmap(bprm.page[i]); + } + __free_page(bprm.page[i]); + bprm.page[i] = 0; + } + } + if(!err) + { + if(r.result == 0 && bprm.page[MAX_ARG_PAGES-1]) + { + r.pgno = MAX_ARG_PAGES; + r.len = MAX_ARG_PAGES * PAGE_SIZE - r.p; + if(r.len > PAGE_SIZE) + r.len = PAGE_SIZE; + kaddr = kmap(bprm.page[MAX_ARG_PAGES-1]); + } + else + { + r.pgno = 0; + kaddr = NULL; /* only to pacify the compiler */ + } + err = comm_send(DEP_BRING_STRINGS|REPLY, &r, sizeof(r), r.pgno ? + (kaddr + (PAGE_SIZE-r.len)) : 0, r.pgno ? r.len : 0, 0); + if(r.pgno) + kunmap(bprm.page[MAX_ARG_PAGES-1]); + } + for(i = 0 ; i < MAX_ARG_PAGES ; i++) + if(bprm.page[i]) + __free_page(bprm.page[i]); + current->mosix.inexec = NULL; + return(err); +} + +int +remote_setup_args(struct execve_setup_args_h *e) +{ + int i; + struct task_struct *p = current; + register struct linux_binprm *bprm = p->mosix.inexec; + struct execve_setup_args_ret_h r; + int err = 0; + struct page *page; + char *kaddr; + + if(e->create) + { +#ifdef CONFIG_MOSIX_DIAG + if(p->mosix.inexec) + { + printk("%d-Dup create in remote_setup_args!\n", p->pid); + err = -EDIST; + goto out; + } +#endif /* CONFIG_MOSIX_DIAG */ + if((bprm = p->mosix.inexec = kmalloc(sizeof(*bprm),GFP_KERNEL))) + memset(bprm->page, 0, + MAX_ARG_PAGES * sizeof(bprm->page[0])); + } + if(bprm && e->pgno) + { + if(!(page = alloc_page(GFP_HIGHUSER))) + { + comm_flushdata(e->len); + goto fail; + } + bprm->page[e->pgno-1] = page; + kaddr = kmap(page); + i = comm_copydata(kaddr + (PAGE_SIZE - e->len), e->len, 0); + if(e->len < PAGE_SIZE) + memset(kaddr, 0, PAGE_SIZE - e->len); + kunmap(page); + if(i) + { + fail: + for(i = 0 ; i < MAX_ARG_PAGES ; i++) + if(bprm->page[i]) + __free_page(bprm->page[i]); + kfree(bprm); + bprm = p->mosix.inexec = NULL; + } + } + else if(e->pgno) + comm_flushdata(e->len); + if(e->how == SETUP_ARGS_NOTYET) + goto out; + if(!p->mosix.inexec) /* one of the allocations failed */ + r.reply = -ENOMEM; + else + { + if(e->how == SETUP_ARGS_AS_ELF) + { + p->mm->start_data = 0; + p->mm->end_data = 0; + p->mm->end_code = 0; + p->mm->rss = 0; + p->mm->mmap = NULL; + } + bprm->p = e->p; + bprm->loader = e->loader; + bprm->exec = e->exec; + bprm->argc = e->argc; + bprm->envc = e->envc; + p->personality = e->personality; + r.reply = setup_arg_pages(bprm); + if(e->how == SETUP_ARGS_AS_AOUT) + { + if(r.reply == 0) + p->mm->start_stack = (unsigned long) + create_aout_tables((char *)bprm->p, bprm); + } + else + p->mm->start_stack = bprm->p; + r.p = bprm->p; + r.loader = bprm->loader; + r.exec = bprm->exec; + if(r.reply == 0) + p->ptrace &= ~PT_DTRACE; + r.start_stack = p->mm->start_stack; + } + p->mosix.pass_regs |= BIT_OF_REGISTER(xds)|BIT_OF_REGISTER(xes)| + BIT_OF_REGISTER(xss)|BIT_OF_REGISTER(xcs)|BIT_OF_REGISTER(eip)| + BIT_OF_REGISTER(esp); + err = comm_send(DEP_SETUP_ARGS|REPLY, &r, sizeof(r), NULL, 0, 0); + mosix_decay_exec(); + out: + if(e->how != SETUP_ARGS_NOTYET && bprm) + { + kfree(bprm); + p->mosix.inexec = NULL; + } + comm_free(e); + return(err); +} + +int +remote_exec_mmap(void) +{ + int r; + + if(!(r = exec_mmap())) + { + flush_thread(); + mosix_clear_statistics(); + current->mosix.rpagecredit = 0; + current->mosix.rfreepages = 0; + current->sas_ss_sp = current->sas_ss_size = 0; + } + return(comm_send(DEP_EXEC_MMAP|REPLY, &r, sizeof(r), NULL, 0, 0)); +} + +int +remote_dump_thread(void) +{ + extern void dump_thread(struct pt_regs *, struct user *); + struct user u; + /* (size == 286 bytes -- should be OK while allocation can fail) */ + + dump_thread(mos_to_regs(¤t->mosix), &u); + return(comm_send(DEP_DUMP_THREAD|REPLY, &u, sizeof(u), NULL, 0, 0)); +} + +int +remote_init_aout_mm(struct exec *ex) +{ + aout_remote_init_mm(ex); + comm_free(ex); + return(comm_send(DEP_INIT_AOUT_MM|REPLY, NULL, 0, NULL, 0, 0)); +} + +int +remote_elf_setup(struct execve_elf_setup_h *e) +{ + unsigned long r; + struct elfhdr h; + + h.e_phoff = e->exec_e_phoff; + h.e_phnum = e->exec_e_phnum; + h.e_entry = e->exec_e_entry; + r = elf_remote_setup(e->p, e->argc, e->envc, e->hasexec ? &h : NULL, + e->addr, e->load_bias, e->interp_load_addr, e->ibcs, + e->add_arg_start, e->elf_brk, e->end_code, + e->start_code, e->start_data, e->end_data, e->elf_bss, + e->personality, &e->extras); + comm_free(e); + return(comm_send(DEP_ELF_SETUP|REPLY, &r, sizeof(r), NULL, 0, 0)); +} + +int +remote_fix_elf_aout(struct execve_fix_elf_aout_h *f) +{ + current->mm->brk = f->bss + + (current->mm->end_data = f->data + + (current->mm->end_code = f->text)); + comm_free(f); + return(comm_send(DEP_FIX_ELF_AOUT|REPLY, NULL, 0, NULL, 0, 0)); +} + +int +remote_list_vmas(void) +{ + register int i; + register struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + struct vmalist *lis = NULL; + struct list_vmas_ret_h r; + int error; + + if(!mm) + mosix_panic("remote_list_vmas: no mm"); + r.argstart = current->mm->arg_start; + r.argend = current->mm->arg_end; + for(i = 0 , vma = mm->mmap ; vma ; vma = vma->vm_next) + i++; + if(i && !(lis = kmalloc(i * sizeof(struct vmalist), GFP_KERNEL))) + i = -1; + if((r.n = i) <= 0) + return(comm_send(DEP_LIST_VMAS|REPLY, &r, sizeof(r), NULL,0,0)); + for(i = 0 , vma = mm->mmap ; vma && i < r.n ; vma = vma->vm_next , i++) + { + lis[i].vmstart = vma->vm_start; + lis[i].vmend = vma->vm_end; + lis[i].vmflags = vma->vm_flags; + lis[i].maydump = elf_maydump(vma); + } + error = comm_send(DEP_LIST_VMAS|REPLY, &r, sizeof(r), + lis, i * sizeof(struct vmalist), 0); + kfree(lis); + return(error); +} + +int +remote_fork(struct fork_h *f) +{ + struct mosix_task *me = ¤t->mosix; + struct fork_h lf = *f; + mosix_addr a; + mosix_link *sonsock; + int pid; + register struct task_struct *son; + int err; + int tries; + int done, ret; + + comm_free(f); + mosix_obtain_registers(ALL_REGISTERS & ~BIT_OF_REGISTER(eax)); + if(lf.do_forkmigrate) + mosix_forkmigrate(); /* may not return */ + ret = -EAGAIN; + for(tries = 0 ; ; tries++) + { + if(tries >= 100 || !(sonsock = + comm_open(COMM_ACCEPT, &a, comm_remote_timo))) + goto out; + if((err = remote_request(REM_CONNECT_TO, &a, sizeof(a), + NULL, 0, 0, (void **)&done, -sizeof(done)))) + { + comm_close(sonsock); + return(err); + } + if(done) + break; + comm_close(sonsock); + } + me->sonpid = lf.pid; + if((pid = do_fork(SIGCHLD|CLONE_SIGHAND +#ifndef CONFIG_MOSIX_DFSA + |CLONE_FS|CLONE_FILES +#endif /* CONFIG_MOSIX_DFSA */ + , lf.usp, mos_to_regs(me), 0)) < 0) + { + comm_close(sonsock); + goto out; + } + read_lock(&tasklist_lock); + for_each_task(son) + if(son->pid == pid) + break; + read_unlock(&tasklist_lock); + comm_use(son, sonsock); + wake_up(mos_to_waitp(&son->mosix)); + ret = 0; + out: + return(comm_send(DEP_PLEASE_FORK|REPLY, &ret, sizeof(ret), NULL, 0, 0)); +} + +int +remote_personality(unsigned long *pp) +{ + int res; + unsigned long personality = *pp; + + comm_free(pp); + set_personality(personality); + res = (current->personality == personality) ? 0 : -1; + return(comm_send(DEP_PERSONALITY|REPLY, &res, sizeof(res), NULL, 0, 0)); +} + +int +remote_bring_me_regs(unsigned long *bring) +{ + struct mosix_task *m = ¤t->mosix; + + m->pass_regs |= *bring & ~m->deputy_regs; + comm_free(bring); + return(comm_send(DEP_BRING_ME_REGS|REPLY, NULL, 0, NULL, 0, 0)); +} + +int +remote_dump_fpu(void) +{ + extern int dump_fpu (struct pt_regs *, struct user_i387_struct *); + struct user_i387_struct f; + int r = dump_fpu(mos_to_regs(¤t->mosix), &f); + + return(comm_send(DEP_DUMP_FPU|REPLY, &r, sizeof(r), + r ? &f : NULL, r ? sizeof(f) : 0, 0)); +} + +int +remote_come_back(void *head) +{ + struct mosix_task *m = ¤t->mosix; + int reason = *((int *) head); + int reply; + int omigpages; + + comm_free(head); + if(!(omigpages = current->mosix.migpages)) + m->migpages = count_migrating_pages(); + if (mig_send_request(reason, FROM_REMOTE)) + { + m->migpages = omigpages; + return (-EDIST); + } + spin_lock_irq(&runqueue_lock); + m->dflags |= DPASSING; + spin_unlock_irq(&runqueue_lock); + +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & (DSDEB_MIG|DSDEB_MIGSTAGE)) + printk("%d-remote_come_back: 2\n", current->pid); +#endif /* CONFIG_MOSIX_DEBUG */ + release_migrations(m->deppe); + reply = mig_do_send(); + m->migpages = omigpages; + spin_lock_irq(&runqueue_lock); + m->dflags &= ~DPASSING; + spin_unlock_irq(&runqueue_lock); + return(reply); +} + +int +remote_goto_remote(void *head) +{ + struct task_struct *p = current; + mosix_link *remsock = NULL; + mosix_link *depsock; + struct please_migrate_h *pm = (struct please_migrate_h *)head; + int reason = pm->reason; + int status = 1; + int error; + int whereto = pm->to; + int omigpages; + + spin_lock_irq(&runqueue_lock); + p->mosix.dflags |= DPASSING; + spin_unlock_irq(&runqueue_lock); + remsock = comm_open(COMM_TOADDR, &pm->ma, comm_reconn_timo); + comm_free(head); + if (!remsock) + goto reply; + + depsock = comm_use(p, remsock); + if(!(omigpages = p->mosix.migpages)) + current->mosix.migpages = count_migrating_pages(); + if ((error = mig_send_request(reason, FROM_REMOTE))) + goto cont; + + release_migrations(whereto); + if (!mig_do_send()) + status = 0; +cont: + p->mosix.migpages = omigpages; + comm_use(p, depsock); + comm_close(remsock); + +reply: + error = comm_send(DEP_PLEASE_MIGRATE | REPLY, (void *)&status, + sizeof(status), NULL, 0, 0); + if (!(status || error)) + remote_disappear(); + spin_lock_irq(&runqueue_lock); + p->mosix.dflags &= ~DPASSING; + spin_unlock_irq(&runqueue_lock); + return (error); +} + +int +remote_consider(int *reasonp) +{ + int reason = *reasonp; + + comm_free(reasonp); + consider(reason, NULL); + return(0); +} + +int +remote_psinfo(void) +{ + int n; + char *p = fill_common_ps_info(&n); + int err; + + if(!p) + return(comm_send(DEP_PSINFO|REPLY, &n, sizeof(n), NULL, 0, 0)); + err = comm_send(DEP_PSINFO|REPLY, &n, sizeof(n), p, n, 0); + kfree(p); + return(err); +} + +#ifdef CONFIG_MOSIX_DFSA +int +remote_receive_dfsachanges(int *head) +{ + int len = *head; + char *changes; + int error = 0; + + comm_free(head); +#ifdef CONFIG_MOSIX_DIAG + if(len <= 0) + { + printk("%s: remote_receive_dfschanges: len=%d\n", + desc_mostask(NULL), len); + return(-EINVAL); + } +#endif /* CONFIG_MOSIX_DIAG */ + if((changes = kmalloc(len, GFP_KERNEL))) + { + if((error = comm_copydata(changes, len, 0))) + { + remote_clear_dfsa(); + kfree(changes); + } + else + remote_unpack_dfsa_changes(changes); + } + else + remote_clear_dfsa(); + + if(!error) + error = comm_send(DEP_DFSA_CHANGES|REPLY, NULL, 0, NULL, 0, 0); + return(error); +} +#endif /* CONFIG_MOSIX_DFSA */ + +void +absorb_deptime(unsigned long ticks) +{ + register struct task_struct *p = current; + unsigned long it_prof = p->it_prof_value; + + if(it_prof) + { + cli(); + if(it_prof > ticks) + { + p->it_prof_value -= ticks; + sti(); + } + else + { + p->it_prof_value = p->it_prof_incr; + sti(); + send_sig(SIGPROF, p, 1); + } + } +} + + + + diff -urN linux-2.4.17/mos/rinode.c linux_umopenmosix/mos/rinode.c --- linux-2.4.17/mos/rinode.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/mos/rinode.c Wed Jun 26 23:45:18 2002 @@ -0,0 +1,688 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* REMOTE section: */ + +spinlock_t rinode_list_lock = SPIN_LOCK_UNLOCKED; + +#ifdef CONFIG_MOSIX_UDB +void +count_files(void) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + struct file *f; + + if(!mm) { printk("no MM\n"); return; } + for(vma = mm->mmap ; vma ; vma = vma->vm_next) + if((f = vma->vm_file)) + printk("File %x, count=%d\n", (int)f, file_count(f)); +} +#endif /* CONFIG_MOSIX_UDB */ + +#define next_entry(dp) ((struct dentry *)(dp)->d_fsdata) + +static struct dentry *rinode_list = NULL; + +static int remote_dentry_delete(struct dentry *); +static void remote_dentry_iput(struct dentry *, struct inode *); +static int mosix_remote_file_mmap(struct file *, struct vm_area_struct *); + +static struct dentry_operations remote_dentry_ops = +{ + d_delete: remote_dentry_delete, + d_iput: remote_dentry_iput +}; + +static struct file_operations remote_file_operations = +{ + mmap: mosix_remote_file_mmap, +}; + +struct address_space_operations remote_aops = +{ + readpage: remote_readpage, +}; + +void +invalidate_old_remote_pages(struct inode *ip) +{ + down(&ip->i_sem); + /* we prefer "invalidate_inode_pages", + but it does not work on locked pages */ + truncate_inode_pages(ip->i_mapping, 0); + up(&ip->i_sem); +} + +struct file * +get_remote_file(int origin, struct file *fpr, struct dentry *dpr, uint64_t uniq, off_t isize, + nopage_t nopage) +{ + register struct file *f; + register struct dentry *d, *d2; + register struct inode *ip, *i2; + struct vm_area_struct *vma; + int cng; + struct dentry *to_dput = NULL; + struct inode *to_iput = NULL; + + for(vma = current->mm->mmap ; vma ; vma = vma->vm_next) + if((f = vma->vm_file) && home_file(f) == fpr) + { + ip = f->f_dentry->d_inode; + if(ip->u.remote_i.origin == origin && ip->u.remote_i.dp == dpr + && ip->u.remote_i.unique == uniq && ip->i_size == isize + && ip->u.remote_i.nopage == nopage) + { + get_file(f); + return(f); + } + } + if(!(f = get_empty_filp())) + return(NULL); + spin_lock(&dcache_lock); + spin_lock(&rinode_list_lock); + for(d = rinode_list ; d ; d = next_entry(d)) + { + ip = d->d_inode; +#ifdef CONFIG_MOSIX_DIAG + if(!ip) + { + mosix_panic("get_remote_file: dcache race"); + break; + } +#endif /* CONFIG_MOSIX_DIAG */ + if(ip->u.remote_i.origin == origin && ip->u.remote_i.dp == dpr) + { + join_in: + cng = (ip->u.remote_i.unique != uniq) || + (ip->u.remote_i.nopage != nopage) || + (isize < ip->i_size); + ip->i_size = isize; + if(cng) + { + ip->u.remote_i.unique = uniq; + ip->u.remote_i.nopage = nopage; + } + dget(d); + spin_unlock(&rinode_list_lock); + spin_unlock(&dcache_lock); + if(to_iput) + iput(to_iput); + if(to_dput) + dput(to_dput); + if(cng) + invalidate_old_remote_pages(ip); + f->f_dentry = d; + f->f_op = &remote_file_operations; + f->f_mode = FMODE_READ; + home_file(f) = fpr; + return(f); + } + } + spin_unlock(&rinode_list_lock); + spin_unlock(&dcache_lock); + if(!(ip = get_empty_inode())) + { + put_filp(f); + return(NULL); + } + ip->i_mode = S_IFREG; + ip->u.remote_i.origin = origin; + ip->u.remote_i.dp = dpr; + ip->u.remote_i.unique = uniq; + ip->u.remote_i.nopage = nopage; + ip->i_size = isize; + ip->i_fop = &remote_file_operations; + ip->i_mapping->a_ops = &remote_aops; + if(!(d = d_alloc(NULL, &(const struct qstr) { "/", 1, 0 }))) + { + put_filp(f); + iput(ip); + return(NULL); + } + d->d_parent = d; + /* any copy created while we slept? */ + spin_lock(&dcache_lock); + spin_lock(&rinode_list_lock); + for(d2 = rinode_list ; d2 ; d2 = next_entry(d2)) + { + i2 = d2->d_inode; +#ifdef CONFIG_MOSIX_DIAG + if(!i2) + { + mosix_panic("get_remote_file: dcache race (2)"); + break; + } +#endif /* CONFIG_MOSIX_DIAG */ + if(i2->u.remote_i.origin == origin && i2->u.remote_i.dp == dpr) + { + to_iput = ip; + to_dput = d; + d = d2; + ip = i2; + goto join_in; + } + } + d->d_op = &remote_dentry_ops; + d->d_inode = ip; + next_entry(d) = rinode_list; + rinode_list = d; + f->f_dentry = d; + f->f_op = &remote_file_operations; + f->f_mode = 1; + home_file(f) = fpr; + spin_unlock(&rinode_list_lock); + spin_unlock(&dcache_lock); + return(f); +} + +/* + * called by fput(fd) ==> dput(d) + */ + +static int +remote_dentry_delete(struct dentry *del) +{ + register struct dentry *dp; + + spin_lock(&rinode_list_lock); + if(del == rinode_list) + rinode_list = next_entry(del); + else + { + for(dp = rinode_list ; dp && next_entry(dp) != del ; + dp = next_entry(dp)) + if(!dp) + panic("remote dentry not listed"); + next_entry(dp) = next_entry(del); + } + spin_unlock(&rinode_list_lock); + return(1); +} + +static void +remote_dentry_iput(struct dentry *dp, struct inode *ip) +{ + invalidate_old_remote_pages(ip); + iput(ip); +} + +struct vm_operations_struct rinode_mmap = +{ + nopage: filemap_nopage, +}; + +static int +mosix_remote_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_SHARED) + panic("REMOTE VM_SHARED mmap"); + vma->vm_ops = &rinode_mmap; + return(0); +} + +int +remote_report_files(void) +{ + register struct vm_area_struct *vma, *vmb; + struct mm_struct *mm = current->mm; + struct file **fp = NULL; + int n = 0; + int result; + + down_read(&mm->mmap_sem); + for(vma = mm->mmap ; vma ; vma = vma->vm_next) + if(vma->vm_file) + { + for(vmb = mm->mmap ; vmb != vma ; vmb = vmb->vm_next) + if(vmb->vm_file == vma->vm_file) + break; + if(vmb == vma) + n++; + } + up_read(&mm->mmap_sem); + if(n && !(fp = kmalloc(n * sizeof(struct file *), GFP_KERNEL))) + n = -EAGAIN; + else + { + down_read(&mm->mmap_sem); + for(n = 0 , vma = mm->mmap ; vma ; vma = vma->vm_next) + if(vma->vm_file) + { + for(vmb = mm->mmap ; vmb != vma ; vmb = vmb->vm_next) + if(vmb->vm_file == vma->vm_file) + break; + if(vmb == vma) + fp[n++] = home_file(vma->vm_file); + } + up_read(&mm->mmap_sem); + } + result = comm_send(DEP_LISTHOLD|REPLY, &n, sizeof(int), + fp, n > 0 ? n * sizeof(struct file *) : 0, 0); + if(n > 0) + kfree(fp); + return(result); +} + +void +rinode_flush_files(int pe) +{ + register struct dentry *dp; + register struct inode *ip; + +#ifdef CONFIG_MOSIX_DEBUG + if (ds_debug & (DSDEB_CONFIG|DSDEB_MIG)) + printk("%s-rinode_flush_files: for pe %d\n", + desc_mostask(NULL), pe); +#endif + + loop: + spin_lock(&rinode_list_lock); + for(dp = rinode_list ; dp ; dp = next_entry(dp)) + { + ip = dp->d_inode; +#ifdef CONFIG_MOSIX_DIAG + if(!ip) + { + mosix_panic("rinode_flush_files: dcache race"); + break; + } +#endif /* CONFIG_MOSIX_DIAG */ + if(ip->u.remote_i.origin == pe && ip->u.remote_i.nopage) + { + ip->u.remote_i.dp = NULL; + /* (the pointer is no good and any process + * still using this inode is doomed anyway) */ + ip->u.remote_i.unique = 0xffffffffffffffffULL; + ip->u.remote_i.nopage = NULL; + spin_unlock(&rinode_list_lock); + invalidate_old_remote_pages(ip); + goto loop; + } + } + spin_unlock(&rinode_list_lock); +} + +/* DEPUTY section: */ + +#define DEFAULT_ALLOCATION 10 + +#define ALLOCATED_NEW 1 +#define ALLOCATED_NEW_AND_DENY 2 +#define ADDED_A_DENY 3 +#define FOUND_OLD 4 + +int +mosix_register_a_file(struct file *fp, int denywrite) +{ + struct task_struct *p = current; + register int i, new; + struct held_files *nh, *oh; + struct mm_struct *mm; + + if(!p->mosix.held_allocated) + { + if((mm = p->mm) && atomic_read(&mm->mm_realusers) > 1) + return(0); + nh = NULL; /* COMPILER BUG -- hope it is optimized out */ + for(i = DEFAULT_ALLOCATION ; i > 0 ; i >>= 1) + if((nh = (struct held_files *) + kmalloc(i * sizeof(struct held_files), GFP_KERNEL))) + break; + if(!nh) + return(-ENOMEM); + memset(nh, 0, i * sizeof(struct held_files)); + task_lock(p); + p->mosix.held_files = nh; + p->mosix.held_allocated = i; + task_unlock(p); + new = 0; + } + else + { + new = -1; + for(i = 0 ; i < p->mosix.held_allocated ; i++) + if((struct file *)p->mosix.held_files[i].f == fp) + { + if(!denywrite || p->mosix.held_files[i].denywrite) + return(FOUND_OLD); + if(deny_write_access(fp)) + return(-ETXTBSY); + p->mosix.held_files[i].denywrite = 1; + return(ADDED_A_DENY); + } + else if(new == -1 && !p->mosix.held_files[i].f) + new = i; + if(new == -1) + { + nh = NULL;/* COMPILER BUG -- hope it is optimized out */ + for(i = DEFAULT_ALLOCATION ; i > 0 ; i >>= 1) + if((nh = kmalloc((i + p->mosix.held_allocated) * + sizeof(struct held_files), GFP_KERNEL))) + break; + if(!i) + return(-ENOMEM); + for(new = p->mosix.held_allocated + i - 1 ; + new >= p->mosix.held_allocated ; new--) + nh[new].f = NULL; + for(new = 0 ; new < p->mosix.held_allocated ; new++) + nh[new] = p->mosix.held_files[new]; + oh = p->mosix.held_files; + task_lock(p); + p->mosix.held_files = nh; + p->mosix.held_allocated += i; + task_unlock(p); + kfree(oh); + } + } + get_file(fp); + task_lock(p); + p->mosix.held_files[new].f = fp; + task_unlock(p); + if((p->mosix.held_files[new].denywrite = denywrite)) + { + if(deny_write_access(fp)) + { + task_lock(p); + p->mosix.held_files[new].f = NULL; + task_unlock(p); + return(-ETXTBSY); + } + return(ALLOCATED_NEW_AND_DENY); + } + else + return(ALLOCATED_NEW); +} + +void +mosix_undo_last_file_registration(struct file *fp, int result) +{ + register int i; + struct mosix_task *m = ¤t->mosix; + + if(result == FOUND_OLD) + return; + for(i = 0 ; i < m->held_allocated ; i++) + if((struct file *)m->held_files[i].f == fp) + { + switch(result) + { + case ADDED_A_DENY: + case ALLOCATED_NEW_AND_DENY: + if(!m->held_files[i].denywrite) + printk("%s: mosix_undo_last_file_registration - no denywrite\n", + desc_mostask(m)); + else + allow_write_access(fp); + if(result == ADDED_A_DENY) + break; + case ALLOCATED_NEW: + fput(fp); + task_lock(current); + m->held_files[i].f = NULL; + task_unlock(current); + break; + } + return; + } + printk("%s: mosix_undo_last_file_registration - not found\n", + desc_mostask(m)); +} + +void +mosix_update_remote_files(void) +{ + struct task_struct *p = current; + register struct held_files *h = p->mosix.held_files; + register int i, j; + int op, np; + struct file **rf; + struct file *fp; + + if(!(op = p->mosix.held_allocated)) + return; + if(deputy_request(DEP_LISTHOLD, NULL, 0, NULL, 0, 0, + (void **)&np, -sizeof(int))) + deputy_die_on_communication(); + if(np < 0 || (np && comm_recvdata((void **)&rf) < 0)) + deputy_die_on_communication(); + for(j = 0 ; j < op ; j++) + h[j].denywrite = (h[j].denywrite != 0) | 2; + for(i = 0 ; i < np ; i++) + { + for(j = 0 ; j < op ; j++) + if(h[j].f == rf[i]) + { + h[j].denywrite &= ~2; + break; + } + if(j == op) + panic("mosix_update_remote_files: not listed"); + } + for(j = 0 ; j < op ; j++) + if((fp = h[j].f) && (h[j].denywrite & 2)) + { +#ifdef CONFIG_MOSIX_DEBUG + if(ds_debug & DSDEB_HOLD) + printk("%s-mosix_update_remote_files: freeing %x\n", + desc_mostask(NULL), (int)fp); +#endif /* CONFIG_MOSIX_DEBUG */ + task_lock(p); + h[j].f = NULL; + task_unlock(p); + if(h[j].denywrite & 1) + allow_write_access(fp); + fput(fp); + } + if(np) + comm_free(rf); +} + +int +mosix_rebuild_file_list(void) +{ + struct task_struct *p = current; + register struct vm_area_struct *vma, *vmb; + struct mm_struct *mm = p->mm; + struct held_files *hf = NULL, *old; + int i, n, oldn; + int denywrite; + struct file *this; + + if(p->mosix.dflags & DDEPUTY) + { + if(p->mosix.dflags & DINCOMING) + p->mosix.dflags |= DDELAYHELD; + else + mosix_update_remote_files(); + return(0); + } + if(atomic_read(&mm->mm_realusers) > 1) + { + mosix_clear_all_held_files(p); /* discard old */ + return(-EBUSY); /* thread */ + } + /* because we are the only clone, no MM lock is needed */ + /* and the initial count cannot change */ + + for(n = 0 , vma = mm->mmap ; vma ; vma = vma->vm_next) + if((this = vma->vm_file)) + { + for(vmb = mm->mmap ; vmb != vma ; vmb = vmb->vm_next) + if(vmb->vm_file == vma->vm_file) + break; + if(vmb == vma) + { + if(this->f_dentry->d_inode->i_mapping->i_mmap_shared) + { + monkey: + mosix_clear_all_held_files(p); + tell_process(p, DREQ_CHECKSTAY); + return(-EDIST); + } + n++; + } + } + if(n && !(hf = kmalloc(n*sizeof(struct held_files), GFP_KERNEL))) + { + mosix_clear_all_held_files(p); /* discard old */ + return(-ENOMEM); + } + for(n = 0 , vma = mm->mmap ; vma ; vma = vma->vm_next) + if((this = vma->vm_file)) + { + for(vmb = mm->mmap ; vmb != vma ; vmb = vmb->vm_next) + if(vmb->vm_file == this) + goto already_placed; + denywrite = 0; + for(; vmb ; vmb = vmb->vm_next) + if(vmb->vm_file == this && (vmb->vm_flags & VM_DENYWRITE)) + { + denywrite = 1; + atomic_dec(&this->f_dentry->d_inode->i_writecount); + break; + } + get_file(this); + hf[n].f = this; + hf[n++].denywrite = denywrite; + already_placed:; + } + /* must not use "mosix_clear_all_held_files" because it could sleep */ + /* in 'fput' when another held file becomes VMONKEY */ + oldn = p->mosix.held_allocated; + old = p->mosix.held_files; + task_lock(p); + p->mosix.held_files = hf; + p->mosix.held_allocated = n; + task_unlock(p); + if(old) + { + for(i = 0 ; i < oldn ; i++) + if((this = old[i].f)) + { + if(old[i].denywrite) + allow_write_access(this); + fput(this); + } + kfree(old); + } + /* final race check: has any file been made monkey meanwhile? */ + for(i = 0 ; i < n ; i++) + if(hf[i].f->f_dentry->d_inode->i_mapping->i_mmap_shared) + goto monkey; + return(0); +} + +int +fork_mosix_remote_files(struct task_struct *p) +{ + register int i; + register struct file *fp; + + if(!(p->mosix.held_allocated = current->mosix.held_allocated)) + return(0); + if(!(p->mosix.held_files = kmalloc(p->mosix.held_allocated * + sizeof(struct held_files), GFP_KERNEL))) + { + p->mosix.held_allocated = 0; + return(-ENOMEM); + } + for(i = 0 ; i < current->mosix.held_allocated ; i++) + if((fp = (struct file *)current->mosix.held_files[i].f)) + { + get_file(fp); + p->mosix.held_files[i].f = fp; + if((p->mosix.held_files[i].denywrite = + current->mosix.held_files[i].denywrite)) + atomic_dec(&fp->f_dentry->d_inode->i_writecount); + } + else + p->mosix.held_files[i].f = NULL; + return(0); +} + +void +mosix_clear_all_held_files(struct task_struct *p) +{ + register int i, n; + struct held_files *h; + + if(!(n = p->mosix.held_allocated)) + return; + h = p->mosix.held_files; + task_lock(p); + p->mosix.held_files = NULL; + p->mosix.held_allocated = 0; + task_unlock(p); + for(i = 0 ; i < n ; i++) + if(h[i].f) + { + if(h[i].denywrite) + allow_write_access(h[i].f); + fput(h[i].f); + } + kfree(h); +} + +int +task_maps_ip(struct task_struct *p, struct inode *ip) +{ + register int i, n; + int ret = 0; + struct held_files *h; + + task_lock(p); + if((n = p->mosix.held_allocated)) + for(h = p->mosix.held_files , i = 0 ; i < n ; i++) + if(h[i].f && (h[i].f)->f_dentry->d_inode == ip) + { + ret = 1; + break; + } + task_unlock(p); + return(ret); +} + +#ifdef CONFIG_MOSIX_DEBUG +void +mosix_print_remote_files(struct mosix_task *m) +{ + register int i; + struct file *f; + + if(!m) + m = ¤t->mosix; + printk("%s -- REMOTE FILES: Allocated=%d\n", desc_mostask(m), + m->held_allocated); + for(i = 0 ; i < m->held_allocated ; i++) + if((f = m->held_files[i].f)) + printk("%d:%x: inode=%x, d_ino=%x, count=%x, deny=%d\n", i, + (int)f->f_dentry, + (int)f->f_dentry->d_inode, + (int)f->f_dentry->d_inode->i_ino, + atomic_read(&f->f_dentry->d_inode->i_count), + m->held_files[i].denywrite); +} +#endif /* CONFIG_MOSIX_DEBUG */ diff -urN linux-2.4.17/mos/service.c linux_umopenmosix/mos/service.c --- linux-2.4.17/mos/service.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/mos/service.c Wed Jun 26 23:45:18 2002 @@ -0,0 +1,489 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* all sorts of operating-system dependent mob */ + + +void +common_daemon_setup(char *name, int highpri) +{ + register struct task_struct *p = current; + struct fs_struct *fs; + struct files_struct *fl; + + strcpy(p->comm, name); + exit_mm(p); + exit_files(p); + fl = init_task.files; + p->files = fl; + atomic_inc(&fl->count); + exit_fs(p); + fs = init_task.fs; + p->fs = fs; + atomic_inc(&fs->count); +#ifdef CONFIG_MOSIX_DFSA + atomic_inc(&fs->users); +#endif /* CONFIG_MOSIX_DFSA */ + + p->pgrp = 1; + p->session = 1; + p->tty = NULL; + reparent_to_init(); + p->it_real_incr = 0; + p->it_prof_incr = 0; + p->it_virt_incr = 0; + p->it_real_value = 0; + p->it_prof_value = 0; + p->it_virt_value = 0; + if(p->uid) + { + free_uid(p->user); + p->uid = 0; + alloc_uid(0); + } + p->euid = 0; + p->suid = 0; + p->gid = 0; + p->ngroups = 0; + spin_lock_irq(&p->sigmask_lock); + sigemptyset(&p->blocked); + recalc_sigpending(p); + spin_unlock_irq(&p->sigmask_lock); + memcpy(p->rlim, init_task.rlim, sizeof(p->rlim)); + task_lock(p); + if(highpri) + { + p->policy = SCHED_FIFO; + p->mosix.stay |= DSTAY_FOR_RT; + p->rt_priority = 0; + } + else + { + p->policy = SCHED_OTHER; + p->mosix.stay &= ~DSTAY_FOR_RT; + p->nice = DEF_NICE; + } + p->mosix.stay |= DSTAY_ITS_DAEMON; + task_unlock(p); +} + +char * +desc_mostask(struct mosix_task *m) +{ + register struct task_struct *p = m ? MOSIX_TO_TASK(m) : current; + /* paranoid: save space for each SMP + interrupts */ + static char descbuf[NR_CPUS+2][14+sizeof(p->comm)]; + static int ind; + static spinlock_t desc_lock = SPIN_LOCK_UNLOCKED; + int i; + + spin_lock_irq(&desc_lock); + i = ind; + ind = (i + 1) % (NR_CPUS+2); + spin_unlock_irq(&desc_lock); + + sprintf(descbuf[i], "%d(%s)", p->pid, p->comm); + return(descbuf[i]); +} + +asmlinkage void ret_from_kickstart(void) __asm__("ret_from_kickstart"); + +#define loaddebug(tsk,register) \ + __asm__("movl %0,%%db" #register \ + : /* no output */ \ + :"r" (tsk->thread.debugreg[register])) + +void +kickstart(void) +{ + register struct task_struct *p = current; + struct pt_regs *regs, *curegs = mos_to_regs(&p->mosix); + + set_fs(USER_DS); + regs = ((struct pt_regs *) (2*PAGE_SIZE + (unsigned long)p)) - 1; + if(regs != curegs) + { + task_lock(current); + *regs = *(curegs); + p->mosix.altregs = (uint32_t *)regs; + task_unlock(current); + } + /* the contents of our registers may at this stage still be held + * by our DEPUTY, hence our registers contain junk. + * (which is OK - we are not going to user-mode yet) + * the silly thing about it is that we may get to "ret_from_intr" + * via "do_bottom_half", which might then believe we had a kernel-mode + * interrupt and avoid returning via ret_with_reschedule... + * the remedy is: + */ + regs->xcs |= 3; + + if(current->thread.debugreg[7]) + { + loaddebug(current,0); + loaddebug(current,1); + loaddebug(current,2); + loaddebug(current,3); + loaddebug(current,6); + loaddebug(current,7); + } + release_kernel_lock(current, smp_processor_id()); + current->lock_depth = -1; + /*current->mosix.lock_depth = -1;*/ + + + +// disabled ??? asm("movl %0,%%esp ; jmp ret_from_kickstart" : : "r" (regs)); + + + + + /*NOTREACHED*/ +} + +/* + * a page on the swap-cache is dirty: even if it happens to be clean relative + * to the swap-page, it is NOT clean relative to the original file. + */ +static inline int +pte_really_dirty(pte_t entry) +{ + struct page *page; + + if(pte_dirty(entry)) + return(1); /* the simple case */ + page = pte_page(entry); + if(!VALID_PAGE(page)) + { +#ifdef CONFIG_MOSIX_DIAG + mosix_panic("scanning a process with physical map"); +#endif /* CONFIG_MOSIX_DIAG */ + return(1); + } + return(PageDirty(page) || PageSwapCache(page)); +} + +#define RUN_OVER_MAGIC 126094 + +static int run_over_maxchunk; + +int +run_over_dirty_pages(int (*func)(unsigned long, int), int count_in_file) +{ + struct task_struct *p = current; + struct mm_struct *mm = p->mm; + struct vm_area_struct *vma; + int count = 0; + int isfile; + unsigned long start, addr, pgd_end, pmd_end, pte_end; + pgd_t *pgdir; + pmd_t *pmdir; + pte_t *pte; + int err; + int let_go; + + if(!run_over_maxchunk) + { + /* this routine can take very long for huge processes: + * we must allow other processes to take over at least + * about every millisecond. + */ + run_over_maxchunk = loops_per_jiffy / RUN_OVER_MAGIC; + if(!run_over_maxchunk) /* slow processors */ + run_over_maxchunk = 1; + } + +#ifdef CONFIG_MOSIX_DIAG + if(!mm || mm == &init_mm) + { + printk("run_over_dirty_pages: no MM!\n"); + return(0); + } + if(atomic_read(&mm->mm_realusers) != 1) + { + printk("run_over_dirty_pages: SHARED! (by %d)\n", + atomic_read(&mm->mm_realusers)); + mosix_panic("SHARED"); + return(0); + } + /* so now there is no need to lock mmap_sem! */ +#endif /* CONFIG_MOSIX_DIAG */ + addr = 0; + loop: + let_go = run_over_maxchunk; + for(vma = mm->mmap ; vma != NULL ; vma = vma->vm_next) + if(addr < vma->vm_end) + { + start = vma->vm_start; + if(start < addr) + start = addr; + pgdir = pgd_offset(mm, start); + isfile = (vma->vm_file != NULL); + for(addr = start ; addr < vma->vm_end ; addr = pgd_end , pgdir++) + { + if(let_go-- == 0) + { + p->policy |= SCHED_YIELD; + schedule(); + goto loop; + } + pgd_end = (addr + PGDIR_SIZE) & PGDIR_MASK; + if(pgd_end > vma->vm_end) + pgd_end = vma->vm_end; + if(pgd_none(*pgdir) || pgd_bad(*pgdir)) + continue; + spin_lock(&mm->page_table_lock); + pmdir = pmd_offset(pgdir, addr); + for(; addr < pgd_end ; addr = pmd_end , pmdir++) + { + pmd_end = (addr + PMD_SIZE) & PMD_MASK; + if(pmd_end > pgd_end) + pmd_end = pgd_end; + if(pmd_none(*pmdir) || pmd_bad(*pmdir)) + continue; + pte = pte_offset(pmdir, addr); + for(; addr < pmd_end ; addr = pte_end , pte++) + { + pte_end = addr + PAGE_SIZE; + if(pte_end > pmd_end) + pte_end = pmd_end; + if(pte_present(*pte) ? + (isfile && !pte_really_dirty(*pte)) : + pte_none(*pte)) + { + if(isfile && count_in_file) + count++; + continue; + } + if(!count_in_file) + count++; + if(func) + { + spin_unlock(&mm->page_table_lock); + if((err = (*func)(addr, PAGE_SIZE))) + return(err); + spin_lock(&mm->page_table_lock); + } + } + } + spin_unlock(&mm->page_table_lock); + } + } + return(count); +} + +#if 0 +/* needed once-only to calibrate the "run_over_maxchunk" calculation */ +/* not bothering using locks here, since we are only called from the debugger */ +int +calibrate_run_over(void) +{ + int sv = run_over_maxchunk; + struct mm_struct *mm = current->mm; + int chunks = 0; + register struct vm_area_struct *vma; + unsigned long addr, pgd_end; + __typeof__ (jiffies) before; + int milli, magic; + int i; + + if(!mm) + { + printk("No MM\n"); + return(0); + } + + for(vma = mm->mmap ; vma != NULL ; vma = vma->vm_next) + for(addr = vma->vm_start ; addr < vma->vm_end ; addr = pgd_end) + { + chunks++; + pgd_end = (addr + PGDIR_SIZE) & PGDIR_MASK; + if(pgd_end > vma->vm_end) + pgd_end = vma->vm_end; + } + printk("%d chunks: starting 1000 times...", chunks); + run_over_maxchunk = 0x7fffffff; /* unlimit */ + sti(); + before = jiffies; + for(i = 0 ; i < 1000 ; i++) + run_over_dirty_pages(NULL, 0); + milli = (jiffies - before) * 1000 / HZ; + printk("%d miliseconds, ", milli); + run_over_maxchunk = sv; + if(milli < 100) + { + printk("That was too quick -- try a larger process!\n"); + return((int)milli); + } + magic = loops_per_jiffy / (chunks * 1000 / milli); + printk("RUN_OVER_MAGIC=%d\n", magic); + return(magic); +} +#endif /* MOSIX_ONCE_ONLY */ + +/* provide accurate time in microseconds, even when jiffies wraps around */ +now_t +time_now(void) +{ + static int64_t cycles; + static typeof(jiffies) lastj; + now_t now; + typeof(jiffies) prev; + static spinlock_t time_lock = SPIN_LOCK_UNLOCKED; + + if(jiffies == 0 && cycles == 0) + return(1); + spin_lock_irq(&time_lock); + prev = lastj; + lastj = jiffies; + if(lastj < prev) + cycles += 0x100000000LL * MILLION / HZ; + now = ticks_to_ms(lastj) + cycles; + spin_unlock_irq(&time_lock); + return(now); +} + +void +adjust_task_mosix_context(struct task_struct **tp) +{ + struct task_struct *t = *tp; + + if(t->mosix.dflags & DINSCHED) + *tp = MOSIX_CONTEXT(t); +} + +int +obtain_mm(void) +{ + struct mm_struct *mm, *active_mm; + struct task_struct *p = current; + int err; + + if(p->mm) + panic("obtain_mm: had already"); + if(!(mm = mm_alloc())) + return(-ENOMEM); + if((err = init_new_context(p, mm))) + { + task_unlock(p); + mmdrop(mm); + return(err); + } + spin_lock(&mmlist_lock); + list_add(&mm->mmlist, &init_mm.mmlist); + mmlist_nr++; + spin_unlock(&mmlist_lock); + task_lock(p); + active_mm = p->active_mm; + p->mm = mm; + p->active_mm = mm; + task_unlock(p); + activate_mm(active_mm, mm); + mmdrop(active_mm); + return(0); +} + +DECLARE_WAIT_QUEUE_HEAD(wait_for_capsync); + +void +sync_caps(void) +{ + struct task_struct *p = current; + kernel_cap_t prev; + + if(process_told(p, DREQ_CAPCNG)) + { + write_lock_irq(&tasklist_lock); + prev = p->mosix.remote_caps; + p->mosix.remote_caps = p->cap_effective; + process_ack(p, DREQ_CAPCNG); + write_unlock_irq(&tasklist_lock); + wake_up(&wait_for_capsync); + if((p->mosix.dflags & DDEPUTY) && + ((prev ^ p->mosix.remote_caps) & REMOTE_CAPS)) + deputy_request(DEP_CAPS, &p->mosix.remote_caps, + sizeof(p->mosix.remote_caps), NULL, 0, 0, + NULL, 0); + } +} + +int +mosix_sync_caps(kernel_cap_t effective) +{ + int any = 0; + struct task_struct *p; + DECLARE_WAITQUEUE(wait, current); + + write_lock_irq(&tasklist_lock); + for_each_task(p) + if((p->mosix.dflags & (DDEPUTY|DFINISHED)) == DDEPUTY && + p->cap_effective == effective && + ((p->mosix.remote_caps ^ effective) & REMOTE_CAPS)) + { + tell_process(p, DREQ_CAPCNG); + if(p != current) + { + any = 1; + wake_up_mosix(p); + } + } + write_unlock_irq(&tasklist_lock); + sync_caps(); /* affecting ourselves is the most common case */ + if(!any) + return(0); + add_wait_queue(&wait_for_capsync, &wait); + loop: + set_current_state(TASK_INTERRUPTIBLE); + any = 0; + read_lock(&tasklist_lock); + for_each_task(p) + if((p->mosix.dflags & (DDEPUTY|DFINISHED)) == DDEPUTY && + p->cap_effective == effective && + (process_told(p, DREQ_CAPCNG)) && + ((p->mosix.remote_caps ^ effective) & REMOTE_CAPS)) + { + any = 1; + break; + } + read_unlock(&tasklist_lock); + if(any) + { + schedule(); + set_current_state(TASK_RUNNING); + if(signal_pending(current)) + { + remove_wait_queue(&wait_for_capsync, &wait); + return(-EINTR); + } + goto loop; + } + remove_wait_queue(&wait_for_capsync, &wait); + return(0); +} diff -urN linux-2.4.17/mos/syscalls.c linux_umopenmosix/mos/syscalls.c --- linux-2.4.17/mos/syscalls.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/mos/syscalls.c Wed Jun 26 23:45:18 2002 @@ -0,0 +1,2461 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_MOSIX_DFSA +#include +#endif /* CONFIG_MOSIX_DFSA */ + +extern caddr_t sys_call_table[]; + +#define set_file_name(x) do { \ + unsigned long fn = ((int *)®s)[x]; \ + set_write_region(fn, strlen_user((char *)fn)); \ + } while(0); +#define ARG(x) (((int *)®s)[x]) +#if BITS_PER_LONG == 32 +#define LARG(x) (*((long long *)(((int *)®s)+(x)))) +#else +#define LARG(x) (*((long *)(((int *)®s)+(x)))) +#endif + +#ifdef CONFIG_MOSIX_DFSA +#define RESULT_IS_FINAL(result) ((result) >= 0 || ( \ + (result) != -EDOITATHOME && (result) != -EINTR && \ + (result) != -ERESTARTSYS && (result) != -ERESTARTNOINTR && \ + (result) != -ERESTARTNOHAND && (result) != -EAGAIN)) +#endif /* CONFIG_MOSIX_DFSA */ + +int +remote_sys_getpid(int n, struct pt_regs regs) +{ + return(current->mosix.depinfo.tgid); +} + +int +remote_sys_gettid(int n, struct pt_regs regs) +{ + return(current->mosix.mypid); +} + +long +remote_sys_getitimer(int n, struct pt_regs regs) +{ + extern asmlinkage long sys_getitimer(int, struct itimerval *); + + if(*((int *)®s) == ITIMER_REAL) + { + set_read_region(ARG(1), sizeof(struct itimerval)); + return(remote_standard_system_call(n, ®s)); + } + return(sys_getitimer(ARG(0), (struct itimerval *)ARG(1))); +} + +long +remote_sys_setitimer(int n, struct pt_regs regs) +{ + extern asmlinkage long sys_setitimer(int, struct itimerval *, + struct itimerval *); + + if(*((int *)®s) == ITIMER_REAL) + { + if(ARG(1)) + set_write_region(ARG(1), sizeof(struct itimerval)); + if(ARG(2)) + set_read_region(ARG(2), sizeof(struct itimerval)); + return(remote_standard_system_call(n, ®s)); + } + return(sys_setitimer(ARG(0), (struct itimerval *)ARG(1), + (struct itimerval *)ARG(2))); +} + +unsigned long +remote_sys_brk(int n, struct pt_regs regs) +{ + extern asmlinkage unsigned long sys_brk(unsigned long); + + return(sys_brk((unsigned long)ARG(0))); +} + +long +remote_sys_mprotect(int n, struct pt_regs regs) +{ + extern asmlinkage long sys_mprotect(unsigned long, size_t, + unsigned long); + + return(sys_mprotect((unsigned long)ARG(0), (size_t)ARG(1), + (unsigned long)ARG(2))); +} + +long +remote_sys_msync(int n, struct pt_regs regs) +{ + return(0); /* no writeable file-mapping allowed remotely! */ +} + +long +remote_sys_munlock(int n, struct pt_regs regs) +{ + /* since remote mlocking is not allowed, but we are still here, + * this must only be a "make-sure" call */ + return(0); +} + +long +remote_sys_munlockall(int n, struct pt_regs regs) +{ + /* since remote mlocking is not allowed, but we are still here, + * this must only be a "make-sure" call */ + return(0); +} + +unsigned long +remote_sys_mremap(int n, struct pt_regs regs) +{ + extern asmlinkage unsigned long sys_mremap(unsigned long, unsigned long, + unsigned long, unsigned long, unsigned long); + + return(sys_mremap((unsigned long)ARG(0), (unsigned long)ARG(1), + (unsigned long)ARG(2), (unsigned long)ARG(3), + (unsigned long)ARG(4))); +} + +int +remote_sys_modify_ldt(int n, int func, void *ptr, unsigned long bytecount) +{ + extern asmlinkage int sys_modify_ldt(int, void *, unsigned long); + + return(sys_modify_ldt(func, ptr, bytecount)); +} + +long +remote_sys_sched_yield(void) +{ + extern asmlinkage long sys_sched_yield(void); + + return(sys_sched_yield()); +} + +long +remote_sys_mincore(int n, struct pt_regs regs) +{ + extern asmlinkage long sys_mincore(unsigned long, size_t, + unsigned char *); + + return(sys_mincore((unsigned long)ARG(0), (size_t)ARG(1), + (unsigned char *)ARG(2))); +} + +long +remote_sys_madvise(int n, struct pt_regs regs) +{ + extern asmlinkage long sys_madvise(unsigned long, size_t, int); + + return(sys_madvise((unsigned long)ARG(0), (size_t)ARG(1), (int)ARG(2))); +} + +long +#ifdef CONFIG_MOSIX_DFSA +remote_sys_close_syscall(int n, struct pt_regs regs) +#else +remote_sys_close(int n, struct pt_regs regs) +#endif /* CONFIG_MOSIX_DFSA */ +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_close(unsigned int); + unsigned int fd = ARG(0); + struct task_struct *p = current; + + if(fd >= p->files->max_fds) + { + if(p->mosix.ttab) + return(-EBADF); + } + else if((p->files->fd[fd] || + (p->mosix.ttab && !FD_ISSET(fd, p->files->open_fds))) && + enter_remote_dfsa_mode()) + { + result = sys_close((unsigned int)ARG(0)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_fchdir(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_fchdir(unsigned int); + unsigned int fd = ARG(0); + struct task_struct *p = current; + + if(fd >= p->files->max_fds) + { + if(p->mosix.ttab) + return(-EBADF); + } + else if(p->fs->pwd && p->files->fd[fd] && enter_remote_dfsa_mode()) + { + result = sys_fchdir(fd); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_ftruncate(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_ftruncate(unsigned int, unsigned long); + unsigned int fd = ARG(0); + struct task_struct *p = current; + + if(fd >= p->files->max_fds) + { + if(p->mosix.ttab) + return(-EBADF); + } + else if(p->files->fd[fd] && enter_remote_dfsa_mode()) + { + result = sys_ftruncate(fd, (unsigned long)ARG(1)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_ftruncate64(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_ftruncate64(unsigned int, loff_t); + unsigned int fd = ARG(0); + struct task_struct *p = current; + + if(fd >= p->files->max_fds) + { + if(p->mosix.ttab) + return(-EBADF); + } + else if(p->files->fd[fd] && enter_remote_dfsa_mode()) + { + result = sys_ftruncate64(fd, (loff_t)LARG(1)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_fchmod(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_fchmod(unsigned int, mode_t); + unsigned int fd = ARG(0); + struct task_struct *p = current; + + if(fd >= p->files->max_fds) + { + if(p->mosix.ttab) + return(-EBADF); + } + else if(p->files->fd[fd] && enter_remote_dfsa_mode()) + { + result = sys_fchmod(fd, (mode_t)ARG(1)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_fchown(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_fchown(unsigned int, uid_t, gid_t); + unsigned int fd = ARG(0); + struct task_struct *p = current; + + if(fd >= p->files->max_fds) + { + if(p->mosix.ttab) + return(-EBADF); + } + else if(p->files->fd[fd] && enter_remote_dfsa_mode()) + { + result = sys_fchown(fd, (uid_t)ARG(1), (gid_t)ARG(2)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_fchown16(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_fchown16(unsigned int, old_uid_t, old_gid_t); + unsigned int fd = ARG(0); + struct task_struct *p = current; + + if(fd >= p->files->max_fds) + { + if(p->mosix.ttab) + return(-EBADF); + } + else if(p->files->fd[fd] && enter_remote_dfsa_mode()) + { + result = sys_fchown16(fd, (old_uid_t)ARG(1), (old_gid_t)ARG(2)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_dup(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_dup(unsigned int); + unsigned int fd = ARG(0); + struct task_struct *p = current; + + if(fd >= p->files->max_fds) + { + if(p->mosix.ttab) + return(-EBADF); + } + else if(p->files->fd[fd] && enter_remote_dfsa_mode()) + { + result = sys_dup(fd); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_dup2(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_dup2(unsigned int, unsigned int); + unsigned int fd = ARG(0); + struct task_struct *p = current; + + if(fd >= p->files->max_fds) + { + if(p->mosix.ttab) + return(-EBADF); + } + else if(p->files->fd[fd] && enter_remote_dfsa_mode()) + { + result = sys_dup2(fd, (unsigned int)ARG(1)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_fsync(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_fsync(unsigned int); + unsigned int fd = ARG(0); + struct task_struct *p = current; + + if(fd >= p->files->max_fds) + { + if(p->mosix.ttab) + return(-EBADF); + } + else if(p->files->fd[fd] && enter_remote_dfsa_mode()) + { + result = sys_fsync(fd); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_fdatasync(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_fdatasync(unsigned int); + unsigned int fd = ARG(0); + struct task_struct *p = current; + + if(fd >= p->files->max_fds) + { + if(p->mosix.ttab) + return(-EBADF); + } + else if(p->files->fd[fd] && enter_remote_dfsa_mode()) + { + result = sys_fdatasync(fd); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + return(remote_standard_system_call(n, ®s)); +} + +ssize_t +remote_sys_write(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + unsigned int fd = ARG(0); + ssize_t result; + extern asmlinkage ssize_t sys_write(unsigned int, char *, size_t); + struct task_struct *p = current; + + if(fd >= p->files->max_fds) + { + if(p->mosix.ttab) + return(-EBADF); + } + else if(p->files->fd[fd] && enter_remote_dfsa_mode()) + { + result = sys_write(fd, (char *)ARG(1), ARG(2)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_write_region(ARG(1), ARG(2)); + return(remote_standard_system_call(n, ®s)); +} + +ssize_t +remote_sys_read(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + unsigned int fd = ARG(0); + ssize_t result; + extern asmlinkage ssize_t sys_read(unsigned int, char *, size_t); + struct task_struct *p = current; + + if(fd >= p->files->max_fds) + { + if(p->mosix.ttab) + return(-EBADF); + } + else if(p->files->fd[fd] && enter_remote_dfsa_mode()) + { + result = sys_read(fd, (char *)ARG(1), ARG(2)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_read_region(ARG(1), ARG(2)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_open(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + + if(current->mosix.ttab && enter_remote_dfsa_mode()) + { + result = sys_open((char *)ARG(0), ARG(1), ARG(2)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_file_name(0); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_waitpid(int n, struct pt_regs regs) +{ + set_read_region(ARG(1), sizeof(int)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_creat(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_creat(char *, int); + + if(current->mosix.ttab && enter_remote_dfsa_mode()) + { + result = sys_creat((char *)ARG(0), ARG(1)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_file_name(0); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_link(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_link(const char *, const char *); + + if(current->mosix.ttab && enter_remote_dfsa_mode()) + { + result = sys_link((const char *)ARG(0), (const char *)ARG(1)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_file_name(0); + set_file_name(1); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_unlink(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_unlink(const char *); + + if(current->mosix.ttab && enter_remote_dfsa_mode()) + { + result = sys_unlink((const char *)ARG(0)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_file_name(0); + return(remote_standard_system_call(n, ®s)); +} + +int +remote_sys_execve(int n, struct pt_regs regs) +{ + set_file_name(0); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_chdir(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_chdir(char *); + + if(current->fs->pwd && current->mosix.ttab && enter_remote_dfsa_mode()) + { + result = sys_chdir((char *)ARG(0)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_file_name(0); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_time(int n, struct pt_regs regs) +{ + if(ARG(0)) + set_read_region(ARG(0), sizeof(int)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_mknod(int n, struct pt_regs regs) +{ + set_file_name(0); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_chmod(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_chmod(const char *, mode_t); + + if(current->mosix.ttab && enter_remote_dfsa_mode()) + { + result = sys_chmod((const char *)ARG(0), (mode_t)ARG(1)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_file_name(0); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_lchown(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_lchown(const char *, uid_t, gid_t); + + if(current->mosix.ttab && enter_remote_dfsa_mode()) + { + result = sys_lchown((const char *)ARG(0), (uid_t)ARG(1), + (gid_t)ARG(2)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_file_name(0); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_lchown16(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_lchown16(const char *, old_uid_t, old_gid_t); + + if(current->mosix.ttab && enter_remote_dfsa_mode()) + { + result = sys_lchown16((const char *)ARG(0), (old_uid_t)ARG(1), + (old_gid_t)ARG(2)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_file_name(0); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_stat(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_stat(char *, struct __old_kernel_stat *); + + if(current->mosix.ttab && enter_remote_dfsa_mode()) + { + result = sys_stat((char *)ARG(0), + (struct __old_kernel_stat *)ARG(1)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_file_name(0); + set_read_region(ARG(1), sizeof(struct __old_kernel_stat)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_stat64(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_stat64(char *, struct stat64 *, long); + + if(current->mosix.ttab && enter_remote_dfsa_mode()) + { + result = sys_stat64((char *)ARG(0), (struct stat64 *)ARG(1), + (long)ARG(2)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_file_name(0); + set_read_region(ARG(1), sizeof(struct stat64)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_mount(int n, struct pt_regs regs) +{ + unsigned long arg, len; + set_file_name(0); + set_file_name(1); + if((arg = ARG(2))) + { + len = TASK_SIZE - arg; + if(len > PAGE_SIZE-1) + len = PAGE_SIZE-1; + set_write_region(ARG(2), len); + } + if((arg = ARG(4))) + { + len = TASK_SIZE - arg; + if(len > PAGE_SIZE-1) + len = PAGE_SIZE-1; + set_write_region(ARG(4), PAGE_SIZE-1); + } + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_oldumount(int n, struct pt_regs regs) +{ + set_file_name(0); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_stime(int n, struct pt_regs regs) +{ + set_write_region(ARG(0), sizeof(int)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_fstat(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_fstat(unsigned int, + struct __old_kernel_stat *); + unsigned int fd = ARG(0); + struct task_struct *p = current; + + if(fd >= p->files->max_fds) + { + if(p->mosix.ttab) + return(-EBADF); + } + else if(p->files->fd[fd] && enter_remote_dfsa_mode()) + { + result = sys_fstat(fd, (struct __old_kernel_stat *)ARG(1)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_read_region(ARG(1), sizeof(struct __old_kernel_stat)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_fstat64(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_fstat64(unsigned int, struct stat64 *, long); + unsigned int fd = ARG(0); + struct task_struct *p = current; + + if(fd >= p->files->max_fds) + { + if(p->mosix.ttab) + return(-EBADF); + } + else if(p->files->fd[fd] && enter_remote_dfsa_mode()) + { + result = sys_fstat64(fd, (struct stat64 *)ARG(1), (long)ARG(2)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_read_region(ARG(1), sizeof(struct stat64)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_utime(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_utime(char *, struct utimbuf *); + + if(current->mosix.ttab && enter_remote_dfsa_mode()) + { + result = sys_utime((char *)ARG(0), (struct utimbuf *)ARG(1)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_file_name(0); + if(ARG(1)) + set_read_region(ARG(1), sizeof(struct utimbuf)); + return(remote_standard_system_call(n, ®s)); +} + +/* NOTE: sys_utimes is not supported for i386, + * but should be implemented for Alpha/Sparc */ + +long +remote_sys_access(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_access(const char *, int); + + if(current->mosix.ttab && enter_remote_dfsa_mode()) + { + result = sys_access((const char *)ARG(0), (int)ARG(1)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_file_name(0); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_rename(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_rename(const char *, const char *); + + if(current->mosix.ttab && enter_remote_dfsa_mode()) + { + result = sys_rename((const char *)ARG(0), (const char *)ARG(1)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_file_name(0); + set_file_name(1); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_mkdir(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_mkdir(const char *, int); + + if(current->mosix.ttab && enter_remote_dfsa_mode()) + { + result = sys_mkdir((const char *)ARG(0), ARG(1)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_file_name(0); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_rmdir(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_rmdir(const char *); + + if(current->mosix.ttab && enter_remote_dfsa_mode()) + { + result = sys_rmdir((const char *)ARG(0)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_file_name(0); + return(remote_standard_system_call(n, ®s)); +} + +int +remote_sys_pipe(int n, struct pt_regs regs) +{ + set_read_region(ARG(0), 2 * sizeof(long)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_times(int n, struct pt_regs regs) +{ + if(ARG(0)) + set_read_region(ARG(0), sizeof(struct tms)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_acct(int n, struct pt_regs regs) +{ + if(ARG(0)) + set_file_name(0); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_umount(int n, struct pt_regs regs) +{ + set_file_name(0); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_ioctl(int n, struct pt_regs regs) +{ + switch(ARG(1)) + { + /* so many more -- feel free to add: */ + /* if ambiguous -- add both! */ + /* but do not include ioctls whose definitions is configurable*/ + /* also, it is better to not include rare non-tty ioctls, + because it would slow-down the other's performance */ + case TCGETS: + case TIOCGLCKTRMIOS: + set_read_region(ARG(2), sizeof(struct termios)); + break; + case TCSETS: + case TCSETSF: + case TCSETSW: + case TIOCSLCKTRMIOS: + set_write_region(ARG(2), sizeof(struct termios)); + break; + case TCGETA: + set_read_region(ARG(2), sizeof(struct termio)); + break; + case TCSETA: + case TCSETAW: + case TCSETAF: + set_write_region(ARG(2), sizeof(struct termio)); + break; + case TIOCOUTQ: + case TIOCGSOFTCAR: + case FIONREAD: + case FIGETBSZ: + case TIOCGETD: + case TIOCMGET: + case TIOCSERGWILD: + case TIOCSERGETLSR: + case RNDGETENTCNT: + set_read_region(ARG(2), sizeof(int)); + break; + case FIBMAP: + set_write_region(ARG(2), sizeof(int)); + set_read_region(ARG(2), sizeof(int)); + break; + case TIOCPKT: + case TIOCSETD: + case TIOCSSOFTCAR: + case FIONBIO: + case TIOCMSET: + case TIOCSERSWILD: + set_write_region(ARG(2), sizeof(int)); + break; + case TIOCSTI: + set_write_region(ARG(2), sizeof(char)); + break; + case TIOCGWINSZ: + set_read_region(ARG(2), sizeof(struct winsize)); + break; + case TIOCSWINSZ: + set_write_region(ARG(2), sizeof(struct winsize)); + break; + case TIOCGPGRP: + case TIOCGSID: + set_read_region(ARG(2), sizeof(pid_t)); + break; + case TIOCSPGRP: + set_write_region(ARG(2), sizeof(pid_t)); + break; + case TIOCLINUX: + /* there are many cases - use the extreme ones: */ + set_read_region(ARG(2), 1); + set_write_region(ARG(2), 36); + break; + case TIOCTTYGSTRUCT: + set_read_region(ARG(2), sizeof(struct tty_struct)); + break; + case TIOCGSERIAL: + set_read_region(ARG(2), sizeof(struct serial_struct)); + break; + case TIOCSSERIAL: + set_write_region(ARG(2), sizeof(struct serial_struct)); + break; + case TIOCSERGSTRUCT: + set_read_region(ARG(2), sizeof(struct async_struct)); + break; + case TIOCGICOUNT: + set_read_region(ARG(2), sizeof(struct serial_icounter_struct)); + break; + case HDIO_GETGEO: + set_read_region(ARG(2), sizeof(struct hd_geometry)); + break; + } + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_fcntl(int n, struct pt_regs regs) +{ + unsigned long f = ARG(1); +#ifdef CONFIG_MOSIX_DFSA + unsigned int fd = ARG(0); + struct task_struct *p = current; + long result; + extern asmlinkage long sys_fcntl(unsigned int, unsigned int, + unsigned long); + + if(fd >= p->files->max_fds) + { + if(p->mosix.ttab) + return(-EBADF); + } + else if(FD_ISSET(fd, p->files->open_fds) && p->files->fd[fd]) + switch(f) + { + case F_DUPFD: + case F_GETFL: + case F_SETFL: + if(!enter_remote_dfsa_mode()) + break; + result = sys_fcntl(fd, f, (unsigned long)ARG(2)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + break; + } +#endif /* CONFIG_MOSIX_DFSA */ + switch(f) + { + case F_GETLK: + set_read_region(ARG(2), sizeof(struct flock)); + break; + case F_SETLK: + case F_SETLKW: + set_write_region(ARG(2), sizeof(struct flock)); + break; + } + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_fcntl64(int n, struct pt_regs regs) +{ + unsigned long f = ARG(1); +#ifdef CONFIG_MOSIX_DFSA + unsigned int fd = ARG(0); + struct task_struct *p = current; + long result; + extern asmlinkage long sys_fcntl64(unsigned int, unsigned int, + unsigned long); + + if(fd >= p->files->max_fds) + { + if(p->mosix.ttab) + return(-EBADF); + } + else if(FD_ISSET(fd, p->files->open_fds) && p->files->fd[fd]) + switch(f) + { + case F_DUPFD: + case F_GETFD: + case F_SETFD: + case F_GETFL: + case F_SETFL: + if(!enter_remote_dfsa_mode()) + break; + result = sys_fcntl64(fd, f, (unsigned long)ARG(2)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + break; + } +#endif /* CONFIG_MOSIX_DFSA */ + switch(f) + { + case F_GETLK: + set_read_region(ARG(2), sizeof(struct flock)); + break; + case F_SETLK: + case F_SETLKW: + set_write_region(ARG(2), sizeof(struct flock)); + break; + case F_GETLK64: + set_read_region(ARG(2), sizeof(struct flock64)); + break; + case F_SETLK64: + case F_SETLKW64: + set_write_region(ARG(2), sizeof(struct flock64)); + break; + } + return(remote_standard_system_call(n, ®s)); +} + +int +remote_sys_olduname(int n, struct pt_regs regs) +{ + set_read_region(ARG(0), sizeof(struct oldold_utsname)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_chroot(int n, struct pt_regs regs) +{ + set_file_name(0); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_ustat(int n, struct pt_regs regs) +{ + set_read_region(ARG(1), sizeof(struct ustat)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_sigpending(int n, struct pt_regs regs) +{ + set_read_region(ARG(0), sizeof(old_sigset_t)); + return(remote_standard_system_call(n, ®s)); +} + +int +remote_sys_sigaction(int n, struct pt_regs regs) +{ + if(ARG(1)) + set_write_region(ARG(1), sizeof(struct old_sigaction)); + if(ARG(2)) + set_read_region(ARG(2), sizeof(struct old_sigaction)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_setrlimit(int n, struct pt_regs regs) +{ + set_write_region(ARG(1), sizeof(struct rlimit)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_getrlimit(int n, struct pt_regs regs) +{ + set_read_region(ARG(1), sizeof(struct rlimit)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_old_getrlimit(int n, struct pt_regs regs) +{ + set_read_region(ARG(1), sizeof(struct rlimit)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_getrusage(int n, struct pt_regs regs) +{ + set_read_region(ARG(1), sizeof(struct rusage)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_gettimeofday(int n, struct pt_regs regs) +{ + if(ARG(0)) + set_read_region(ARG(0), sizeof(struct timeval)); + if(ARG(1)) + set_read_region(ARG(1), sizeof(struct timezone)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_settimeofday(int n, struct pt_regs regs) +{ + if(ARG(0)) + set_write_region(ARG(0), sizeof(struct timeval)); + if(ARG(1)) + set_write_region(ARG(1), sizeof(struct timezone)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_getgroups(int n, struct pt_regs regs) +{ + int ng = ARG(0); + + if(ng > 0 && ARG(1)) + set_read_region(ARG(1), ng * sizeof(gid_t)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_getgroups16(int n, struct pt_regs regs) +{ + int ng = ARG(0); + + if(ng > 0 && ARG(1)) + set_read_region(ARG(1), ng * sizeof(old_gid_t)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_setgroups(int n, struct pt_regs regs) +{ + int ng = ARG(0); + + if(ng > 0) + set_write_region(ARG(1), ng * sizeof(gid_t)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_setgroups16(int n, struct pt_regs regs) +{ + int ng = ARG(0); + + if(ng > 0) + set_write_region(ARG(1), ng * sizeof(old_gid_t)); + return(remote_standard_system_call(n, ®s)); +} + +void +set_select_regions(int n, unsigned long inp, unsigned long outp, + unsigned long exp, unsigned long tvp) +{ + n = (n + 8*sizeof(long) - 1) / (8*sizeof(long)) * sizeof(long); + if(inp) + { + set_read_region(inp, n); + set_write_region(inp, n); + } + if(outp) + { + set_read_region(outp, n); + set_write_region(outp, n); + } + if(exp) + { + set_read_region(exp, n); + set_write_region(exp, n); + } + if(tvp) + { + set_read_region(tvp, sizeof(struct timeval)); + set_write_region(tvp, sizeof(struct timeval)); + } +} + +int +remote_old_select(int n, struct pt_regs regs) +{ + unsigned long a[5]; + + if(copy_from_user(a, (char *)ARG(0), sizeof(a))) + return(-EFAULT); + + set_write_region(ARG(0), sizeof(a)); + set_select_regions(a[0], a[1], a[2], a[3], a[4]); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_symlink(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_symlink(const char *, const char *); + + if(current->mosix.ttab && enter_remote_dfsa_mode()) + { + result = sys_symlink((const char *)ARG(0), + (const char *)ARG(1)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_file_name(0); + set_file_name(1); + return(remote_standard_system_call(n, ®s)); +} + + +long +remote_sys_lstat(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_lstat(char *, struct __old_kernel_stat *); + + if(current->mosix.ttab && enter_remote_dfsa_mode()) + { + result = sys_lstat((char *)ARG(0), + (struct __old_kernel_stat *)ARG(1)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_file_name(0); + set_read_region(ARG(1), sizeof(struct __old_kernel_stat)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_lstat64(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_lstat64(char *, struct stat64 *, long); + + if(current->mosix.ttab && enter_remote_dfsa_mode()) + { + result = sys_lstat64((char *)ARG(0), (struct stat64 *)ARG(1), + (long)ARG(2)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_file_name(0); + set_read_region(ARG(1), sizeof(struct stat64)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_readlink(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_readlink(const char *, char *, int); + + if(current->mosix.ttab && enter_remote_dfsa_mode()) + { + result = sys_readlink((const char *)ARG(0), (char *)ARG(1), + (int)ARG(2)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_file_name(0); + set_read_region(ARG(1), ARG(2)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_uselib(int n, struct pt_regs regs) +{ + set_file_name(0); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_swapon(int n, struct pt_regs regs) +{ + set_file_name(0); + return(remote_standard_system_call(n, ®s)); +} + +int +remote_old_readdir(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + unsigned int fd = ARG(0); + int result; + extern asmlinkage int old_readdir(unsigned int, void *, unsigned int); + struct task_struct *p = current; + + if(fd >= p->files->max_fds) + { + if(p->mosix.ttab) + return(-EBADF); + } + else if(p->files->fd[fd] && enter_remote_dfsa_mode()) + { + result = old_readdir(fd, (void *)ARG(1), (unsigned int)ARG(2)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_read_region(ARG(1), ARG(2)); + return(remote_standard_system_call(n, ®s)); +} + +int +remote_old_mmap(int n, struct pt_regs regs) +{ + set_write_region(ARG(0), 6 * sizeof(int)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_truncate(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_truncate(const char *, unsigned long); + + if(current->mosix.ttab && enter_remote_dfsa_mode()) + { + result = sys_truncate((const char *)ARG(0), + (unsigned long)ARG(1)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_file_name(0); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_truncate64(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_truncate64(const char *, loff_t); + + if(current->mosix.ttab && enter_remote_dfsa_mode()) + { + result = sys_truncate64((const char *)ARG(0), (loff_t)LARG(1)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_file_name(0); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_statfs(int n, struct pt_regs regs) +{ + set_file_name(0); + set_read_region(ARG(1), sizeof(struct statfs)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_fstatfs(int n, struct pt_regs regs) +{ + set_read_region(ARG(1), sizeof(struct statfs)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_socketcall(int n, struct pt_regs regs) +{ + unsigned long a[6]; + unsigned long alen; + struct msghdr hdr; + int i; + struct iovec io; + + set_write_region(ARG(1), 6 * sizeof(int)); + if(!copy_from_user(a, (char *)ARG(1), 6 * sizeof(int))) + switch(ARG(0)) + { + case SYS_BIND: + case SYS_CONNECT: + set_write_region(a[1], a[2]); + break; + case SYS_ACCEPT: + case SYS_GETSOCKNAME: + case SYS_GETPEERNAME: + if(!get_user(alen, (int *)a[2])) + { + set_write_region(a[2], sizeof(int)); + set_read_region(a[2], sizeof(int)); + set_read_region(a[1], alen); + } + break; + case SYS_SOCKETPAIR: + set_read_region(a[3], 2 * sizeof(int)); + break; + case SYS_SEND: + set_write_region(a[1], a[2]); + break; + case SYS_SENDTO: + set_write_region(a[1], a[2]); + set_write_region(a[4], a[5]); + break; + case SYS_RECV: + set_read_region(a[1], a[2]); + break; + case SYS_RECVFROM: + set_read_region(a[1], a[2]); + if(a[4] && !get_user(alen, (int *)a[5])) + { + set_write_region(a[5], sizeof(int)); + set_read_region(a[5], sizeof(int)); + set_read_region(a[4], alen); + } + break; + case SYS_SETSOCKOPT: + set_write_region(a[3], a[4]); + break; + case SYS_GETSOCKOPT: + if(!get_user(alen, (int *)a[4])) + { + set_write_region(a[4], sizeof(int)); + set_read_region(a[4], sizeof(int)); + set_read_region(a[3], alen); + } + break; + case SYS_SENDMSG: + if(copy_from_user(&hdr, (char *)a[1], sizeof(hdr))) + break; + set_write_region(a[1], sizeof(hdr)); + if(hdr.msg_name) + set_write_region((unsigned long)hdr.msg_name, hdr.msg_namelen); + if(hdr.msg_controllen) + set_write_region((unsigned long)hdr.msg_control, hdr.msg_controllen); + if(hdr.msg_iovlen > 0) + { + set_write_region((unsigned long)hdr.msg_iov, hdr.msg_iovlen * sizeof(struct iovec)); + if(hdr.msg_iovlen > 100) + hdr.msg_iovlen = 100; + for(i = 0 ; i < hdr.msg_iovlen && + !copy_from_user(&io, hdr.msg_iov + i, + sizeof(io)) ; i++) + set_write_region((unsigned long)io.iov_base, io.iov_len); + } + break; + case SYS_RECVMSG: + if(copy_from_user(&hdr, (char *)a[1], sizeof(hdr))) + break; + set_write_region(a[1], sizeof(hdr)); + set_read_region(a[1], sizeof(hdr)); + if(hdr.msg_name) + set_read_region((unsigned long)hdr.msg_name, hdr.msg_namelen); + if(hdr.msg_control) + set_read_region((unsigned long)hdr.msg_control, hdr.msg_controllen); + if(hdr.msg_iovlen > 0) + { + set_write_region((unsigned long)hdr.msg_iov, hdr.msg_iovlen * sizeof(struct iovec)); + if(hdr.msg_iovlen > 100) + hdr.msg_iovlen = 100; + for(i = 0 ; i < hdr.msg_iovlen && + !copy_from_user(&io, hdr.msg_iov + i, + sizeof(io)) ; i++) + set_read_region((unsigned long)io.iov_base, io.iov_len); + } + break; + } + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_syslog(int n, struct pt_regs regs) +{ + set_read_region(ARG(1), ARG(2)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_newstat(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_newstat(char *, struct stat *); + + if(current->mosix.ttab && enter_remote_dfsa_mode()) + { + result = sys_newstat((char *)ARG(0), (struct stat *)ARG(1)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_file_name(0); + set_read_region(ARG(1), sizeof(struct stat)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_newlstat(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_newlstat(char *, struct stat *); + + if(current->mosix.ttab && enter_remote_dfsa_mode()) + { + result = sys_newlstat((char *)ARG(0), + (struct stat *)ARG(1)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_file_name(0); + set_read_region(ARG(1), sizeof(struct stat)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_newfstat(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_newfstat(unsigned int, struct stat *); + unsigned int fd = ARG(0); + struct task_struct *p = current; + + if(fd >= p->files->max_fds) + { + if(p->mosix.ttab) + return(-EBADF); + } + else if(p->files->fd[fd] && enter_remote_dfsa_mode()) + { + result = sys_newfstat(fd, (struct stat *)ARG(1)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_read_region(ARG(1), sizeof(struct stat)); + return(remote_standard_system_call(n, ®s)); +} + +int +remote_sys_uname(int n, struct pt_regs regs) +{ + set_read_region(ARG(0), sizeof(struct old_utsname)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_wait4(int n, struct pt_regs regs) +{ + if(ARG(1)) + set_read_region(ARG(1), sizeof(int)); + if(ARG(3)) + set_read_region(ARG(3), sizeof(struct rusage)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_sysinfo(int n, struct pt_regs regs) +{ + set_read_region(ARG(0), sizeof(struct sysinfo)); + return(remote_standard_system_call(n, ®s)); +} + +int +remote_sys_ipc(int n, struct pt_regs regs) +{ + int call = ARG(0) & 0xffff; + int version = ARG(0) >> 16; + + switch (call) + { + case SEMOP: + set_write_region(ARG(4), ARG(2)*sizeof(struct sembuf)); + break; + case SEMCTL: + { + unsigned long fourth; + + if(get_user(fourth, (unsigned long *)ARG(4))) + break; + set_read_region(ARG(4), sizeof(long)); + switch(ARG(3)) + { + case IPC_INFO: + case SEM_INFO: + set_read_region(fourth, + sizeof(struct seminfo)); + break; + case IPC_STAT: + case SEM_STAT: + set_read_region(fourth, + sizeof(struct semid_ds)); + break; + case IPC_SET: + set_write_region(fourth, + sizeof(struct semid_ds)); + case GETALL: + set_read_region(fourth, + SEMMNS * sizeof(ushort)); + break; + case SETALL: + set_write_region(fourth, + SEMMNS * sizeof(ushort)); + break; + } + break; + } + case MSGSND: + set_write_region(ARG(4), sizeof(long) + ARG(2)); + break; + case MSGRCV: + switch (version) { + case 0: { + struct ipc_kludge tmp; + + if (!ARG(4)) + break; + if (copy_from_user(&tmp,(struct ipc_kludge *) + ARG(4), sizeof (tmp))) + break; + set_read_region(ARG(4), sizeof(tmp)); + set_read_region((unsigned long)tmp.msgp, + sizeof(long) + ARG(2)); + } + break; + case 1: default: + set_read_region(ARG(4), sizeof(long) + ARG(2)); + break; + } + break; + case MSGCTL: + switch(ARG(2)) + { + case IPC_INFO: + case MSG_INFO: + set_read_region(ARG(4), + sizeof(struct msginfo)); + break; + case IPC_STAT: + case MSG_STAT: + set_read_region(ARG(4), + sizeof(struct msqid_ds)); + break; + case IPC_SET: + set_write_region(ARG(4), + sizeof(struct msqid_ds)); + break; + } + break; + case SHMCTL: + switch(ARG(2)) + { + case IPC_SET: + set_write_region(ARG(4), + sizeof(struct shmid_ds)); + break; + case IPC_INFO: + set_read_region(ARG(4), + sizeof(struct shminfo)); + break; + case SHM_INFO: + set_read_region(ARG(4), + sizeof(struct shm_info)); + break; + case SHM_STAT: + set_read_region(ARG(4), + sizeof(struct shmid_ds)); + break; + case IPC_STAT: + set_read_region(ARG(4), + sizeof(struct shmid_ds)); + } + break; + } + return(remote_standard_system_call(n, ®s)); +} + +int +remote_sys_sigreturn(int n, struct pt_regs regs) +{ + set_write_region(regs.esp - 8, 732); + /* Eek, struct sigframe not defined outside arch/i386/kernel/signal.c */ + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_setdomainname(int n, struct pt_regs regs) +{ + set_write_region(ARG(0), ARG(1)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_newuname(int n, struct pt_regs regs) +{ + set_read_region(ARG(0), sizeof(struct new_utsname)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_adjtimex(int n, struct pt_regs regs) +{ + set_write_region(ARG(0), sizeof(struct timex)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_sigprocmask(int n, struct pt_regs regs) +{ + if(ARG(1)) + set_write_region(ARG(1), sizeof(old_sigset_t)); + if(ARG(2)) + set_read_region(ARG(2), sizeof(old_sigset_t)); + return(remote_standard_system_call(n, ®s)); +} + +unsigned long +remote_sys_create_module(int n, struct pt_regs regs) +{ + set_file_name(0); /* not really a file, but still a string */ + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_init_module(int n, struct pt_regs regs) +{ + set_file_name(0); /* not really a file, but still a string */ + set_write_region(ARG(1), sizeof(struct module) + 64); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_delete_module(int n, struct pt_regs regs) +{ + set_file_name(0); /* not really a file, but still a string */ + return(remote_standard_system_call(n, ®s)); +} + +void +set_max_readable(unsigned long addr) +{ + struct vm_area_struct * vma; + + vma = find_vma(current->mm, addr); + if (vma && (vma->vm_flags & VM_WRITE) && addr >= vma->vm_start) + set_read_region(addr, vma->vm_end - addr); +} + +long +remote_sys_get_kernel_syms(int n, struct pt_regs regs) +{ + set_max_readable(ARG(0)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_quotactl(int n, struct pt_regs regs) +{ + set_file_name(1); + switch(ARG(0)) + { + case Q_QUOTAON: + set_file_name(3); + break; + case Q_GETQUOTA: + set_read_region(ARG(3), sizeof(struct dqblk)); + break; + case Q_GETSTATS: + set_read_region(ARG(3), sizeof(struct dqstats)); + break; + case Q_SETQUOTA: + case Q_SETUSE: + case Q_SETQLIM: + set_write_region(ARG(3), sizeof(struct dqblk)); + break; + } + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_bdflush(int n, struct pt_regs regs) +{ + if(ARG(0) >= 2 && !(ARG(0) & 1)) + set_read_region(ARG(1), sizeof(int)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_sysfs(int n, struct pt_regs regs) +{ + switch(ARG(0)) + { + case 1: + set_file_name(1); + break; + case 2: + set_max_readable(2); + break; + } + return(remote_standard_system_call(n, ®s)); +} + +off_t +remote_sys_lseek(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + unsigned int fd = ARG(0); + off_t result; + extern asmlinkage off_t sys_lseek(unsigned int, off_t, unsigned int); + struct task_struct *p = current; + + if(fd >= p->files->max_fds) + { + if(p->mosix.ttab) + return(-EBADF); + } + else if(p->files->fd[fd] && enter_remote_dfsa_mode()) + { + result = sys_lseek(fd, ARG(1), ARG(2)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_llseek(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + unsigned int fd = ARG(0); + long result; + extern long sys_llseek(unsigned int, unsigned long, unsigned long, + loff_t *, int); + struct task_struct *p = current; + + if(fd >= p->files->max_fds) + { + if(p->mosix.ttab) + return(-EBADF); + } + else if(p->files->fd[fd] && enter_remote_dfsa_mode()) + { + result = sys_llseek(fd, ARG(1), ARG(2), (loff_t *)ARG(3), + ARG(4)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_read_region(ARG(3), sizeof(loff_t)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_getdents(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + unsigned int fd = ARG(0); + long result; + extern asmlinkage long sys_getdents(unsigned int, void *, unsigned int); + struct task_struct *p = current; + + if(fd >= p->files->max_fds) + { + if(p->mosix.ttab) + return(-EBADF); + } + else if(p->files->fd[fd] && enter_remote_dfsa_mode()) + { + result = sys_getdents(fd, (void *)ARG(1), (unsigned int)ARG(2)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_read_region(ARG(1), ARG(2)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_getdents64(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + unsigned int fd = ARG(0); + long result; + extern asmlinkage long sys_getdents64(unsigned int, void *, + unsigned int); + struct task_struct *p = current; + + if(fd >= p->files->max_fds) + { + if(p->mosix.ttab) + return(-EBADF); + } + else if(p->files->fd[fd] && enter_remote_dfsa_mode()) + { + result = sys_getdents64(fd, (void *)ARG(1), + (unsigned int)ARG(2)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_read_region(ARG(1), ARG(2)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_select(int n, struct pt_regs regs) +{ + set_select_regions(ARG(0), ARG(1), ARG(2), ARG(3), ARG(4)); + return(remote_standard_system_call(n, ®s)); +} + +ssize_t +remote_sys_readv(int n, struct pt_regs regs) +{ + int i, l = ARG(2); + struct iovec io; + +#ifdef CONFIG_MOSIX_DFSA + unsigned int fd = ARG(0); + ssize_t result; + extern asmlinkage ssize_t sys_readv(unsigned int, struct iovec*, + unsigned long); + struct task_struct *p = current; + + if(fd >= p->files->max_fds) + { + if(p->mosix.ttab) + return(-EBADF); + } + else if(p->files->fd[fd] && enter_remote_dfsa_mode()) + { + result = sys_readv(fd, (struct iovec *)ARG(1), + (unsigned long)ARG(2)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + if(l > 0) + set_read_region(ARG(1), l * sizeof(struct iovec)); + if(l > 100) + l = 100; /* do not loop forever here */ + for(i = 0 ; i < l && + !copy_from_user(&io, ((struct iovec *)ARG(1)) + i, sizeof(io)) ; + i++) + set_read_region((unsigned long)io.iov_base, io.iov_len); + return(remote_standard_system_call(n, ®s)); +} + +ssize_t +remote_sys_writev(int n, struct pt_regs regs) +{ + int i, l = ARG(2); + struct iovec io; + +#ifdef CONFIG_MOSIX_DFSA + unsigned int fd = ARG(0); + ssize_t result; + extern asmlinkage ssize_t sys_writev(unsigned int, struct iovec *, + unsigned long); + struct task_struct *p = current; + + if(fd >= p->files->max_fds) + { + if(p->mosix.ttab) + return(-EBADF); + } + else if(p->files->fd[fd] && enter_remote_dfsa_mode()) + { + result = sys_writev(fd, (struct iovec *)ARG(1), + (unsigned long)ARG(2)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + if(l > 0) + set_read_region(ARG(1), l * sizeof(struct iovec)); + if(l > 100) + l = 100; /* do not loop forever here */ + for(i = 0 ; i < l && + !copy_from_user(&io, ((struct iovec *)ARG(1)) + i, sizeof(io)) ; + i++) + set_write_region((unsigned long)io.iov_base, io.iov_len); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_sysctl(int n, struct pt_regs regs) +{ + struct __sysctl_args args; + int oldlen; + + if(copy_from_user((char *)&args, (char *)ARG(0), sizeof(args))) + return(-EFAULT); + set_read_region(ARG(0), sizeof(args)); + set_write_region((unsigned long)args.name, args.nlen); + if(args.oldval && args.oldlenp && !get_user(oldlen, args.oldlenp)) + { + set_write_region((unsigned long)args.oldlenp, sizeof(int)); + if(oldlen) + { + set_read_region((unsigned long)args.oldlenp, sizeof(int)); + set_read_region((unsigned long)args.oldval, oldlen); + } + } + if(args.newval) + set_write_region((unsigned long)args.newval, args.newlen); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_sched_setparam(int n, struct pt_regs regs) +{ + set_write_region(ARG(1), sizeof(struct sched_param)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_sched_getparam(int n, struct pt_regs regs) +{ + set_read_region(ARG(1), sizeof(struct sched_param)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_sched_setscheduler(int n, struct pt_regs regs) +{ + set_write_region(ARG(2), sizeof(struct sched_param)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_sched_getscheduler(int n, struct pt_regs regs) +{ + set_read_region(ARG(2), sizeof(struct sched_param)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_sched_rr_get_interval(int n, struct pt_regs regs) +{ + set_read_region(ARG(1), sizeof(struct timespec)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_nanosleep(int n, struct pt_regs regs) +{ + set_write_region(ARG(0), sizeof(struct timespec)); + set_read_region(ARG(1), sizeof(struct timespec)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_getresuid(int n, struct pt_regs regs) +{ + set_read_region(ARG(0), sizeof(uid_t)); + set_read_region(ARG(1), sizeof(uid_t)); + set_read_region(ARG(2), sizeof(uid_t)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_getresuid16(int n, struct pt_regs regs) +{ + set_read_region(ARG(0), sizeof(old_uid_t)); + set_read_region(ARG(1), sizeof(old_uid_t)); + set_read_region(ARG(2), sizeof(old_uid_t)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_query_module(int n, struct pt_regs regs) +{ + if(ARG(0)) + set_file_name(0); /* not really a file, but still a string */ + if(ARG(2)) + set_read_region(ARG(2), ARG(3)); + set_read_region(ARG(4), sizeof(size_t)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_poll(int n, struct pt_regs regs) +{ + set_write_region(ARG(0), ARG(1) * sizeof(struct pollfd)); + return(remote_standard_system_call(n, ®s)); +} + +int +remote_nfsservctl(int n, struct pt_regs regs) +{ + set_write_region(ARG(1), sizeof(struct nfsctl_arg)); + set_write_region(ARG(2), sizeof(union nfsctl_res)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_getresgid(int n, struct pt_regs regs) +{ + set_read_region(ARG(0), sizeof(gid_t)); + set_read_region(ARG(1), sizeof(gid_t)); + set_read_region(ARG(2), sizeof(gid_t)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_getresgid16(int n, struct pt_regs regs) +{ + set_read_region(ARG(0), sizeof(old_gid_t)); + set_read_region(ARG(1), sizeof(old_gid_t)); + set_read_region(ARG(2), sizeof(old_gid_t)); + return(remote_standard_system_call(n, ®s)); +} + +int +remote_sys_rt_sigreturn(int n, struct pt_regs regs) +{ + set_write_region(regs.esp - 8, 892); + /*Eek,struct rt_sigframe not defined outside arch/i386/kernel/signal.c*/ + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_rt_sigaction(int n, struct pt_regs regs) +{ + if(ARG(1)) + set_write_region(ARG(1), sizeof(struct k_sigaction)); + if(ARG(2)) + set_read_region(ARG(2), sizeof(struct k_sigaction)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_rt_sigprocmask(int n, struct pt_regs regs) +{ + if(ARG(1)) + set_write_region(ARG(1), sizeof(sigset_t)); + if(ARG(2)) + set_read_region(ARG(2), sizeof(sigset_t)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_rt_sigpending(int n, struct pt_regs regs) +{ + set_read_region(ARG(0), sizeof(sigset_t)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_rt_sigtimewait(int n, struct pt_regs regs) +{ + set_write_region(ARG(0), sizeof(sigset_t)); + if(ARG(1)) + set_read_region(ARG(1), sizeof(siginfo_t)); + if(ARG(2)) + set_write_region(ARG(2), sizeof(struct timespec)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_rt_sigqueueinfo(int n, struct pt_regs regs) +{ + set_write_region(ARG(2), sizeof(siginfo_t)); + if(ARG(1)) + set_read_region(ARG(1), sizeof(siginfo_t)); + if(ARG(2)) + set_write_region(ARG(2), sizeof(struct timespec)); + return(remote_standard_system_call(n, ®s)); +} + +int +remote_sys_rt_sigsuspend(int n, struct pt_regs regs) +{ + set_write_region(ARG(0), sizeof(siginfo_t)); + return(remote_standard_system_call(n, ®s)); +} + +ssize_t +remote_sys_pread(int n, struct pt_regs regs) +{ + set_read_region(ARG(1), ARG(2)); + return(remote_standard_system_call(n, ®s)); +} + +ssize_t +remote_sys_pwrite(int n, struct pt_regs regs) +{ + set_write_region(ARG(1), ARG(2)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_chown(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_chown(const char *, uid_t, gid_t); + + if(current->mosix.ttab && enter_remote_dfsa_mode()) + { + result = sys_chown((const char *)ARG(0), (uid_t)ARG(1), + (gid_t)ARG(2)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_file_name(0); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_chown16(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_chown16(const char *, old_uid_t, old_gid_t); + + if(current->mosix.ttab && enter_remote_dfsa_mode()) + { + result = sys_chown16((const char *)ARG(0), (uid_t)ARG(1), + (gid_t)ARG(2)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_file_name(0); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_getcwd(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + long result; + extern asmlinkage long sys_getcwd(char *, unsigned long); + + if(current->mosix.ttab && current->fs->pwd && enter_remote_dfsa_mode()) + { + result = sys_getcwd((char *)ARG(0), (unsigned long)ARG(1)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + set_read_region(ARG(0), ARG(1)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_capget(int n, struct pt_regs regs) +{ + set_write_region(ARG(0), sizeof(struct __user_cap_header_struct)); + set_read_region(ARG(1), sizeof(struct __user_cap_data_struct)); + return(remote_standard_system_call(n, ®s)); +} + +long +remote_sys_capset(int n, struct pt_regs regs) +{ + set_write_region(ARG(0), sizeof(struct __user_cap_header_struct)); + set_write_region(ARG(1), sizeof(struct __user_cap_data_struct)); + return(remote_standard_system_call(n, ®s)); +} + +int +remote_sys_sigaltstack(int n, struct pt_regs regs) +{ + set_write_region(ARG(0), sizeof(struct sigaltstack)); + set_read_region(ARG(1), sizeof(struct sigaltstack)); + return(remote_standard_system_call(n, ®s)); +} + +ssize_t +remote_sys_readahead(int n, struct pt_regs regs) +{ +#ifdef CONFIG_MOSIX_DFSA + unsigned int fd = ARG(0); + ssize_t result; + extern asmlinkage ssize_t sys_readahead(unsigned int, loff_t, size_t); + struct task_struct *p = current; + + if(fd >= p->files->max_fds) + { + if(p->mosix.ttab) + return(-EBADF); + } + else if(p->files->fd[fd] && enter_remote_dfsa_mode()) + { + result = sys_readahead(fd, (loff_t)LARG(1), (size_t)ARG(3)); + leave_remote_dfsa_mode(); + if(RESULT_IS_FINAL(result)) + return(result); + } +#endif /* CONFIG_MOSIX_DFSA */ + return(remote_standard_system_call(n, ®s)); +} diff -urN linux-2.4.17/mos/ucache.c linux_umopenmosix/mos/ucache.c --- linux-2.4.17/mos/ucache.c Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/mos/ucache.c Wed Jun 26 23:45:18 2002 @@ -0,0 +1,1527 @@ +/* + * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) + * + * Permission to use this software is hereby granted under the terms of the + * GNU General Public License, as published by the Free Software Foundation. + * + * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY + * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING + * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. + */ +/* + * Author(s): Amnon Shiloh. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_MOSIX_DEBUG +#define WHY_NOT_CACHED + +extern void print_ucache(void); + +void +breakpoint1(char *str, int arg) +{ + if(ds_debug & DSDEB_CACHE) + { + printk("%s-Cache: ", desc_mostask(NULL)); + printk(str, arg); + printk("\n"); + print_ucache(); +#ifdef CONFIG_MOSIX_UDB + if(ds_debug & DSDEB_CACHESTOP) + mosix_panic("cache_stop"); +#endif /* CONFIG_MOSIX_UDB */ + } +} + + +#else +#define breakpoint1(x,a) do {} while(0) +#endif /* CONFIG_MOSIX_DEBUG */ + +#define breakpoint(x) breakpoint1(x, 0) + +#define CACHE_ENTRIES 10 + +struct cblock +{ + /* + * DIRTY but useful NOTE: we actually rely on having at least 3 + * longwords before the data! (so don't make it less) + */ + unsigned long offset; + int len; + int count; + char data[1]; +}; + +struct data_cache +{ + short first; /* first entry in list, -1 = NONE */ + short free; /* first entry in freelist, -1 = NONE */ + short next[CACHE_ENTRIES]; /* next on list of entries, -1=none */ + short prev[CACHE_ENTRIES]; /* prev on list of entries, -1=none */ + char type[CACHE_ENTRIES]; /* see below */ + unsigned long addr[CACHE_ENTRIES]; /* start address */ + unsigned long len[CACHE_ENTRIES]; /* end address */ + struct cblock *bp[CACHE_ENTRIES]; /* where data (if any) is held */ +}; + +/* types: */ + +#define WRITEABLE 1 /* data available from user to kernel */ +#define WRITEDATA 2 /* data in cache from user to kernel */ +#define WRITE_STATUS (WRITEABLE|WRITEDATA) /* user ==> kernel */ +#define READABLE 4 /* user address valid for reading */ +#define READDATA 8 /* data for user cached */ +#define READ_STATUS (READABLE|READDATA) /* kernel ==> user */ +#define SKIP_MARK 16 + +#define END_MARK (-1) + +#define MAX_POSITIVE 0x7ffffffe /* max value to return in *non_cache */ +#define MAX_SEGMENT 32768 +#define READ_SEND_THRESHOLD MAX_SEGMENT + +extern char *pack_read_cache_data(int *); + +struct cblock * +cblock_alloc(unsigned long offset, int len) +{ + struct cblock *cb; + + if((cb = kmalloc(len + offsetof(struct cblock, data[0]), GFP_NOIO))) + { + cb->offset = offset; + cb->len = len; + cb->count = 1; + } + return(cb); +} + +int +alloc_ucache(void) +{ + struct mosix_task *m = ¤t->mosix; + register struct data_cache *d; + register int i; + + if(!(d = m->ucache)) + { + m->ucache = kmalloc(sizeof (struct data_cache), GFP_NOIO); + if(!(d = m->ucache)) + return(0); + } +#ifdef CONFIG_MOSIX_DIAG + else if(d->first != END_MARK) + mosix_panic("alloc_ucache: cache was not cleared"); +#endif /* CONFIG_MOSIX_DIAG */ + /* on free list */ + d->first = END_MARK; + d->free = 0; + for(i = 0 ; i < CACHE_ENTRIES ; i++) + { + d->next[i] = i+1; + d->prev[i] = i-1; + d->bp[i] = NULL; + } + d->next[CACHE_ENTRIES-1] = END_MARK; + d->prev[0] = END_MARK; + breakpoint("ucache allocated"); + return(1); +} + +inline void +write_read_cache(struct data_cache *d, int ent) +{ + struct user_copy_h u; + register struct cblock *bp = d->bp[ent]; + caddr_t from = &bp->data[d->addr[ent] - bp->offset]; + int dflags = current->mosix.dflags; + + breakpoint1("write_read_cache, entry %d", ent); + u.addr = (char *)d->addr[ent]; + u.size = d->len[ent]; + if(dflags & DDEPUTY) + comm_send(DEP_DATA_TO_USER, &u, sizeof(u), from, u.size, 0); + else if(!(dflags & DFINISHED)) + copy_to_user(u.addr, from, u.size); +} + +void +free_ucache(void) +{ + register struct data_cache *d = current->mosix.ucache; + register int i; + struct cblock *cb; + + if(d) + { + breakpoint("free ucache"); + for(i = d->first ; i != END_MARK ; i = d->next[i]) + if((cb = d->bp[i])) + { + if(d->type[i] & READDATA) + write_read_cache(d, i); + if(--cb->count == 0) + kfree(cb); + } + kfree(d); + current->mosix.ucache = NULL; + } +} + +int +alloc_ucache_entry(struct data_cache *d, int after, int type, + unsigned long addr, int len) +{ + int ent = d->free; + int nex; + + if(ent == END_MARK) + return(END_MARK); + d->free = d->next[ent]; + if(d->free != END_MARK) + d->prev[d->free] = END_MARK; + d->type[ent] = type; + d->addr[ent] = addr; + d->len[ent] = len; + d->bp[ent] = NULL; + d->prev[ent] = after; + nex = d->next[ent] = (after == END_MARK) ? d->first : d->next[after]; + if(after == END_MARK) + d->first = ent; + else + d->next[after] = ent; + if(nex != END_MARK) + d->prev[nex] = ent; + d->next[ent] = nex; + breakpoint1("allocated entry #%d", ent); + return(ent); +} + +void +free_ucache_entry(struct data_cache *d, int ent) +{ + struct cblock *cb; + + if(ent == END_MARK) + return; + if(d->type[ent] & READDATA) + write_read_cache(d, ent); + if((cb = d->bp[ent]) && --cb->count == 0) + kfree(cb); + d->bp[ent] = NULL; + + if(d->next[ent] != END_MARK) + d->prev[d->next[ent]] = d->prev[ent]; + if(d->prev[ent] == END_MARK) + d->first = d->next[ent]; + else + d->next[d->prev[ent]] = d->next[ent]; + d->prev[ent] = END_MARK; + d->next[ent] = d->free; + d->free = ent; + breakpoint1("freed entry #%d", ent); +} + +void +data_was_read(struct data_cache *d, int ent) +{ + unsigned long beg, end; + int prev, next; + unsigned int newlen; + /* + * The data from this entry was read and brought back to the REMOTE, + * so it can be discarded - however, if this entry can bridge its + * neighbours on both sides (a common case), it is even better to + * do it and save 2 entries rather than 1! + */ + d->type[ent] &= ~(READDATA|WRITEDATA); + d->type[ent] |= READABLE; + if((prev = d->prev[ent]) != END_MARK && + (next = d->next[ent]) != END_MARK && + (d->type[prev] & ~SKIP_MARK) == d->type[ent] && + (d->type[next] & ~SKIP_MARK) == d->type[ent] && + d->bp[prev] == d->bp[ent] && + d->bp[next] == d->bp[ent] && + (beg = d->addr[ent]) == d->addr[prev] + d->len[prev] && + (end = beg + d->len[ent]) == d->addr[next] && + (newlen = end + d->len[next] - d->addr[prev]) <= MAX_POSITIVE) + { + breakpoint1("data_was_read: Joining around %d", ent); + free_ucache_entry(d, ent); + /* mostly one non-READDATA entry can have a SKIP_MARK */ + if(d->type[next] & SKIP_MARK) + { + d->addr[next] = d->addr[prev]; + d->len[next] = newlen; + free_ucache_entry(d, prev); + } + else + { + d->len[prev] = newlen; + free_ucache_entry(d, next); + } + } + else if(!(d->type[ent] & WRITEABLE)) /* still of a value... */ + free_ucache_entry(d, ent); +} + +/* + * Flush only the read-cache: + * + * mostly, there is no more than one entry involved, so we use + * "write_read_cache" to send it back, but if there is more than one entry, + * all entries must be packed to minimize the number of communication packets + * (unless we are no longer DEPUTY - then we also use "write_read_cache"). + * + * Any entry(ies) marked SKIP_MARK are not flushed. + */ +void +flush_read_cache(void) +{ + register struct data_cache *d = current->mosix.ucache; + register int i; + int j = 0; /* only for the compiler's sake */ + int n; + char *p; + + if(!d) + return; + breakpoint("flushing read_cache"); + if(!(current->mosix.dflags & DDEPUTY)) + { + one_by_one: + for(i = d->first ; i != END_MARK ; i = d->next[i]) + if((d->type[i] & (READDATA|SKIP_MARK)) == READDATA) + { + write_read_cache(d, i); + data_was_read(d, i); + goto one_by_one; /* list may be scrambled */ + } + breakpoint("read cache flushed one by one"); + return; + } + for(n = 0 , i = d->first ; i != END_MARK ; i = d->next[i]) + if((d->type[i] & (READDATA|SKIP_MARK)) == READDATA) + { + if(!n++) + j = i; + } + if(!n) + { + breakpoint("there was no read-cache to free"); + return; + } + if(n == 1) + { + write_read_cache(d, j); + data_was_read(d, j); + breakpoint("there was only one read-cache to free"); + return; + } + if((p = pack_read_cache_data(&n))) + { + comm_send(DEP_CACHE_READ_DATA, NULL, 0, p, n, 0); + kfree(p); + breakpoint("freed multiple read-cache entries"); + } + else /* no memory -- too bad: */ + goto one_by_one; +} + +/* + * free some entry except the one we are dealing with, + * obtaining at least one free entry. + * Because CACHE_ENTRIES > 1, there is ALWAYS SOMETHING to free, + * but priorities may apply + * (the lower, the dearer; the higher, the more disposable). + */ +void +free_some_entry_except(struct data_cache *d, int not_this) +{ + register int i; + int pri = -1, p; + int e = END_MARK; + struct cblock *cb; + int n = 0; + int m = 0; + + breakpoint1("free_some_entry_except #%d", not_this); + /* + * first try to flush all complete READDATA entries: + * if all the cblock was filled, all operations on that region were + * very likely completed: + */ + for(i = d->first ; i != END_MARK ; i = d->next[i]) + if(d->type[i] & READDATA) + { + m++; + if(i == not_this || d->addr[i] != (cb = d->bp[i])->offset || + d->len[i] != cb->len) + d->type[i] |= SKIP_MARK; + else if(!n++) + e = i; + } + if(n == 1) + { + write_read_cache(d, e); + d->type[not_this] |= SKIP_MARK; /* (even if not READDATA) */ + data_was_read(d, e); + } + else if(n) /* should be pretty rare, but still save on packets */ + flush_read_cache(); + if(m) + for(i = d->first ; i != END_MARK ; i = d->next[i]) + d->type[i] &= ~SKIP_MARK; + if(d->free != END_MARK) + return; + again: + for(e = END_MARK , i = d->first ; i != END_MARK ; i = d->next[i]) + if(i != not_this) + { + /* PRIORITY POLICY: + * generally, WRITEDATA is dearer than READDATA because it would + * be a pity to lose the already existing data. + * In both cases, the SHORTER the data the dearer, because + * the extra communication would cost more per-byte. + * (remember that the data is incomplete within its cblock - + * when complete, it can be joined with other entries) + * Also, READABLE is dearer than WRITEABLE because more + * system-calls bring data to the user in tiny parts. + * In both cases, the LONGER the data the dearer, because we + * have more helpful information. + * Data segments are between 1-MAX_SEGMENT size + * (READABLE|WRITEABLE can be more) + * In the mixed cases (both data and pure information), + * there is no easy way to evaluate, so we provide fixed values. + */ + switch(d->type[i]) + { + /* throwing data - the shorter the dearer */ + case READDATA: + p = MAX_SEGMENT/2 + 40 + d->len[i] / (MAX_SEGMENT / 10); + break; + case WRITEDATA: + p = MAX_SEGMENT/2 + 30 + d->len[i] / (MAX_SEGMENT / 10); + break; + case WRITEDATA|READDATA: + p = MAX_SEGMENT/2 + 20 + d->len[i] / (MAX_SEGMENT / 10); + break; + /* mixed case: hard, so just a fixed value: */ + case READDATA|WRITEABLE: + p = MAX_SEGMENT/2 + 39; + break; + case WRITEDATA|READABLE: + p = MAX_SEGMENT/2 + 29; + break; + /* declaration only - the longer the dearer: */ + case WRITEABLE: + p = MAX_SEGMENT / d->len[i]; + break; + case READABLE: + p = MAX_SEGMENT / (2 * d->len[i]); + break; + case READABLE|WRITEABLE: + p = MAX_SEGMENT * 2 / (3 * d->len[i]); + break; + default: + printk("free_some_entry_except: type %d encountered\n", d->type[i]); + p = 1; + } + if(!d->bp[i] || d->bp[i] != d->bp[not_this] || + d->addr[i] < d->addr[not_this]) + p++; + if(p > pri) + { + pri = p; + e = i; + } + } + if(e == END_MARK) + panic("free_some_entry_except: none found"); + if(d->type[e] & READDATA) + { + write_read_cache(d, e); + d->type[not_this] |= SKIP_MARK; + data_was_read(d, e); + d->type[not_this] &= ~SKIP_MARK; + if(d->free == END_MARK) + goto again; + } + else + free_ucache_entry(d, e); +} + +void +split_ucache(int ent, unsigned long at, int back) +{ + register struct data_cache *d = current->mosix.ucache; + int e2; + struct cblock *cb; + + breakpoint1("split entry at %X", at); +#ifdef CONFIG_MOSIX_DIAG + if(at <= d->addr[ent] || at >= d->addr[ent] + d->len[ent]) + panic("split_ucache: point not inside"); +#endif /* CONFIG_MOSIX_DIAG */ + if(d->free == END_MARK) + free_some_entry_except(d, ent); + if(back) + { + e2 = alloc_ucache_entry(d, d->prev[ent], d->type[ent], + d->addr[ent], at - d->addr[ent]); + d->len[ent] = d->addr[ent] + d->len[ent] - at; + d->addr[ent] = at; + } + else + { + e2 = alloc_ucache_entry(d, ent, d->type[ent], at, + d->addr[ent] + d->len[ent] - at); + d->len[ent] = at - d->addr[ent]; + } + if((cb = d->bp[ent])) + { + d->bp[e2] = cb; + cb->count++; + } + breakpoint1("Split of Entry #%d Complete", ent); +} + +int +obtain_ucache_block(struct data_cache *d, int ent, unsigned long at) +{ + if(d->len[ent] > MAX_SEGMENT) + { + if(at - d->addr[ent] < MAX_SEGMENT) + split_ucache(ent, d->addr[ent] + MAX_SEGMENT, 0); + else if(at + MAX_SEGMENT <= d->addr[ent] + d->len[ent]) + split_ucache(ent, (d->type[ent] & READABLE) ? at : + d->addr[ent] + d->len[ent] - MAX_SEGMENT, 1); + else + { + split_ucache(ent, at, 1); + split_ucache(ent, at + MAX_SEGMENT, 0); + } + } + return((d->bp[ent] = cblock_alloc(d->addr[ent], d->len[ent])) != NULL); +} + +void +flush_ucache(void) +{ + register struct data_cache *d = current->mosix.ucache; + register int i; + struct cblock *cb; + + if(!d || d->first == END_MARK) + return; + breakpoint("flush_ucache"); + if(!(current->mosix.dflags & DREMOTE)) /* save time - no READDATA */ + flush_read_cache(); + for(i = 0 ; i < CACHE_ENTRIES ; i++) + if((cb = d->bp[i])) + { + if(--cb->count == 0) + kfree(cb); + d->bp[i] = NULL; + } + for(i = 0 ; i < CACHE_ENTRIES ; i++) + { + d->next[i] = i+1; + d->prev[i] = i-1; + } + d->first = END_MARK; + d->free = 0; + d->next[CACHE_ENTRIES-1] = END_MARK; + d->prev[0] = END_MARK; +} + +int +ask_for_write_data(int ent) +{ + register struct data_cache *d = current->mosix.ucache; + struct cblock *cb = d->bp[ent]; + struct user_copy_h u; + int result; + + breakpoint1("ask_for_write_data on entry #%d", ent); + u.addr = (char *)d->addr[ent]; + u.size = d->len[ent]; + u.verify = 0; /* we wouldn't ask if we had no WRITEABLE! */ + if(deputy_request(DEP_COPY_FROM_USER, &u, sizeof(u), NULL, 0, 0, + (void **)&result, -sizeof(result))) + return(0); + if(result) + { + comm_flushdata(COMM_ALLDATA); + return(0); + } + if(comm_copydata(&cb->data[d->addr[ent] - cb->offset], u.size, 0)) + return(0); + d->type[ent] &= ~WRITE_STATUS; + d->type[ent] |= WRITEDATA; + return(1); +} + +void +try_to_merge_ucache(int ent) +{ + register struct data_cache *d = current->mosix.ucache; + int o; + int t = d->type[ent]; + struct cblock *cb = d->bp[ent]; + + if((o = d->prev[ent]) != END_MARK && + d->type[o] == t && d->bp[o] == cb && + d->addr[o] + d->len[o] == d->addr[ent] && + d->len[ent] + d->len[o] <= MAX_POSITIVE) + { + d->addr[ent] = d->addr[o]; + d->len[ent] += d->len[o]; + d->type[o] &= ~READDATA; + free_ucache_entry(d, o); + breakpoint1("Merged #%d with previous", ent); + } + if((o = d->next[ent]) != END_MARK && + d->type[o] == t && d->bp[o] == cb && + d->addr[ent] + d->len[ent] == d->addr[o] && + d->len[ent] + d->len[o] <= MAX_POSITIVE) + { + d->len[ent] += d->len[o]; + d->type[o] &= ~READDATA; + free_ucache_entry(d, o); + breakpoint1("Merged #%d with next", ent); + } +} + +inline int +find_non_cached(struct data_cache *d, int types, unsigned int addr, int max) +{ + register int i; + int nc; + + if(!max) + return(0); + breakpoint1("Finding non-cached from address %X", addr); + for(i = d->first ; i != END_MARK && + (!(d->type[i] & types) || d->addr[i] + d->len[i] <= addr) ; + i = d->next[i]) + ; + if(i == END_MARK) + return(max); + if((nc = d->addr[i] - addr) > MAX_POSITIVE) + return(MAX_POSITIVE); + if(nc > max) + nc = max; + breakpoint1("non-cached=0x%X", nc); + return(nc); +} + +int +copy_from_cache(char *to, unsigned long from, int len, int *non_cache) +{ + register struct data_cache *d = current->mosix.ucache; + int rlen = (len < 0 ? -len : len); + unsigned int orlen = rlen; + register int i; + struct cblock *cb; + char *cp; + int n; + int zero_found = 0; + + if(!d) + { + none: + *non_cache = rlen; +#ifdef WHY_NOT_CACHED +if(ds_debug & DSDEB_NOTCACHED){printk("%X(%x) not in cache!",(int)from,rlen);mosix_panic("Why");} +#endif /* WHY_NOT_CACHED */ + return(0); + } + for(i = d->first ; i != END_MARK && + (!(d->type[i] & (WRITE_STATUS|READDATA)) || + d->addr[i] + d->len[i] <= from) ; + i = d->next[i]) + ; + if(i == END_MARK) + goto none; + if(d->addr[i] > from) + { + n = d->addr[i] - from; + if(((unsigned int)n) > orlen) + n = orlen; + *non_cache = n; + breakpoint1("copy_from_cache from 0x%X - way before cache", from); +#ifdef WHY_NOT_CACHED +if(ds_debug & DSDEB_NOTCACHED){printk("%X(%x) not in cache!",(int)from,rlen);mosix_panic("Why");} +#endif /* WHY_NOT_CACHED */ + return(0); + } + breakpoint1("copy_from_cache from 0x%X - something relevant", from); + if(!d->bp[i] && !obtain_ucache_block(d, i, from)) + { + breakpoint("copy_from_cache: failed allocating a cblock"); + free_ucache_entry(d, i); + *non_cache = find_non_cached(d, WRITE_STATUS|READDATA, + from, orlen); +#ifdef WHY_NOT_CACHED +if(ds_debug & DSDEB_NOTCACHED){printk("%X(%x) not in cache!",(int)from,rlen);mosix_panic("Why");} +#endif /* WHY_NOT_CACHED */ + return(0); + } + if(d->addr[i] + d->len[i] < from + rlen) + rlen = d->addr[i] + d->len[i] - from; + if(!(d->type[i] & (READDATA|WRITEDATA)) && !ask_for_write_data(i)) + { + breakpoint1("Failed obtaining write-data for entry #%d", i); + free_ucache_entry(d, i); + rlen = 0; + } + else + { + breakpoint1("Fetching 0x%x bytes from cache", rlen); + cb = d->bp[i]; + cp = &cb->data[from - cb->offset]; + if(len > 0) + memcpy(to, cp, rlen); + else if(to) /* stop at 0, but include it */ + { + for(n = 0 ; n < rlen ; n++) + if((*to++ = *cp++) == '\0') + { + n++; + zero_found = 1; + break; + } + rlen = n; + } + else /* called from "strlen_cache" */ + { + for(n = 0 ; n < rlen ; n++) + if(*cp++ == '\0') + { + n++; + zero_found = 1; + break; + } + rlen = n; + } + /* finally, can we rid or shorten this entry? */ + if(d->type[i] == WRITEDATA && + d->first == i && d->next[i] == END_MARK) + /* leave the poor single-entry alone -nothing to lose */ + /* (some tty routines read TWICE) */ + ; + else if(d->type[i] & READ_STATUS) + { + if(from != d->addr[i]) + split_ucache(i, from, 1); + if(len < d->len[i]) + split_ucache(i, from + len, 0); + d->type[i] &= ~WRITEDATA; + if(!(d->type[i] & READDATA)) + { + if(cb && --cb->count == 0) + kfree(cb); + d->bp[i] = NULL; + } + try_to_merge_ucache(i); + breakpoint1("some (or all) of block #%d was only left for reading", i); + } + else if(from == d->addr[i] && d->len[i] == rlen) + { + free_ucache_entry(d, i); + breakpoint1("All block #%d was consumed", i); + } + else + { + if(from == d->addr[i]) + { + d->addr[i] += rlen; + d->len[i] -= rlen; + } + else if(from + rlen == d->addr[i] + d->len[i]) + d->len[i] = from - d->addr[i]; + breakpoint1("some of block #%d was consumed", i); + } + } + if(zero_found) + { + breakpoint("zero found"); + *non_cache = -1; + } + else + *non_cache = find_non_cached(d, WRITE_STATUS|READDATA, + from+rlen, orlen - rlen); +#ifdef WHY_NOT_CACHED +if(*non_cache && !zero_found && (ds_debug & DSDEB_NOTCACHED)){printk("from %X(%x) not in cache!",(int)(from+rlen),orlen-rlen);mosix_panic("Why");} +#endif /* WHY_NOT_CACHED */ + return(rlen); +} + +void +check_whether_to_read_flush(void) +{ + register struct data_cache *d = current->mosix.ucache; + register int i; + int l = 0, n = 0; + register struct cblock *cb; + + for(i = d->first ; i != END_MARK ; i = d->next[i]) + if(d->type[i] & READDATA) + { + cb = d->bp[i]; + if(d->len[i] == cb->len && d->addr[i] == cb->offset) + l += cb->len; + else + n += cb->len; + } + else if(d->type[i] & READABLE) + n += d->len[i]; + if(l >= READ_SEND_THRESHOLD && n >= READ_SEND_THRESHOLD) + flush_read_cache(); +} + +int +copy_to_cache(unsigned long to, char *from, int len, int *non_cache) +{ + register struct data_cache *d = current->mosix.ucache; + unsigned int olen = len; + register int i; + unsigned int lim; + struct cblock *cb; + + if(!d) + { + none: + *non_cache = len; +#ifdef WHY_NOT_CACHED +if(ds_debug & DSDEB_NOTCACHED){printk("%X(%x) not in cache!",(int)to,olen);mosix_panic("Why");} +#endif /* WHY_NOT_CACHED */ + return(0); + } + for(i = d->first ; i != END_MARK && + (!(d->type[i] & (READ_STATUS|WRITEDATA)) || + d->addr[i] + d->len[i] <= to) ; + i = d->next[i]) + ; + if(i == END_MARK) + goto none; + if(d->addr[i] > to) + { + if(d->addr[i] - to > olen) + *non_cache = olen; + else + *non_cache = d->addr[i] - to; +#ifdef WHY_NOT_CACHED +if(ds_debug & DSDEB_NOTCACHED){printk("%X(%x) not in cache!",(int)to,olen);mosix_panic("Why");} +#endif /* WHY_NOT_CACHED */ + return(0); + } + breakpoint1("copy_to_cache from 0x%X - something relevant", to); + if(!d->bp[i] && !obtain_ucache_block(d, i, to)) + { + breakpoint("copy_to_cache: failed allocating a cblock"); + cant_do_it: + free_ucache_entry(d, i); + *non_cache = find_non_cached(d, READ_STATUS|WRITEDATA, + to, olen); + breakpoint1("cant_do_it: *non_cache=%d", *non_cache); +#ifdef WHY_NOT_CACHED +if(ds_debug & DSDEB_NOTCACHED){printk("%X(%x) not in cache!",(int)to,olen);mosix_panic("Why");} +#endif /* WHY_NOT_CACHED */ + return(0); + } + if(d->addr[i] != to && !(d->type[i] & READDATA)) + split_ucache(i, to, 1); + lim = d->addr[i] + d->len[i]; + if(lim != to + len) + { + if(lim < to + len) + len = d->addr[i] + d->len[i] - to; + else if(!(d->type[i] & READDATA)) + split_ucache(i, to + len, 0); + } + if(!(d->type[i] & READ_STATUS)) + /* we have user-read permission but not user-write: + * we did need to get thus far to minimize the data-loss, + * but now we must discard this entry. + */ + { + breakpoint("copy_to_cache: Sorry Only read-permission"); + goto cant_do_it; + } + breakpoint1("copy_to_cache: copying %d bytes", len); + cb = d->bp[i]; + if(from) + memcpy(&cb->data[to - cb->offset], from, len); + else /* called from zero_cache() */ + memset(&cb->data[to - cb->offset], 0, len); + d->type[i] &= ~READ_STATUS; + d->type[i] |= READDATA; + try_to_merge_ucache(i); + if(d->addr[i] == cb->offset && d->len[i] == cb->len) + check_whether_to_read_flush(); + *non_cache = find_non_cached(d, READ_STATUS|WRITEDATA, to + len, + olen - len); + breakpoint1("copy_to_cache: success, non_cache=%d", *non_cache); +#ifdef WHY_NOT_CACHED +if(*non_cache && (ds_debug & DSDEB_NOTCACHED)){printk("%X(%x) not in cache!",(int)(to+len),olen-len);mosix_panic("Why");} +#endif /* WHY_NOT_CACHED */ + return(len); +} + +int +zero_cache(char *to, int len, int *non_cache) +{ + return(copy_to_cache((unsigned long)to, NULL, len, non_cache)); +} + +int +strlen_cache(char *addr, int *non_cache) +{ + return(copy_from_cache(NULL, (unsigned long)addr, -MAX_POSITIVE, + non_cache)); +} + +int +all_in_cache(unsigned long addr, unsigned int size, int writeable) +{ + register struct data_cache *d = current->mosix.ucache; + register int i; + unsigned int limit; + + if(!d || addr + size < addr) /* no cache or overflow */ + return(0); + for(i = d->first ; i != END_MARK && d->addr[i] + d->len[i] < addr ; + i = d->next[i]) + ; + limit = addr + size; + for(; i != END_MARK ; i = d->next[i]) + if(d->addr[i] > addr || (writeable && !(d->type[i] & READ_STATUS))) + return(0); + else if((addr = d->addr[i] + d->len[i]) >= limit) + return(1); + return(0); +} + +int +any_in_cache(unsigned long addr, unsigned int size) +{ + register struct data_cache *d = current->mosix.ucache; + register int i; + + if(!d || addr + size < addr) /* no cache or overflow */ + return(0); + for(i = d->first ; i != END_MARK && d->addr[i] < addr+size ; + i = d->next[i]) + if(d->addr[i] + d->len[i] > addr && + (d->type[i] & (READDATA|WRITEDATA))) + return(1); + return(0); +} + +/* + * the following routine is only used in the first stage of preparing + * a cache, therefore "type" may only be "READABLE", "WRITEABLE", or both + * and so must be all entries already in the cache before the call. + * this is because its complexity is already enough as it is. + * If at some time in the future, new entries are to be produced after + * initialization, another routine must be used (or this one expanded). + * + * Also, note that for simplicity-sake, one of the entries, though very + * rare, could be merged to become over the size of MAX_POSITIVE, + * so this must be weeded-out later. + */ +void +add_ucache_entry(unsigned long addr, int len, int type) +{ + register struct data_cache *d = current->mosix.ucache; + register int i; + int n; + int prev; + unsigned long next_addr = 0; + unsigned long next_len = 0; + char msg[50]; + + sprintf(msg, "add_ucache_entry at 0x%X, len=0x%X", (int)addr, len); + breakpoint(msg); + if(len <= 0 || len > MAX_POSITIVE || addr + len < addr) + return; + if(!d) + { + if(alloc_ucache()) + d = current->mosix.ucache; + else + return; + } + next_part: + for(prev = END_MARK , i = d->first ; + i != END_MARK && d->addr[i] + d->len[i] <= addr ; + prev = i , i = d->next[i]) + ; + if(i == END_MARK || d->addr[i] > addr + len) + { + /* no overlap, or even touching */ + if(prev != END_MARK && d->type[prev] == type && + d->addr[prev] + d->len[prev] == addr) + d->len[prev] += len; + else + alloc_ucache_entry(d, prev, type, addr, len); + goto all_done; + } + if(i != END_MARK && addr + len > d->addr[i] + d->len[i]) + { + /* do some now, the rest later */ + next_len = addr + len - (d->addr[i] + d->len[i]); + next_addr = d->addr[i] + d->len[i]; + len = next_addr - addr; + } + if(prev != END_MARK && d->type[prev] == type && + d->addr[prev] + d->len[prev] == addr) + { + /* increase previous entry */ + if(i != END_MARK && addr + len >= d->addr[i]) + { + n = d->addr[i] - addr; + addr = d->addr[i]; + len -= n; + d->len[prev] += n; + try_to_merge_ucache(i); + } + else + { + d->len[prev] += len; + goto all_done; + } + } + if(i == END_MARK || d->addr[i] > addr + len) + alloc_ucache_entry(d, prev, type, addr, len); + else if(d->type[i] == type) + { + if(d->addr[i] > addr) + { + d->len[i] += d->addr[i] - addr; + d->addr[i] = addr; + } + /* else -- already included, nothing to do */ + } + else if(d->type[i] == (READABLE|WRITEABLE) && d->addr[i] <= addr) + ; /* nothing to do - already included */ + else if(d->addr[i] == addr && d->len[i] == len) + { + d->type[i] |= type; + try_to_merge_ucache(i); + } + else if(d->addr[i] == addr) + { + split_ucache(i, addr + len, 0); /* now "i"=new entry */ + d->type[i] |= type; + } + else if(d->addr[i] > addr) + { + alloc_ucache_entry(d, prev, type, addr, d->addr[i] - addr); + if(d->addr[i] < addr + len) + { + if(addr + len < d->addr[i] + d->len[i]) + split_ucache(i, addr + len, 0); + d->type[i] |= type; + } + } + else + { + split_ucache(i, addr, 1); + if(d->addr[i] + d->len[i] != addr + len) + split_ucache(i, addr + len, 0); + d->type[i] |= type; + } + if(next_len) + { + breakpoint1("adding split next segment of %d", next_len); + len = next_len; + addr = next_addr; + next_len = 0; + goto next_part; + } + all_done: + breakpoint("after add_ucache_entry"); +} + +int +ucache_ok(unsigned long addr, unsigned long size, int mode) +{ + struct vm_area_struct * vma; + unsigned long pages; + int ok = 0; + unsigned int need; + + if (!size || addr+size >= TASK_SIZE || addr+size < addr) + return(0); + + down_read(¤t->mm->mmap_sem); + if (!(vma = find_extend_vma(current->mm, addr)) || + !(vma->vm_flags & mode)) + goto out; + need = ((addr & ~PAGE_MASK) + size + PAGE_SIZE-1) >> PAGE_SHIFT; + addr &= PAGE_MASK; + + while(1) + { + pages = (vma->vm_end - addr) >> PAGE_SHIFT; + if(need <= pages) + break; + need -= pages; + addr = vma->vm_end; + vma = vma->vm_next; + if (!vma || vma->vm_start != addr) + goto out; + if (!(vma->vm_flags & mode)) + goto out; + } + ok = 1; + out: + up_read(¤t->mm->mmap_sem); + return(ok); +} + +#ifdef CONFIG_MOSIX_DIAG +int no_ucache, no_write_cache, no_read_cache; +#endif /* CONFIG_MOSIX_DIAG */ + +void +set_read_region(unsigned long addr, unsigned int len) +{ +#ifdef CONFIG_MOSIX_DIAG + if(no_ucache || no_read_cache) + return; +#endif /* CONFIG_MOSIX_DIAG */ + if(!ucache_ok(addr, len, VM_WRITE)) + return; + add_ucache_entry(addr, len, READABLE); +} + +void +set_write_region(unsigned long addr, unsigned int len) +{ +#ifdef CONFIG_MOSIX_DIAG + if(no_ucache || no_write_cache) + return; +#endif /* CONFIG_MOSIX_DIAG */ + if(!ucache_ok(addr, len, VM_READ)) + return; + add_ucache_entry(addr, len, WRITEABLE); +} + +struct ucache_envelope_entry +{ + unsigned long addr; + unsigned int len; + short type; +#if MAX_SEGMENT < 65536 + unsigned short actual_len; +#else + int actual_len; +#endif +}; + +char * +construct_ucache_envelope(int *len, int *from_user, struct syscall_h *s, char **tofree) +{ + int l, n; + register struct data_cache *d = current->mosix.ucache; + struct ucache_envelope_entry e[CACHE_ENTRIES]; + register int i; + char *envelope, *ep; + int ent = END_MARK; /* only to shut-up the compiler */ + + *tofree = NULL; + *len = *from_user = s->simple_data_len = 0; + if(!d) + return(NULL); + rare_again: + for(l = n = 0 , i = d->first ; i != END_MARK ; i = d->next[i]) + { + if(d->len[i] > MAX_POSITIVE) /* rare indeed, but check */ + { + split_ucache(i, d->addr[i] + MAX_POSITIVE, 0); + goto rare_again; /* because an entry could be lost */ + } + l += sizeof(struct ucache_envelope_entry); + e[n].addr = d->addr[i]; + e[n].len = d->len[i]; + if((e[n].type = d->type[i]) & WRITEABLE) + { + e[n].actual_len = (d->len[i] > MAX_SEGMENT ? + MAX_SEGMENT : d->len[i]); + l += e[n].actual_len; + } + else + e[n].actual_len = 0; + if(!n++) + { + ent = i; + l += sizeof(int); + } + } + if(n == 1) + { + /* no need for package */ + breakpoint1("construct_ucache_envelope: only one", l); + s->simple_data_addr = d->addr[ent]; + s->simple_data_len = d->len[ent]; + s->simple_data_type = d->type[ent]; + if(d->type[ent] & WRITEABLE) + { + *from_user = 1; + if(s->simple_data_len >= MAX_SEGMENT) + *len = MAX_SEGMENT; + else + *len = s->simple_data_len; + s->simple_data_actual = *len; + ep = (char *)d->addr[ent]; + } + else + { + ep = NULL; + s->simple_data_actual = 0; + } + flush_ucache(); + return(ep); + } + if(!n || !(envelope = kmalloc(l, GFP_NOIO))) + { + if(n) + breakpoint1("construct_ucache_envelope: could not allocate %d bytes", l); + flush_ucache(); + return(NULL); + } + *((int *)envelope) = n; + memcpy(envelope + sizeof(int), (caddr_t)e, n * sizeof(e[0])); + ep = envelope + sizeof(int) + n * sizeof(e[0]); + for(i = 0 ; i < n ; i++) + if(e[i].actual_len) + { + if(copy_from_user(ep, (char *)e[i].addr, e[i].actual_len)) + { + /* must be a very serious problem, because area was + * already checked, but could happen on a bad page, + * or if disconnected from DEPUTY, anyway: + */ + kfree(envelope); + flush_ucache(); + return(NULL); + } + ep += e[i].actual_len; + } + breakpoint1("construct_ucache_envelope: made %d bytes", l); + flush_ucache(); + *len = l; + *tofree = envelope; + s->simple_data_len = -1; + return(envelope); +} + +int +open_ucache_envelope(struct syscall_h *s) +{ + int n; + register int i, j; + int ent; + struct ucache_envelope_entry e[CACHE_ENTRIES]; + struct cblock *cb; + register struct data_cache *d; + struct mosix_task *m = ¤t->mosix; + + if(!s->simple_data_len) + return(0); + if(!(d = m->ucache) && !(alloc_ucache())) + { + comm_flushdata(COMM_ALLDATA); + return(-ENOMEM); + } + d = m->ucache; + if(s->simple_data_len == -1) /* complex */ + { + if(comm_copydata(&n, sizeof(int), 0)) + { + mosix_panic("open_ucache_envelope: no total"); + return(-EDIST); + } + if(n < 1 || n > CACHE_ENTRIES) + { + printk("n in envelope = %d\n", n); + mosix_panic("bad n in envelope"); + comm_flushdata(COMM_ALLDATA); + return(-EDIST); + } + if(comm_copydata(e, n * sizeof(e[0]), 0)) + { + mosix_panic("open_ucache_envelope: non header"); + return(-EDIST); + } + } + else + { + n = 1; + e[0].type = s->simple_data_type; + e[0].addr = s->simple_data_addr; + e[0].len = s->simple_data_len; + e[0].actual_len = s->simple_data_actual; + } + for(ent = END_MARK , i = 0 ; i < n ; i++) + ent = alloc_ucache_entry(d, ent, e[i].type, e[i].addr, e[i].len); + for(i = 0 ; i < n ; i++) + if(e[i].actual_len) + { + if(!(cb = cblock_alloc(e[i].addr, e[i].actual_len))) + { + comm_flushdata(COMM_ALLDATA); + return(-ENOMEM); + } + if(comm_copydata(cb->data, e[i].actual_len, 0)) + { + printk("open_ucache_envelope: copy failed\n"); + kfree(cb); + return(-EDIST); + } + /* find all entries to share the block: */ + for(j = d->first ; j != END_MARK ; j = d->next[j]) + if(d->type[j] & WRITEABLE) + { + if(d->addr[j] >= e[i].addr + e[i].actual_len || + d->addr[j] + d->len[j] <= e[i].addr) + continue; + if(d->addr[j] < e[i].addr) + split_ucache(j, e[i].addr, 1); + if(d->addr[j] + d->len[j] > e[i].addr + e[i].actual_len) + split_ucache(j, e[i].addr + e[i].actual_len, 0); + d->bp[j] = cb; + cb->count++; + d->type[j] &= ~WRITEABLE; + d->type[j] |= WRITEDATA; + } + if(--cb->count == 0) + kfree(cb); + } + breakpoint("opened_ucache_envelope"); + return(0); +} + +struct read_cache_header +{ + unsigned long addr; + unsigned int len; +}; + +/* + * Attempt to send the data directly whenever there is only one entry. + */ +char * +deputy_pack_read_cache_data(int *len, struct syscall_ret_h *r, char **tofree) +{ + register struct data_cache *d = current->mosix.ucache; + register int i; + int f = END_MARK; + char *result; + struct cblock *cb; + + *tofree = NULL; + *len = r->simple_data_len = 0; + if(!d || d->first == END_MARK) + return(NULL); + for(i = d->first ; i != END_MARK ; i = d->next[i]) + if((d->type[i] & (READDATA|SKIP_MARK)) == READDATA) + { + if(f != END_MARK) + { + /* more than one entry - do it the long way */ + result = pack_read_cache_data(len); + if(result) /* (could still fail on memory) */ + { + r->simple_data_len = -1; + *tofree = result; + } + flush_ucache(); + return(result); + } + else + f = i; + } + if(f == END_MARK) + { + flush_ucache(); + return(NULL); + } + /* OK: Just one READDATA entry "f" */ + r->simple_data_addr = d->addr[f]; + *len = r->simple_data_len = d->len[f]; + cb = d->bp[f]; + result = &cb->data[r->simple_data_addr - cb->offset]; + d->bp[f] = NULL; + *tofree = (char *)cb; + d->type[f] &= ~(READDATA|WRITEDATA); + if(!(d->type[f] & WRITEABLE)) + free_ucache_entry(d, f); + breakpoint1("deputy_pack_read_cache_data: One entry found/used (%d)",f); + flush_ucache(); + return(result); +} + +char * +pack_read_cache_data(int *len) +{ + int l, n; + register struct data_cache *d = current->mosix.ucache; + struct read_cache_header e[CACHE_ENTRIES]; + caddr_t from[CACHE_ENTRIES]; + register int i; + char *package, *ep; + struct cblock *cb; + + if(!d || d->first == END_MARK) + { + *len = 0; + return(NULL); + } + for(l = n = 0 , i = d->first ; i != END_MARK ; i = d->next[i]) + if((d->type[i] & (READDATA|SKIP_MARK)) == READDATA) + { + cb = d->bp[i]; +#ifdef CONFIG_MOSIX_DIAG + if(!cb) + panic("pack_read_cache_data: no cb"); +#endif /* CONFIG_MOSIX_DIAG */ + from[n] = &cb->data[d->addr[i] - cb->offset]; + e[n].addr = d->addr[i]; + l += sizeof(e[0]) + (e[n].len = d->len[i]); + if(!n++) + l += sizeof(int); + } +#ifdef CONFIG_MOSIX_DIAG + if(n < 2) + printk("pack_read_cache_data, why got here if (n=%d) < 2?\n",n); +#endif /* CONFIG_MOSIX_DIAG */ + if(!n || !(package = kmalloc(l, GFP_NOIO))) + { + breakpoint1("pack_read_cache_data: could not allocate %d bytes", l); + *len = 0; + return(NULL); + } + *((int *)package) = n; + memcpy(package + sizeof(int), (caddr_t)e, n * sizeof(e[0])); + ep = package + sizeof(int) + n * sizeof(e[0]); + for(i = 0 ; i < n ; i++) + { + memcpy(ep, from[i], e[i].len); + ep += e[i].len; + } + *len = l; + clear_readdata: + for(i = d->first ; i != END_MARK ; i = d->next[i]) + if((d->type[i] & (READDATA|SKIP_MARK)) == READDATA) + { + data_was_read(d, i); + goto clear_readdata; /* list may be scrambled */ + } + breakpoint1("pack_read_cache_data: packed %d bytes", *len); + return(package); +} + +int +remote_unpack_read_cache_data(struct syscall_ret_h *r) +{ + register int i; + int l = 0, n; + struct read_cache_header e[CACHE_ENTRIES]; + + if(r && !r->simple_data_len) + return(0); + if(!r || r->simple_data_len == -1) + { + if(comm_copydata(&n, sizeof(int), 0)) + { + mosix_panic("unpack_read_cache_data: no total"); + return(-EDIST); + } + if(n < 1 || n > CACHE_ENTRIES) + { + printk("n in package = %d\n", n); + mosix_panic("bad n in package"); + comm_flushdata(COMM_ALLDATA); + return(-EDIST); + } + if(comm_copydata(e, n * sizeof(e[0]), 0)) + { + mosix_panic("open_ucache_envelope: non header"); + return(-EDIST); + } + l = sizeof(int) + n * sizeof(e[0]); + } + else + { + n = 1; + e[0].addr = r->simple_data_addr; + e[0].len = r->simple_data_len; + } + for(i = 0 ; i < n ; i++) + if(comm_copydata((char *)e[i].addr, e[i].len, 1)) + { + printk("unpack_cache_read_data: copy to user failed\n"); + comm_flushdata(COMM_ALLDATA); + return(-EDIST); + } + else + l += e[i].len; + bump_copyout(l); + return(0); +} + +#ifdef CONFIG_MOSIX_DEBUG +void +print_ucache(void) +{ + register struct data_cache *d = current->mosix.ucache; + register int i; + struct cblock *cb; + + if(!d) + { + printk("No data-cache allocated.\n"); + return; + } + printk(" n addr len (hex) type block\n"); + printk("===========================================================\n"); + for(i = d->first ; i != END_MARK ; i = d->next[i]) + { + printk("%2d %08X %8X ", i, (int)d->addr[i], (int)d->len[i]); + printk("%c", (d->type[i] & SKIP_MARK) ? 'S' : ' '); + printk("%c", (d->type[i] & READABLE) ? 'r' : '-'); + printk("%c", (d->type[i] & READDATA) ? 'R' : '-'); + printk("%c", (d->type[i] & WRITEABLE) ? 'w' : '-'); + printk("%c", (d->type[i] & WRITEDATA) ? 'W' : '-'); + if((cb = d->bp[i])) + printk(" %08X,o=%X,l=%X,n=%d", (int)cb, (int)cb->offset, + cb->len, cb->count); + printk("\n"); + } + printk("Free list:"); + for(i = d->free ; i != END_MARK ; i = d->next[i]) + printk(" %d", i); + printk("\n"); +} +#endif /* CONFIG_MOSIX_DEBUG */ diff -urN linux-2.4.17/net/core/scm.c linux_umopenmosix/net/core/scm.c --- linux-2.4.17/net/core/scm.c Fri Dec 21 19:42:05 2001 +++ linux_umopenmosix/net/core/scm.c Wed Jun 26 23:45:18 2002 @@ -31,6 +31,9 @@ #include #include +#ifdef CONFIG_MOSIX_DFSA +#include +#endif /* CONFIG_MOSIX_DFSA */ /* * Only allow a user to send credentials, that they could set with @@ -81,6 +84,12 @@ * Verify the descriptors and increment the usage count. */ +#ifdef CONFIG_MOSIX_DFSA + /* this is massive and rare, so rather than make special cases: */ + if(current->mosix.dflags & DREMOTE) + return(-EDOITATHOME); + dfsa_is_not_up_to_date(); +#endif /* CONFIG_MOSIX_DFSA */ for (i=0; i< num; i++) { int fd = fdp[i]; @@ -206,6 +215,10 @@ int *cmfptr; int err = 0, i; +#ifdef CONFIG_MOSIX_DFSA + if(current->mosix.dflags & DREMOTE) + panic("scm_detach_fds: REMOTE"); +#endif /* CONFIG_MOSIX_DFSA */ if (msg->msg_controllen > sizeof(struct cmsghdr)) fdmax = ((msg->msg_controllen - sizeof(struct cmsghdr)) / sizeof(int)); diff -urN linux-2.4.17/net/core/skbuff.c linux_umopenmosix/net/core/skbuff.c --- linux-2.4.17/net/core/skbuff.c Fri Dec 21 19:42:05 2001 +++ linux_umopenmosix/net/core/skbuff.c Wed Jun 26 23:45:18 2002 @@ -4,7 +4,7 @@ * Authors: Alan Cox * Florian La Roche * - * Version: $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $ + * Version: $Id: skbuff.c,v 1.5 2002/05/16 07:32:47 marhoy Exp $ * * Fixes: * Alan Cox : Fixed the worst of the load balancer bugs. @@ -61,6 +61,10 @@ #include #include +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ + int sysctl_hot_list_len = 128; static kmem_cache_t *skbuff_head_cache; @@ -166,6 +170,9 @@ struct sk_buff *skb; u8 *data; +#ifdef CONFIG_MOSIX_UDB + if(!udb_booting) +#endif /* CONFIG_MOSIX_UDB */ if (in_interrupt() && (gfp_mask & __GFP_WAIT)) { static int count = 0; if (++count < 5) { diff -urN linux-2.4.17/net/core/sock.c linux_umopenmosix/net/core/sock.c --- linux-2.4.17/net/core/sock.c Fri Dec 21 19:42:05 2001 +++ linux_umopenmosix/net/core/sock.c Wed Jun 26 23:45:18 2002 @@ -7,7 +7,7 @@ * handler for protocols to use and generic option handler. * * - * Version: $Id: sock.c,v 1.116 2001/11/08 04:20:06 davem Exp $ + * Version: $Id: sock.c,v 1.5 2002/05/16 07:32:47 marhoy Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -127,6 +127,10 @@ #include #endif +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ + /* Run time adjustable parameters. */ __u32 sysctl_wmem_max = SK_WMEM_MAX; __u32 sysctl_rmem_max = SK_RMEM_MAX; @@ -1136,6 +1140,10 @@ if (sk->sleep && waitqueue_active(sk->sleep)) wake_up_interruptible(sk->sleep); sk_wake_async(sk,1,POLL_IN); +#ifdef CONFIG_MOSIX + if (sk->socket && test_bit(SOCK_INTER_MOSIX, &sk->socket->flags)) + mosix_notify_receive(sk->socket); +#endif /* CONFIG_MOSIX */ read_unlock(&sk->callback_lock); } diff -urN linux-2.4.17/net/ipv4/af_inet.c linux_umopenmosix/net/ipv4/af_inet.c --- linux-2.4.17/net/ipv4/af_inet.c Fri Dec 21 19:42:05 2001 +++ linux_umopenmosix/net/ipv4/af_inet.c Wed Jun 26 23:45:18 2002 @@ -117,6 +117,10 @@ #include /* Note : will define WIRELESS_EXT */ #endif /* CONFIG_NET_RADIO || CONFIG_NET_PCMCIA_RADIO */ +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ + struct linux_mib net_statistics[NR_CPUS*2]; #ifdef INET_REFCNT_DEBUG @@ -556,6 +560,10 @@ if (sk->num==0 && inet_autobind(sk) != 0) return -EAGAIN; +#ifdef CONFIG_MOSIX + if(reserved_mosix_address(uaddr)) + return(-EACCES); +#endif /* CONFIG_MOSIX */ return sk->prot->connect(sk, (struct sockaddr *)uaddr, addr_len); } @@ -627,6 +635,11 @@ sk->sport = htons(sk->num); } +#ifdef CONFIG_MOSIX + err = -EACCES; + if(reserved_mosix_address(uaddr)) + goto out; +#endif /* CONFIG_MOSIX */ err = sk->prot->connect(sk, uaddr, addr_len); if (err < 0) goto out; diff -urN linux-2.4.17/net/ipv4/tcp_diag.c linux_umopenmosix/net/ipv4/tcp_diag.c --- linux-2.4.17/net/ipv4/tcp_diag.c Fri Dec 21 19:42:05 2001 +++ linux_umopenmosix/net/ipv4/tcp_diag.c Wed Jun 26 23:45:18 2002 @@ -1,7 +1,7 @@ /* * tcp_diag.c Module for monitoring TCP sockets. * - * Version: $Id: tcp_diag.c,v 1.2 2001/11/05 09:42:22 davem Exp $ + * Version: $Id: tcp_diag.c,v 1.3 2002/05/16 07:32:47 marhoy Exp $ * * Authors: Alexey Kuznetsov, * diff -urN linux-2.4.17/net/ipv4/tcp_input.c linux_umopenmosix/net/ipv4/tcp_input.c --- linux-2.4.17/net/ipv4/tcp_input.c Fri Dec 21 19:42:05 2001 +++ linux_umopenmosix/net/ipv4/tcp_input.c Wed Jun 26 23:45:18 2002 @@ -1987,6 +1987,9 @@ return 0; } +#ifdef CONFIG_MOSIX +#include +#endif /* CONFIG_MOSIX */ /* Look for tcp options. Normally only called on SYN and SYNACK packets. * But, this can also be called on packets in the established flow when @@ -3054,7 +3057,13 @@ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); u32 ptr = ntohs(th->urg_ptr); +#ifdef CONFIG_MOSIX + /* MOSIX sockets always use BSD style regardless of sysctl_tcp_stdurg */ + if (ptr && (!sysctl_tcp_stdurg || + (sk->socket && test_bit(SOCK_INTER_MOSIX, &sk->socket->flags)))) +#else if (ptr && !sysctl_tcp_stdurg) +#endif /* CONFIG_MOSIX */ ptr--; ptr += ntohl(th->seq); @@ -3081,6 +3090,17 @@ /* Tell the world about our new urgent pointer. */ if (sk->proc != 0) { +#ifdef CONFIG_MOSIX + if (!sk->socket) +#ifdef CONFIG_MOSIX_DEBUG + printk("tcp_check_urg: funny, no socket, sk=0x%x", + (int)sk) +#endif /* CONFIG_MOSIX_DEBUG */ + ; + else if (test_bit(SOCK_INTER_MOSIX, &sk->socket->flags)) + /*mosix_notify_urgent(sk->socket)*/ ; + else +#endif /* CONFIG_MOSIX */ if (sk->proc > 0) kill_proc(sk->proc, SIGURG, 1); else @@ -3142,6 +3162,15 @@ tp->urg_data = TCP_URG_VALID | tmp; if (!sk->dead) sk->data_ready(sk,0); +#ifdef CONFIG_MOSIX + if (sk->socket && + test_bit(SOCK_INTER_MOSIX, &sk->socket->flags)) + { + if(tmp != 0xdb) + printk("tcp_urg: not 0xdb (%x)\n", tmp); + mosix_notify_urgent(sk->socket); + } +#endif /* CONFIG_MOSIX */ } } } diff -urN linux-2.4.17/net/sunrpc/sched.c linux_umopenmosix/net/sunrpc/sched.c --- linux-2.4.17/net/sunrpc/sched.c Thu Oct 11 17:12:52 2001 +++ linux_umopenmosix/net/sunrpc/sched.c Wed Jun 26 23:45:18 2002 @@ -1109,6 +1109,9 @@ unsigned long flags; while (all_tasks) { +#ifdef CONFIG_MOSIX + current->mosix.ignoreoldsigs = 1; +#endif /* CONFIG_MOSIX */ current->sigpending = 0; rpc_killall_tasks(NULL); __rpc_schedule(); @@ -1184,6 +1187,9 @@ * Usually rpciod will exit very quickly, so we * wait briefly before checking the process id. */ +#ifdef CONFIG_MOSIX + current->mosix.ignoreoldsigs = 1; +#endif /* CONFIG_MOSIX */ current->sigpending = 0; current->policy |= SCHED_YIELD; schedule(); diff -urN linux-2.4.17/net/sunrpc/svc.c linux_umopenmosix/net/sunrpc/svc.c --- linux-2.4.17/net/sunrpc/svc.c Fri Sep 7 20:48:39 2001 +++ linux_umopenmosix/net/sunrpc/svc.c Wed Jun 26 23:45:18 2002 @@ -185,6 +185,9 @@ progp->pg_name, proto == IPPROTO_UDP? "udp" : "tcp", port); if (!port) +#ifdef CONFIG_MOSIX + current->mosix.ignoreoldsigs = 1, +#endif /* CONFIG_MOSIX */ current->sigpending = 0; for (i = 0; i < progp->pg_nvers; i++) { diff -urN linux-2.4.17/node1_uml_config.openmosix linux_umopenmosix/node1_uml_config.openmosix --- linux-2.4.17/node1_uml_config.openmosix Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/node1_uml_config.openmosix Wed Jun 26 23:45:18 2002 @@ -0,0 +1,396 @@ +# +# Automatically generated by make menuconfig: don't edit +# +CONFIG_USERMODE=y +# CONFIG_ISA is not set +# CONFIG_SBUS is not set +# CONFIG_PCI is not set +CONFIG_UID16=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y + +# +# openMosix +# +CONFIG_MOSIX=y +# CONFIG_MOSIX_TOPOLOGY is not set +CONFIG_MOSIX_SECUREPORTS=y +CONFIG_MOSIX_DISCLOSURE=1 +# CONFIG_MOSIX_EXTMOSIX is not set +CONFIG_MOSIX_FS=y +CONFIG_MOSIX_PIPE_EXCEPTIONS=y + +# +# General Setup +# +CONFIG_STDIO_CONSOLE=y +CONFIG_NET=y +CONFIG_SYSVIPC=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_SYSCTL=y +CONFIG_BINFMT_AOUT=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=y +CONFIG_UNIX98_PTYS=y +CONFIG_UNIX98_PTY_COUNT=256 +CONFIG_SSL=y +CONFIG_HOSTFS=y +CONFIG_MCONSOLE=y +CONFIG_MAGIC_SYSRQ=y +# CONFIG_HOST_2G_2G is not set +# CONFIG_UML_SMP is not set +# CONFIG_SMP is not set +CONFIG_CON_ZERO_CHAN="fd:0,fd:1" +CONFIG_CON_CHAN="xterm" +CONFIG_SSL_CHAN="pty" + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_KMOD=y + +# +# Devices +# +CONFIG_BLK_DEV_UBD=y +# CONFIG_BLK_DEV_UBD_SYNC is not set +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_NBD=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_INITRD=y +# CONFIG_MMAPPER is not set +CONFIG_UML_SOUND=y +CONFIG_SOUND=y +CONFIG_HOSTAUDIO=y +CONFIG_FD_CHAN=y +CONFIG_PORT_CHAN=y +CONFIG_PTY_CHAN=y +CONFIG_TTY_CHAN=y +CONFIG_XTERM_CHAN=y + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_PACKET_MMAP=y +# CONFIG_NETLINK_DEV is not set +# CONFIG_NETFILTER is not set +# CONFIG_FILTER is not set +CONFIG_UNIX=y +CONFIG_INET=y +# CONFIG_IP_MULTICAST is not set +# CONFIG_IP_ADVANCED_ROUTER is not set +# CONFIG_IP_PNP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_ARPD is not set +# CONFIG_INET_ECN is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_IPV6 is not set +# CONFIG_KHTTPD is not set +# CONFIG_ATM is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_DECNET is not set +# CONFIG_BRIDGE is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_LLC is not set +# CONFIG_NET_DIVERT is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_NET_FASTROUTE is not set +# CONFIG_NET_HW_FLOWCONTROL is not set + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set + +# +# Network device support +# +CONFIG_UML_NET=y +CONFIG_UML_NET_ETHERTAP=y +CONFIG_UML_NET_TUNTAP=y +CONFIG_UML_NET_SLIP=y +CONFIG_UML_NET_DAEMON=y +CONFIG_UML_NET_MCAST=y +CONFIG_NETDEVICES=y + +# +# ARCnet devices +# +# CONFIG_ARCNET is not set +CONFIG_DUMMY=y +# CONFIG_BONDING is not set +# CONFIG_EQUALIZER is not set +CONFIG_TUN=y +CONFIG_ETHERTAP=y + +# +# Ethernet (10 or 100Mbit) +# +CONFIG_NET_ETHERNET=y +# CONFIG_SUNLANCE is not set +# CONFIG_SUNBMAC is not set +# CONFIG_SUNQE is not set +# CONFIG_SUNLANCE is not set +# CONFIG_SUNGEM is not set +# CONFIG_NET_VENDOR_3COM is not set +# CONFIG_LANCE is not set +# CONFIG_NET_VENDOR_SMC is not set +# CONFIG_NET_VENDOR_RACAL is not set +# CONFIG_NET_ISA is not set +# CONFIG_NET_PCI is not set +# CONFIG_NET_POCKET is not set + +# +# Ethernet (1000 Mbit) +# +# CONFIG_ACENIC is not set +# CONFIG_DL2K is not set +# CONFIG_MYRI_SBUS is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_SK98LIN is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +# CONFIG_PLIP is not set +CONFIG_PPP=y +# CONFIG_PPP_MULTILINK is not set +# CONFIG_PPP_FILTER is not set +# CONFIG_PPP_ASYNC is not set +# CONFIG_PPP_SYNC_TTY is not set +# CONFIG_PPP_DEFLATE is not set +# CONFIG_PPP_BSDCOMP is not set +# CONFIG_PPPOE is not set +CONFIG_SLIP=y +# CONFIG_SLIP_COMPRESSED is not set +# CONFIG_SLIP_SMART is not set +# CONFIG_SLIP_MODE_SLIP6 is not set + +# +# Wireless LAN (non-hamradio) +# +# CONFIG_NET_RADIO is not set + +# +# Token Ring devices +# +# CONFIG_TR is not set +# CONFIG_NET_FC is not set +# CONFIG_RCPCI is not set +# CONFIG_SHAPER is not set + +# +# Wan interfaces +# +# CONFIG_WAN is not set + +# +# File systems +# +CONFIG_QUOTA=y +CONFIG_AUTOFS_FS=y +CONFIG_AUTOFS4_FS=y +CONFIG_REISERFS_FS=y +# CONFIG_REISERFS_CHECK is not set +# CONFIG_REISERFS_PROC_INFO is not set +# CONFIG_ADFS_FS is not set +# CONFIG_ADFS_FS_RW is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EXT3_FS is not set +# CONFIG_JBD is not set +# CONFIG_JBD_DEBUG is not set +CONFIG_FAT_FS=y +CONFIG_MSDOS_FS=y +# CONFIG_UMSDOS_FS is not set +CONFIG_VFAT_FS=y +# CONFIG_EFS_FS is not set +CONFIG_JFFS_FS=y +CONFIG_JFFS_FS_VERBOSE=0 +CONFIG_JFFS_PROC_FS=y +CONFIG_JFFS2_FS=y +CONFIG_JFFS2_FS_DEBUG=0 +# CONFIG_CRAMFS is not set +# CONFIG_TMPFS is not set +# CONFIG_RAMFS is not set +CONFIG_ISO9660_FS=y +# CONFIG_JOLIET is not set +# CONFIG_ZISOFS is not set +CONFIG_MINIX_FS=y +# CONFIG_VXFS_FS is not set +# CONFIG_NTFS_FS is not set +# CONFIG_NTFS_RW is not set +# CONFIG_HPFS_FS is not set +CONFIG_PROC_FS=y +CONFIG_DEVFS_FS=y +CONFIG_DEVFS_MOUNT=y +# CONFIG_DEVFS_DEBUG is not set +CONFIG_DEVPTS_FS=y +# CONFIG_QNX4FS_FS is not set +# CONFIG_QNX4FS_RW is not set +# CONFIG_ROMFS_FS is not set +CONFIG_EXT2_FS=y +# CONFIG_SYSV_FS is not set +# CONFIG_UDF_FS is not set +# CONFIG_UDF_RW is not set +# CONFIG_UFS_FS is not set +# CONFIG_UFS_FS_WRITE is not set + +# +# Network File Systems +# +# CONFIG_CODA_FS is not set +# CONFIG_INTERMEZZO_FS is not set +CONFIG_NFS_FS=y +# CONFIG_NFS_V3 is not set +# CONFIG_ROOT_NFS is not set +CONFIG_NFSD=y +# CONFIG_NFSD_V3 is not set +CONFIG_SUNRPC=y +CONFIG_LOCKD=y +CONFIG_SMB_FS=y +# CONFIG_SMB_NLS_DEFAULT is not set +# CONFIG_NCP_FS is not set +# CONFIG_NCPFS_PACKET_SIGNING is not set +# CONFIG_NCPFS_IOCTL_LOCKING is not set +# CONFIG_NCPFS_STRONG is not set +# CONFIG_NCPFS_NFS_NS is not set +# CONFIG_NCPFS_OS2_NS is not set +# CONFIG_NCPFS_SMALLDOS is not set +# CONFIG_NCPFS_NLS is not set +# CONFIG_NCPFS_EXTRAS is not set +# CONFIG_ZISOFS_FS is not set +# CONFIG_ZLIB_FS_INFLATE is not set + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y +CONFIG_SMB_NLS=y +CONFIG_NLS=y + +# +# Native Language Support +# +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=y +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +CONFIG_NLS_CODEPAGE_850=y +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +# CONFIG_NLS_ISO8859_1 is not set +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +# CONFIG_NLS_ISO8859_15 is not set +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +# CONFIG_NLS_UTF8 is not set + +# +# Multi-device support (RAID and LVM) +# +# CONFIG_MD is not set +# CONFIG_BLK_DEV_MD is not set +# CONFIG_MD_LINEAR is not set +# CONFIG_MD_RAID0 is not set +# CONFIG_MD_RAID1 is not set +# CONFIG_MD_RAID5 is not set +# CONFIG_MD_MULTIPATH is not set +# CONFIG_BLK_DEV_LVM is not set + +# +# Memory Technology Devices (MTD) +# +CONFIG_MTD=y +# CONFIG_MTD_DEBUG is not set +# CONFIG_MTD_PARTITIONS is not set +# CONFIG_MTD_REDBOOT_PARTS is not set +CONFIG_MTD_CHAR=y +CONFIG_MTD_BLOCK=y +# CONFIG_FTL is not set +# CONFIG_NFTL is not set + +# +# RAM/ROM/Flash chip drivers +# +# CONFIG_MTD_CFI is not set +# CONFIG_MTD_JEDECPROBE is not set +# CONFIG_MTD_GEN_PROBE is not set +# CONFIG_MTD_CFI_INTELEXT is not set +# CONFIG_MTD_CFI_AMDSTD is not set +# CONFIG_MTD_RAM is not set +# CONFIG_MTD_ROM is not set +# CONFIG_MTD_ABSENT is not set +# CONFIG_MTD_OBSOLETE_CHIPS is not set +# CONFIG_MTD_AMDSTD is not set +# CONFIG_MTD_SHARP is not set +# CONFIG_MTD_JEDEC is not set + +# +# Mapping drivers for chip access +# +# CONFIG_MTD_PHYSMAP is not set + +# +# Self-contained MTD device drivers +# +# CONFIG_MTD_PMC551 is not set +# CONFIG_MTD_SLRAM is not set +# CONFIG_MTD_MTDRAM is not set +CONFIG_MTD_BLKMTD=y +# CONFIG_MTD_DOC1000 is not set +# CONFIG_MTD_DOC2000 is not set +# CONFIG_MTD_DOC2001 is not set +# CONFIG_MTD_DOCPROBE is not set + +# +# NAND Flash Device Drivers +# +# CONFIG_MTD_NAND is not set + +# +# Kernel hacking +# +# CONFIG_DEBUG_SLAB is not set +CONFIG_DEBUGSYM=y +CONFIG_PT_PROXY=y +# CONFIG_GPROF is not set +# CONFIG_GCOV is not set diff -urN linux-2.4.17/openMosix-2.4.17-1 linux_umopenmosix/openMosix-2.4.17-1 --- linux-2.4.17/openMosix-2.4.17-1 Thu Jan 1 02:00:00 1970 +++ linux_umopenmosix/openMosix-2.4.17-1 Wed Jun 26 23:45:19 2002 @@ -0,0 +1,50479 @@ +diff -ruN --exclude=CVS /tmp/openmosix/linux-2.4.17/Documentation/Configure.help linux-openmosix/Documentation/Configure.help +--- /tmp/openmosix/linux-2.4.17/Documentation/Configure.help Fri Dec 21 18:41:53 2001 ++++ linux-openmosix/Documentation/Configure.help Wed May 15 10:55:17 2002 +@@ -75,6 +75,182 @@ + # 1995-2000 by Axel Boldt and many others and are governed by the GNU + # General Public License. + ++Mosix extensions ++CONFIG_MOSIX ++ Say Y to support process migration and automatic load-balancing ++ within a cluster. ++ ++Support clusters with a complex network topology ++CONFIG_MOSIX_TOPOLOGY ++ This option is intended for configurations where the network ++ "distance" between the nodes is not uniform, so you require ++ node-dependent fine-tuning (see "man tune"). ++ ++ If all the nodes in your MOSIX cluster are connected via a simple ++ network and use the same networking hardware (which is the more ++ common case), Say N here (and save kernel-time!). ++ ++ Say Y here if not all your MOSIX cluster is connected via a simple ++ network or if some nodes use significantly different networking ++ hardware (if you are not sure whether the difference is significant, ++ compile some MOSIX kernel anyway and compare the results of "tune"). ++ ++Maximum network-topology complexity to support ++CONFIG_MOSIX_MAXTOPOLOGY ++ In a complex network-toplogy, each node can identify a number of ++ subsets of the other nodes, each set having the same networking ++ hardware, the same processor type (perhaps slower or faster, but ++ of the same type) and the same routing path. ++ The maximum network-complexity is defined as the maximum number of ++ those sets over all the nodes in your cluster. minimizing this ++ number helps saving kernel time and inter-node communication, so ++ please use only the level of complexity that you actually need. ++ ++ IMPORTANT NOTE: even if you prepare different kernels for different ++ nodes, all the nodes in your cluster must have the same value ++ configured here. ++ ++ MOSIX allows this value to be in the range of 2-10 (higher values ++ will be truncated and lower values imply no complex topology). ++ ++MOSIX kernel debugger ++CONFIG_MOSIX_UDB ++ Say Y to use the MOSIX kernel-debugger: ++ the MOSIX kernel-debugger is provided AS IS and can be entered from the ++ console by pressing . It is being used for the ++ development of MOSIX and no claims to reliability are made. ++ ++MOSIX diagnostics ++CONFIG_MOSIX_DIAG ++ Say Y to include MOSIX consistency checks. ++ While this adds code to the kernel, it may prevent unexpected ++ occurences when running new versions of MOSIX. ++ ++MOSIX debug-code ++CONFIG_MOSIX_DEBUG ++ Say Y to include extensive debugging-messages option in MOSIX ++ (used for MOSIX kernel-development). ++ ++Process arrival messages ++CONFIG_MOSIX_WEEEEEEEEE ++ If you say Y to this option, the console will display messages whenever ++ a process arrives: Weeeeeeeee..... for remote (guest) processes and ++ Wooooooooo..... for local processes returning home. ++ ++Loopback process-migration testing ++CONFIG_MOSIX_CHEAT_MIGSELF ++ Say Y to allow loopback process "migration" from a node into itself ++ (useful only for MOSIX kernel-debugging). ++ ++Level of process-identity disclosure ++CONFIG_MOSIX_DISCLOSURE ++ Determine how much information about processes is disclosed by default ++ when they run as guests on remote nodes: ++ 0 = no information ++ 1 = only PID (and TGID if different) ++ 2 = PID(/TGID), UID, GID ++ 3 = PID(/TGID), UID, GID, PGRP, SESSION, COMMAND ++ Processes may modify this default by writing to /proc/self/disclosure. ++ ++Prevent the "-mosix" extension on kernel name ++CONFIG_MOSIX_EXTMOSIX ++ Say Y if you want the kernel and module directory to have the "-mosix" ++ extension (to distinguish it from coexistant non-MOSIX kernels of the ++ same kernel version number). ++ ++Stricter security on MOSIX ports ++CONFIG_MOSIX_SECUREPORTS ++ The internal kernel TCP/UDP ports used by MOSIX may not be accessed ++ by normal users (and even the Super-User has no real reason to access ++ them). The question arises whether to allow user-connections to those ++ ports on other (internet) nodes, outside the cluster. ++ If routing schemes allow a MOSIX node to also be accessed from within ++ the MOSIX cluster using an IP address that is not listed in the MOSIX ++ configuration, you must say Y (or risk that a hacking user will mess ++ with the MOSIX internals by connecting to that IP address and one of ++ the MOSIX port numbers). However, these port numbers, though not ++ listed in "/etc/services", may be in use for other purposes somewhere ++ else on the internet - and this option would prevent accessing them, ++ so if your users require such access, say N and make sure that all the ++ IP addresses by which your nodes can be reached are listed as aliases ++ in the MOSIX configuration. ++ ++Direct File-System Access for MOSIX ++CONFIG_MOSIX_DFSA ++ Direct File System Access: ++ DFSA is now ready for Beta-testing. It can currently only be used ++ by MFS (MOSIX File-System). You may be interested in this option ++ if you either like experimenting and experiencing the power of DFSA ++ with MFS, or if you are interested in either developing a new ++ file-system, or adapting an existing one to DFSA. ++ ++ For the lay user, all you need is to also configure MFS, then run ++ the following commands on each node: ++ mkdir /mfs ++ mount -t mfs cluster /mfs -odfsa=1 ++ ++ If you are more serious about it, please read "Documentation/DFSA" ++ or run "man dfsa". ++ ++MOSIX File-System ++CONFIG_MOSIX_FS ++ The MOSIX File-System (MFS) is now ready for Beta-testing. ++ Along with DFSA, it forms the basis for the next generation ++ of MOSIX, expanding the power of MOSIX beyond CPU-bound tasks ++ into I/O tasks as well. ++ ++ On its own, MFS allows processes to access most files (more accurately, ++ all regular files, directories and symbolic-links, but excluding the ++ "/proc" file-system and MFS itself) on all the nodes in the cluster. ++ ++ MFS assumes that all users/group-ID's throughout the cluster are ++ equivalent: if this is not the case, you cannot use MFS, so you must ++ say 'N' here, unless you have a sub-cluster where the user/group-ID's ++ are equivalent, in which case you may still choose to configure MFS ++ only in that sub-cluster. ++ ++ To use MFS, all you need is to type: ++ mount -t mfs cluster /mfs ++ (the word "cluster" can be changed to suit your taste and the "/mfs" ++ mount-point is only a suggestion, to be used in this discussion). ++ For a permanent solution, enter the following line in "/etc/fstab": ++ cluster /mfs mfs defaults 0 0 ++ ++ You can now access each node via "/mfs/{node_number}", you may also ++ access the following useful directories: ++ ++ /mfs/here - The current node where your process runs ++ /mfs/home - Your home node ++ /mfs/magic - The current node when used by the "creat" system call ++ (or an "open" with the "O_CREAT" option) - otherwise, ++ the last node on which an MFS magical file was ++ successfully created (this is very useful for creating ++ temporary-files, then immediately unlinking them) ++ /mfs/lastexec - The node on which the process last issued a successful ++ "execve" system-call. ++ /mfs/selected - The node you selected by either your process itself or ++ one of its ancesstors (before forking this process), ++ writing a number into "/proc/self/selected". ++ ++ MFS is currently the only file-system that can be used with DFSA. ++ Detailed technical information is available in ++ "Documentation/filesystems/mfs.txt". ++ ++Poll/Select exceptions on pipes ++CONFIG_MOSIX_PIPE_EXCEPTIONS ++ This feature is not required for MOSIX-proper, but some user-mode ++ programs like it. It allows a program to be notified when someone ++ is wishing to read from a pipe. If you say Y, you may then use: ++ ioctl(pipefd, TCSBRK, arg) to set or clear exception conditions. ++ If (arg & 1), an exception is generated when someone is trying to ++ read the pipe. If (arg & 2), an exception is generated when the ++ pipe has no more readers. The default is that pipes never generate ++ any exceptions. An exception can be returned by the "select" system ++ call and will also cause POLLPRI to be included in the return-value ++ of the "poll" system-call. You can also get an under-estimate of the ++ number of bytes that processes curretly try to read from the pipe using ++ ioctl(pipefd, TIOCGWINSZ, 0). ++ + Prompt for development and/or incomplete code/drivers + CONFIG_EXPERIMENTAL + Some of the various things that Linux supports (such as network +diff -ruN --exclude=CVS /tmp/openmosix/linux-2.4.17/Documentation/DFSA linux-openmosix/Documentation/DFSA +--- /tmp/openmosix/linux-2.4.17/Documentation/DFSA Thu Jan 1 01:00:00 1970 ++++ linux-openmosix/Documentation/DFSA Wed May 15 10:55:17 2002 +@@ -0,0 +1,215 @@ ++DFSA - Direct File-System Access: ++ ++The first generation of MOSIX has brought about great performance ++improvements in CPU jobs - "number crunchers", but cannot help in the ++case of I/O tasks, which need to communicate with their home-node as ++often as every system-call, and are therefore better off remaining there. ++ ++The second generation of MOSIX, includes DFSA, whereby the more common ++system-calls can be (under certain conditions) performed directly on the ++caller's current node, thus increasing the benefit and probability that ++I/O-oriented (or mixed I/O and CPU) tasks will also migrate. ++ ++DFSA operates over suitable, cluster-wide shared file-systems that fulfill ++certain requirements. The only file-system to currently fulfill those ++requirements is the MOSIX File-System (MFS). ++ ++To use DFSA without violating access permissions, the permission-scheme ++(user and group ID's) must be identical, or at least compatible throughout ++the MOSIX cluster. ++ ++Each partition that is to operate in DFSA mode must be assigned a unique ++DFSA index, currently in the range of 1-8, that must be identical on all ++the nodes in the MOSIX cluster. ++ ++To request a particular partition to operate in DFSA mode, mount (or remount) ++it with the "-odfsa={n}" argument (1 <= n <= 8). ++ ++You should do the same on all the nodes in the cluster either at about the ++same time or before MOSIX is configured: failure to assign all DFSA ++mount-points on some of the nodes is not fatal, but may result in serious ++performance degradation, while simultaneous use of the same index for ++different partitions, is likely to cause various faults. ++ ++To disassociate a partition from DFSA, run: ++ ++mount -o remount {mount-point} -odfsa=0. ++ ++You may also designate symbolic-links to operate in DFSA mode: this is ++equivalent to a declaration that the given links are identical on all ++nodes and point to the same partition. It saves remote processes who use ++those link(s) the need to contact their home node every use in order to ++read those links. To declare a symbolic-link as identical, type: ++ ++echo {symbolic-link} > /proc/mosix/admin/dfsalinks, ++ ++where the symbolic-link must be an absolute-pathname, pointing at an existing ++file (or directory or another symbolic-link) on an already-mounted partition ++that is capable of DFSA (but it is not required to be already associated with ++a DFSA index). ++ ++To remove a symbolic-link declaration, type: ++ ++echo -{symbolic-link} > /proc/mosix/admin/dfsalinks: ++ ++If you intend to re-define a declared symbolic link, you must first re- ++move its declaration, then re-declare after the change is made. ++ ++To see a list of all currently-declared symbolic links, type: ++ ++cat /proc/mosix/admin/dfsalinks. ++ ++To cancel all symbolic-link declarations, type: ++ ++echo - > /proc/mosix/admin/dfsalinks. ++ ++The number of declared symbolic-links is currently limited to 8 and their ++path-name length is limited to 128 characters. ++ ++requirements from a complying file-system: ++------------------------------------------ ++1) all operations on the file system must be synchronous, in the sense that ++ there is [at most] only one buffer/inode cache throughout the cluster. ++ (on client-server file-systems, this usually means that the whole cache ++ is maintained on the server - however, a sophisticated server may "lend" ++ the cache of particular inodes to particular clients at any given time. ++ on shared-hardware file-systems, this probably requires either a hardware ++ invalidation signal or a new version to be marked on each inode after each ++ modification). ++ ++2) The time-stamps on files and between files of the same file-system must be ++ consistent and advancing (unless the clock is deliberately set backwards), ++ regardless from which node modifications are made. ++ ++3) The file-system must populate the following two new super-block methods: ++ a) "identify": ++ Given a "dentry", encapsulate identifying information about it into ++ a finite, rather-small structure, in a way that is sufficient to be ++ able to re-establish that open file/directory on another node. ++ b) "reconstruct": ++ Given only a mount-structure ("vfsmnt") and information that was ++ provided by "identify", produce a live new "dentry". ++ ++ Also, while not enforced by DFSA itself, in order for the getcwd ++ system-call to work correctly on a shared file-system, regardless ++ of where the call is made from, it is also highly recommended to ++ populate the following new inode-method: ++ c) "checkpath": ++ Given a "dentry", ensure that following its path via the "dcache" ++ will truly reflect its current position on the shared file-system - ++ and if not, make the necessary fixes by adjusting the "dentry" around ++ the directory cache: The "dcache" of shared file-systems cannot be ++ trusted, since processes running on other nodes can move (or remove) ++ a directory at any time. ++ ++4) The file-system must ensure that files/directories are not cleared when ++ unlinked, for as long as any process in the cluster still holds them open. ++ There are several possible techniques to achieve this, but given the ++ distributed nature of the file-system, some form of garbage-collection ++ is probably also called upon. ++ ++Which system-calls are supported: ++--------------------------------- ++The following system-calls are normally supported and usually run directly ++by the process, while any other calls, or hard cases still need to go via ++the home-node: ++ ++ read, readv, write, writev, readahead ++ lseek, llseek ++ open, creat, close ++ dup, dup2, fcntl/fcntl64 (F_DUPFD,F_GETFL,F_SETFL) ++ getdents, getdents64, old_readdir ++ fsync, fdatasync ++ chdir, fchdir, getcwd ++ stat, stat64, newstat, lstat, lstat64, newlstat, ++ fstat, fstat64, newfstat ++ access ++ truncate, truncate64, ftruncate, ftruncate64 ++ chmod, chown, chown16, lchown, lchown16, fchmod, fchown, fchown16 ++ utime, utimes ++ symlink, readlink ++ mkdir, rmdir ++ link, unlink, rename ++ ++Examples of hard cases: ++* if not all nodes have the same mounted DFSA partitions, or they do - ++ but with different mount-flags. ++* if the calling process is being traced. ++* if the process has a non-standard root-directory. ++* If the calling process has an emulating personality that causes it ++ to use an alternate root (but this is currently not relevant for the ++ i386 architecture). ++* if the calling process shares either its files or current directory ++ as a result of the "clone" system-call. ++* operations occuring during re-configuration of DFSA on either the ++ home-node or the node where the process runs. ++* operations involving special files (eg. other than regular, directories ++ or symbolic-links) ++* operations on files that were commonly opened and still shared with other ++ related processes. ++* dup2, where the second file-descriptor is an already open non-DFSA file ++ (that requires closing). ++* chdir/fchdir when the previous directory is non-DFSA. ++* link/rename that fail due to an attempt to cross-device link. ++* open/dup/dup2/fcntl(F_DUPFD) that requires an allowable-increase in ++ the maximal file-descriptor index (initially 1023!). ++* When the home-node has pending requests for the process (such as ++ signals, requests for "ps" information, request to migrate or consider ++ migration, etc.) ++* Use of path-names that leave the DFSA partition, as demonstrated by ++ the following example: ++ "/mfs" is a DFSA file-system ++ /mfstmp is a symbolic link to /mfs/2/tmp, and is declared in ++ /proc/mosix/admin/dfsalinks. ++ /mtmp is a symbolic link to /mfstmp, and is declared in ++ /proc/mosix/admin/dfsalinks. ++ /mfs2 is a symbolic link to /mfs/2, but is not declared. ++ on node #2, /fie is a symbolic link to "/tmp/foo". ++ then the following are accepted as simple cases (and identical): ++ /mfs/2/tmp/foo ++ //mfs//2/tmp/foo ++ /./mfs/2/tmp/foo ++ /mfstmp/foo ++ /mtmp/foo ++ /mfs/2/fie ++ mfs/2/tmp/foo (when in the root directory) ++ ++ but not the following: ++ /tmp/../mfs/tmp/foo ++ (the kernel is not allowed to assume that each node has an ++ accessible "/tmp" directory!) ++ /mfs/2/../../mfs/2/tmp/foo ++ (the secon ".." steps out of the "/mfs" DFSA partition) ++ /mfs2/tmp/foo ++ (/mfs2 is not declared, hence no assurance was provided ++ that it is identical on all nodes) ++ mfstmp/foo (or mfstmp/foo) when in the root directory ++ (just a difficult case to recognize) ++ ++* when the home-node DEPUTY has pending requests for the process (such as ++ signals, requests for "ps" information, request to migrate or consider ++ migration, etc.) ++ ++Deviations from normal Linux/Unix/Posix behavior: ++-------------------------------------------------- ++It was impossible to maintain 100% compatibility on DFSA file-systems, ++but the deviations are kept to the very minimum: ++ ++* A process that received a signal may continue running a few DFSA system-calls ++ before it actually receives and handles the signal. ++ (in contrast, any POSIX process that receives a signal may possibly ++ complete the next system-call, but cannot issue any new ones beyond that). ++ ++* Simultaneous mapping and I/O on the same DFSA file creates unpredictable ++ results as follows: ++ 1) execution (and library and all other file-mappings) is not always ++ protected against other process(es) modifying the file: either the ++ writing-process or the executing/mapping process may fail to receive the ++ "ETXTBSY" error. ++ 2) The "MS_INVALIDATE" flag of "msync" may fail to ensure that previous ++ "write"(s) to a mapped DFSA file are discarded. ++ 3) when a process modifies memory that is mapped as "MAP_SHARED" to a DFSA ++ file, but has not yet written it back (using "msync", "munmap", "exec" ++ or "exit"), it is possible that another process that reads that file as ++ it migrates will first see some of the changes but later (as opposed to ++ normal behavior), see the old values (or some of them) again. +diff -ruN --exclude=CVS /tmp/openmosix/linux-2.4.17/Documentation/filesystems/00-INDEX linux-openmosix/Documentation/filesystems/00-INDEX +--- /tmp/openmosix/linux-2.4.17/Documentation/filesystems/00-INDEX Wed Jun 20 20:10:27 2001 ++++ linux-openmosix/Documentation/filesystems/00-INDEX Wed May 15 10:55:17 2002 +@@ -22,6 +22,8 @@ + - info and mount options for the OS/2 HPFS. + isofs.txt + - info and mount options for the ISO 9660 (CDROM) filesystem. ++mfs.txt ++ - info on the Mosix filesystem. + ncpfs.txt + - info on Novell Netware(tm) filesystem using NCP protocol. + ntfs.txt +diff -ruN --exclude=CVS /tmp/openmosix/linux-2.4.17/Documentation/filesystems/mfs.txt linux-openmosix/Documentation/filesystems/mfs.txt +--- /tmp/openmosix/linux-2.4.17/Documentation/filesystems/mfs.txt Thu Jan 1 01:00:00 1970 ++++ linux-openmosix/Documentation/filesystems/mfs.txt Wed May 15 10:55:17 2002 +@@ -0,0 +1,197 @@ ++MFS interface, capabilities and limitations: ++============================================ ++To use MFS, configure the "CONFIG_MOSIX_FS" option into the kernel ++then mount it, using: ++ mount -t mfs {any_name} {mount_point} [-o dfsa={n}] ++This gives access to nearly all files throughout the MOSIX cluster, ++with the root of each node available via {mount_point}/{node_number}/. ++Also available as sub-directories are: ++ ++/{mount_point}/here/ ++ The current node where your process runs ++/{mount_point}/home/ ++ Your home node ++/{mount_point}/magic/ ++ The current node when used by the "creat" system call (or an "open" ++ with the "O_CREAT" option) - otherwise, the last node on which an MFS ++ magical file was successfully created (this is very useful for creating ++ temporary-files, then immediately unlinking them) ++/{mount_point}/lastexec/ ++ The node on which the process last issued a successful "execve" ++ system-call. ++/{mount_point}/selected/ ++ The node you selected by either your process itself or as inherited ++ by one of its ancesstors (before forking this process), writing a ++ number into "/proc/self/selected". ++ ++You may also wish to have MFS be automatically by entering the following ++line into "/etc/fstab": ++ cluster /{mount_point} mfs defaults 0 0 ++or for DFSA use: ++ cluster /{mount_point} mfs dfsa=1 0 0 ++ ++Once CONFIG_MOSIX_FS is configured in the kernel and MOSIX has been configured ++(See "man setpe"), other nodes can access the node's file-system even without ++the above mount. To disallow MFS access to this node, write a "1" to ++"/proc/mosix/admin/nomfs" (to re-allow, write a "0"). ++ ++MFS was designed to also run under DFSA, allowing direct access by processes, ++from wherever they run at each moment, to the node holding the files/directories ++that they require, bypassing their "DEPUTY" in most cases. This makes it even ++more efficient if the required files are on the same node as the process, in ++which case the process can serve itself without resorting to the network. ++To use MFS with DFSA, make sure that the mount-point is the same on all nodes, ++then mount (or remount) MFS with the "-odfsa={n} flag, where {n} is in the ++range of 1-8 and identical on all nodes in the cluster: ++ ++Users and Groups: ++----------------- ++MFS assumes that all user and group ID's throughout the cluster have ++equivalent access rights. You should not use MFS on clusters with ++heterogenous user/group scheme. While allowing the Super-User to access ++all files throughout the cluster, this is implied anyway by the security ++requirements of MOSIX (See "man mosix"). If most of your cluster uses the ++same scheme, but some nodes do not, you may either configure MFS only in the ++kernel of those nodes that use the same scheme, or write a "1" to ++"/proc/mosix/admin/nomfs" during node-startup and before MOSIX is configured ++on the other nodes, as well as not mounting MFS there. ++ ++Temporary files: ++---------------- ++the "here", "magic", "lastexec" and "selected" directories are designed ++to provide easier access to temporary files, so that programs are helped ++to create their temporary files where they run. With many programs, you ++can make use these directories without recompiling, by using the "TMPDIR" ++environment variable. ++ ++The most conservative, but safest thing to do, which can be applied to all ++programs, is to: ++ setenv TMPDIR "/{mount-point}/selected/tmp ++In this case, your shell (or the calling script) should run ++ echo `cat /proc/self/where` > /proc/self/selected ++before calling the program. ++(note that "cp" cannot be used here, since only the shell may modify its ++own "selected", but "echo" works because it is built into most shells) ++ ++The next, little less conservative approach, but still safe for programs ++that do not rely on passing file-names to their children as arguments of ++"exec", is to: ++ setenv TMPDIR "/{mount-point}/lastexec/tmp ++ (or "env TMPDIR=/{mount-point}/lastexec/tmp program [args]") ++ ++The next, still less conservative, but more powerful approach, can be used ++for programs that create temporary-files, which either create only one MFS file, ++or unlink temporary-files as soon as they are created. For such programs: ++ setenv TMPDIR "/{mount-point}/magic/tmp ++ (or "env TMPDIR=/{mount-point}/magic/tmp program [args]") ++ ++Finally, programs that are locked on any particular node, may use: ++ setenv TMPDIR "/{mount-point}/here/tmp ++ (or "env TMPDIR=/{mount-point}/here/tmp program [args]") ++Please note that this approach is not 100% safe, because even while locked, ++migration back to the home-node may still occur if/when the node where the ++program runs is being shut-down for reboot. ++ ++Of course, when designing a new program to run with MFS, ++all the above methods can be freely mixed. ++ ++Interpretation of symbolic-links: ++--------------------------------- ++The following non-trivial interpretation of symbolic links found within MFS, ++was designed to provide uniformity of access between links created locally ++and via MFS, especially by scripts and "makefile"s that use `pwd` as part ++of symbolic links: ++ ++The rule is that when a symbolic link begins with a '/', it refers to the ++root of the file-system's node - not the home-node! ++Similarly, a "/.." (or any combination with ".." that calls for the parent of ++the file-system's root) refers to the file-system's root again, rather than to ++the MFS mount-point. ++ ++One of the implications is that a symbolic link is never allowed to cross nodes. ++ ++Excluded files: ++--------------- ++The following may not be accessed via the MFS file-system: ++* nodes that excluded themselves. ++* special files - other than regular-files, directories or symbolic-links. ++* the "proc" file-system. ++* any subdirectories of the recursive MFS mount-point with the exception ++ of symblic links starting in '/', pointing to the same node, and doing ++ so only once. ++ ++Examples: ++assuming that there are 3 nodes in the cluster and on node #2: ++1) MFS is mounted on "/mfs" ++2) "/usr/src/linux_here" is a symbolic link to "/mfs/2/usr/src/linux" ++3) "/usr/src/local_linux" is a symbolic link to "../../mfs/2/usr/src/linux" ++4) "/usr/src/other_linux" is a symbolic link to "/mfs/3/usr/src/linux" ++5) "/usr/src/mfs_linux" is a symbolic link to "/mfs/2/mfs/2/usr/src/linux" ++ ++then the following are accessible: ++ ++/mfs/2/usr/src/linux ++/mfs/2/etc/hosts ++/mfs/2/mfs ++/mfs/2/usr/src/linux_here ++/usr/src/local_linux ++/usr/src/other_linux ++ ++but the following are not (and will result in "Permission denied" error): ++ ++/mfs/2/dev/tty6 (special character device) ++/mfs/2/proc/mosix ("proc" file system) ++/mfs/2/mfs/2/tmp (to prevent infinite recursion and confusing the shell) ++/mfs/2/usr/src/local_linux (symbolic-link does not start with '/') ++/mfs/2/usr/src/other_linux (symbolic-link pointing to another node) ++/usr/src/mfs_linux (symbolic-link pointing to local node twice) ++ ++(please note, however, that symbolic-links are still readable ++with "lstat" and "readlink" regardless of their contents) ++ ++Garbage Collection: ++------------------- ++When either a client node or part of the network crashes, a garbage-collection ++mechanism will eventually clean up the references to the held-files or ++directories on the serving node(s). It may take, however, up to an hour ++until the server(s) finally give up the connection, during which the serving ++node(s) will not be able to un-mount the particular file-system(s) involved. ++ ++The Super-User may still force an un-mount in 3 ways: ++1) disable MFS by writing a "1" to "/proc/mosix/admin/nomfs". ++2) un-configure MOSIX by running "setpe -off". ++3) write the name of a file or directory to be released to ++ "/proc/mosix/admin/mfskill". If the given name is of a directory, all ++ files and sub-directories under it will be released as well (with the ++ possible exception of files being actively accessed at that very moment), ++ thus writing '/' releases everything, but is very distruptive to users, ++ so it is better to write the name of the mount-point of the file-system ++ that you wish to un-mount. ++ ++Functionality limitations: ++-------------------------- ++* Mandatory file-locking is not supported. ++* the F_NOTIFY fcntl option is not supported. ++* Voluntary file-locking only operates among processes of the same home-node ++ (and since it will not be supported by DFSA, it always requires DEPUTY- ++ assitance on the home-node). ++* file-ioctl is currently only supported for the EXT2 file-system. ++* mmap of MFS files only supports private mappings (MAP_PRIVATE). ++ Open files must have read-permission. ++ The actual implementation of "mmap" and "execve" does not use demand-paging, ++ but rather reads in the relevant text/data from the file before proceeding. ++* Every effort was attempted to prevent giving the same inode-number to ++ different files, and in most cases this is the case, but it is not totally ++ possible with only 32 bits inode-numbers and the large potential number of ++ files on numerous nodes and devices within each node. Priority is given so ++ that files on any particular node do not get the same inode-numbers, but even ++ this cannot be absolutely guaranteed when some of the files are NFS (or other ++ file-systems that use the full 32-bit space for inode numbers). To identify ++ an inode most accurately, one should use the raw "stat" ("fstat"/"lstat") ++ system-call as provided by the kernel before being filtered by the ++ compatibility library, providing the node-number in the "__unused1" field, ++ the device-number in the "__unused2" field and the local inode-numer in the ++ "__unused3" field (these fields are currently always 0 for non-MFS). ++ In the "stat64"/"lstat64"/"fstat64" system-calls, the node number can be ++ found in "__pad0[2-3]", the device-number in "__pad0[4-5]" and the local ++ inode number in "__pad0[6-9]". +diff -ruN --exclude=CVS /tmp/openmosix/linux-2.4.17/Makefile linux-openmosix/Makefile +--- /tmp/openmosix/linux-2.4.17/Makefile Fri Dec 21 18:41:53 2001 ++++ linux-openmosix/Makefile Wed May 15 10:55:17 2002 +@@ -54,6 +54,11 @@ + + ifeq (.config,$(wildcard .config)) + include .config ++ ++ifdef CONFIG_MOSIX_EXTMOSIX ++EXTRAVERSION := $(EXTRAVERSION)-mosix ++endif ++ + ifeq (.depend,$(wildcard .depend)) + include .depend + do-it-all: Version vmlinux +@@ -88,8 +93,13 @@ + + CPPFLAGS := -D__KERNEL__ -I$(HPATH) + ++ifdef CONFIG_MOSIX_UDB ++CFLAGS := $(CPPFLAGS) -Wall -Wstrict-prototypes -Wno-trigraphs -O2 \ ++ -fno-omit-frame-pointer -fno-strict-aliasing -fno-common ++else + CFLAGS := $(CPPFLAGS) -Wall -Wstrict-prototypes -Wno-trigraphs -O2 \ + -fomit-frame-pointer -fno-strict-aliasing -fno-common ++endif + AFLAGS := -D__ASSEMBLY__ $(CPPFLAGS) + + # +@@ -122,7 +132,16 @@ + NETWORKS =net/network.o + + LIBS =$(TOPDIR)/lib/lib.a ++ifdef CONFIG_MOSIX ++CORE_FILES += $(TOPDIR)/mos/mos.o ++SUBDIRS =mos kernel drivers mm fs net ipc lib ++else + SUBDIRS =kernel drivers mm fs net ipc lib ++endif ++ifdef CONFIG_MOSIX_UDB ++CORE_FILES += $(TOPDIR)/udb/debugger.o ++SUBDIRS += udb ++endif + + DRIVERS-n := + DRIVERS-y := +@@ -208,6 +227,11 @@ + net/khttpd/make_times_h \ + net/khttpd/times.h \ + submenu* ++ ++CLEAN_FILES += arch/i386/kernel/mosasm.H arch/i386/kernel/offset \ ++ mos/auto_syscalls.c mos/mkdefcalls mos/alternate.c \ ++ udb/symbols.c udb/symtab vmlinux.symtab ++ + # directories removed with 'make clean' + CLEAN_DIRS = \ + modules +@@ -266,6 +290,20 @@ + $(LIBS) \ + --end-group \ + -o vmlinux ++ifdef CONFIG_MOSIX_UDB ++ $(NM) vmlinux | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] \)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' | sort > vmlinux.symtab ++ udb/symtab < vmlinux.symtab > udb/symbols.c ++ $(CC) -c $(CFLAGS) udb/symbols.c -o udb/symbols.o ++ $(LD) $(LINKFLAGS) $(HEAD) init/main.o init/version.o \ ++ --start-group \ ++ $(CORE_FILES) \ ++ $(DRIVERS) \ ++ $(NETWORKS) \ ++ $(LIBS) \ ++ --end-group \ ++ udb/symbols.o \ ++ -o vmlinux ++endif + $(NM) vmlinux | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] \)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' | sort > System.map + + symlinks: +diff -ruN --exclude=CVS /tmp/openmosix/linux-2.4.17/arch/i386/Makefile linux-openmosix/arch/i386/Makefile +--- /tmp/openmosix/linux-2.4.17/arch/i386/Makefile Thu Apr 12 21:20:31 2001 ++++ linux-openmosix/arch/i386/Makefile Wed May 15 10:55:17 2002 +@@ -138,6 +138,9 @@ + install: vmlinux + @$(MAKEBOOT) BOOTIMAGE=bzImage install + ++install1: ++ @$(MAKEBOOT) BOOTIMAGE=bzImage install ++ + archclean: + @$(MAKEBOOT) clean + +diff -ruN --exclude=CVS /tmp/openmosix/linux-2.4.17/arch/i386/config.in linux-openmosix/arch/i386/config.in +--- /tmp/openmosix/linux-2.4.17/arch/i386/config.in Fri Dec 21 18:41:53 2001 ++++ linux-openmosix/arch/i386/config.in Wed May 15 10:55:17 2002 +@@ -11,6 +11,31 @@ + define_bool CONFIG_UID16 y + + mainmenu_option next_comment ++comment 'MOSIX' ++bool 'MOSIX process migration support' CONFIG_MOSIX ++if [ "$CONFIG_MOSIX" = "y" ]; then ++ bool 'Support clusters with a complex network topology' CONFIG_MOSIX_TOPOLOGY ++ if [ "$CONFIG_MOSIX_TOPOLOGY" = "y" ]; then ++ int 'Maximum network-topology complexity to support (2-10)' CONFIG_MOSIX_MAXTOPOLOGY 4 ++ fi ++ ++ bool 'MOSIX Kernel Debugger' CONFIG_MOSIX_UDB ++ if [ "$CONFIG_MOSIX_UDB" = "y" ]; then ++ bool 'MOSIX Kernel Debugging Code' CONFIG_MOSIX_DEBUG ++ bool 'Allow migration to self (for easyier testing)' CONFIG_MOSIX_CHEAT_MIGSELF ++ bool 'Process-arrival messages' CONFIG_MOSIX_WEEEEEEEEE ++ fi ++ bool 'MOSIX Kernel Diagnostics' CONFIG_MOSIX_DIAG ++ bool 'Stricter security on MOSIX ports' CONFIG_MOSIX_SECUREPORTS ++ int 'Level of process-identity disclosure (0-3)' CONFIG_MOSIX_DISCLOSURE 1 ++ bool 'Create the kernel with a "-mosix" extension' CONFIG_MOSIX_EXTMOSIX ++ bool 'Direct File-System Access' CONFIG_MOSIX_DFSA ++ bool 'MOSIX File-System' CONFIG_MOSIX_FS ++ bool 'Poll/Select exceptions on pipes' CONFIG_MOSIX_PIPE_EXCEPTIONS ++fi ++endmenu ++ ++mainmenu_option next_comment + comment 'Code maturity level options' + bool 'Prompt for development and/or incomplete code/drivers' CONFIG_EXPERIMENTAL + endmenu +diff -ruN --exclude=CVS /tmp/openmosix/linux-2.4.17/arch/i386/defconfig linux-openmosix/arch/i386/defconfig +--- /tmp/openmosix/linux-2.4.17/arch/i386/defconfig Mon Nov 12 20:59:03 2001 ++++ linux-openmosix/arch/i386/defconfig Wed May 15 10:55:17 2002 +@@ -1,6 +1,26 @@ + # + # Automatically generated make config: don't edit + # ++ ++# ++# MOSIX options ++# ++ ++CONFIG_MOSIX=y ++# CONFIG_MOSIX_TOPOLOGY is not set ++CONFIG_MOSIX_MAXTOPOLOGY=4 ++# CONFIG_MOSIX_UDB is not set ++# CONFIG_MOSIX_CHEAT_MIGSELF is not set ++# CONFIG_MOSIX_DEBUG is not set ++# CONFIG_MOSIX_WEEEEEEEEE is not set ++CONFIG_MOSIX_DISCLOSURE=1 ++# CONFIG_MOSIX_EXTMOSIX is not set ++CONFIG_MOSIX_DIAG=y ++CONFIG_MOSIX_SECUREPORTS=y ++# CONFIG_MOSIX_DFSA is not set ++# CONFIG_MOSIX_FS is not set ++# CONFIG_MOSIX_PIPE_EXCEPTIONS is not set ++ + CONFIG_X86=y + CONFIG_ISA=y + # CONFIG_SBUS is not set +diff -ruN --exclude=CVS /tmp/openmosix/linux-2.4.17/arch/i386/kernel/Makefile linux-openmosix/arch/i386/kernel/Makefile +--- /tmp/openmosix/linux-2.4.17/arch/i386/kernel/Makefile Fri Nov 9 23:21:21 2001 ++++ linux-openmosix/arch/i386/kernel/Makefile Wed May 15 10:55:17 2002 +@@ -42,3 +42,14 @@ + obj-$(CONFIG_X86_VISWS_APIC) += visws_apic.o + + include $(TOPDIR)/Rules.make ++ ++ifdef CONFIG_MOSIX ++entry.o: ./mosasm.H ++ ++offset: offset.c $(TOPDIR)/include/linux/config.h $(TOPDIR)/include/linux/types.h $(TOPDIR)/include/linux/sched.h $(TOPDIR)/include/linux/signal.h $(TOPDIR)/include/linux/sys.h $(TOPDIR)/include/linux/kernel.h ++ $(HOSTCC) $(HOSTCFLAGS) -D__KERNEL__ -I$(TOPDIR)/include -o offset offset.c ++ ++./mosasm.H: offset entry.S ++ ./offset < entry.S > mosasm.H ++ ++endif +diff -ruN --exclude=CVS /tmp/openmosix/linux-2.4.17/arch/i386/kernel/entry.S linux-openmosix/arch/i386/kernel/entry.S +--- /tmp/openmosix/linux-2.4.17/arch/i386/kernel/entry.S Sat Nov 3 02:18:49 2001 ++++ linux-openmosix/arch/i386/kernel/entry.S Wed May 15 10:55:17 2002 +@@ -46,6 +46,10 @@ + #include + #include + ++#ifdef CONFIG_MOSIX ++#include "mosasm.H" ++#endif /* CONFIG_MOSIX */ ++ + EBX = 0x00 + ECX = 0x04 + EDX = 0x08 +@@ -179,10 +183,16 @@ + pushl %ebx + call SYMBOL_NAME(schedule_tail) + addl $4, %esp ++#ifdef CONFIG_MOSIX ++ENTRY(ret_from_kickstart) ++ GET_CURRENT(%ebx) ++ jmp ret_from_sys_call ++#else + GET_CURRENT(%ebx) + testb $0x02,tsk_ptrace(%ebx) # PT_TRACESYS + jne tracesys_exit + jmp ret_from_sys_call ++#endif /* CONFIG_MOSIX */ + + /* + * Return to user mode is not as complex as all this looks, +@@ -197,16 +207,50 @@ + GET_CURRENT(%ebx) + testb $0x02,tsk_ptrace(%ebx) # PT_TRACESYS + jne tracesys ++/* conflict resolution - Qlusters */ ++#ifdef CONFIG_MOSIX_UDB ++ pushl %eax ++ call SYMBOL_NAME(sys_call_trace) # display syscalls for debugging ++ popl %eax ++#endif /* CONFIG_MOSIX_UDB */ + cmpl $(NR_syscalls),%eax + jae badsys ++#ifdef CONFIG_MOSIX ++ testl $(DTRACESYS1|DTRACESYS2),DFLAGS(%ebx) ++ jne adjust_trace_before_syscall ++adjusted_trace: ++ testb $DREMOTE,DFLAGS(%ebx) ++ je local_syscall ++on_remote: ++ pushl %eax ++ call *SYMBOL_NAME(remote_sys_call_table)(,%eax,4) ++ addl $4,%esp ++ movl %eax,EAX(%esp) ++ jmp ret_from_sys_call ++local_syscall: ++#endif /* CONFIG_MOSIX */ + call *SYMBOL_NAME(sys_call_table)(,%eax,4) + movl %eax,EAX(%esp) # save the return value ++#ifdef CONFIG_MOSIX ++ call SYMBOL_NAME(mosix_local_syscall) ++#endif /* CONFIG_MOSIX */ + ENTRY(ret_from_sys_call) ++#ifdef CONFIG_MOSIX ++ testl $(DTRACESYS1|DTRACESYS2),DFLAGS(%ebx) ++ jne adjust_trace_before_syscall ++ret_check_reschedule: ++#endif /* CONFIG_MOSIX */ + cli # need_resched and signals atomic test + cmpl $0,need_resched(%ebx) + jne reschedule + cmpl $0,sigpending(%ebx) + jne signal_return ++#ifdef CONFIG_MOSIX ++straight_to_mosix: ++ call SYMBOL_NAME(mosix_pre_usermode_actions) ++ testl %eax,%eax ++ jne ret_from_sys_call ++#endif /* CONFIG_MOSIX */ + restore_all: + RESTORE_ALL + +@@ -218,7 +262,11 @@ + jne v86_signal_return + xorl %edx,%edx + call SYMBOL_NAME(do_signal) ++#ifdef CONFIG_MOSIX ++ jmp straight_to_mosix ++#else + jmp restore_all ++#endif /* CONFIG_MOSIX */ + + ALIGN + v86_signal_return: +@@ -226,18 +274,41 @@ + movl %eax,%esp + xorl %edx,%edx + call SYMBOL_NAME(do_signal) ++#ifdef CONFIG_MOSIX ++ jmp straight_to_mosix ++#else + jmp restore_all ++#endif /* CONFIG_MOSIX */ + + ALIGN + tracesys: + movl $-ENOSYS,EAX(%esp) + call SYMBOL_NAME(syscall_trace) ++#ifdef CONFIG_MOSIX ++adjust_trace_before_syscall: # only arrive here with DTRACESYS(1|2) ++ testl $DDEPUTY,DFLAGS(%ebx) ++ jne straight_to_mosix # no mess with signals/syscalls/tracesys ++ testl $DREMOTE,DFLAGS(%ebx) ++ je no_need_to_unsync ++ call wait_for_permission_to_continue ++no_need_to_unsync: ++ testl $DTRACESYS2,DFLAGS(%ebx) ++ jne second_tracesys # skipping system-call ++ orl $DTRACESYS2,DFLAGS(%ebx) # next time we skip the system-call ++ movl $-ENOSYS,EAX(%esp) ++ movl ORIG_EAX(%esp),%eax ++ cmpl $(NR_syscalls),%eax ++ jae second_tracesys # prevent system-call out of range trick ++ jmp adjusted_trace # now do the system-call ++second_tracesys: # note: "syscall_trace" clears the flags ++#else + movl ORIG_EAX(%esp),%eax + cmpl $(NR_syscalls),%eax + jae tracesys_exit + call *SYMBOL_NAME(sys_call_table)(,%eax,4) + movl %eax,EAX(%esp) # save the return value + tracesys_exit: ++#endif /* CONFIG_MOSIX */ + call SYMBOL_NAME(syscall_trace) + jmp ret_from_sys_call + badsys: +@@ -251,7 +322,11 @@ + movl EFLAGS(%esp),%eax # mix EFLAGS and CS + movb CS(%esp),%al + testl $(VM_MASK | 3),%eax # return to VM86 mode or non-supervisor? ++#ifdef CONFIG_MOSIX ++ jne ret_check_reschedule ++#else + jne ret_from_sys_call ++#endif /* CONFIG_MOSIX */ + jmp restore_all + + ALIGN +@@ -259,6 +334,73 @@ + call SYMBOL_NAME(schedule) # test + jmp ret_from_sys_call + ++#ifdef CONFIG_MOSIX ++/* ++ * call_with_regs(caddr_t routine, pt_regs *before, pt_regs *after) ++ * pushes the "before" regs on the stack and calls routine, ++ * then places the possibly-modified registers in "after" ++ * (which may possibly equal "before"). ++ * Also, set "current->altregs" to the pushed registers, then restores it. ++ */ ++ENTRY(call_with_regs) ++ pushl %ebx ++ GET_CURRENT(%ebx) ++ pushl ALTREGS(%ebx) ++ movl 16(%esp),%eax ++ pushl 56(%eax) ++ pushl 52(%eax) ++ pushl 48(%eax) ++ pushl 44(%eax) ++ pushl 40(%eax) ++ pushl 36(%eax) ++ pushl 32(%eax) ++ pushl 28(%eax) ++ pushl 24(%eax) ++ pushl 20(%eax) ++ pushl 16(%eax) ++ pushl 12(%eax) ++ pushl 8(%eax) ++ pushl 4(%eax) ++ pushl 0(%eax) ++ movl %esp,ALTREGS(%ebx) ++ movl 72(%esp),%eax ++ call *%eax ++ movl 80(%esp),%edx ++ popl %ecx ++ movl %ecx,0(%edx) ++ popl %ecx ++ movl %ecx,4(%edx) ++ popl %ecx ++ movl %ecx,8(%edx) ++ popl %ecx ++ movl %ecx,12(%edx) ++ popl %ecx ++ movl %ecx,16(%edx) ++ popl %ecx ++ movl %ecx,20(%edx) ++ popl %ecx ++ movl %ecx,24(%edx) ++ popl %ecx ++ movl %ecx,28(%edx) ++ popl %ecx ++ movl %ecx,32(%edx) ++ popl %ecx ++ movl %ecx,36(%edx) ++ popl %ecx ++ movl %ecx,40(%edx) ++ popl %ecx ++ movl %ecx,44(%edx) ++ popl %ecx ++ movl %ecx,48(%edx) ++ popl %ecx ++ movl %ecx,52(%edx) ++ popl %ecx ++ movl %ecx,56(%edx) ++ popl ALTREGS(%ebx) ++ popl %ebx ++ ret ++#endif /* CONFIG_MOSIX */ ++ + ENTRY(divide_error) + pushl $0 # no error code + pushl $ SYMBOL_NAME(do_divide_error) +@@ -402,7 +544,11 @@ + .long SYMBOL_NAME(sys_read) + .long SYMBOL_NAME(sys_write) + .long SYMBOL_NAME(sys_open) /* 5 */ ++#ifdef CONFIG_MOSIX_DFSA ++ .long SYMBOL_NAME(sys_close_syscall) ++#else + .long SYMBOL_NAME(sys_close) ++#endif /* CONFIG_MOSIX_DFSA */ + .long SYMBOL_NAME(sys_waitpid) + .long SYMBOL_NAME(sys_creat) + .long SYMBOL_NAME(sys_link) +diff -ruN --exclude=CVS /tmp/openmosix/linux-2.4.17/arch/i386/kernel/i387.c linux-openmosix/arch/i386/kernel/i387.c +--- /tmp/openmosix/linux-2.4.17/arch/i386/kernel/i387.c Fri Feb 23 19:09:08 2001 ++++ linux-openmosix/arch/i386/kernel/i387.c Wed May 15 10:55:17 2002 +@@ -520,3 +520,66 @@ + + return fpvalid; + } ++ ++#ifdef CONFIG_MOSIX ++ ++int ++has_fxsr(void) ++{ ++ return(cpu_has_fxsr); ++} ++ ++void ++fsave_to_fxsave(union i387_union *from, union i387_union *to) ++{ ++ int i; ++ long *fcp, *tcp; ++ ++ to->fxsave.cwd = from->fsave.cwd; ++ to->fxsave.swd = from->fsave.swd; ++ to->fxsave.twd = twd_i387_to_fxsr(from->fsave.twd); ++ to->fxsave.fop = from->fxsave.padding[0]; ++ to->fxsave.fip = from->fsave.fip; ++ to->fxsave.fcs = from->fsave.fcs; ++ to->fxsave.foo = from->fsave.foo; ++ to->fxsave.mxcsr = from->fxsave.padding[1]; ++ to->fxsave.fos = from->fsave.fos; ++ for(fcp = from->fsave.st_space , tcp = to->fxsave.st_space , ++ i = 0 ; i < 8 ; i++) ++ { ++ *tcp++ = *fcp++; ++ *tcp++ = *fcp++; ++ *tcp = *((unsigned short *)fcp)++; ++ tcp += 2; ++ } ++ memcpy(to->fxsave.xmm_space, from->fxsave.xmm_space, ++ sizeof(from->fxsave.xmm_space)); ++} ++ ++void ++fxsave_to_fsave(union i387_union *from, union i387_union *to) ++{ ++ int i; ++ long *fcp, *tcp; ++ ++ to->fsave.cwd = from->fxsave.cwd; ++ to->fsave.swd = from->fxsave.swd; ++ to->fsave.twd = twd_fxsr_to_i387(&from->fxsave); ++ to->fsave.fip = from->fxsave.fip; ++ to->fsave.fcs = from->fxsave.fcs; ++ to->fsave.foo = from->fxsave.foo; ++ to->fsave.fos = from->fxsave.fos; ++ to->fxsave.padding[0] = from->fxsave.fop; ++ to->fxsave.padding[1] = from->fxsave.mxcsr; ++ for(fcp = from->fxsave.st_space , tcp = to->fsave.st_space , ++ i = 0 ; i < 8 ; i++) ++ { ++ *tcp++ = *fcp++; ++ *tcp++ = *fcp++; ++ *((unsigned short *)tcp)++ = *fcp; ++ fcp += 2; ++ } ++ memcpy(to->fxsave.xmm_space, from->fxsave.xmm_space, ++ sizeof(to->fxsave.xmm_space)); ++} ++#endif /* CONFIG_MOSIX */ +diff -ruN --exclude=CVS /tmp/openmosix/linux-2.4.17/arch/i386/kernel/ioport.c linux-openmosix/arch/i386/kernel/ioport.c +--- /tmp/openmosix/linux-2.4.17/arch/i386/kernel/ioport.c Tue Jul 20 00:22:48 1999 ++++ linux-openmosix/arch/i386/kernel/ioport.c Wed May 15 10:55:17 2002 +@@ -15,6 +15,10 @@ + #include + #include + ++#ifdef CONFIG_MOSIX ++#include ++#endif /* CONFIG_MOSIX */ ++ + /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ + static void set_bitmap(unsigned long *bitmap, short base, short extent, int new_value) + { +@@ -61,6 +65,10 @@ + return -EINVAL; + if (turn_on && !capable(CAP_SYS_RAWIO)) + return -EPERM; ++#ifdef CONFIG_MOSIX ++ if(turn_on && !mosix_go_home_for_reason(1, DSTAY_FOR_IOPL)) ++ return(-ENOMEM); ++#endif /* CONFIG_MOSIX */ + /* + * If it's the first ioperm() call in this thread's lifetime, set the + * IO bitmap up. ioperm() is much less timing critical than clone(), +@@ -111,6 +119,11 @@ + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + } ++#ifdef CONFIG_MOSIX ++ if(!mosix_go_home_for_reason(1, DSTAY_FOR_IOPL)) ++ return(-ENOMEM); ++ regs = mos_to_regs(¤t->mosix); ++#endif /* CONFIG_MOSIX */ + regs->eflags = (regs->eflags & 0xffffcfff) | (level << 12); + return 0; + } +diff -ruN --exclude=CVS /tmp/openmosix/linux-2.4.17/arch/i386/kernel/irq.c linux-openmosix/arch/i386/kernel/irq.c +--- /tmp/openmosix/linux-2.4.17/arch/i386/kernel/irq.c Thu Oct 25 22:53:46 2001 ++++ linux-openmosix/arch/i386/kernel/irq.c Wed May 15 10:55:17 2002 +@@ -279,6 +279,10 @@ + clear_bit(0,&global_irq_lock); + + for (;;) { ++#ifdef CONFIG_MOSIX_UDB ++ extern int nmi_debugger; ++ if(!nmi_debugger) ++#endif /* CONFIG_MOSIX_UDB */ + if (!--count) { + show("wait_on_irq"); + count = ~0; +diff -ruN --exclude=CVS /tmp/openmosix/linux-2.4.17/arch/i386/kernel/nmi.c linux-openmosix/arch/i386/kernel/nmi.c +--- /tmp/openmosix/linux-2.4.17/arch/i386/kernel/nmi.c Fri Sep 21 05:55:24 2001 ++++ linux-openmosix/arch/i386/kernel/nmi.c Wed May 15 10:55:17 2002 +@@ -25,6 +25,9 @@ + #include + + unsigned int nmi_watchdog = NMI_NONE; ++#ifdef CONFIG_MOSIX_UDB ++#include ++#endif /* CONFIG_MOSIX_UDB */ + static unsigned int nmi_hz = HZ; + unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ + extern void show_registers(struct pt_regs *regs); +@@ -268,6 +271,10 @@ + */ + int sum, cpu = smp_processor_id(); + ++#ifdef CONFIG_MOSIX_UDB ++ if(nmi_debugger) ++ return; ++#endif /* CONFIG_MOSIX_UDB */ + sum = apic_timer_irqs[cpu]; + + if (last_irq_sums[cpu] == sum) { +@@ -285,6 +292,10 @@ + bust_spinlocks(1); + printk("NMI Watchdog detected LOCKUP on CPU%d, registers:\n", cpu); + show_registers(regs); ++#ifdef CONFIG_MOSIX_UDB ++ mosix_debugger("Watchdog"); ++ return; ++#endif /* CONFIG_MOSIX_UDB */ + printk("console shuts up ...\n"); + console_silent(); + spin_unlock(&nmi_print_lock); +diff -ruN --exclude=CVS /tmp/openmosix/linux-2.4.17/arch/i386/kernel/offset.c linux-openmosix/arch/i386/kernel/offset.c +--- /tmp/openmosix/linux-2.4.17/arch/i386/kernel/offset.c Thu Jan 1 01:00:00 1970 ++++ linux-openmosix/arch/i386/kernel/offset.c Thu May 16 08:43:31 2002 +@@ -0,0 +1,86 @@ ++/* ++ * Copyright (C) 2000, 2001, Amnon Barak (amnon@cs.huji.ac.il) ++ * Some sections copyright 2002 by Moshe Bar ++ * Permission to use this software is hereby granted under the terms of the ++ * GNU General Public License, as published by the Free Software Foundation. ++ * ++ * THIS SOFTWARE IS PROVIDED IN ITS "AS IS" CONDITION, WITH NO WARRANTY ++ * WHATSOEVER. NO LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING ++ * FROM THE USE OF THIS SOFTWARE WILL BE ACCEPTED. ++ */ ++/* ++ * Author(s): Amnon Shiloh, Moshe Bar ++ */ ++ ++/* ++ * Produce an include-file for "entry.S", with: ++ * 1. constant-offsets of some required MOSIX-members of "task_struct" ++ * 2. some bits to test "current->dflags" ++ * 3. a duplicate system-call table (remote_sys_call_table), with all ++ * system-calls names preceded by a "remote_". ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++struct file; ++#define FILE struct file ++ ++extern int printf(char *, ...); ++extern int fgets(char *, int, FILE *); ++extern int perror(char *); ++extern FILE *stdin; ++ ++char line[2048]; ++ ++int ++main(void) ++{ ++ register char *c; ++ register int i; ++ long t, time(long *); ++ char *ctime(long *); ++ ++ time(&t); ++ printf("/* Please do not edit -- this file is created automatically */\n"); ++ printf("/* %.24s */\n\n", ctime(&t)); ++ printf("ALTREGS\t\t= 0x%X\n", ++ (int)&(((struct task_struct *)0)->mosix.altregs)); ++ printf("DFLAGS\t\t= 0x%X\n", ++ (int)&(((struct task_struct *)0)->mosix.dflags)); ++ printf("DDEPUTY\t\t= 0x%X\n", DDEPUTY); ++ printf("DREMOTE\t\t= 0x%X\n", DREMOTE); ++ printf("DTRACESYS1\t\t= 0x%X\n", DTRACESYS1); ++ printf("DTRACESYS2\t\t= 0x%X\n", DTRACESYS2); ++ printf(".data\n"); ++ printf("\nENTRY(remote_sys_call_table)\n"); ++ while(fgets(line, 2048, stdin) && ++ strcmp(line, "ENTRY(sys_call_table)\n")) ++ ; ++ for(i = 0 ; i < NR_syscalls && fgets(line, 2048, stdin) ; i++) ++ if(!strncmp(line, "\t.long SYMBOL_NAME(", 19)) ++ { ++ for(c = &line[19] ; *c && *c != ')' ; c++); ++ *c = '\0'; ++ if(!strcmp(&line[19], "sys_ni_syscall")) ++ { ++ printf("\t.long SYMBOL_NAME(sys_ni_syscall)\n"); ++ continue; ++ } ++ printf("\t.long SYMBOL_NAME(remote_%s)\n", &line[19]); ++ } ++ else if(!strcmp(line, "\t.rept NR_syscalls-(.-sys_call_table)/4\n")) ++ printf("\t.rept NR_syscalls-(.-remote_sys_call_table)/4\n"); ++ else ++ { ++ printf("%s", line); ++ if(!strncmp(line, "\t.endr", 5)) ++ break; ++ } ++ printf(".text\n"); ++ return(0); ++} +diff -ruN --exclude=CVS /tmp/openmosix/linux-2.4.17/arch/i386/kernel/process.c linux-openmosix/arch/i386/kernel/process.c +--- /tmp/openmosix/linux-2.4.17/arch/i386/kernel/process.c Fri Oct 5 03:42:54 2001 ++++ linux-openmosix/arch/i386/kernel/process.c Wed May 15 10:55:17 2002 +@@ -49,6 +49,10 @@ + + #include + ++#ifdef CONFIG_MOSIX ++#include ++#endif /* CONFIG_MOSIX */ ++ + asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); + + int hlt_counter; +@@ -160,6 +164,14 @@ + /* shamelessly grabbed from lib/vsprintf.c for readability */ + #define is_digit(c) ((c) >= '0' && (c) <= '9') + #endif ++ ++#ifdef CONFIG_MOSIX_UDB ++void set_fastest_reboot(void) ++{ ++ reboot_mode = 0x1234; ++} ++#endif /* CONFIG_MOSIX_UDB */ ++ + static int __init reboot_setup(char *str) + { + while(1) { +@@ -510,6 +522,43 @@ + return retval; + } + ++#ifdef CONFIG_MOSIX ++/* ++ * Create a thread that starts as kernel, but will eventually run in user-mode: ++ * The slight differences from "kernel_thread" are: ++ * 1) not using CLONE_VM. ++ * 2) adding SIGCHLD to the flags. ++ * 3) leaving space on the stack for the user-registers. ++ */ ++int user_thread(int (*fn)(void *), void * arg, unsigned long flags) ++{ ++ long retval, d0; ++ ++ __asm__ __volatile__( ++ "movl %%esp,%%esi\n\t" ++ "int $0x80\n\t" /* Linux/i386 system call */ ++ "cmpl %%esp,%%esi\n\t" /* child or parent? */ ++ "je 1f\n\t" /* parent - jump */ ++ "subl %7,%%esp\n\t" /* space for user-registers */ ++ /* Load the argument into eax, and push it. That way, it does ++ * not matter whether the called function is compiled with ++ * -mregparm or not. */ ++ "movl %4,%%eax\n\t" ++ "pushl %%eax\n\t" ++ "call *%5\n\t" /* call fn */ ++ "movl %3,%0\n\t" /* exit */ ++ "int $0x80\n" ++ "1:\t" ++ :"=&a" (retval), "=&S" (d0) ++ :"0" (__NR_clone), "i" (__NR_exit), ++ "r" (arg), "r" (fn), ++ "b" (flags | SIGCHLD), ++ "i" (sizeof(struct pt_regs)) ++ : "memory"); ++ return retval; ++} ++#endif /* CONFIG_MOSIX */ ++ + /* + * Free current thread data structures etc.. + */ +@@ -583,6 +632,10 @@ + + childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p)) - 1; + struct_cpy(childregs, regs); ++#ifdef CONFIG_MOSIX ++ p->mosix.altregs = (uint32_t *)childregs; ++ p->mosix.deputy_regs = current->mosix.deputy_regs; ++#endif /* CONFIG_MOSIX */ + childregs->eax = 0; + childregs->esp = esp; + +@@ -734,10 +787,32 @@ + */ + tss->bitmap = INVALID_IO_BITMAP_OFFSET; + } ++#ifdef CONFIG_MOSIX ++ if(test_bit(X86_FEATURE_TSC, &boot_cpu_data.x86_capability)) ++ { ++ /* REMOTE emulates the RDTSC instruction */ ++ if(next_p->mosix.dflags & DREMOTE) ++ { ++ if(!(prev_p->mosix.dflags & DREMOTE)) ++ __asm__ __volatile__("movl %%cr4,%%eax\n\t" \ ++ "orl $4,%%eax\n\t" \ ++ "movl %%eax,%%cr4\n" \ ++ : : :"ax"); ++ } ++ else if(prev_p->mosix.dflags & DREMOTE) ++ __asm__ __volatile__("movl %%cr4,%%eax\n\t" \ ++ "andl $0xfffffffb,%%eax\n\t" \ ++ "movl %%eax,%%cr4\n" \ ++ : : :"ax"); ++ } ++#endif /* CONFIG_MOSIX */ + } + + asmlinkage int sys_fork(struct pt_regs regs) + { ++#ifdef CONFIG_MOSIX ++ mosix_obtain_registers(BIT_OF_REGISTER(esp)); ++#endif /* CONFIG_MOSIX */ + return do_fork(SIGCHLD, regs.esp, ®s, 0); + } + +@@ -745,12 +820,27 @@ + { + unsigned long clone_flags; + unsigned long newsp; ++#ifdef CONFIG_MOSIX ++ int retval; ++#endif /* CONFIG_MOSIX */ + + clone_flags = regs.ebx; + newsp = regs.ecx; + if (!newsp) ++#ifdef CONFIG_MOSIX ++ mosix_obtain_registers(BIT_OF_REGISTER(esp)) , ++#endif /* CONFIG_MOSIX */ + newsp = regs.esp; ++#ifdef CONFIG_MOSIX ++ if((clone_flags & CLONE_VM) && (retval = mosix_pre_clone())) ++ return(retval); ++ retval = do_fork(clone_flags, newsp, ®s, 0); ++ if(clone_flags & CLONE_VM) ++ mosix_post_clone(); ++ return(retval); ++#else + return do_fork(clone_flags, newsp, ®s, 0); ++#endif /* CONFIG_MOSIX */ + } + + /* +@@ -765,7 +855,18 @@ + */ + asmlinkage int sys_vfork(struct pt_regs regs) + { ++#ifdef CONFIG_MOSIX ++ int retval; ++ ++ mosix_obtain_registers(BIT_OF_REGISTER(esp)); ++ if((retval = mosix_pre_clone())) ++ return(retval); ++ retval = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, ®s, 0); ++ mosix_post_clone(); ++ return(retval); ++#else + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, ®s, 0); ++#endif /* CONFIG_MOSIX */ + } + + /* +@@ -801,23 +902,50 @@ + unsigned long ebp, esp, eip; + unsigned long stack_page; + int count = 0; ++#ifdef CONFIG_MOSIX ++ unsigned long result = 0; ++ if (!p || p == current || LOGICAL_STATE(p) == TASK_RUNNING) ++#else + if (!p || p == current || p->state == TASK_RUNNING) ++#endif /* CONFIG_MOSIX */ + return 0; + stack_page = (unsigned long)p; + esp = p->thread.esp; + if (!stack_page || esp < stack_page || esp > 8188+stack_page) ++#ifdef CONFIG_MOSIX ++ return(result); ++#else + return 0; ++#endif /* CONFIG_MOSIX */ + /* include/asm-i386/system.h:switch_to() pushes ebp last. */ + ebp = *(unsigned long *) esp; + do { + if (ebp < stack_page || ebp > 8184+stack_page) ++#ifdef CONFIG_MOSIX ++ return(result); ++#else + return 0; ++#endif /* CONFIG_MOSIX */ + eip = *(unsigned long *) (ebp+4); + if (eip < first_sched || eip >= last_sched) ++#ifdef CONFIG_MOSIX ++ { ++ if(!result) ++ return eip; ++ } ++ else ++ result = 0; ++#else + return eip; ++#endif /* CONFIG_MOSIX */ + ebp = *(unsigned long *) ebp; ++#ifdef CONFIG_MOSIX ++ } while (count++ < 64); ++ return(result); ++#else + } while (count++ < 16); + return 0; ++#endif /* CONFIG_MOSIX */ + } + #undef last_sched + #undef first_sched +diff -ruN --exclude=CVS /tmp/openmosix/linux-2.4.17/arch/i386/kernel/ptrace.c linux-openmosix/arch/i386/kernel/ptrace.c +--- /tmp/openmosix/linux-2.4.17/arch/i386/kernel/ptrace.c Wed Nov 21 19:42:41 2001 ++++ linux-openmosix/arch/i386/kernel/ptrace.c Wed May 15 10:55:17 2002 +@@ -21,6 +21,10 @@ + #include + #include + ++#ifdef CONFIG_MOSIX ++#include ++#endif /* CONFIG_MOSIX */ ++ + /* + * does not yet catch signals sent when the child dies. + * in exit.c or in signal.c. +@@ -44,13 +48,47 @@ + * this routine assumes that all the privileged stacks are in our + * data space. + */ ++#ifdef CONFIG_MOSIX ++inline int get_stack_long(struct task_struct *task, int offset) ++#else + static inline int get_stack_long(struct task_struct *task, int offset) ++#endif /* CONFIG_MOSIX */ + { + unsigned char *stack; + ++#ifdef CONFIG_MOSIX ++ if(task != current) ++ lock_mosix(); ++ if(task->mosix.dflags & DDEPUTY) ++ { ++ if(task != current) ++ unlock_mosix(); ++ return(request_process(task, NULL, PR_PTRACE_GET_STACK_LONG, ++ offset)); ++ } ++ if(task == current) ++ mosix_obtain_registers(ALL_REGISTERS); ++ else ++ task_lock(task); ++ if(!task->thread.saved_esp0) /* unless in VM86 mode */ ++ stack = (char *)(mos_to_regs(&task->mosix) + 1); ++ else ++#endif /* CONFIG_MOSIX */ + stack = (unsigned char *)task->thread.esp0; + stack += offset; ++#ifdef CONFIG_MOSIX ++ { ++ int res = *((int *)stack); ++ if(task != current) ++ { ++ task_unlock(task); ++ unlock_mosix(); ++ } ++ return(res); ++ } ++#else + return (*((int *)stack)); ++#endif /* CONFIG_MOSIX */ + } + + /* +@@ -59,14 +97,43 @@ + * this routine assumes that all the privileged stacks are in our + * data space. + */ ++#ifdef CONFIG_MOSIX ++inline int put_stack_long(struct task_struct *task, int offset, ++#else + static inline int put_stack_long(struct task_struct *task, int offset, ++#endif /* CONFIG_MOSIX */ + unsigned long data) + { + unsigned char * stack; + ++#ifdef CONFIG_MOSIX ++ if(task != current) ++ lock_mosix(); ++ if(task->mosix.dflags & DDEPUTY) ++ { ++ if(task != current) ++ unlock_mosix(); ++ return(request_process_arg2(task, NULL, ++ PR_PTRACE_PUT_STACK_LONG, offset, data)); ++ } ++ if(task == current) ++ mosix_obtain_registers(ALL_REGISTERS); ++ else ++ task_lock(task); ++ if(!task->thread.saved_esp0) /* unless in VM86 mode */ ++ stack = (char *)(mos_to_regs(&task->mosix) + 1); ++ else ++#endif /* CONFIG_MOSIX */ + stack = (unsigned char *) task->thread.esp0; + stack += offset; + *(unsigned long *) stack = data; ++#ifdef CONFIG_MOSIX ++ if(task != current) ++ { ++ task_unlock(task); ++ unlock_mosix(); ++ } ++#endif /* CONFIG_MOSIX */ + return 0; + } + +@@ -107,6 +174,17 @@ + return 0; + } + ++#ifdef CONFIG_MOSIX ++void ++ptrace_putregs(unsigned long *data) ++{ ++ register int i; ++ ++ for (i = 0; i < FRAME_SIZE ; i++) ++ putreg(current, i << 2, data[i]); ++} ++#endif /* CONFIG_MOSIX */ ++ + static unsigned long getreg(struct task_struct *child, + unsigned long regno) + { +@@ -147,6 +225,17 @@ + put_stack_long(child, EFL_OFFSET, tmp); + } + ++#ifdef CONFIG_MOSIX ++void ++ptrace_getregs(unsigned long *data) ++{ ++ register int i; ++ ++ for (i = 0; i < FRAME_SIZE ; i++) ++ data[i] = getreg(current, i << 2); ++} ++#endif /* CONFIG_MOSIX */ ++ + asmlinkage int sys_ptrace(long request, long pid, long addr, long data) + { + struct task_struct *child; +@@ -161,6 +250,9 @@ + goto out; + /* set the ptrace bit in the process flags. */ + current->ptrace |= PT_PTRACED; ++#ifdef CONFIG_MOSIX_DFSA ++ tell_process(current, DREQ_NOTUPTODATE); ++#endif /* CONFIG_MOSIX_DFSA */ + ret = 0; + goto out; + } +@@ -210,6 +302,17 @@ + addr > sizeof(struct user) - 3) + break; + ++#ifdef CONFIG_MOSIX ++ lock_mosix(); ++ if(child->mosix.dflags & DDEPUTY) ++ { ++ unlock_mosix(); ++ if(request_process(child, &tmp, PR_PTRACE_PEEKUSER, addr)) ++ goto out; ++ } ++ else ++ { ++#endif /* CONFIG_MOSIX */ + tmp = 0; /* Default return condition */ + if(addr < FRAME_SIZE*sizeof(long)) + tmp = getreg(child, addr); +@@ -219,6 +322,10 @@ + addr = addr >> 2; + tmp = child->thread.debugreg[addr]; + } ++#ifdef CONFIG_MOSIX ++ unlock_mosix(); ++ } ++#endif /* CONFIG_MOSIX */ + ret = put_user(tmp,(unsigned long *) data); + break; + } +@@ -265,7 +372,23 @@ + + addr -= (long) &dummy->u_debugreg; + addr = addr >> 2; ++#ifdef CONFIG_MOSIX ++ lock_mosix(); ++ if(child->mosix.dflags & DDEPUTY) ++ { ++ unlock_mosix(); ++ if(request_process_arg2(child, NULL, ++ PR_PTRACE_POKEUSER, addr, data)) ++ goto out; ++ } ++ else ++ { ++#endif /* CONFIG_MOSIX */ + child->thread.debugreg[addr] = data; ++#ifdef CONFIG_MOSIX ++ unlock_mosix(); ++ } ++#endif /* CONFIG_MOSIX */ + ret = 0; + } + break; +@@ -282,9 +405,22 @@ + else + child->ptrace &= ~PT_TRACESYS; + child->exit_code = data; ++#ifdef CONFIG_MOSIX ++ if(child->mosix.dflags & DDEPUTY) ++ { ++ if(request_process(child, NULL, PR_PTRACE_CONT, ++ request)) ++ goto out; ++ } ++ else ++ { ++#endif /* CONFIG_MOSIX */ + /* make sure the single step bit is not set. */ + tmp = get_stack_long(child, EFL_OFFSET) & ~TRAP_FLAG; + put_stack_long(child, EFL_OFFSET,tmp); ++#ifdef CONFIG_MOSIX ++ } ++#endif /* CONFIG_MOSIX */ + wake_up_process(child); + ret = 0; + break; +@@ -299,7 +435,11 @@ + long tmp; + + ret = 0; ++#ifdef CONFIG_MOSIX ++ if (LOGICAL_STATE(child) == TASK_ZOMBIE) /* already dead */ ++#else + if (child->state == TASK_ZOMBIE) /* already dead */ ++#endif /* CONFIG_MOSIX */ + break; + child->exit_code = SIGKILL; + /* make sure the single step bit is not set. */ +@@ -320,8 +460,21 @@ + /* Spurious delayed TF traps may occur */ + child->ptrace |= PT_DTRACE; + } ++#ifdef CONFIG_MOSIX ++ if(child->mosix.dflags & DDEPUTY) ++ { ++ if(request_process(child, NULL, PR_PTRACE_SINGLE_STEP, ++ 0)) ++ goto out; ++ } ++ else ++ { ++#endif /* CONFIG_MOSIX */ + tmp = get_stack_long(child, EFL_OFFSET) | TRAP_FLAG; + put_stack_long(child, EFL_OFFSET, tmp); ++#ifdef CONFIG_MOSIX ++ } ++#endif /* CONFIG_MOSIX */ + child->exit_code = data; + /* give it a chance to run. */ + wake_up_process(child); +@@ -339,26 +492,57 @@ + ret = -EIO; + break; + } ++#ifdef CONFIG_MOSIX ++ /* NOTE: even when child does not look like a DEPUTY, ++ * it could easily become one while we wait for a page ++ * on __put_user ... therefore, we do this ALWAYS: ++ */ ++ { ++ unsigned long t[FRAME_SIZE]; ++ ++ if(request_process(child, &t, PR_PTRACE_GETREGS, 0)) ++ ret = -EIO; ++ else ++ copy_to_user((void *)data, t, sizeof(t)); ++ } ++#else + for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) { + __put_user(getreg(child, i),(unsigned long *) data); + data += sizeof(long); + } ++#endif /* CONFIG_MOSIX */ + ret = 0; + break; + } + + case PTRACE_SETREGS: { /* Set all gp regs in the child. */ ++#ifndef CONFIG_MOSIX + unsigned long tmp; ++#endif /* CONFIG_MOSIX */ + if (!access_ok(VERIFY_READ, (unsigned *)data, FRAME_SIZE*sizeof(long))) { + ret = -EIO; + break; + } ++#ifdef CONFIG_MOSIX ++ /* NOTE: even when child does not look like a DEPUTY, ++ * it could easily become one while we wait for a page ++ * on __get_user ... therefore, we do this ALWAYS: ++ */ ++ { ++ unsigned long t[FRAME_SIZE]; ++ ++ copy_from_user(t, (void *)data, sizeof(t)); ++ ret = request_process(child, &t, PR_PTRACE_SETREGS, 0) ++ ? -EIO : 0; ++ } ++#else + for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) { + __get_user(tmp, (unsigned long *) data); + putreg(child, i, tmp); + data += sizeof(long); + } + ret = 0; ++#endif /* CONFIG_MOSIX */ + break; + } + +@@ -369,6 +553,23 @@ + break; + } + ret = 0; ++#ifdef CONFIG_MOSIX ++ /* DEPUTY or NOT - the child-process may become DEPUTY ++ * while we page-fault on "__copy_to_user", ++ * so we always do it the long way: ++ */ ++ { ++ struct user_i387_struct f; ++ ++ if(request_process(child, &f, PR_PTRACE_GETFPREGS, 0)) ++ { ++ ret = -EIO; ++ goto out; ++ } ++ __copy_to_user((void *)data, &f, ++ sizeof(struct user_i387_struct)); ++ } ++#else + if ( !child->used_math ) { + /* Simulate an empty FPU. */ + set_fpu_cwd(child, 0x037f); +@@ -376,6 +577,7 @@ + set_fpu_twd(child, 0xffff); + } + get_fpregs((struct user_i387_struct *)data, child); ++#endif /* CONFIG_MOSIX */ + break; + } + +@@ -385,9 +587,23 @@ + ret = -EIO; + break; + } ++#ifdef CONFIG_MOSIX ++ /* DEPUTY or NOT - the child-process may become DEPUTY ++ * while we page-fault on "__copy_from_user", ++ * so we always do it the long way: ++ */ ++ { ++ struct user_i387_struct f; ++ ++ __copy_from_user(&f, (void *)data, sizeof(f)); ++ ret = request_process(child, &f, PR_PTRACE_SETFPREGS, 0) ++ ? -EIO : 0; ++ } ++#else + child->used_math = 1; + set_fpregs(child, (struct user_i387_struct *)data); + ret = 0; ++#endif /* CONFIG_MOSIX */ + break; + } + +@@ -397,6 +613,24 @@ + ret = -EIO; + break; + } ++#ifdef CONFIG_MOSIX ++ ret = 0; ++ /* DEPUTY or NOT - the child-process may become DEPUTY ++ * while we page-fault on "__copy_to_user", ++ * so we always do it the long way: ++ */ ++ { ++ struct user_fxsr_struct f; ++ ++ if(request_process(child, &f, PR_PTRACE_GETFPXREGS, 0)) ++ { ++ ret = -EIO; ++ goto out; ++ } ++ __copy_to_user((void *)data, &f, ++ sizeof(struct user_fxsr_struct)); ++ } ++#else + if ( !child->used_math ) { + /* Simulate an empty FPU. */ + set_fpu_cwd(child, 0x037f); +@@ -405,6 +639,7 @@ + set_fpu_mxcsr(child, 0x1f80); + } + ret = get_fpxregs((struct user_fxsr_struct *)data, child); ++#endif /* CONFIG_MOSIX */ + break; + } + +@@ -414,8 +649,22 @@ + ret = -EIO; + break; + } ++#ifdef CONFIG_MOSIX ++ /* DEPUTY or NOT - the child-process may become DEPUTY ++ * while we page-fault on "__copy_from_user", ++ * so we always do it the long way: ++ */ ++ { ++ struct user_fxsr_struct f; ++ ++ __copy_from_user(&f, (void *)data, sizeof(f)); ++ ret = request_process(child, &f, PR_PTRACE_SETFPXREGS, ++ 0) ? -EIO : 0; ++ } ++#else + child->used_math = 1; + ret = set_fpxregs(child, (struct user_fxsr_struct *)data); ++#endif /* CONFIG_MOSIX */ + break; + } + +@@ -439,7 +688,174 @@ + return ret; + } + ++#ifdef CONFIG_MOSIX ++/* ++ * The following routines are parts of sys_ptrace performed by the ++ * child process on itself. ++ * Should the relevant code in "sys_ptrace" change, the following routines ++ * must also be modified accordingly. ++ */ ++unsigned long ++ptrace_peekuser(long addr) ++{ ++ struct user * dummy = NULL; ++ unsigned long tmp; ++ ++ tmp = 0; /* Default return condition */ ++ if(addr < FRAME_SIZE*sizeof(long)) ++ tmp = getreg(current, addr); ++ if(addr >= (long) &dummy->u_debugreg[0] && ++ addr <= (long) &dummy->u_debugreg[7]){ ++ addr -= (long) &dummy->u_debugreg[0]; ++ addr = addr >> 2; ++ tmp = current->thread.debugreg[addr]; ++ }; ++ return(tmp); ++} ++ ++void ++ptrace_pokeuser(long addr, long data) ++{ ++ current->thread.debugreg[addr] = data; ++#define loaddebug(tsk,register) \ ++ __asm__("movl %0,%%db" #register \ ++ : /* no output */ \ ++ :"r" (tsk->thread.debugreg[register])) ++ /* unlike the original ptrace code, we are doing this for ourselves, ++ * and there may be no "switch_to" before we go to user-mode again, ++ * so here we go: ++ */ ++ if(current->thread.debugreg[7]) ++ { ++ loaddebug(current, 0); ++ loaddebug(current, 1); ++ loaddebug(current, 2); ++ loaddebug(current, 3); ++ loaddebug(current, 6); ++ loaddebug(current, 7); ++ } ++} ++ ++void ++ptrace_cont(int request) ++{ ++ unsigned long tmp; ++ struct task_struct *tsk = current; ++ ++ tmp = get_stack_long(tsk, EFL_OFFSET) & ~TRAP_FLAG; ++ put_stack_long(tsk, EFL_OFFSET, tmp); ++ if (request == PTRACE_SYSCALL) ++ tsk->ptrace |= PT_TRACESYS; ++ else ++ tsk->ptrace &= ~PT_TRACESYS; ++ if (request == PTRACE_DETACH) ++ tsk->ptrace &= ~PT_PTRACED; ++} ++ ++void ++ptrace_single_step(void) ++{ ++ unsigned long tmp; ++ ++ tmp = get_stack_long(current, EFL_OFFSET) | TRAP_FLAG; ++ put_stack_long(current, EFL_OFFSET, tmp); ++ current->ptrace |= PT_PTRACED | PT_DTRACE; ++#ifdef CONFIG_MOSIX_DFSA ++ tell_process(current, DREQ_NOTUPTODATE); ++#endif /* CONFIG_MOSIX_DFSA */ ++} ++ ++void ++ptrace_getfpregs(struct user_i387_struct *to) ++{ ++ struct task_struct *p = current; ++ ++ unlazy_fpu(p); ++ if (!p->used_math) ++ { ++ /* Simulate an empty FPU. */ ++ set_fpu_cwd(p, 0x037f); ++ set_fpu_swd(p, 0x0000); ++ set_fpu_twd(p, 0xffff); ++ } ++ get_fpregs(to, p); ++} ++ ++void ++ptrace_getfpxregs(struct user_fxsr_struct *to) ++{ ++ struct task_struct *p = current; ++ ++ unlazy_fpu(p); ++ if (!p->used_math) ++ { ++ /* Simulate an empty FPU. */ ++ set_fpu_cwd(p, 0x037f); ++ set_fpu_swd(p, 0x0000); ++ set_fpu_twd(p, 0xffff); ++ set_fpu_mxcsr(p, 0x1f80); ++ } ++ get_fpxregs(to, p); ++} ++ ++void ++ptrace_setfpregs(struct user_i387_struct *from) ++{ ++ struct task_struct *p = current; ++ ++ clear_fpu(p); ++ p->used_math = 1; ++ set_fpregs(p, from); ++} ++ ++void ++ptrace_setfpxregs(struct user_fxsr_struct *from) ++{ ++ struct task_struct *p = current; ++ ++ clear_fpu(p); ++ p->used_math = 1; ++ set_fpxregs(p, from); ++} ++#endif /* CONFIG_MOSIX */ ++ ++#ifdef CONFIG_MOSIX ++/* ++ * When a process starts a system call in PT_TRACESYS mode, it must run ++ * the full sequence: A=syscall_trace, B=the_system_call, C=syscall_trace. ++ * Since our process can migrate at any of those stages, we maintain 2 flags ++ * that tell us where to resume in the sequence. ++ * In the particular case of DTRACESYS1, the trace was already complete ++ * before the migration. In the case of DTRACESYS2, the system-call was already ++ * complete before the migration and the code in "entry.S" prevents it from ++ * re-running. ++ */ ++asmlinkage void ++syscall_trace(void) ++{ ++ extern asmlinkage void do_syscall_trace(void); ++ ++ switch(current->mosix.dflags & (DTRACESYS1|DTRACESYS2)) ++ { ++ case 0: ++ current->mosix.dflags |= DTRACESYS1; ++ break; ++ case DTRACESYS1: ++ return; ++ case DTRACESYS2: ++ case DTRACESYS1|DTRACESYS2: ++ current->mosix.dflags &= ~(DTRACESYS1|DTRACESYS2); ++ } ++ if(current->mosix.dflags & DREMOTE) ++ mosix_remote_syscall_trace(); ++ else ++ do_syscall_trace(); ++} ++ ++asmlinkage void do_syscall_trace(void) ++#else + asmlinkage void syscall_trace(void) ++#endif /* CONFIG_MOSIX */ + { + if ((current->ptrace & (PT_PTRACED|PT_TRACESYS)) != + (PT_PTRACED|PT_TRACESYS)) +diff -ruN --exclude=CVS /tmp/openmosix/linux-2.4.17/arch/i386/kernel/signal.c linux-openmosix/arch/i386/kernel/signal.c +--- /tmp/openmosix/linux-2.4.17/arch/i386/kernel/signal.c Fri Sep 14 23:15:40 2001 ++++ linux-openmosix/arch/i386/kernel/signal.c Wed May 15 10:55:17 2002 +@@ -24,6 +24,10 @@ + #include + #include + ++#ifdef CONFIG_MOSIX ++#include ++#endif /* CONFIG_MOSIX */ ++ + #define DEBUG_SIG 0 + + #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) +@@ -37,6 +41,29 @@ + if (from->si_code < 0) + return __copy_to_user(to, from, sizeof(siginfo_t)); + else { ++#ifdef CONFIG_MOSIX ++ /* it is unreasonable to send a separate request per word, ++ * so find the limit and send them all together. ++ */ ++ int sz = offsetof(struct siginfo, _sifields); ++ ++ switch(from->si_code >> 16) ++ { ++ case __SI_FAULT >> 16: ++ sz += sizeof(to->_sifields._sigfault); ++ break; ++ case __SI_CHLD >> 16: ++ sz += sizeof(to->_sifields._sigchld); ++ break; ++ case __SI_MIGRATION >> 16: ++ sz += sizeof(to->_sifields._sigmig); ++ break; ++ default: ++ sz += sizeof(to->_sifields._kill); ++ break; ++ } ++ return(__copy_to_user(to, from, sz)); ++#else + int err; + + /* If you change siginfo_t structure, please be sure +@@ -62,6 +89,7 @@ + /* case __SI_RT: This is not generated by the kernel as of now. */ + } + return err; ++#endif /* CONFIG_MOSIX */ + } + } + +@@ -81,6 +109,10 @@ + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + ++#ifdef CONFIG_MOSIX ++ mosix_obtain_registers( ++ BIT_OF_REGISTER(orig_eax)|BIT_OF_REGISTER(eax)|BIT_OF_REGISTER(eip)); ++#endif /* CONFIG_MOSIX */ + regs->eax = -EINTR; + while (1) { + current->state = TASK_INTERRUPTIBLE; +@@ -110,6 +142,10 @@ + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + ++#ifdef CONFIG_MOSIX ++ mosix_obtain_registers( ++ BIT_OF_REGISTER(orig_eax)|BIT_OF_REGISTER(eax)|BIT_OF_REGISTER(eip)); ++#endif /* CONFIG_MOSIX */ + regs->eax = -EINTR; + while (1) { + current->state = TASK_INTERRUPTIBLE; +@@ -155,6 +191,10 @@ + sys_sigaltstack(const stack_t *uss, stack_t *uoss) + { + struct pt_regs *regs = (struct pt_regs *) &uss; ++ ++#ifdef CONFIG_MOSIX ++ mosix_obtain_registers(BIT_OF_REGISTER(esp)); ++#endif /* CONFIG_MOSIX */ + return do_sigaltstack(uss, uoss, regs->esp); + } + +@@ -185,11 +225,19 @@ + char retcode[8]; + }; + ++#ifdef CONFIG_MOSIX ++int ++#else + static int ++#endif /* CONFIG_MOSIX */ + restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc, int *peax) + { + unsigned int err = 0; + ++#ifdef CONFIG_MOSIX ++ mosix_obtain_registers(ALL_REGISTERS); ++#endif /* CONFIG_MOSIX */ ++ + #define COPY(x) err |= __get_user(regs->x, &sc->x) + + #define COPY_SEG(seg) \ +@@ -249,10 +297,18 @@ + asmlinkage int sys_sigreturn(unsigned long __unused) + { + struct pt_regs *regs = (struct pt_regs *) &__unused; ++#ifdef CONFIG_MOSIX ++ struct sigframe *frame; ++#else + struct sigframe *frame = (struct sigframe *)(regs->esp - 8); ++#endif /* CONFIG_MOSIX */ + sigset_t set; + int eax; + ++#ifdef CONFIG_MOSIX ++ mosix_obtain_registers(BIT_OF_REGISTER(esp)); ++ frame = (struct sigframe *)(regs->esp - 8); ++#endif /* CONFIG_MOSIX */ + if (verify_area(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__get_user(set.sig[0], &frame->sc.oldmask) +@@ -267,6 +323,14 @@ + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + ++#ifdef CONFIG_MOSIX ++ if(current->mosix.dflags & DDEPUTY) ++ { ++ if (mosix_deputy_restore_sigcontext(&frame->sc, &eax)) ++ goto badframe; ++ } ++ else ++#endif /* CONFIG_MOSIX */ + if (restore_sigcontext(regs, &frame->sc, &eax)) + goto badframe; + return eax; +@@ -279,11 +343,19 @@ + asmlinkage int sys_rt_sigreturn(unsigned long __unused) + { + struct pt_regs *regs = (struct pt_regs *) &__unused; ++#ifdef CONFIG_MOSIX ++ struct rt_sigframe *frame; ++#else + struct rt_sigframe *frame = (struct rt_sigframe *)(regs->esp - 4); ++#endif /* CONFIG_MOSIX */ + sigset_t set; + stack_t st; + int eax; + ++#ifdef CONFIG_MOSIX ++ mosix_obtain_registers(BIT_OF_REGISTER(esp)); ++ frame = (struct rt_sigframe *)(regs->esp - 4); ++#endif /* CONFIG_MOSIX */ + if (verify_area(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) +@@ -295,6 +367,14 @@ + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + ++#ifdef CONFIG_MOSIX ++ if(current->mosix.dflags & DDEPUTY) ++ { ++ if(mosix_deputy_restore_sigcontext(&frame->uc.uc_mcontext,&eax)) ++ goto badframe; ++ } ++ else ++#endif /* CONFIG_MOSIX */ + if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax)) + goto badframe; + +@@ -302,6 +382,9 @@ + goto badframe; + /* It is more difficult to avoid calling this function than to + call it and ignore errors. */ ++#ifdef CONFIG_MOSIX ++ mosix_obtain_registers(BIT_OF_REGISTER(esp)); ++#endif /* CONFIG_MOSIX */ + do_sigaltstack(&st, NULL, regs->esp); + + return eax; +@@ -321,6 +404,9 @@ + { + int tmp, err = 0; + ++#ifdef CONFIG_MOSIX_NO_NEED__ALL_CALLERS_ALREADY_DID ++ mosix_obtain_registers(ALL_REGISTERS); ++#endif /* CONFIG_MOSIX_NO_NEED__ALL_CALLERS_ALREADY_DID */ + tmp = 0; + __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp)); + err |= __put_user(tmp, (unsigned int *)&sc->gs); +@@ -385,12 +471,19 @@ + return (void *)((esp - frame_size) & -8ul); + } + ++#ifdef CONFIG_MOSIX ++void setup_frame(int sig, struct k_sigaction *ka, ++#else + static void setup_frame(int sig, struct k_sigaction *ka, ++#endif /* CONFIG_MOSIX */ + sigset_t *set, struct pt_regs * regs) + { + struct sigframe *frame; + int err = 0; + ++#ifdef CONFIG_MOSIX ++ mosix_obtain_registers(ALL_REGISTERS); ++#endif /* CONFIG_MOSIX */ + frame = get_sigframe(ka, regs, sizeof(*frame)); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) +@@ -455,12 +548,19 @@ + force_sig(SIGSEGV, current); + } + ++#ifdef CONFIG_MOSIX ++void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, ++#else + static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, ++#endif /* CONFIG_MOSIX */ + sigset_t *set, struct pt_regs * regs) + { + struct rt_sigframe *frame; + int err = 0; + ++#ifdef CONFIG_MOSIX ++ mosix_obtain_registers(ALL_REGISTERS); ++#endif /* CONFIG_MOSIX */ + frame = get_sigframe(ka, regs, sizeof(*frame)); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) +@@ -538,6 +638,10 @@ + handle_signal(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, struct pt_regs * regs) + { ++#ifdef CONFIG_MOSIX ++ mosix_obtain_registers( ++ BIT_OF_REGISTER(orig_eax)|BIT_OF_REGISTER(eax)|BIT_OF_REGISTER(eip)); ++#endif /* CONFIG_MOSIX */ + /* Are we from a system call? */ + if (regs->orig_eax >= 0) { + /* If so, check system call restarting.. */ +@@ -559,6 +663,11 @@ + } + + /* Set up the stack frame */ ++#ifdef CONFIG_MOSIX ++ if(current->mosix.dflags & DDEPUTY) ++ mosix_deputy_setup_frame(sig, ka, *info, oldset); ++ else ++#endif /* CONFIG_MOSIX */ + if (ka->sa.sa_flags & SA_SIGINFO) + setup_rt_frame(sig, ka, info, oldset, regs); + else +@@ -586,6 +695,11 @@ + siginfo_t info; + struct k_sigaction *ka; + ++#ifdef CONFIG_MOSIX ++ if(current->mosix.dflags & DREMOTE) ++ return(0); ++#endif /* CONFIG_MOSIX */ ++ + /* + * We want the common case to go fast, which + * is why we may in certain cases get here from +@@ -611,6 +725,10 @@ + if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) { + /* Let the debugger run. */ + current->exit_code = signr; ++#ifdef CONFIG_MOSIX ++ if(current->mosix.dflags & DDEPUTY) ++ mosix_deputy_rusage(0); ++#endif /* CONFIG_MOSIX */ + current->state = TASK_STOPPED; + notify_parent(current, SIGCHLD); + schedule(); +@@ -668,6 +786,10 @@ + + case SIGSTOP: { + struct signal_struct *sig; ++#ifdef CONFIG_MOSIX ++ if(current->mosix.dflags & DDEPUTY) ++ mosix_deputy_rusage(0); ++#endif /* CONFIG_MOSIX */ + current->state = TASK_STOPPED; + current->exit_code = signr; + sig = current->p_pptr->sig; +@@ -706,6 +828,10 @@ + } + + /* Did we come from a system call? */ ++#ifdef CONFIG_MOSIX ++ mosix_obtain_registers( ++ BIT_OF_REGISTER(orig_eax)|BIT_OF_REGISTER(eax)|BIT_OF_REGISTER(eip)); ++#endif /* CONFIG_MOSIX */ + if (regs->orig_eax >= 0) { + /* Restart the system call - no handlers present */ + if (regs->eax == -ERESTARTNOHAND || +diff -ruN --exclude=CVS /tmp/openmosix/linux-2.4.17/arch/i386/kernel/sys_i386.c linux-openmosix/arch/i386/kernel/sys_i386.c +--- /tmp/openmosix/linux-2.4.17/arch/i386/kernel/sys_i386.c Mon Mar 19 21:35:09 2001 ++++ linux-openmosix/arch/i386/kernel/sys_i386.c Wed May 15 10:55:17 2002 +@@ -22,6 +22,10 @@ + #include + #include + ++#ifdef CONFIG_MOSIX_DFSA ++#include ++#endif /* CONFIG_MOSIX_DFSA */ ++ + /* + * sys_pipe() is the normal C calling standard for creating + * a pipe. It's not the way Unix traditionally does this, though. +@@ -55,12 +59,20 @@ + goto out; + } + ++#ifdef CONFIG_MOSIX ++ error = do_mmap_pgoff_down(file, addr, len, prot, flags, pgoff); ++#else + down_write(¤t->mm->mmap_sem); + error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + up_write(¤t->mm->mmap_sem); ++#endif /* CONFIG_MOSIX */ + + if (file) + fput(file); ++#ifdef CONFIG_MOSIX_DFSA ++ if(file && file_count(file) > 1) ++ dfsa_touch_file(fd); ++#endif /* CONFIG_MOSIX_DFSA */ + out: + return error; + } +diff -ruN --exclude=CVS /tmp/openmosix/linux-2.4.17/arch/i386/kernel/traps.c linux-openmosix/arch/i386/kernel/traps.c +--- /tmp/openmosix/linux-2.4.17/arch/i386/kernel/traps.c Sun Sep 30 21:26:08 2001 ++++ linux-openmosix/arch/i386/kernel/traps.c Wed May 15 10:55:17 2002 +@@ -50,6 +50,11 @@ + #include + #include + ++#ifdef CONFIG_MOSIX ++#include ++#include ++#endif /* CONFIG_MOSIX */ ++ + asmlinkage int system_call(void); + asmlinkage void lcall7(void); + asmlinkage void lcall27(void); +@@ -248,6 +253,12 @@ + show_registers(regs); + bust_spinlocks(0); + spin_unlock_irq(&die_lock); ++#ifdef CONFIG_MOSIX_UDB ++ { ++ extern void mosix_debugger(char *); ++ mosix_debugger("die"); ++ } ++#endif /* CONFIG_MOSIX_UDB */ + do_exit(SIGSEGV); + } + +@@ -336,10 +347,16 @@ + } + + DO_VM86_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->eip) ++#ifndef CONFIG_MOSIX_UDB + DO_VM86_ERROR( 3, SIGTRAP, "int3", int3) ++#endif /* CONFIG_MOSIX_UDB */ + DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow) + DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds) ++#ifdef CONFIG_MOSIX ++DO_ERROR_INFO( 6, SIGILL, "invalid operand", real_invalid_op, ILL_ILLOPN, regs->eip) ++#else + DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip) ++#endif /* CONFIG_MOSIX */ + DO_VM86_ERROR( 7, SIGSEGV, "device not available", device_not_available) + DO_ERROR( 8, SIGSEGV, "double fault", double_fault) + DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) +@@ -356,6 +373,24 @@ + if (!(regs->xcs & 3)) + goto gp_in_kernel; + ++#ifdef CONFIG_MOSIX ++ if(error_code == 0 && (current->mosix.dflags & DREMOTE)) ++ { ++ short code; ++ u64 clk; ++ ++ if(!get_user(code, (short *)regs->eip) && ++ code == 0x310F) /* RDTSC */ ++ { ++ clk = mosix_remote_tsc(); ++ regs->eax = clk & 0xffffffff; ++ regs->edx = clk >> 32; ++ regs->eip += 2; ++ return; ++ } ++ mosix_go_home(0); ++ } ++#endif /* CONFIG_MOSIX */ + current->thread.error_code = error_code; + current->thread.trap_no = 13; + force_sig(SIGSEGV, current); +@@ -452,6 +487,41 @@ + inb(0x71); /* dummy */ + } + ++#ifdef CONFIG_MOSIX_UDB ++asmlinkage void do_int3(struct pt_regs * regs, long error_code) ++{ ++ extern int udb_breakpoint(struct pt_regs *regs); ++ ++ if (!(regs->eflags & VM_MASK) && (regs->xcs & 3) != 3 && ++ udb_breakpoint(regs)) ++ return; ++ do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL); ++} ++#endif /* CONFIG_MOSIX_UDB */ ++ ++#ifdef CONFIG_MOSIX ++static __u32 user_features[NCAPINTS] = USER_MODE_FEATURES; ++ ++asmlinkage void ++do_invalid_op(struct pt_regs * regs, long error_code) ++{ ++ if((current->mosix.dflags & DREMOTE) && !(regs->eflags & VM_MASK) && ++ (regs->xcs & 3) == 3) ++ { ++ int i; ++ ++ for(i = 0 ; i < NCAPINTS ; i++) ++ if(current->mosix.features[i] & user_features[i] & ++ ~boot_cpu_data.x86_capability[i]) ++ { ++ mosix_go_home(0); /* no return if successful */ ++ break; ++ } ++ } ++ do_real_invalid_op(regs, error_code); ++} ++#endif /* CONFIG_MOSIX */ ++ + /* + * Our handling of the processor debug registers is non-trivial. + * We do not clear them on entry and exit from the kernel. Therefore +diff -ruN --exclude=CVS /tmp/openmosix/linux-2.4.17/arch/i386/kernel/vm86.c linux-openmosix/arch/i386/kernel/vm86.c +--- /tmp/openmosix/linux-2.4.17/arch/i386/kernel/vm86.c Sat Jul 7 02:05:07 2001 ++++ linux-openmosix/arch/i386/kernel/vm86.c Wed May 15 10:55:17 2002 +@@ -17,6 +17,10 @@ + #include + #include + ++#ifdef CONFIG_MOSIX ++#include ++#endif /* CONFIG_MOSIX */ ++ + /* + * Known problems: + * +@@ -69,6 +73,10 @@ + struct pt_regs *ret; + unsigned long tmp; + ++#ifdef CONFIG_MOSIX ++ if(current->mosix.dflags & DREMOTE) ++ panic("remote save_v86"); ++#endif /* CONFIG_MOSIX */ + if (!current->thread.vm86_info) { + printk("no vm86_info: BAD\n"); + do_exit(SIGSEGV); +@@ -83,9 +91,18 @@ + do_exit(SIGSEGV); + } + tss = init_tss + smp_processor_id(); ++#ifdef CONFIG_MOSIX ++ lock_mosix(); /* ptrace checks saved_esp0 under the mosix-lock */ ++#endif /* CONFIG_MOSIX */ + tss->esp0 = current->thread.esp0 = current->thread.saved_esp0; + current->thread.saved_esp0 = 0; + ret = KVM86->regs32; ++#ifdef CONFIG_MOSIX ++ unlock_mosix(); ++ task_lock(current); ++ current->mosix.stay &= ~DSTAY_FOR_86; ++ task_unlock(current); ++#endif /* CONFIG_MOSIX */ + return ret; + } + +@@ -136,6 +153,13 @@ + struct task_struct *tsk; + int tmp, ret = -EPERM; + ++#ifdef CONFIG_MOSIX ++ if(!mosix_go_home_for_reason(1, DSTAY_FOR_86)) ++ { ++ ret = -ENOMEM; ++ goto out; ++ } ++#endif /* CONFIG_MOSIX */ + tsk = current; + if (tsk->thread.saved_esp0) + goto out; +@@ -193,6 +217,13 @@ + ret = -EFAULT; + if (tmp) + goto out; ++#ifdef CONFIG_MOSIX ++ if(!mosix_go_home_for_reason(1, DSTAY_FOR_86)) ++ { ++ ret = -ENOMEM; ++ goto out; ++ } ++#endif /* CONFIG_MOSIX */ + info.regs32 = (struct pt_regs *) &subfunction; + info.vm86plus.is_vm86pus = 1; + tsk->thread.vm86_info = (struct vm86_struct *)v86; +@@ -245,9 +276,15 @@ + * Save old state, set default return value (%eax) to 0 + */ + info->regs32->eax = 0; ++#ifdef CONFIG_MOSIX ++ lock_mosix(); ++#endif /* CONFIG_MOSIX */ + tsk->thread.saved_esp0 = tsk->thread.esp0; + tss = init_tss + smp_processor_id(); + tss->esp0 = tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; ++#ifdef CONFIG_MOSIX ++ unlock_mosix(); ++#endif /* CONFIG_MOSIX */ + + tsk->thread.screen_bitmap = info->screen_bitmap; + if (info->flags & VM86_SCREEN_BITMAP) +@@ -267,6 +304,11 @@ + + regs32 = save_v86_state(regs16); + regs32->eax = retval; ++#ifdef CONFIG_MOSIX ++ task_lock(current); ++ current->mosix.stay &= ~DSTAY_FOR_86; ++ task_unlock(current); ++#endif /* CONFIG_MOSIX */ + __asm__ __volatile__("movl %0,%%esp\n\t" + "jmp ret_from_sys_call" + : : "r" (regs32), "b" (current)); +@@ -602,7 +644,11 @@ + int ret = 0; + + read_lock(&tasklist_lock); ++#ifdef CONFIG_MOSIX ++ for_each_local_task(p) { ++#else + for_each_task(p) { ++#endif /* CONFIG_MOSIX */ + if ((p == tsk) && (p->sig)) { + ret = 1; + break; +diff -ruN --exclude=CVS /tmp/openmosix/linux-2.4.17/arch/i386/lib/usercopy.c linux-openmosix/arch/i386/lib/usercopy.c +--- /tmp/openmosix/linux-2.4.17/arch/i386/lib/usercopy.c Fri Nov 9 22:58:02 2001 ++++ linux-openmosix/arch/i386/lib/usercopy.c Wed May 15 10:55:17 2002 +@@ -99,6 +99,11 @@ + __strncpy_from_user(char *dst, const char *src, long count) + { + long res; ++ ++#ifdef CONFIG_MOSIX ++ if(USER_IS_REMOTE) ++ return(deputy_strncpy_from_user(dst, (char *)src, count, 0)); ++#endif /* CONFIG_MOSIX */ + __do_strncpy_from_user(dst, src, count, res); + return res; + } +@@ -107,6 +112,11 @@ + strncpy_from_user(char *dst, const char *src, long count) + { + long res = -EFAULT; ++ ++#ifdef CONFIG_MOSIX ++ if(USER_IS_REMOTE) ++ return(deputy_strncpy_from_user(dst, (char *)src, count, 1)); ++#endif /* CONFIG_MOSIX */ + if (access_ok(VERIFY_READ, src, 1)) + __do_strncpy_from_user(dst, src, count, res); + return res; +@@ -141,6 +151,10 @@ + unsigned long + clear_user(void *to, unsigned long n) + { ++#ifdef CONFIG_MOSIX ++ if (USER_IS_REMOTE) ++ return(deputy_clear_user(to, n, 1)); ++#endif /* CONFIG_MOSIX */ + if (access_ok(VERIFY_WRITE, to, n)) + __do_clear_user(to, n); + return n; +@@ -149,6 +163,10 @@ + unsigned long + __clear_user(void *to, unsigned long n) + { ++#ifdef CONFIG_MOSIX ++ if (USER_IS_REMOTE) ++ return(deputy_clear_user(to, n, 0)); ++#endif /* CONFIG_MOSIX */ + __do_clear_user(to, n); + return n; + } +@@ -164,6 +182,10 @@ + unsigned long mask = -__addr_ok(s); + unsigned long res, tmp; + ++#ifdef CONFIG_MOSIX ++ if(USER_IS_REMOTE) ++ return(deputy_strnlen_user((char *)s, n)); ++#endif /* CONFIG_MOSIX */ + __asm__ __volatile__( + " testl %0, %0\n" + " jz 3f\n" +diff -ruN --exclude=CVS /tmp/openmosix/linux-2.4.17/arch/i386/mm/fault.c linux-openmosix/arch/i386/mm/fault.c +--- /tmp/openmosix/linux-2.4.17/arch/i386/mm/fault.c Wed Oct 10 00:13:03 2001 ++++ linux-openmosix/arch/i386/mm/fault.c Wed May 15 10:55:17 2002 +@@ -25,6 +25,10 @@ + #include + #include + ++#ifdef CONFIG_MOSIX ++#include ++#endif /* CONFIG_MOSIX */ ++ + extern void die(const char *,struct pt_regs *,long); + + extern int console_loglevel; +@@ -40,6 +44,10 @@ + if (!size) + return 1; + ++#ifdef CONFIG_MOSIX ++ if(USER_IS_REMOTE) ++ return(deputy_verify_write((void *)addr, size)); ++#endif /* CONFIG_MOSIX */ + vma = find_vma(current->mm, start); + if (!vma) + goto bad_area; +@@ -136,6 +144,10 @@ + asmlinkage void do_invalid_op(struct pt_regs *, unsigned long); + extern unsigned long idt; + ++#ifdef CONFIG_MOSIX_UDB ++int debug_fixup = 0; ++#endif /* CONFIG_MOSIX_UDB */ ++ + /* + * This routine handles page faults. It determines the address, + * and the problem, and then passes it off to one of the appropriate +@@ -263,6 +275,10 @@ + */ + if (regs->eflags & VM_MASK) { + unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT; ++#ifdef CONFIG_MOSIX_DIAG ++ if(!(current->mosix.stay & DSTAY_FOR_86)) ++ panic("VM_MASK without STAY"); ++#endif /* CONFIG_MOSIX_DIAG */ + if (bit < 32) + tsk->thread.screen_bitmap |= 1 << bit; + } +@@ -307,6 +323,10 @@ + /* Are we prepared to handle this kernel fault? */ + if ((fixup = search_exception_table(regs->eip)) != 0) { + regs->eip = fixup; ++#ifdef CONFIG_MOSIX_UDB ++ if(debug_fixup) ++ mosix_debugger("fixup"); ++#endif /* CONFIG_MOSIX_UDB */ + return; + } + +diff -ruN --exclude=CVS /tmp/openmosix/linux-2.4.17/drivers/char/Makefile linux-openmosix/drivers/char/Makefile +--- /tmp/openmosix/linux-2.4.17/drivers/char/Makefile Sun Nov 11 19:09:32 2001 ++++ linux-openmosix/drivers/char/Makefile Wed May 15 10:55:17 2002 +@@ -137,7 +137,12 @@ + obj-$(CONFIG_PCI) += keyboard.o $(KEYMAP) + endif + ++ifdef CONFIG_MOSIX_UDB ++obj-y += sysrq.o ++else + obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o ++endif ++ + obj-$(CONFIG_ATARI_DSP56K) += dsp56k.o + obj-$(CONFIG_ROCKETPORT) += rocket.o + obj-$(CONFIG_MOXA_SMARTIO) += mxser.o +diff -ruN --exclude=CVS /tmp/openmosix/linux-2.4.17/drivers/char/console.c linux-openmosix/drivers/char/console.c +--- /tmp/openmosix/linux-2.4.17/drivers/char/console.c Fri Dec 21 18:41:53 2001 ++++ linux-openmosix/drivers/char/console.c Wed May 15 10:55:17 2002 +@@ -2170,6 +2170,32 @@ + clear_bit(0, &printing); + } + ++#ifdef CONFIG_MOSIX_UDB ++int ++vt_console_read(struct console *co, const char *into, unsigned cnt) ++{ ++ extern int keyboard_has_any(void); ++ extern void keyboard_manual_mode(int); ++ extern char keyboard_readch(void); ++ int n = 0; ++ ++ keyboard_manual_mode(1); ++ while(!keyboard_has_any()); ++ while(cnt--) ++ { ++ ((char *)into)[n++] = keyboard_readch(); ++ unblank_screen(); ++ if(!keyboard_has_any()) ++ { ++ keyboard_manual_mode(0); ++ return(n); ++ } ++ } ++ keyboard_manual_mode(0); ++ return(n); ++} ++#endif /* CONFIG_MOSIX_UDB */ ++ + static kdev_t vt_console_device(struct console *c) + { + return MKDEV(TTY_MAJOR, c->index ? c->index : fg_console + 1); +@@ -2178,6 +2204,9 @@ + struct console vt_console_driver = { + name: "tty", + write: vt_console_print, ++#ifdef CONFIG_MOSIX_UDB ++ read: vt_console_read, ++#endif /* CONFIG_MOSIX_UDB */ + device: vt_console_device, + wait_key: keyboard_wait_for_keypress, + unblank: unblank_screen, +diff -ruN --exclude=CVS /tmp/openmosix/linux-2.4.17/drivers/char/drm/i810_dma.c linux-openmosix/drivers/char/drm/i810_dma.c +--- /tmp/openmosix/linux-2.4.17/drivers/char/drm/i810_dma.c Wed Aug 8 18:42:15 2001 ++++ linux-openmosix/drivers/char/drm/i810_dma.c Wed May 15 10:55:17 2002 +@@ -36,6 +36,10 @@ + #include "i810_drv.h" + #include /* For task queue support */ + ++#ifdef CONFIG_MOSIX ++#include ++#endif /* CONFIG_MOSIX */ ++ + /* in case we don't have a 2.3.99-pre6 kernel or later: */ + #ifndef VM_DONTCOPY + #define VM_DONTCOPY 0 +@@ -181,6 +185,13 @@ + + if(buf_priv->currently_mapped == I810_BUF_MAPPED) return -EINVAL; + ++#ifdef CONFIG_MOSIX ++ if(!mosix_go_home(1)) ++ { ++ printk("i810_map_buffer: Cannot map while away!\n"); ++ return(-ENOMEM); ++ } ++#endif /* CONFIG_MOSIX */ + if(VM_DONTCOPY != 0) { + #if LINUX_VERSION_CODE <= 0x020402 + down( ¤t->mm->mmap_sem ); +@@ -222,6 +233,13 @@ + if(VM_DONTCOPY != 0) { + if(buf_priv->currently_mapped != I810_BUF_MAPPED) + return -EINVAL; ++#ifdef CONFIG_MOSIX ++ { ++ extern asmlinkage long sys_munmap(unsigned long,size_t); ++ retcode = sys_munmap((unsigned long)buf_priv->virtual, ++ (size_t) buf->total); ++ } ++#else + #if LINUX_VERSION_CODE <= 0x020402 + down( ¤t->mm->mmap_sem ); + #else +@@ -240,6 +258,7 @@ + #else + up_write( ¤t->mm->mmap_sem ); + #endif ++#endif /* CONFIG_MOSIX */ + } + buf_priv->currently_mapped = I810_BUF_UNMAPPED; + buf_priv->virtual = 0; +diff -ruN --exclude=CVS /tmp/openmosix/linux-2.4.17/drivers/char/keyboard.c linux-openmosix/drivers/char/keyboard.c +--- /tmp/openmosix/linux-2.4.17/drivers/char/keyboard.c Tue Sep 18 22:39:51 2001 ++++ linux-openmosix/drivers/char/keyboard.c Wed May 15 10:55:17 2002 +@@ -65,6 +65,61 @@ + EXPORT_SYMBOL(handle_scancode); + EXPORT_SYMBOL(kbd_ledfunc); + ++#ifdef CONFIG_MOSIX_UDB ++#define MANQ 8 ++char manq[MANQ]; ++static int man_in, man_out; ++static int manual_mode; ++ ++void ++keyboard_manual_mode(int x) ++{ ++ if(x) ++ manual_mode++; ++ else ++ { ++ if(manual_mode) ++ manual_mode--; ++ man_in = man_out = 0; ++ } ++} ++ ++int ++in_manual_mode(void) ++{ ++ return(manual_mode); ++} ++ ++int ++keyboard_has_any(void) ++{ ++ extern void keyboard_like_interrupt(void); ++ ++ keyboard_like_interrupt(); ++ return(man_in != man_out); ++} ++ ++char ++keyboard_readch(void) ++{ ++ char c; ++ ++ if(man_in == man_out) ++ return('\0'); ++ c = manq[man_out]; ++ man_out = (man_out + 1) % MANQ; ++ return(c); ++} ++ ++void ++keyboard_putch(char ch) ++{ ++ manq[man_in++] = ch; ++ man_in %= MANQ; ++} ++ ++#endif /* CONFIG_MOSIX_UDB */ ++ + extern void ctrl_alt_del(void); + + DECLARE_WAIT_QUEUE_HEAD(keypress_wait); +@@ -129,12 +184,19 @@ + num, hold, scroll_forw, scroll_back, boot_it, caps_on, compose, + SAK, decr_console, incr_console, spawn_console, bare_num; + ++#ifdef CONFIG_MOSIX_UDB ++static void_fn call_udb; ++#endif /* CONFIG_MOSIX_UDB */ ++ + static void_fnp spec_fn_table[] = { + do_null, enter, show_ptregs, show_mem, + show_state, send_intr, lastcons, caps_toggle, + num, hold, scroll_forw, scroll_back, + boot_it, caps_on, compose, SAK, + decr_console, incr_console, spawn_console, bare_num ++#ifdef CONFIG_MOSIX_UDB ++ , call_udb ++#endif /* CONFIG_MOSIX_UDB */ + }; + + #define SPECIALS_ALLOWED_IN_RAW_MODE (1 << KVAL(K_SAK)) +@@ -279,6 +341,9 @@ + */ + if (!rep || + (vc_kbd_mode(kbd,VC_REPEAT) && tty && ++#ifdef CONFIG_MOSIX_UDB ++ !manual_mode && ++#endif /* CONFIG_MOSIX_UDB */ + (L_ECHO(tty) || (tty->driver.chars_in_buffer(tty) == 0)))) { + u_short keysym; + u_char type; +@@ -334,6 +399,13 @@ + + void put_queue(int ch) + { ++#ifdef CONFIG_MOSIX_UDB ++ if(manual_mode) ++ { ++ keyboard_putch((char)ch); ++ return; ++ } ++#endif /* CONFIG_MOSIX_UDB */ + wake_up(&keypress_wait); + if (tty) { + tty_insert_flip_char(tty, ch, 0); +@@ -814,6 +886,14 @@ + } + } + ++#ifdef CONFIG_MOSIX_UDB ++static void call_udb(void) ++{ ++ extern void mosix_debugger(char *); ++ mosix_debugger("Keyboard"); ++} ++#endif /* CONFIG_MOSIX_UDB */ ++ + /* + * The leds display either (i) the status of NumLock, CapsLock, ScrollLock, + * or (ii) whatever pattern of lights people want to show using KDSETLED, +@@ -940,5 +1020,8 @@ + + pm_kbd = pm_register(PM_SYS_DEV, PM_SYS_KBC, pm_kbd_request_override); + ++#ifdef CONFIG_MOSIX_UDB ++ key_maps[2][69] = key_maps[8][69] = K_DEBUGGER|0xf00