summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CREDITS9
-rw-r--r--Documentation/ABI/testing/sysfs-bus-pci70
-rw-r--r--Documentation/ABI/testing/sysfs-fs-ext481
-rw-r--r--Documentation/DocBook/.gitignore4
-rw-r--r--Documentation/DocBook/kernel-api.tmpl1
-rw-r--r--Documentation/PCI/MSI-HOWTO.txt814
-rw-r--r--Documentation/PCI/pci-iov-howto.txt99
-rw-r--r--Documentation/cgroups/cgroups.txt26
-rw-r--r--Documentation/cgroups/memcg_test.txt20
-rw-r--r--Documentation/fb/00-INDEX2
-rw-r--r--Documentation/fb/cyblafb/bugs13
-rw-r--r--Documentation/fb/cyblafb/credits7
-rw-r--r--Documentation/fb/cyblafb/documentation17
-rw-r--r--Documentation/fb/cyblafb/fb.modes154
-rw-r--r--Documentation/fb/cyblafb/performance79
-rw-r--r--Documentation/fb/cyblafb/todo31
-rw-r--r--Documentation/fb/cyblafb/usage217
-rw-r--r--Documentation/fb/cyblafb/whatsnew29
-rw-r--r--Documentation/fb/cyblafb/whycyblafb85
-rw-r--r--Documentation/feature-removal-schedule.txt54
-rw-r--r--Documentation/filesystems/Locking2
-rw-r--r--Documentation/filesystems/ext4.txt30
-rw-r--r--Documentation/filesystems/proc.txt1118
-rw-r--r--Documentation/filesystems/sysfs-pci.txt10
-rw-r--r--Documentation/gpio.txt23
-rw-r--r--Documentation/hwmon/lis3lv02d20
-rw-r--r--Documentation/hwmon/ltc421550
-rw-r--r--Documentation/kernel-parameters.txt11
-rw-r--r--Documentation/misc-devices/isl2900362
-rw-r--r--Documentation/networking/vxge.txt100
-rw-r--r--Documentation/powerpc/dts-bindings/mmc-spi-slot.txt23
-rw-r--r--Documentation/sysctl/00-INDEX2
-rw-r--r--Documentation/sysctl/fs.txt74
-rw-r--r--Documentation/sysctl/kernel.txt53
-rw-r--r--Documentation/sysctl/net.txt175
-rw-r--r--Documentation/sysrq.txt5
-rw-r--r--MAINTAINERS29
-rw-r--r--arch/alpha/include/asm/machvec.h2
-rw-r--r--arch/alpha/include/asm/pci.h14
-rw-r--r--arch/alpha/include/asm/spinlock.h3
-rw-r--r--arch/alpha/include/asm/system.h547
-rw-r--r--arch/alpha/include/asm/types.h5
-rw-r--r--arch/alpha/include/asm/uaccess.h12
-rw-r--r--arch/alpha/include/asm/xchg.h258
-rw-r--r--arch/alpha/kernel/Makefile2
-rw-r--r--arch/alpha/kernel/err_ev6.c4
-rw-r--r--arch/alpha/kernel/err_ev7.c6
-rw-r--r--arch/alpha/kernel/err_marvel.c40
-rw-r--r--arch/alpha/kernel/err_titan.c28
-rw-r--r--arch/alpha/kernel/pci-sysfs.c366
-rw-r--r--arch/alpha/kernel/pci.c2
-rw-r--r--arch/alpha/kernel/pci_iommu.c34
-rw-r--r--arch/alpha/kernel/process.c2
-rw-r--r--arch/alpha/kernel/proto.h16
-rw-r--r--arch/alpha/kernel/setup.c2
-rw-r--r--arch/alpha/kernel/smc37c669.c4
-rw-r--r--arch/alpha/kernel/sys_jensen.c3
-rw-r--r--arch/alpha/kernel/sys_sable.c4
-rw-r--r--arch/alpha/kernel/traps.c2
-rw-r--r--arch/arm/include/asm/spinlock.h3
-rw-r--r--arch/arm/kernel/process.c2
-rw-r--r--arch/avr32/kernel/process.c2
-rw-r--r--arch/avr32/mm/fault.c18
-rw-r--r--arch/blackfin/kernel/process.c2
-rw-r--r--arch/cris/arch-v10/kernel/process.c2
-rw-r--r--arch/cris/arch-v32/kernel/process.c2
-rw-r--r--arch/cris/include/arch-v32/arch/spinlock.h2
-rw-r--r--arch/frv/kernel/process.c2
-rw-r--r--arch/h8300/kernel/process.c2
-rw-r--r--arch/ia64/configs/generic_defconfig3
-rw-r--r--arch/ia64/hp/sim/simserial.c49
-rw-r--r--arch/ia64/include/asm/intrinsics.h6
-rw-r--r--arch/ia64/include/asm/mmu_context.h6
-rw-r--r--arch/ia64/include/asm/module.h6
-rw-r--r--arch/ia64/include/asm/native/inst.h13
-rw-r--r--arch/ia64/include/asm/native/patchlist.h38
-rw-r--r--arch/ia64/include/asm/native/pvchk_inst.h8
-rw-r--r--arch/ia64/include/asm/paravirt.h65
-rw-r--r--arch/ia64/include/asm/paravirt_patch.h143
-rw-r--r--arch/ia64/include/asm/paravirt_privop.h365
-rw-r--r--arch/ia64/include/asm/smp.h3
-rw-r--r--arch/ia64/include/asm/spinlock.h77
-rw-r--r--arch/ia64/include/asm/timex.h1
-rw-r--r--arch/ia64/include/asm/topology.h5
-rw-r--r--arch/ia64/include/asm/uv/uv_hub.h6
-rw-r--r--arch/ia64/include/asm/uv/uv_mmrs.h158
-rw-r--r--arch/ia64/include/asm/xen/hypervisor.h39
-rw-r--r--arch/ia64/include/asm/xen/inst.h28
-rw-r--r--arch/ia64/include/asm/xen/interface.h9
-rw-r--r--arch/ia64/include/asm/xen/minstate.h11
-rw-r--r--arch/ia64/include/asm/xen/patchlist.h38
-rw-r--r--arch/ia64/include/asm/xen/privop.h8
-rw-r--r--arch/ia64/kernel/Makefile39
-rw-r--r--arch/ia64/kernel/Makefile.gate27
-rw-r--r--arch/ia64/kernel/acpi.c8
-rw-r--r--arch/ia64/kernel/asm-offsets.c2
-rw-r--r--arch/ia64/kernel/efi.c1
-rw-r--r--arch/ia64/kernel/entry.S4
-rw-r--r--arch/ia64/kernel/fsys.S35
-rw-r--r--arch/ia64/kernel/gate.S171
-rw-r--r--arch/ia64/kernel/gate.lds.S17
-rw-r--r--arch/ia64/kernel/head.S10
-rw-r--r--arch/ia64/kernel/ivt.S2
-rw-r--r--arch/ia64/kernel/mca.c6
-rw-r--r--arch/ia64/kernel/module.c35
-rw-r--r--arch/ia64/kernel/paravirt.c539
-rw-r--r--arch/ia64/kernel/paravirt_patch.c514
-rw-r--r--arch/ia64/kernel/paravirt_patchlist.c79
-rw-r--r--arch/ia64/kernel/paravirt_patchlist.h28
-rw-r--r--arch/ia64/kernel/paravirtentry.S99
-rw-r--r--arch/ia64/kernel/patch.c40
-rw-r--r--arch/ia64/kernel/perfmon.c4
-rw-r--r--arch/ia64/kernel/process.c2
-rw-r--r--arch/ia64/kernel/salinfo.c6
-rw-r--r--arch/ia64/kernel/setup.c9
-rw-r--r--arch/ia64/kernel/smp.c6
-rw-r--r--arch/ia64/kernel/smpboot.c17
-rw-r--r--arch/ia64/kernel/time.c25
-rw-r--r--arch/ia64/kernel/vmlinux.lds.S30
-rw-r--r--arch/ia64/kvm/kvm-ia64.c2
-rw-r--r--arch/ia64/kvm/vcpu.c2
-rw-r--r--arch/ia64/kvm/vtlb.c2
-rw-r--r--arch/ia64/mm/init.c12
-rw-r--r--arch/ia64/mm/tlb.c2
-rw-r--r--arch/ia64/scripts/pvcheck.sed1
-rw-r--r--arch/ia64/sn/kernel/io_common.c15
-rw-r--r--arch/ia64/sn/kernel/io_init.c12
-rw-r--r--arch/ia64/sn/kernel/setup.c5
-rw-r--r--arch/ia64/sn/kernel/sn2/sn2_smp.c12
-rw-r--r--arch/ia64/sn/kernel/sn2/sn_hwperf.c8
-rw-r--r--arch/ia64/sn/pci/pcibr/pcibr_dma.c4
-rw-r--r--arch/ia64/xen/Makefile19
-rw-r--r--arch/ia64/xen/gate-data.S3
-rw-r--r--arch/ia64/xen/hypercall.S2
-rw-r--r--arch/ia64/xen/time.c48
-rw-r--r--arch/ia64/xen/xen_pv_ops.c800
-rw-r--r--arch/m32r/kernel/process.c2
-rw-r--r--arch/m68k/kernel/process.c2
-rw-r--r--arch/m68knommu/kernel/process.c2
-rw-r--r--arch/mips/include/asm/mach-bcm47xx/gpio.h20
-rw-r--r--arch/mips/include/asm/spinlock.h2
-rw-r--r--arch/mips/include/asm/unistd.h18
-rw-r--r--arch/mips/kernel/process.c2
-rw-r--r--arch/mips/kernel/scall32-o32.S2
-rw-r--r--arch/mips/kernel/scall64-64.S2
-rw-r--r--arch/mips/kernel/scall64-n32.S2
-rw-r--r--arch/mips/kernel/scall64-o32.S2
-rw-r--r--arch/mips/mm/highmem.c2
-rw-r--r--arch/mn10300/kernel/process.c2
-rw-r--r--arch/parisc/include/asm/spinlock.h3
-rw-r--r--arch/parisc/kernel/process.c2
-rw-r--r--arch/parisc/kernel/time.c7
-rw-r--r--arch/powerpc/Kconfig3
-rw-r--r--arch/powerpc/Kconfig.debug9
-rw-r--r--arch/powerpc/boot/dts/mpc832x_rdb.dts24
-rw-r--r--arch/powerpc/include/asm/highmem.h2
-rw-r--r--arch/powerpc/include/asm/pci.h4
-rw-r--r--arch/powerpc/include/asm/spinlock.h3
-rw-r--r--arch/powerpc/include/asm/suspend.h3
-rw-r--r--arch/powerpc/kernel/msi.c5
-rw-r--r--arch/powerpc/kernel/process.c2
-rw-r--r--arch/powerpc/kernel/vio.c2
-rw-r--r--arch/powerpc/platforms/83xx/mpc832x_rdb.c123
-rw-r--r--arch/powerpc/sysdev/fsl_soc.c109
-rw-r--r--arch/powerpc/sysdev/fsl_soc.h7
-rw-r--r--arch/s390/Kconfig3
-rw-r--r--arch/s390/Kconfig.debug8
-rw-r--r--arch/s390/hypfs/hypfs_diag.c2
-rw-r--r--arch/s390/include/asm/cio.h2
-rw-r--r--arch/s390/include/asm/spinlock.h3
-rw-r--r--arch/s390/kernel/process.c2
-rw-r--r--arch/sh/include/asm/spinlock.h3
-rw-r--r--arch/sh/kernel/process_32.c2
-rw-r--r--arch/sh/kernel/process_64.c2
-rw-r--r--arch/sparc/Kconfig3
-rw-r--r--arch/sparc/Kconfig.debug8
-rw-r--r--arch/sparc/include/asm/spinlock_32.h2
-rw-r--r--arch/sparc/include/asm/spinlock_64.h2
-rw-r--r--arch/sparc/kernel/process_32.c2
-rw-r--r--arch/sparc/kernel/process_64.c2
-rw-r--r--arch/sparc/mm/highmem.c1
-rw-r--r--arch/um/drivers/net_kern.c2
-rw-r--r--arch/um/drivers/pcap_user.h10
-rw-r--r--arch/um/drivers/port.h10
-rw-r--r--arch/um/drivers/ssl.h10
-rw-r--r--arch/um/drivers/stdio_console.h10
-rw-r--r--arch/um/drivers/ubd_kern.c17
-rw-r--r--arch/um/drivers/xterm.h10
-rw-r--r--arch/um/include/asm/irq_vectors.h10
-rw-r--r--arch/um/include/asm/mmu.h10
-rw-r--r--arch/um/include/asm/pda.h10
-rw-r--r--arch/um/include/asm/pgalloc.h10
-rw-r--r--arch/um/include/asm/pgtable-3level.h10
-rw-r--r--arch/um/include/shared/frame_kern.h10
-rw-r--r--arch/um/include/shared/initrd.h10
-rw-r--r--arch/um/include/shared/irq_kern.h10
-rw-r--r--arch/um/include/shared/mem_kern.h10
-rw-r--r--arch/um/include/shared/ubd_user.h10
-rw-r--r--arch/um/kernel/Makefile6
-rw-r--r--arch/um/kernel/config.c.in18
-rw-r--r--arch/um/kernel/process.c2
-rw-r--r--arch/um/kernel/syscall.c3
-rw-r--r--arch/um/os-Linux/start_up.c8
-rw-r--r--arch/um/sys-i386/asm/archparam.h10
-rw-r--r--arch/um/sys-i386/shared/sysdep/checksum.h10
-rw-r--r--arch/um/sys-i386/sys_call_table.S11
-rw-r--r--arch/um/sys-ia64/sysdep/ptrace.h10
-rw-r--r--arch/um/sys-ia64/sysdep/sigcontext.h10
-rw-r--r--arch/um/sys-ia64/sysdep/syscalls.h10
-rw-r--r--arch/um/sys-ppc/miscthings.c11
-rw-r--r--arch/um/sys-ppc/ptrace.c10
-rw-r--r--arch/um/sys-ppc/ptrace_user.c10
-rw-r--r--arch/um/sys-ppc/shared/sysdep/ptrace.h10
-rw-r--r--arch/um/sys-ppc/shared/sysdep/sigcontext.h10
-rw-r--r--arch/um/sys-ppc/shared/sysdep/syscalls.h10
-rw-r--r--arch/um/sys-ppc/sigcontext.c10
-rw-r--r--arch/um/sys-x86_64/asm/archparam.h10
-rw-r--r--arch/um/sys-x86_64/asm/module.h10
-rw-r--r--arch/um/sys-x86_64/mem.c9
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/Kconfig.debug8
-rw-r--r--arch/x86/boot/memory.c39
-rw-r--r--arch/x86/ia32/ia32entry.S2
-rw-r--r--arch/x86/include/asm/pci.h3
-rw-r--r--arch/x86/include/asm/spinlock.h3
-rw-r--r--arch/x86/include/asm/suspend_32.h24
-rw-r--r--arch/x86/include/asm/unistd_32.h2
-rw-r--r--arch/x86/include/asm/unistd_64.h4
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h14
-rw-r--r--arch/x86/include/asm/uv/uv_mmrs.h153
-rw-r--r--arch/x86/kernel/apic/io_apic.c4
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c9
-rw-r--r--arch/x86/kernel/asm-offsets_32.c1
-rw-r--r--arch/x86/kernel/asm-offsets_64.c1
-rw-r--r--arch/x86/kernel/pci-dma.c3
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/ptrace.c2
-rw-r--r--arch/x86/kernel/syscall_table_32.S2
-rw-r--r--arch/x86/mm/highmem_32.c46
-rw-r--r--arch/x86/mm/iomap_32.c2
-rw-r--r--arch/x86/pci/early.c19
-rw-r--r--arch/x86/pci/fixup.c20
-rw-r--r--arch/x86/pci/legacy.c3
-rw-r--r--arch/x86/pci/mmconfig-shared.c227
-rw-r--r--arch/x86/pci/mmconfig_64.c17
-rw-r--r--arch/x86/power/cpu_32.c1
-rw-r--r--arch/x86/power/cpu_64.c1
-rw-r--r--arch/x86/power/hibernate_64.c1
-rw-r--r--arch/xtensa/kernel/process.c2
-rw-r--r--arch/xtensa/platforms/iss/console.c29
-rw-r--r--drivers/acpi/pci_root.c180
-rw-r--r--drivers/auxdisplay/Kconfig3
-rw-r--r--drivers/block/floppy.c8
-rw-r--r--drivers/block/loop.c30
-rw-r--r--drivers/block/nbd.c112
-rw-r--r--drivers/char/amiserial.c62
-rw-r--r--drivers/char/cyclades.c54
-rw-r--r--drivers/char/hpet.c22
-rw-r--r--drivers/char/ip2/ip2main.c74
-rw-r--r--drivers/char/istallion.c121
-rw-r--r--drivers/char/pcmcia/synclink_cs.c73
-rw-r--r--drivers/char/random.c3
-rw-r--r--drivers/char/stallion.c126
-rw-r--r--drivers/char/synclink.c98
-rw-r--r--drivers/char/synclink_gt.c132
-rw-r--r--drivers/char/synclinkmp.c74
-rw-r--r--drivers/char/sysrq.c21
-rw-r--r--drivers/char/tty_io.c26
-rw-r--r--drivers/crypto/hifn_795x.c2
-rw-r--r--drivers/edac/Kconfig36
-rw-r--r--drivers/edac/Makefile2
-rw-r--r--drivers/edac/amd8111_edac.c595
-rw-r--r--drivers/edac/amd8111_edac.h130
-rw-r--r--drivers/edac/amd8131_edac.c379
-rw-r--r--drivers/edac/amd8131_edac.h119
-rw-r--r--drivers/edac/edac_core.h16
-rw-r--r--drivers/edac/edac_pci.c14
-rw-r--r--drivers/edac/ppc4xx_edac.c1448
-rw-r--r--drivers/edac/ppc4xx_edac.h172
-rw-r--r--drivers/gpio/gpiolib.c19
-rw-r--r--drivers/gpu/drm/drm_crtc_helper.c31
-rw-r--r--drivers/gpu/drm/drm_edid.c8
-rw-r--r--drivers/gpu/drm/drm_gem.c7
-rw-r--r--drivers/gpu/drm/drm_sysfs.c1
-rw-r--r--drivers/gpu/drm/i915/i915_dma.c11
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h5
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c38
-rw-r--r--drivers/gpu/drm/i915/i915_gem_debug.c2
-rw-r--r--drivers/gpu/drm/i915/i915_gem_debugfs.c6
-rw-r--r--drivers/gpu/drm/i915/i915_gem_tiling.c16
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c67
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h11
-rw-r--r--drivers/gpu/drm/i915/intel_crt.c4
-rw-r--r--drivers/gpu/drm/i915/intel_display.c22
-rw-r--r--drivers/gpu/drm/i915/intel_modes.c1
-rw-r--r--drivers/gpu/drm/i915/intel_sdvo.c193
-rw-r--r--drivers/gpu/drm/i915/intel_sdvo_regs.h3
-rw-r--r--drivers/gpu/drm/i915/intel_tv.c30
-rw-r--r--drivers/gpu/drm/radeon/r600_cp.c8
-rw-r--r--drivers/hwmon/Kconfig36
-rw-r--r--drivers/hwmon/Makefile3
-rw-r--r--drivers/hwmon/hp_accel.c124
-rw-r--r--drivers/hwmon/lis3lv02d.c288
-rw-r--r--drivers/hwmon/lis3lv02d.h20
-rw-r--r--drivers/hwmon/lis3lv02d_spi.c114
-rw-r--r--drivers/hwmon/lm95241.c527
-rw-r--r--drivers/hwmon/ltc4215.c364
-rw-r--r--drivers/ide/Kconfig33
-rw-r--r--drivers/ide/Makefile2
-rw-r--r--drivers/ide/alim15x3.c10
-rw-r--r--drivers/ide/at91_ide.c39
-rw-r--r--drivers/ide/au1xxx-ide.c26
-rw-r--r--drivers/ide/cmd64x.c6
-rw-r--r--drivers/ide/cs5530.c3
-rw-r--r--drivers/ide/cs5536.c1
-rw-r--r--drivers/ide/falconide.c4
-rw-r--r--drivers/ide/gayle.c12
-rw-r--r--drivers/ide/hpt366.c10
-rw-r--r--drivers/ide/ht6560b.c20
-rw-r--r--drivers/ide/icside.c8
-rw-r--r--drivers/ide/ide-atapi.c168
-rw-r--r--drivers/ide/ide-cd.c525
-rw-r--r--drivers/ide/ide-disk.c4
-rw-r--r--drivers/ide/ide-dma-sff.c17
-rw-r--r--drivers/ide/ide-dma.c118
-rw-r--r--drivers/ide/ide-eh.c14
-rw-r--r--drivers/ide/ide-floppy.c29
-rw-r--r--drivers/ide/ide-floppy_ioctl.c5
-rw-r--r--drivers/ide/ide-generic.c86
-rw-r--r--drivers/ide/ide-h8300.c34
-rw-r--r--drivers/ide/ide-io-std.c130
-rw-r--r--drivers/ide/ide-io.c27
-rw-r--r--drivers/ide/ide-iops.c41
-rw-r--r--drivers/ide/ide-pm.c5
-rw-r--r--drivers/ide/ide-probe.c31
-rw-r--r--drivers/ide/ide-tape.c36
-rw-r--r--drivers/ide/ide-taskfile.c96
-rw-r--r--drivers/ide/ide-timings.c12
-rw-r--r--drivers/ide/ide-xfer-mode.c15
-rw-r--r--drivers/ide/ide_arm.c53
-rw-r--r--drivers/ide/it821x.c3
-rw-r--r--drivers/ide/ns87415.c85
-rw-r--r--drivers/ide/pdc202xx_old.c10
-rw-r--r--drivers/ide/pmac.c101
-rw-r--r--drivers/ide/q40ide.c4
-rw-r--r--drivers/ide/qd65xx.c21
-rw-r--r--drivers/ide/sc1200.c7
-rw-r--r--drivers/ide/scc_pata.c56
-rw-r--r--drivers/ide/sgiioc4.c24
-rw-r--r--drivers/ide/siimage.c1
-rw-r--r--drivers/ide/sl82c105.c10
-rw-r--r--drivers/ide/tc86c001.c1
-rw-r--r--drivers/ide/trm290.c57
-rw-r--r--drivers/ide/tx4938ide.c41
-rw-r--r--drivers/ide/tx4939ide.c62
-rw-r--r--drivers/input/mouse/hgpk.c2
-rw-r--r--drivers/isdn/capi/capi.c7
-rw-r--r--drivers/misc/Kconfig14
-rw-r--r--drivers/misc/Makefile1
-rw-r--r--drivers/misc/eeprom/at24.c67
-rw-r--r--drivers/misc/eeprom/at25.c58
-rw-r--r--drivers/misc/hpilo.c6
-rw-r--r--drivers/misc/hpilo.h6
-rw-r--r--drivers/misc/isl29003.c470
-rw-r--r--drivers/misc/sgi-gru/Makefile2
-rw-r--r--drivers/misc/sgi-gru/gru_instructions.h22
-rw-r--r--drivers/misc/sgi-gru/grufault.c130
-rw-r--r--drivers/misc/sgi-gru/grufile.c36
-rw-r--r--drivers/misc/sgi-gru/gruhandles.c183
-rw-r--r--drivers/misc/sgi-gru/gruhandles.h178
-rw-r--r--drivers/misc/sgi-gru/grukservices.c131
-rw-r--r--drivers/misc/sgi-gru/grukservices.h33
-rw-r--r--drivers/misc/sgi-gru/grumain.c84
-rw-r--r--drivers/misc/sgi-gru/gruprocfs.c45
-rw-r--r--drivers/misc/sgi-gru/grutables.h41
-rw-r--r--drivers/misc/sgi-gru/grutlbpurge.c7
-rw-r--r--drivers/misc/sgi-xp/xpc.h33
-rw-r--r--drivers/misc/sgi-xp/xpc_channel.c8
-rw-r--r--drivers/misc/sgi-xp/xpc_main.c6
-rw-r--r--drivers/misc/sgi-xp/xpc_sn2.c20
-rw-r--r--drivers/misc/sgi-xp/xpc_uv.c229
-rw-r--r--drivers/mmc/card/sdio_uart.c62
-rw-r--r--drivers/net/Kconfig19
-rw-r--r--drivers/net/Makefile1
-rw-r--r--drivers/net/dm9000.c2
-rw-r--r--drivers/net/dnet.c1
-rw-r--r--drivers/net/fec_mpc52xx.c77
-rw-r--r--drivers/net/fsl_pq_mdio.c3
-rw-r--r--drivers/net/gianfar.h1
-rw-r--r--drivers/net/hamradio/yam.c64
-rw-r--r--drivers/net/hamradio/yam1200.h343
-rw-r--r--drivers/net/hamradio/yam9600.h343
-rw-r--r--drivers/net/igb/e1000_phy.c7
-rw-r--r--drivers/net/igb/igb_ethtool.c14
-rw-r--r--drivers/net/igb/igb_main.c54
-rw-r--r--drivers/net/ixgbe/ixgbe_82598.c3
-rw-r--r--drivers/net/ixgbe/ixgbe_common.c3
-rw-r--r--drivers/net/ixgbe/ixgbe_common.h9
-rw-r--r--drivers/net/ixgbe/ixgbe_dcb_nl.c6
-rw-r--r--drivers/net/ixgbe/ixgbe_ethtool.c129
-rw-r--r--drivers/net/ixgbe/ixgbe_main.c110
-rw-r--r--drivers/net/ixgbe/ixgbe_type.h1
-rw-r--r--drivers/net/mlx4/en_netdev.c2
-rw-r--r--drivers/net/mlx4/en_rx.c2
-rw-r--r--drivers/net/mlx4/sense.c2
-rw-r--r--drivers/net/pcmcia/ositech.h358
-rw-r--r--drivers/net/pcmcia/smc91c92_cs.c44
-rw-r--r--drivers/net/phy/phy.c3
-rw-r--r--drivers/net/qlge/qlge_ethtool.c1
-rw-r--r--drivers/net/r8169.c5
-rw-r--r--drivers/net/sfc/efx.c7
-rw-r--r--drivers/net/tc35815.c2
-rw-r--r--drivers/net/tokenring/3c359.c63
-rw-r--r--drivers/net/tokenring/3c359.h3
-rw-r--r--drivers/net/tokenring/3c359_microcode.h1581
-rw-r--r--drivers/net/ucc_geth.c22
-rw-r--r--drivers/net/ucc_geth.h4
-rw-r--r--drivers/net/ucc_geth_ethtool.c1
-rw-r--r--drivers/net/usb/hso.c40
-rw-r--r--drivers/net/usb/kaweth.c7
-rw-r--r--drivers/net/vxge/Makefile7
-rw-r--r--drivers/net/vxge/vxge-config.c5264
-rw-r--r--drivers/net/vxge/vxge-config.h2259
-rw-r--r--drivers/net/vxge/vxge-ethtool.c1148
-rw-r--r--drivers/net/vxge/vxge-ethtool.h67
-rw-r--r--drivers/net/vxge/vxge-main.c4502
-rw-r--r--drivers/net/vxge/vxge-main.h557
-rw-r--r--drivers/net/vxge/vxge-reg.h4608
-rw-r--r--drivers/net/vxge/vxge-traffic.c2528
-rw-r--r--drivers/net/vxge/vxge-traffic.h2409
-rw-r--r--drivers/net/vxge/vxge-version.h23
-rw-r--r--drivers/of/base.c1
-rw-r--r--drivers/parport/parport_serial.c30
-rw-r--r--drivers/pci/Kconfig10
-rw-r--r--drivers/pci/Makefile2
-rw-r--r--drivers/pci/bus.c8
-rw-r--r--drivers/pci/hotplug/acpi_pcihp.c58
-rw-r--r--drivers/pci/hotplug/fakephp.c444
-rw-r--r--drivers/pci/hotplug/pciehp.h13
-rw-r--r--drivers/pci/hotplug/pciehp_acpi.c21
-rw-r--r--drivers/pci/hotplug/pciehp_core.c18
-rw-r--r--drivers/pci/hotplug/pciehp_hpc.c34
-rw-r--r--drivers/pci/hotplug/shpchp.h10
-rw-r--r--drivers/pci/hotplug/shpchp_pci.c2
-rw-r--r--drivers/pci/intel-iommu.c2
-rw-r--r--drivers/pci/iov.c680
-rw-r--r--drivers/pci/msi.c426
-rw-r--r--drivers/pci/msi.h6
-rw-r--r--drivers/pci/pci-acpi.c215
-rw-r--r--drivers/pci/pci-driver.c81
-rw-r--r--drivers/pci/pci-sysfs.c124
-rw-r--r--drivers/pci/pci.c193
-rw-r--r--drivers/pci/pci.h65
-rw-r--r--drivers/pci/pcie/aer/aerdrv.c28
-rw-r--r--drivers/pci/pcie/aer/aerdrv_acpi.c2
-rw-r--r--drivers/pci/pcie/aer/aerdrv_core.c10
-rw-r--r--drivers/pci/pcie/portdrv.h14
-rw-r--r--drivers/pci/pcie/portdrv_bus.c18
-rw-r--r--drivers/pci/pcie/portdrv_core.c379
-rw-r--r--drivers/pci/pcie/portdrv_pci.c50
-rw-r--r--drivers/pci/probe.c210
-rw-r--r--drivers/pci/quirks.c221
-rw-r--r--drivers/pci/remove.c4
-rw-r--r--drivers/pci/search.c2
-rw-r--r--drivers/pci/setup-bus.c7
-rw-r--r--drivers/pci/setup-res.c15
-rw-r--r--drivers/pci/slot.c18
-rw-r--r--drivers/pnp/pnpbios/core.c16
-rw-r--r--drivers/rtc/Kconfig21
-rw-r--r--drivers/rtc/Makefile1
-rw-r--r--drivers/rtc/rtc-ds1307.c189
-rw-r--r--drivers/rtc/rtc-ds1374.c6
-rw-r--r--drivers/rtc/rtc-efi.c235
-rw-r--r--drivers/rtc/rtc-lib.c7
-rw-r--r--drivers/rtc/rtc-m41t80.c18
-rw-r--r--drivers/rtc/rtc-parisc.c56
-rw-r--r--drivers/rtc/rtc-v3020.c230
-rw-r--r--drivers/rtc/rtc-wm8350.c43
-rw-r--r--drivers/s390/block/dasd.c1
-rw-r--r--drivers/s390/cio/device.c43
-rw-r--r--drivers/s390/cio/device.h1
-rw-r--r--drivers/s390/cio/device_fsm.c31
-rw-r--r--drivers/s390/net/qeth_core_offl.c0
-rw-r--r--drivers/s390/net/qeth_core_offl.h0
-rw-r--r--drivers/s390/scsi/zfcp_ccw.c5
-rw-r--r--drivers/s390/scsi/zfcp_fc.c2
-rw-r--r--drivers/serial/serial_core.c76
-rw-r--r--drivers/spi/spi_gpio.c21
-rw-r--r--drivers/spi/spi_mpc83xx.c366
-rw-r--r--drivers/staging/rtl8187se/ieee80211/ieee80211_softmac.c8
-rw-r--r--drivers/staging/rtl8187se/r8180_core.c8
-rw-r--r--drivers/usb/serial/usb-serial.c58
-rw-r--r--drivers/usb/wusbcore/devconnect.c2
-rw-r--r--drivers/video/68328fb.c5
-rw-r--r--drivers/video/Kconfig61
-rw-r--r--drivers/video/Makefile2
-rw-r--r--drivers/video/amba-clcd.c8
-rw-r--r--drivers/video/amifb.c7
-rw-r--r--drivers/video/arkfb.c4
-rw-r--r--drivers/video/asiliantfb.c26
-rw-r--r--drivers/video/aty/mach64_accel.c3
-rw-r--r--drivers/video/aty/mach64_cursor.c15
-rw-r--r--drivers/video/aty/radeon_pm.c3
-rw-r--r--drivers/video/backlight/backlight.c3
-rw-r--r--drivers/video/backlight/lcd.c3
-rw-r--r--drivers/video/cirrusfb.c1529
-rw-r--r--drivers/video/console/fbcon.c73
-rw-r--r--drivers/video/cyblafb.c1683
-rw-r--r--drivers/video/efifb.c5
-rw-r--r--drivers/video/fb_defio.c3
-rw-r--r--drivers/video/fbmem.c22
-rw-r--r--drivers/video/nvidia/nv_setup.c1
-rw-r--r--drivers/video/nvidia/nv_type.h2
-rw-r--r--drivers/video/nvidia/nvidia.c7
-rw-r--r--drivers/video/omap/hwa742.c4
-rw-r--r--drivers/video/omap/omapfb_main.c8
-rw-r--r--drivers/video/s1d13xxxfb.c48
-rw-r--r--drivers/video/s3c-fb.c1036
-rw-r--r--drivers/video/sgivwfb.c2
-rw-r--r--drivers/video/skeletonfb.c9
-rw-r--r--drivers/video/sm501fb.c5
-rw-r--r--drivers/video/sstfb.c10
-rw-r--r--drivers/video/stifb.c18
-rw-r--r--drivers/video/sunxvr500.c6
-rw-r--r--drivers/video/tdfxfb.c1
-rw-r--r--drivers/video/tgafb.c4
-rw-r--r--drivers/video/tridentfb.c19
-rw-r--r--drivers/video/uvesafb.c17
-rw-r--r--drivers/video/valkyriefb.c15
-rw-r--r--drivers/video/vesafb.c2
-rw-r--r--drivers/video/vfb.c1
-rw-r--r--drivers/video/via/accel.c8
-rw-r--r--drivers/w1/w1_io.c16
-rw-r--r--firmware/3com/3C359.bin.ihex1573
-rw-r--r--firmware/Makefile3
-rw-r--r--firmware/WHENCE50
-rw-r--r--firmware/ositech/Xilinx7OD.bin.ihex177
-rw-r--r--firmware/yam/1200.bin.ihex342
-rw-r--r--firmware/yam/9600.bin.ihex342
-rw-r--r--fs/adfs/super.c16
-rw-r--r--fs/affs/super.c4
-rw-r--r--fs/autofs4/autofs_i.h2
-rw-r--r--fs/autofs4/dev-ioctl.c29
-rw-r--r--fs/autofs4/expire.c27
-rw-r--r--fs/autofs4/root.c41
-rw-r--r--fs/befs/linuxvfs.c3
-rw-r--r--fs/binfmt_elf_fdpic.c25
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/btrfs_inode.h31
-rw-r--r--fs/btrfs/ctree.c588
-rw-r--r--fs/btrfs/ctree.h71
-rw-r--r--fs/btrfs/delayed-ref.c669
-rw-r--r--fs/btrfs/delayed-ref.h193
-rw-r--r--fs/btrfs/dir-item.c3
-rw-r--r--fs/btrfs/disk-io.c81
-rw-r--r--fs/btrfs/disk-io.h1
-rw-r--r--fs/btrfs/extent-tree.c1674
-rw-r--r--fs/btrfs/extent_io.c51
-rw-r--r--fs/btrfs/extent_io.h3
-rw-r--r--fs/btrfs/file-item.c7
-rw-r--r--fs/btrfs/file.c50
-rw-r--r--fs/btrfs/inode-item.c3
-rw-r--r--fs/btrfs/inode.c206
-rw-r--r--fs/btrfs/locking.c21
-rw-r--r--fs/btrfs/ordered-data.c118
-rw-r--r--fs/btrfs/ordered-data.h4
-rw-r--r--fs/btrfs/transaction.c151
-rw-r--r--fs/btrfs/transaction.h8
-rw-r--r--fs/btrfs/tree-defrag.c2
-rw-r--r--fs/btrfs/tree-log.c444
-rw-r--r--fs/btrfs/tree-log.h17
-rw-r--r--fs/buffer.c62
-rw-r--r--fs/compat.c89
-rw-r--r--fs/cramfs/inode.c39
-rw-r--r--fs/cramfs/uncompress.c2
-rw-r--r--fs/drop_caches.c2
-rw-r--r--fs/ecryptfs/keystore.c3
-rw-r--r--fs/ecryptfs/messaging.c3
-rw-r--r--fs/efs/super.c20
-rw-r--r--fs/eventfd.c26
-rw-r--r--fs/eventpoll.c614
-rw-r--r--fs/ext3/dir.c2
-rw-r--r--fs/ext3/file.c2
-rw-r--r--fs/ext3/inode.c139
-rw-r--r--fs/ext3/ioctl.c59
-rw-r--r--fs/ext3/namei.c29
-rw-r--r--fs/ext4/balloc.c14
-rw-r--r--fs/ext4/dir.c16
-rw-r--r--fs/ext4/ext4.h93
-rw-r--r--fs/ext4/ext4_extents.h1
-rw-r--r--fs/ext4/ext4_i.h6
-rw-r--r--fs/ext4/ext4_sb.h14
-rw-r--r--fs/ext4/extents.c127
-rw-r--r--fs/ext4/file.c7
-rw-r--r--fs/ext4/ialloc.c273
-rw-r--r--fs/ext4/inode.c429
-rw-r--r--fs/ext4/ioctl.c17
-rw-r--r--fs/ext4/mballoc.c158
-rw-r--r--fs/ext4/mballoc.h8
-rw-r--r--fs/ext4/namei.c164
-rw-r--r--fs/ext4/resize.c8
-rw-r--r--fs/ext4/super.c327
-rw-r--r--fs/fat/inode.c6
-rw-r--r--fs/fs-writeback.c29
-rw-r--r--fs/fuse/file.c3
-rw-r--r--fs/gfs2/ops_file.c5
-rw-r--r--fs/hfs/super.c3
-rw-r--r--fs/hfsplus/super.c3
-rw-r--r--fs/hpfs/super.c3
-rw-r--r--fs/hppfs/hppfs.c7
-rw-r--r--fs/hugetlbfs/inode.c21
-rw-r--r--fs/isofs/inode.c3
-rw-r--r--fs/jbd/journal.c34
-rw-r--r--fs/jbd2/commit.c5
-rw-r--r--fs/jbd2/revoke.c24
-rw-r--r--fs/jbd2/transaction.c2
-rw-r--r--fs/lockd/clntlock.c51
-rw-r--r--fs/lockd/mon.c8
-rw-r--r--fs/lockd/svc.c42
-rw-r--r--fs/minix/inode.c11
-rw-r--r--fs/nfs/callback.c31
-rw-r--r--fs/nfs/callback.h1
-rw-r--r--fs/nfs/client.c116
-rw-r--r--fs/nfs/dir.c9
-rw-r--r--fs/nfs/file.c37
-rw-r--r--fs/nfs/getroot.c4
-rw-r--r--fs/nfs/inode.c309
-rw-r--r--fs/nfs/internal.h4
-rw-r--r--fs/nfs/nfs2xdr.c9
-rw-r--r--fs/nfs/nfs3proc.c1
-rw-r--r--fs/nfs/nfs3xdr.c37
-rw-r--r--fs/nfs/nfs4proc.c47
-rw-r--r--fs/nfs/nfs4state.c10
-rw-r--r--fs/nfs/nfs4xdr.c213
-rw-r--r--fs/nfs/pagelist.c11
-rw-r--r--fs/nfs/proc.c1
-rw-r--r--fs/nfs/super.c4
-rw-r--r--fs/nfs/write.c53
-rw-r--r--fs/nfsd/nfsctl.c6
-rw-r--r--fs/nfsd/nfssvc.c5
-rw-r--r--fs/ntfs/dir.c4
-rw-r--r--fs/ntfs/inode.c3
-rw-r--r--fs/ntfs/layout.h329
-rw-r--r--fs/ntfs/logfile.h6
-rw-r--r--fs/ntfs/mft.c2
-rw-r--r--fs/ntfs/super.c50
-rw-r--r--fs/ntfs/usnjrnl.h48
-rw-r--r--fs/ocfs2/mmap.c6
-rw-r--r--fs/omfs/inode.c5
-rw-r--r--fs/proc/meminfo.c2
-rw-r--r--fs/proc/proc_tty.c12
-rw-r--r--fs/proc/task_nommu.c4
-rw-r--r--fs/qnx4/inode.c3
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/ramfs/file-nommu.c15
-rw-r--r--fs/ramfs/inode.c94
-rw-r--r--fs/read_write.c50
-rw-r--r--fs/reiserfs/Kconfig1
-rw-r--r--fs/reiserfs/super.c5
-rw-r--r--fs/squashfs/super.c3
-rw-r--r--fs/sysfs/bin.c8
-rw-r--r--fs/sysv/inode.c3
-rw-r--r--fs/ubifs/file.c9
-rw-r--r--fs/ufs/super.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c4
-rw-r--r--include/asm-frv/highmem.h2
-rw-r--r--include/asm-generic/dma-mapping.h308
-rw-r--r--include/asm-generic/gpio.h5
-rw-r--r--include/asm-m32r/spinlock.h3
-rw-r--r--include/asm-mn10300/highmem.h2
-rw-r--r--include/drm/drm_crtc_helper.h3
-rw-r--r--include/drm/drm_os_linux.h4
-rw-r--r--include/linux/Kbuild1
-rw-r--r--include/linux/acpi.h34
-rw-r--r--include/linux/auto_dev-ioctl.h7
-rw-r--r--include/linux/auto_fs.h6
-rw-r--r--include/linux/binfmts.h3
-rw-r--r--include/linux/bootmem.h6
-rw-r--r--include/linux/buffer_head.h2
-rw-r--r--include/linux/cgroup.h137
-rw-r--r--include/linux/compat.h6
-rw-r--r--include/linux/cpu.h16
-rw-r--r--include/linux/cpuset.h33
-rw-r--r--include/linux/eventfd.h12
-rw-r--r--include/linux/ext3_fs.h5
-rw-r--r--include/linux/fb.h11
-rw-r--r--include/linux/fs.h1
-rw-r--r--include/linux/fsl_devices.h7
-rw-r--r--include/linux/highmem.h12
-rw-r--r--include/linux/i2c/at24.h4
-rw-r--r--include/linux/ide.h56
-rw-r--r--include/linux/idr.h1
-rw-r--r--include/linux/jbd2.h6
-rw-r--r--include/linux/kernel.h6
-rw-r--r--include/linux/lockdep.h17
-rw-r--r--include/linux/loop.h1
-rw-r--r--include/linux/memcontrol.h20
-rw-r--r--include/linux/memory.h11
-rw-r--r--include/linux/mm.h6
-rw-r--r--include/linux/mm_types.h4
-rw-r--r--include/linux/mmzone.h8
-rw-r--r--include/linux/msi.h13
-rw-r--r--include/linux/nfs_fs.h4
-rw-r--r--include/linux/nfs_fs_sb.h5
-rw-r--r--include/linux/nfs_xdr.h59
-rw-r--r--include/linux/page-debug-flags.h30
-rw-r--r--include/linux/page-flags.h20
-rw-r--r--include/linux/page_cgroup.h13
-rw-r--r--include/linux/pagemap.h12
-rw-r--r--include/linux/pagevec.h1
-rw-r--r--include/linux/pci-acpi.h67
-rw-r--r--include/linux/pci.h61
-rw-r--r--include/linux/pci_ids.h2
-rw-r--r--include/linux/pci_regs.h37
-rw-r--r--include/linux/pcieport_if.h36
-rw-r--r--include/linux/poison.h3
-rw-r--r--include/linux/ptrace.h1
-rw-r--r--include/linux/rtc-v3020.h6
-rw-r--r--include/linux/rtc.h6
-rw-r--r--include/linux/sched.h84
-rw-r--r--include/linux/spi/eeprom.h6
-rw-r--r--include/linux/spi/spi_gpio.h6
-rw-r--r--include/linux/spinlock.h6
-rw-r--r--include/linux/string.h1
-rw-r--r--include/linux/sunrpc/svc.h9
-rw-r--r--include/linux/sunrpc/svc_xprt.h52
-rw-r--r--include/linux/sunrpc/xprt.h2
-rw-r--r--include/linux/suspend.h3
-rw-r--r--include/linux/swap.h7
-rw-r--r--include/linux/synclink.h1
-rw-r--r--include/linux/syscalls.h4
-rw-r--r--include/linux/tracehook.h15
-rw-r--r--include/linux/tty_driver.h3
-rw-r--r--include/linux/wait.h29
-rw-r--r--include/linux/workqueue.h5
-rw-r--r--include/linux/writeback.h4
-rw-r--r--include/net/tcp.h15
-rw-r--r--include/video/aty128.h2
-rw-r--r--include/video/cirrus.h2
-rw-r--r--include/video/newport.h4
-rw-r--r--include/video/radeon.h564
-rw-r--r--include/video/s1d13xxxfb.h16
-rw-r--r--init/Kconfig4
-rw-r--r--init/initramfs.c7
-rw-r--r--init/main.c1
-rw-r--r--ipc/ipc_sysctl.c2
-rw-r--r--ipc/shm.c6
-rw-r--r--kernel/cgroup.c430
-rw-r--r--kernel/cgroup_debug.c2
-rw-r--r--kernel/cpuset.c254
-rw-r--r--kernel/exit.c213
-rw-r--r--kernel/fork.c7
-rw-r--r--kernel/kexec.c3
-rw-r--r--kernel/ns_cgroup.c14
-rw-r--r--kernel/pid.c33
-rw-r--r--kernel/pid_namespace.c15
-rw-r--r--kernel/power/disk.c1
-rw-r--r--kernel/power/snapshot.c9
-rw-r--r--kernel/power/swsusp.c18
-rw-r--r--kernel/printk.c19
-rw-r--r--kernel/ptrace.c101
-rw-r--r--kernel/relay.c8
-rw-r--r--kernel/sched.c23
-rw-r--r--kernel/signal.c63
-rw-r--r--kernel/spinlock.c18
-rw-r--r--kernel/sys.c4
-rw-r--r--kernel/sysctl.c17
-rw-r--r--kernel/utsname_sysctl.c2
-rw-r--r--kernel/workqueue.c41
-rw-r--r--lib/Kconfig.debug1
-rw-r--r--lib/cpumask.c4
-rw-r--r--lib/idr.c46
-rw-r--r--lib/rbtree.c14
-rw-r--r--mm/Kconfig9
-rw-r--r--mm/Kconfig.debug26
-rw-r--r--mm/Makefile1
-rw-r--r--mm/debug-pagealloc.c129
-rw-r--r--mm/filemap_xip.c4
-rw-r--r--mm/highmem.c45
-rw-r--r--mm/hugetlb.c6
-rw-r--r--mm/internal.h8
-rw-r--r--mm/memcontrol.c687
-rw-r--r--mm/memory.c33
-rw-r--r--mm/mmap.c3
-rw-r--r--mm/nommu.c52
-rw-r--r--mm/oom_kill.c13
-rw-r--r--mm/page-writeback.c42
-rw-r--r--mm/page_alloc.c29
-rw-r--r--mm/page_cgroup.c37
-rw-r--r--mm/shmem.c3
-rw-r--r--mm/slab.c3
-rw-r--r--mm/sparse.c4
-rw-r--r--mm/swap.c23
-rw-r--r--mm/util.c30
-rw-r--r--mm/vmalloc.c19
-rw-r--r--mm/vmscan.c101
-rw-r--r--mm/vmstat.c16
-rw-r--r--net/bluetooth/rfcomm/tty.c6
-rw-r--r--net/core/dev.c5
-rw-r--r--net/core/ethtool.c3
-rw-r--r--net/core/sock.c8
-rw-r--r--net/ipv4/netfilter/arp_tables.c4
-rw-r--r--net/ipv4/netfilter/ip_tables.c4
-rw-r--r--net/ipv4/tcp.c5
-rw-r--r--net/ipv4/tcp_output.c73
-rw-r--r--net/ipv6/netfilter/ip6_tables.c4
-rw-r--r--net/irda/ircomm/ircomm_tty.c256
-rw-r--r--net/rds/ib.c5
-rw-r--r--net/rds/ib.h28
-rw-r--r--net/rds/ib_cm.c43
-rw-r--r--net/rds/ib_rdma.c43
-rw-r--r--net/rds/ib_recv.c37
-rw-r--r--net/rds/iw.c5
-rw-r--r--net/rds/iw.h28
-rw-r--r--net/rds/iw_cm.c44
-rw-r--r--net/rds/iw_rdma.c44
-rw-r--r--net/rds/iw_recv.c37
-rw-r--r--net/rds/rds.h4
-rw-r--r--net/rds/send.c6
-rw-r--r--net/sunrpc/Kconfig22
-rw-r--r--net/sunrpc/clnt.c48
-rw-r--r--net/sunrpc/rpcb_clnt.c103
-rw-r--r--net/sunrpc/svc.c158
-rw-r--r--net/sunrpc/svc_xprt.c31
-rw-r--r--net/sunrpc/svcsock.c40
-rw-r--r--net/sunrpc/xprt.c89
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c26
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c8
-rw-r--r--net/sunrpc/xprtsock.c363
-rw-r--r--security/device_cgroup.c21
831 files changed, 54781 insertions, 19684 deletions
diff --git a/CREDITS b/CREDITS
index e8b7d36611e5..9a93e3e26d70 100644
--- a/CREDITS
+++ b/CREDITS
@@ -495,6 +495,11 @@ S: Kopmansg 2
S: 411 13 Goteborg
S: Sweden
+N: Paul Bristow
+E: paul@paulbristow.net
+W: http://paulbristow.net/linux/idefloppy.html
+D: Maintainer of IDE/ATAPI floppy driver
+
N: Dominik Brodowski
E: linux@brodo.de
W: http://www.brodo.de/
@@ -2642,6 +2647,10 @@ S: C/ Mieses 20, 9-B
S: Valladolid 47009
S: Spain
+N: Gadi Oxman
+E: gadio@netvision.net.il
+D: Original author and maintainer of IDE/ATAPI floppy/tape drivers
+
N: Greg Page
E: gpage@sovereign.org
D: IPX development and support
diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index e638e15a8895..97ad190e13af 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -41,6 +41,49 @@ Description:
for the device and attempt to bind to it. For example:
# echo "8086 10f5" > /sys/bus/pci/drivers/foo/new_id
+What: /sys/bus/pci/drivers/.../remove_id
+Date: February 2009
+Contact: Chris Wright <chrisw@sous-sol.org>
+Description:
+ Writing a device ID to this file will remove an ID
+ that was dynamically added via the new_id sysfs entry.
+ The format for the device ID is:
+ VVVV DDDD SVVV SDDD CCCC MMMM. That is Vendor ID, Device
+ ID, Subsystem Vendor ID, Subsystem Device ID, Class,
+ and Class Mask. The Vendor ID and Device ID fields are
+ required, the rest are optional. After successfully
+ removing an ID, the driver will no longer support the
+ device. This is useful to ensure auto probing won't
+ match the driver to the device. For example:
+ # echo "8086 10f5" > /sys/bus/pci/drivers/foo/remove_id
+
+What: /sys/bus/pci/rescan
+Date: January 2009
+Contact: Linux PCI developers <linux-pci@vger.kernel.org>
+Description:
+ Writing a non-zero value to this attribute will
+ force a rescan of all PCI buses in the system, and
+ re-discover previously removed devices.
+ Depends on CONFIG_HOTPLUG.
+
+What: /sys/bus/pci/devices/.../remove
+Date: January 2009
+Contact: Linux PCI developers <linux-pci@vger.kernel.org>
+Description:
+ Writing a non-zero value to this attribute will
+ hot-remove the PCI device and any of its children.
+ Depends on CONFIG_HOTPLUG.
+
+What: /sys/bus/pci/devices/.../rescan
+Date: January 2009
+Contact: Linux PCI developers <linux-pci@vger.kernel.org>
+Description:
+ Writing a non-zero value to this attribute will
+ force a rescan of the device's parent bus and all
+ child buses, and re-discover devices removed earlier
+ from this part of the device tree.
+ Depends on CONFIG_HOTPLUG.
+
What: /sys/bus/pci/devices/.../vpd
Date: February 2008
Contact: Ben Hutchings <bhutchings@solarflare.com>
@@ -52,3 +95,30 @@ Description:
that some devices may have malformatted data. If the
underlying VPD has a writable section then the
corresponding section of this file will be writable.
+
+What: /sys/bus/pci/devices/.../virtfnN
+Date: March 2009
+Contact: Yu Zhao <yu.zhao@intel.com>
+Description:
+ This symbolic link appears when hardware supports the SR-IOV
+ capability and the Physical Function driver has enabled it.
+ The symbolic link points to the PCI device sysfs entry of the
+ Virtual Function whose index is N (0...MaxVFs-1).
+
+What: /sys/bus/pci/devices/.../dep_link
+Date: March 2009
+Contact: Yu Zhao <yu.zhao@intel.com>
+Description:
+ This symbolic link appears when hardware supports the SR-IOV
+ capability and the Physical Function driver has enabled it,
+ and this device has vendor specific dependencies with others.
+ The symbolic link points to the PCI device sysfs entry of
+ Physical Function this device depends on.
+
+What: /sys/bus/pci/devices/.../physfn
+Date: March 2009
+Contact: Yu Zhao <yu.zhao@intel.com>
+Description:
+ This symbolic link appears when a device is a Virtual Function.
+ The symbolic link points to the PCI device sysfs entry of the
+ Physical Function this device associates with.
diff --git a/Documentation/ABI/testing/sysfs-fs-ext4 b/Documentation/ABI/testing/sysfs-fs-ext4
new file mode 100644
index 000000000000..4e79074de282
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-fs-ext4
@@ -0,0 +1,81 @@
+What: /sys/fs/ext4/<disk>/mb_stats
+Date: March 2008
+Contact: "Theodore Ts'o" <tytso@mit.edu>
+Description:
+ Controls whether the multiblock allocator should
+ collect statistics, which are shown during the unmount.
+ 1 means to collect statistics, 0 means not to collect
+ statistics
+
+What: /sys/fs/ext4/<disk>/mb_group_prealloc
+Date: March 2008
+Contact: "Theodore Ts'o" <tytso@mit.edu>
+Description:
+ The multiblock allocator will round up allocation
+ requests to a multiple of this tuning parameter if the
+ stripe size is not set in the ext4 superblock
+
+What: /sys/fs/ext4/<disk>/mb_max_to_scan
+Date: March 2008
+Contact: "Theodore Ts'o" <tytso@mit.edu>
+Description:
+ The maximum number of extents the multiblock allocator
+ will search to find the best extent
+
+What: /sys/fs/ext4/<disk>/mb_min_to_scan
+Date: March 2008
+Contact: "Theodore Ts'o" <tytso@mit.edu>
+Description:
+ The minimum number of extents the multiblock allocator
+ will search to find the best extent
+
+What: /sys/fs/ext4/<disk>/mb_order2_req
+Date: March 2008
+Contact: "Theodore Ts'o" <tytso@mit.edu>
+Description:
+ Tuning parameter which controls the minimum size for
+ requests (as a power of 2) where the buddy cache is
+ used
+
+What: /sys/fs/ext4/<disk>/mb_stream_req
+Date: March 2008
+Contact: "Theodore Ts'o" <tytso@mit.edu>
+Description:
+ Files which have fewer blocks than this tunable
+ parameter will have their blocks allocated out of a
+ block group specific preallocation pool, so that small
+ files are packed closely together. Each large file
+ will have its blocks allocated out of its own unique
+ preallocation pool.
+
+What: /sys/fs/ext4/<disk>/inode_readahead
+Date: March 2008
+Contact: "Theodore Ts'o" <tytso@mit.edu>
+Description:
+ Tuning parameter which controls the maximum number of
+ inode table blocks that ext4's inode table readahead
+ algorithm will pre-read into the buffer cache
+
+What: /sys/fs/ext4/<disk>/delayed_allocation_blocks
+Date: March 2008
+Contact: "Theodore Ts'o" <tytso@mit.edu>
+Description:
+ This file is read-only and shows the number of blocks
+ that are dirty in the page cache, but which do not
+ have their location in the filesystem allocated yet.
+
+What: /sys/fs/ext4/<disk>/lifetime_write_kbytes
+Date: March 2008
+Contact: "Theodore Ts'o" <tytso@mit.edu>
+Description:
+ This file is read-only and shows the number of kilobytes
+ of data that have been written to this filesystem since it was
+ created.
+
+What: /sys/fs/ext4/<disk>/session_write_kbytes
+Date: March 2008
+Contact: "Theodore Ts'o" <tytso@mit.edu>
+Description:
+ This file is read-only and shows the number of
+ kilobytes of data that have been written to this
+ filesystem since it was mounted.
diff --git a/Documentation/DocBook/.gitignore b/Documentation/DocBook/.gitignore
index c102c02ecf89..c6def352fe39 100644
--- a/Documentation/DocBook/.gitignore
+++ b/Documentation/DocBook/.gitignore
@@ -4,3 +4,7 @@
*.html
*.9.gz
*.9
+*.aux
+*.dvi
+*.log
+*.out
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index bc962cda6504..58c194572c76 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -199,6 +199,7 @@ X!Edrivers/pci/hotplug.c
-->
!Edrivers/pci/probe.c
!Edrivers/pci/rom.c
+!Edrivers/pci/iov.c
</sect1>
<sect1><title>PCI Hotplug Support Library</title>
!Edrivers/pci/hotplug/pci_hotplug_core.c
diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt
index 256defd7e174..dcf7acc720e1 100644
--- a/Documentation/PCI/MSI-HOWTO.txt
+++ b/Documentation/PCI/MSI-HOWTO.txt
@@ -4,506 +4,356 @@
Revised Feb 12, 2004 by Martine Silbermann
email: Martine.Silbermann@hp.com
Revised Jun 25, 2004 by Tom L Nguyen
+ Revised Jul 9, 2008 by Matthew Wilcox <willy@linux.intel.com>
+ Copyright 2003, 2008 Intel Corporation
1. About this guide
-This guide describes the basics of Message Signaled Interrupts (MSI),
-the advantages of using MSI over traditional interrupt mechanisms,
-and how to enable your driver to use MSI or MSI-X. Also included is
-a Frequently Asked Questions (FAQ) section.
-
-1.1 Terminology
-
-PCI devices can be single-function or multi-function. In either case,
-when this text talks about enabling or disabling MSI on a "device
-function," it is referring to one specific PCI device and function and
-not to all functions on a PCI device (unless the PCI device has only
-one function).
-
-2. Copyright 2003 Intel Corporation
-
-3. What is MSI/MSI-X?
-
-Message Signaled Interrupt (MSI), as described in the PCI Local Bus
-Specification Revision 2.3 or later, is an optional feature, and a
-required feature for PCI Express devices. MSI enables a device function
-to request service by sending an Inbound Memory Write on its PCI bus to
-the FSB as a Message Signal Interrupt transaction. Because MSI is
-generated in the form of a Memory Write, all transaction conditions,
-such as a Retry, Master-Abort, Target-Abort or normal completion, are
-supported.
-
-A PCI device that supports MSI must also support pin IRQ assertion
-interrupt mechanism to provide backward compatibility for systems that
-do not support MSI. In systems which support MSI, the bus driver is
-responsible for initializing the message address and message data of
-the device function's MSI/MSI-X capability structure during device
-initial configuration.
-
-An MSI capable device function indicates MSI support by implementing
-the MSI/MSI-X capability structure in its PCI capability list. The
-device function may implement both the MSI capability structure and
-the MSI-X capability structure; however, the bus driver should not
-enable both.
-
-The MSI capability structure contains Message Control register,
-Message Address register and Message Data register. These registers
-provide the bus driver control over MSI. The Message Control register
-indicates the MSI capability supported by the device. The Message
-Address register specifies the target address and the Message Data
-register specifies the characteristics of the message. To request
-service, the device function writes the content of the Message Data
-register to the target address. The device and its software driver
-are prohibited from writing to these registers.
-
-The MSI-X capability structure is an optional extension to MSI. It
-uses an independent and separate capability structure. There are
-some key advantages to implementing the MSI-X capability structure
-over the MSI capability structure as described below.
-
- - Support a larger maximum number of vectors per function.
-
- - Provide the ability for system software to configure
- each vector with an independent message address and message
- data, specified by a table that resides in Memory Space.
-
- - MSI and MSI-X both support per-vector masking. Per-vector
- masking is an optional extension of MSI but a required
- feature for MSI-X. Per-vector masking provides the kernel the
- ability to mask/unmask a single MSI while running its
- interrupt service routine. If per-vector masking is
- not supported, then the device driver should provide the
- hardware/software synchronization to ensure that the device
- generates MSI when the driver wants it to do so.
-
-4. Why use MSI?
-
-As a benefit to the simplification of board design, MSI allows board
-designers to remove out-of-band interrupt routing. MSI is another
-step towards a legacy-free environment.
-
-Due to increasing pressure on chipset and processor packages to
-reduce pin count, the need for interrupt pins is expected to
-diminish over time. Devices, due to pin constraints, may implement
-messages to increase performance.
-
-PCI Express endpoints uses INTx emulation (in-band messages) instead
-of IRQ pin assertion. Using INTx emulation requires interrupt
-sharing among devices connected to the same node (PCI bridge) while
-MSI is unique (non-shared) and does not require BIOS configuration
-support. As a result, the PCI Express technology requires MSI
-support for better interrupt performance.
-
-Using MSI enables the device functions to support two or more
-vectors, which can be configured to target different CPUs to
-increase scalability.
-
-5. Configuring a driver to use MSI/MSI-X
-
-By default, the kernel will not enable MSI/MSI-X on all devices that
-support this capability. The CONFIG_PCI_MSI kernel option
-must be selected to enable MSI/MSI-X support.
-
-5.1 Including MSI/MSI-X support into the kernel
-
-To allow MSI/MSI-X capable device drivers to selectively enable
-MSI/MSI-X (using pci_enable_msi()/pci_enable_msix() as described
-below), the VECTOR based scheme needs to be enabled by setting
-CONFIG_PCI_MSI during kernel config.
-
-Since the target of the inbound message is the local APIC, providing
-CONFIG_X86_LOCAL_APIC must be enabled as well as CONFIG_PCI_MSI.
-
-5.2 Configuring for MSI support
-
-Due to the non-contiguous fashion in vector assignment of the
-existing Linux kernel, this version does not support multiple
-messages regardless of a device function is capable of supporting
-more than one vector. To enable MSI on a device function's MSI
-capability structure requires a device driver to call the function
-pci_enable_msi() explicitly.
-
-5.2.1 API pci_enable_msi
+This guide describes the basics of Message Signaled Interrupts (MSIs),
+the advantages of using MSI over traditional interrupt mechanisms, how
+to change your driver to use MSI or MSI-X and some basic diagnostics to
+try if a device doesn't support MSIs.
-int pci_enable_msi(struct pci_dev *dev)
-With this new API, a device driver that wants to have MSI
-enabled on its device function must call this API to enable MSI.
-A successful call will initialize the MSI capability structure
-with ONE vector, regardless of whether a device function is
-capable of supporting multiple messages. This vector replaces the
-pre-assigned dev->irq with a new MSI vector. To avoid a conflict
-of the new assigned vector with existing pre-assigned vector requires
-a device driver to call this API before calling request_irq().
+2. What are MSIs?
-5.2.2 API pci_disable_msi
+A Message Signaled Interrupt is a write from the device to a special
+address which causes an interrupt to be received by the CPU.
-void pci_disable_msi(struct pci_dev *dev)
+The MSI capability was first specified in PCI 2.2 and was later enhanced
+in PCI 3.0 to allow each interrupt to be masked individually. The MSI-X
+capability was also introduced with PCI 3.0. It supports more interrupts
+per device than MSI and allows interrupts to be independently configured.
-This API should always be used to undo the effect of pci_enable_msi()
-when a device driver is unloading. This API restores dev->irq with
-the pre-assigned IOAPIC vector and switches a device's interrupt
-mode to PCI pin-irq assertion/INTx emulation mode.
-
-Note that a device driver should always call free_irq() on the MSI vector
-that it has done request_irq() on before calling this API. Failure to do
-so results in a BUG_ON() and a device will be left with MSI enabled and
-leaks its vector.
-
-5.2.3 MSI mode vs. legacy mode diagram
-
-The below diagram shows the events which switch the interrupt
-mode on the MSI-capable device function between MSI mode and
-PIN-IRQ assertion mode.
-
- ------------ pci_enable_msi ------------------------
- | | <=============== | |
- | MSI MODE | | PIN-IRQ ASSERTION MODE |
- | | ===============> | |
- ------------ pci_disable_msi ------------------------
-
-
-Figure 1. MSI Mode vs. Legacy Mode
-
-In Figure 1, a device operates by default in legacy mode. Legacy
-in this context means PCI pin-irq assertion or PCI-Express INTx
-emulation. A successful MSI request (using pci_enable_msi()) switches
-a device's interrupt mode to MSI mode. A pre-assigned IOAPIC vector
-stored in dev->irq will be saved by the PCI subsystem and a new
-assigned MSI vector will replace dev->irq.
-
-To return back to its default mode, a device driver should always call
-pci_disable_msi() to undo the effect of pci_enable_msi(). Note that a
-device driver should always call free_irq() on the MSI vector it has
-done request_irq() on before calling pci_disable_msi(). Failure to do
-so results in a BUG_ON() and a device will be left with MSI enabled and
-leaks its vector. Otherwise, the PCI subsystem restores a device's
-dev->irq with a pre-assigned IOAPIC vector and marks the released
-MSI vector as unused.
-
-Once being marked as unused, there is no guarantee that the PCI
-subsystem will reserve this MSI vector for a device. Depending on
-the availability of current PCI vector resources and the number of
-MSI/MSI-X requests from other drivers, this MSI may be re-assigned.
-
-For the case where the PCI subsystem re-assigns this MSI vector to
-another driver, a request to switch back to MSI mode may result
-in being assigned a different MSI vector or a failure if no more
-vectors are available.
-
-5.3 Configuring for MSI-X support
-
-Due to the ability of the system software to configure each vector of
-the MSI-X capability structure with an independent message address
-and message data, the non-contiguous fashion in vector assignment of
-the existing Linux kernel has no impact on supporting multiple
-messages on an MSI-X capable device functions. To enable MSI-X on
-a device function's MSI-X capability structure requires its device
-driver to call the function pci_enable_msix() explicitly.
-
-The function pci_enable_msix(), once invoked, enables either
-all or nothing, depending on the current availability of PCI vector
-resources. If the PCI vector resources are available for the number
-of vectors requested by a device driver, this function will configure
-the MSI-X table of the MSI-X capability structure of a device with
-requested messages. To emphasize this reason, for example, a device
-may be capable for supporting the maximum of 32 vectors while its
-software driver usually may request 4 vectors. It is recommended
-that the device driver should call this function once during the
-initialization phase of the device driver.
-
-Unlike the function pci_enable_msi(), the function pci_enable_msix()
-does not replace the pre-assigned IOAPIC dev->irq with a new MSI
-vector because the PCI subsystem writes the 1:1 vector-to-entry mapping
-into the field vector of each element contained in a second argument.
-Note that the pre-assigned IOAPIC dev->irq is valid only if the device
-operates in PIN-IRQ assertion mode. In MSI-X mode, any attempt at
-using dev->irq by the device driver to request for interrupt service
-may result in unpredictable behavior.
-
-For each MSI-X vector granted, a device driver is responsible for calling
-other functions like request_irq(), enable_irq(), etc. to enable
-this vector with its corresponding interrupt service handler. It is
-a device driver's choice to assign all vectors with the same
-interrupt service handler or each vector with a unique interrupt
-service handler.
-
-5.3.1 Handling MMIO address space of MSI-X Table
-
-The PCI 3.0 specification has implementation notes that MMIO address
-space for a device's MSI-X structure should be isolated so that the
-software system can set different pages for controlling accesses to the
-MSI-X structure. The implementation of MSI support requires the PCI
-subsystem, not a device driver, to maintain full control of the MSI-X
-table/MSI-X PBA (Pending Bit Array) and MMIO address space of the MSI-X
-table/MSI-X PBA. A device driver should not access the MMIO address
-space of the MSI-X table/MSI-X PBA.
-
-5.3.2 API pci_enable_msix
+Devices may support both MSI and MSI-X, but only one can be enabled at
+a time.
-int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
-This API enables a device driver to request the PCI subsystem
-to enable MSI-X messages on its hardware device. Depending on
-the availability of PCI vectors resources, the PCI subsystem enables
-either all or none of the requested vectors.
+3. Why use MSIs?
+
+There are three reasons why using MSIs can give an advantage over
+traditional pin-based interrupts.
+
+Pin-based PCI interrupts are often shared amongst several devices.
+To support this, the kernel must call each interrupt handler associated
+with an interrupt, which leads to reduced performance for the system as
+a whole. MSIs are never shared, so this problem cannot arise.
+
+When a device writes data to memory, then raises a pin-based interrupt,
+it is possible that the interrupt may arrive before all the data has
+arrived in memory (this becomes more likely with devices behind PCI-PCI
+bridges). In order to ensure that all the data has arrived in memory,
+the interrupt handler must read a register on the device which raised
+the interrupt. PCI transaction ordering rules require that all the data
+arrives in memory before the value can be returned from the register.
+Using MSIs avoids this problem as the interrupt-generating write cannot
+pass the data writes, so by the time the interrupt is raised, the driver
+knows that all the data has arrived in memory.
+
+PCI devices can only support a single pin-based interrupt per function.
+Often drivers have to query the device to find out what event has
+occurred, slowing down interrupt handling for the common case. With
+MSIs, a device can support more interrupts, allowing each interrupt
+to be specialised to a different purpose. One possible design gives
+infrequent conditions (such as errors) their own interrupt which allows
+the driver to handle the normal interrupt handling path more efficiently.
+Other possible designs include giving one interrupt to each packet queue
+in a network card or each port in a storage controller.
+
+
+4. How to use MSIs
+
+PCI devices are initialised to use pin-based interrupts. The device
+driver has to set up the device to use MSI or MSI-X. Not all machines
+support MSIs correctly, and for those machines, the APIs described below
+will simply fail and the device will continue to use pin-based interrupts.
+
+4.1 Include kernel support for MSIs
+
+To support MSI or MSI-X, the kernel must be built with the CONFIG_PCI_MSI
+option enabled. This option is only available on some architectures,
+and it may depend on some other options also being set. For example,
+on x86, you must also enable X86_UP_APIC or SMP in order to see the
+CONFIG_PCI_MSI option.
+
+4.2 Using MSI
+
+Most of the hard work is done for the driver in the PCI layer. It simply
+has to request that the PCI layer set up the MSI capability for this
+device.
+
+4.2.1 pci_enable_msi
+
+int pci_enable_msi(struct pci_dev *dev)
+
+A successful call will allocate ONE interrupt to the device, regardless
+of how many MSIs the device supports. The device will be switched from
+pin-based interrupt mode to MSI mode. The dev->irq number is changed
+to a new number which represents the message signaled interrupt.
+This function should be called before the driver calls request_irq()
+since enabling MSIs disables the pin-based IRQ and the driver will not
+receive interrupts on the old interrupt.
+
+4.2.2 pci_enable_msi_block
+
+int pci_enable_msi_block(struct pci_dev *dev, int count)
+
+This variation on the above call allows a device driver to request multiple
+MSIs. The MSI specification only allows interrupts to be allocated in
+powers of two, up to a maximum of 2^5 (32).
+
+If this function returns 0, it has succeeded in allocating at least as many
+interrupts as the driver requested (it may have allocated more in order
+to satisfy the power-of-two requirement). In this case, the function
+enables MSI on this device and updates dev->irq to be the lowest of
+the new interrupts assigned to it. The other interrupts assigned to
+the device are in the range dev->irq to dev->irq + count - 1.
+
+If this function returns a negative number, it indicates an error and
+the driver should not attempt to request any more MSI interrupts for
+this device. If this function returns a positive number, it will be
+less than 'count' and indicate the number of interrupts that could have
+been allocated. In neither case will the irq value have been
+updated, nor will the device have been switched into MSI mode.
+
+The device driver must decide what action to take if
+pci_enable_msi_block() returns a value less than the number asked for.
+Some devices can make use of fewer interrupts than the maximum they
+request; in this case the driver should call pci_enable_msi_block()
+again. Note that it is not guaranteed to succeed, even when the
+'count' has been reduced to the value returned from a previous call to
+pci_enable_msi_block(). This is because there are multiple constraints
+on the number of vectors that can be allocated; pci_enable_msi_block()
+will return as soon as it finds any constraint that doesn't allow the
+call to succeed.
+
+4.2.3 pci_disable_msi
+
+void pci_disable_msi(struct pci_dev *dev)
-Argument 'dev' points to the device (pci_dev) structure.
+This function should be used to undo the effect of pci_enable_msi() or
+pci_enable_msi_block(). Calling it restores dev->irq to the pin-based
+interrupt number and frees the previously allocated message signaled
+interrupt(s). The interrupt may subsequently be assigned to another
+device, so drivers should not cache the value of dev->irq.
-Argument 'entries' is a pointer to an array of msix_entry structs.
-The number of entries is indicated in argument 'nvec'.
-struct msix_entry is defined in /driver/pci/msi.h:
+A device driver must always call free_irq() on the interrupt(s)
+for which it has called request_irq() before calling this function.
+Failure to do so will result in a BUG_ON(), the device will be left with
+MSI enabled and will leak its vector.
+
+4.3 Using MSI-X
+
+The MSI-X capability is much more flexible than the MSI capability.
+It supports up to 2048 interrupts, each of which can be controlled
+independently. To support this flexibility, drivers must use an array of
+`struct msix_entry':
struct msix_entry {
u16 vector; /* kernel uses to write alloc vector */
u16 entry; /* driver uses to specify entry */
};
-A device driver is responsible for initializing the field 'entry' of
-each element with a unique entry supported by MSI-X table. Otherwise,
--EINVAL will be returned as a result. A successful return of zero
-indicates the PCI subsystem completed initializing each of the requested
-entries of the MSI-X table with message address and message data.
-Last but not least, the PCI subsystem will write the 1:1
-vector-to-entry mapping into the field 'vector' of each element. A
-device driver is responsible for keeping track of allocated MSI-X
-vectors in its internal data structure.
-
-A return of zero indicates that the number of MSI-X vectors was
-successfully allocated. A return of greater than zero indicates
-MSI-X vector shortage. Or a return of less than zero indicates
-a failure. This failure may be a result of duplicate entries
-specified in second argument, or a result of no available vector,
-or a result of failing to initialize MSI-X table entries.
-
-5.3.3 API pci_disable_msix
+This allows for the device to use these interrupts in a sparse fashion;
+for example it could use interrupts 3 and 1027 and allocate only a
+two-element array. The driver is expected to fill in the 'entry' value
+in each element of the array to indicate which entries it wants the kernel
+to assign interrupts for. It is invalid to fill in two entries with the
+same number.
+
+4.3.1 pci_enable_msix
+
+int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
+
+Calling this function asks the PCI subsystem to allocate 'nvec' MSIs.
+The 'entries' argument is a pointer to an array of msix_entry structs
+which should be at least 'nvec' entries in size. On success, the
+function will return 0 and the device will have been switched into
+MSI-X interrupt mode. The 'vector' elements in each entry will have
+been filled in with the interrupt number. The driver should then call
+request_irq() for each 'vector' that it decides to use.
+
+If this function returns a negative number, it indicates an error and
+the driver should not attempt to allocate any more MSI-X interrupts for
+this device. If it returns a positive number, it indicates the maximum
+number of interrupt vectors that could have been allocated. See example
+below.
+
+This function, in contrast with pci_enable_msi(), does not adjust
+dev->irq. The device will not generate interrupts for this interrupt
+number once MSI-X is enabled. The device driver is responsible for
+keeping track of the interrupts assigned to the MSI-X vectors so it can
+free them again later.
+
+Device drivers should normally call this function once per device
+during the initialization phase.
+
+It is ideal if drivers can cope with a variable number of MSI-X interrupts,
+there are many reasons why the platform may not be able to provide the
+exact number a driver asks for.
+
+A request loop to achieve that might look like:
+
+static int foo_driver_enable_msix(struct foo_adapter *adapter, int nvec)
+{
+ while (nvec >= FOO_DRIVER_MINIMUM_NVEC) {
+ rc = pci_enable_msix(adapter->pdev,
+ adapter->msix_entries, nvec);
+ if (rc > 0)
+ nvec = rc;
+ else
+ return rc;
+ }
+
+ return -ENOSPC;
+}
+
+4.3.2 pci_disable_msix
void pci_disable_msix(struct pci_dev *dev)
-This API should always be used to undo the effect of pci_enable_msix()
-when a device driver is unloading. Note that a device driver should
-always call free_irq() on all MSI-X vectors it has done request_irq()
-on before calling this API. Failure to do so results in a BUG_ON() and
-a device will be left with MSI-X enabled and leaks its vectors.
-
-5.3.4 MSI-X mode vs. legacy mode diagram
-
-The below diagram shows the events which switch the interrupt
-mode on the MSI-X capable device function between MSI-X mode and
-PIN-IRQ assertion mode (legacy).
-
- ------------ pci_enable_msix(,,n) ------------------------
- | | <=============== | |
- | MSI-X MODE | | PIN-IRQ ASSERTION MODE |
- | | ===============> | |
- ------------ pci_disable_msix ------------------------
-
-Figure 2. MSI-X Mode vs. Legacy Mode
-
-In Figure 2, a device operates by default in legacy mode. A
-successful MSI-X request (using pci_enable_msix()) switches a
-device's interrupt mode to MSI-X mode. A pre-assigned IOAPIC vector
-stored in dev->irq will be saved by the PCI subsystem; however,
-unlike MSI mode, the PCI subsystem will not replace dev->irq with
-assigned MSI-X vector because the PCI subsystem already writes the 1:1
-vector-to-entry mapping into the field 'vector' of each element
-specified in second argument.
-
-To return back to its default mode, a device driver should always call
-pci_disable_msix() to undo the effect of pci_enable_msix(). Note that
-a device driver should always call free_irq() on all MSI-X vectors it
-has done request_irq() on before calling pci_disable_msix(). Failure
-to do so results in a BUG_ON() and a device will be left with MSI-X
-enabled and leaks its vectors. Otherwise, the PCI subsystem switches a
-device function's interrupt mode from MSI-X mode to legacy mode and
-marks all allocated MSI-X vectors as unused.
-
-Once being marked as unused, there is no guarantee that the PCI
-subsystem will reserve these MSI-X vectors for a device. Depending on
-the availability of current PCI vector resources and the number of
-MSI/MSI-X requests from other drivers, these MSI-X vectors may be
-re-assigned.
-
-For the case where the PCI subsystem re-assigned these MSI-X vectors
-to other drivers, a request to switch back to MSI-X mode may result
-being assigned with another set of MSI-X vectors or a failure if no
-more vectors are available.
-
-5.4 Handling function implementing both MSI and MSI-X capabilities
-
-For the case where a function implements both MSI and MSI-X
-capabilities, the PCI subsystem enables a device to run either in MSI
-mode or MSI-X mode but not both. A device driver determines whether it
-wants MSI or MSI-X enabled on its hardware device. Once a device
-driver requests for MSI, for example, it is prohibited from requesting
-MSI-X; in other words, a device driver is not permitted to ping-pong
-between MSI mod MSI-X mode during a run-time.
-
-5.5 Hardware requirements for MSI/MSI-X support
-
-MSI/MSI-X support requires support from both system hardware and
-individual hardware device functions.
-
-5.5.1 Required x86 hardware support
-
-Since the target of MSI address is the local APIC CPU, enabling
-MSI/MSI-X support in the Linux kernel is dependent on whether existing
-system hardware supports local APIC. Users should verify that their
-system supports local APIC operation by testing that it runs when
-CONFIG_X86_LOCAL_APIC=y.
-
-In SMP environment, CONFIG_X86_LOCAL_APIC is automatically set;
-however, in UP environment, users must manually set
-CONFIG_X86_LOCAL_APIC. Once CONFIG_X86_LOCAL_APIC=y, setting
-CONFIG_PCI_MSI enables the VECTOR based scheme and the option for
-MSI-capable device drivers to selectively enable MSI/MSI-X.
-
-Note that CONFIG_X86_IO_APIC setting is irrelevant because MSI/MSI-X
-vector is allocated new during runtime and MSI/MSI-X support does not
-depend on BIOS support. This key independency enables MSI/MSI-X
-support on future IOxAPIC free platforms.
-
-5.5.2 Device hardware support
-
-The hardware device function supports MSI by indicating the
-MSI/MSI-X capability structure on its PCI capability list. By
-default, this capability structure will not be initialized by
-the kernel to enable MSI during the system boot. In other words,
-the device function is running on its default pin assertion mode.
-Note that in many cases the hardware supporting MSI have bugs,
-which may result in system hangs. The software driver of specific
-MSI-capable hardware is responsible for deciding whether to call
-pci_enable_msi or not. A return of zero indicates the kernel
-successfully initialized the MSI/MSI-X capability structure of the
-device function. The device function is now running on MSI/MSI-X mode.
-
-5.6 How to tell whether MSI/MSI-X is enabled on device function
-
-At the driver level, a return of zero from the function call of
-pci_enable_msi()/pci_enable_msix() indicates to a device driver that
-its device function is initialized successfully and ready to run in
-MSI/MSI-X mode.
-
-At the user level, users can use the command 'cat /proc/interrupts'
-to display the vectors allocated for devices and their interrupt
-MSI/MSI-X modes ("PCI-MSI"/"PCI-MSI-X"). Below shows MSI mode is
-enabled on a SCSI Adaptec 39320D Ultra320 controller.
-
- CPU0 CPU1
- 0: 324639 0 IO-APIC-edge timer
- 1: 1186 0 IO-APIC-edge i8042
- 2: 0 0 XT-PIC cascade
- 12: 2797 0 IO-APIC-edge i8042
- 14: 6543 0 IO-APIC-edge ide0
- 15: 1 0 IO-APIC-edge ide1
-169: 0 0 IO-APIC-level uhci-hcd
-185: 0 0 IO-APIC-level uhci-hcd
-193: 138 10 PCI-MSI aic79xx
-201: 30 0 PCI-MSI aic79xx
-225: 30 0 IO-APIC-level aic7xxx
-233: 30 0 IO-APIC-level aic7xxx
-NMI: 0 0
-LOC: 324553 325068
-ERR: 0
-MIS: 0
-
-6. MSI quirks
-
-Several PCI chipsets or devices are known to not support MSI.
-The PCI stack provides 3 possible levels of MSI disabling:
-* on a single device
-* on all devices behind a specific bridge
-* globally
-
-6.1. Disabling MSI on a single device
-
-Under some circumstances it might be required to disable MSI on a
-single device. This may be achieved by either not calling pci_enable_msi()
-or all, or setting the pci_dev->no_msi flag before (most of the time
-in a quirk).
-
-6.2. Disabling MSI below a bridge
-
-The vast majority of MSI quirks are required by PCI bridges not
-being able to route MSI between busses. In this case, MSI have to be
-disabled on all devices behind this bridge. It is achieves by setting
-the PCI_BUS_FLAGS_NO_MSI flag in the pci_bus->bus_flags of the bridge
-subordinate bus. There is no need to set the same flag on bridges that
-are below the broken bridge. When pci_enable_msi() is called to enable
-MSI on a device, pci_msi_supported() takes care of checking the NO_MSI
-flag in all parent busses of the device.
-
-Some bridges actually support dynamic MSI support enabling/disabling
-by changing some bits in their PCI configuration space (especially
-the Hypertransport chipsets such as the nVidia nForce and Serverworks
-HT2000). It may then be required to update the NO_MSI flag on the
-corresponding devices in the sysfs hierarchy. To enable MSI support
-on device "0000:00:0e", do:
-
- echo 1 > /sys/bus/pci/devices/0000:00:0e/msi_bus
-
-To disable MSI support, echo 0 instead of 1. Note that it should be
-used with caution since changing this value might break interrupts.
-
-6.3. Disabling MSI globally
-
-Some extreme cases may require to disable MSI globally on the system.
-For now, the only known case is a Serverworks PCI-X chipsets (MSI are
-not supported on several busses that are not all connected to the
-chipset in the Linux PCI hierarchy). In the vast majority of other
-cases, disabling only behind a specific bridge is enough.
-
-For debugging purpose, the user may also pass pci=nomsi on the kernel
-command-line to explicitly disable MSI globally. But, once the appro-
-priate quirks are added to the kernel, this option should not be
-required anymore.
-
-6.4. Finding why MSI cannot be enabled on a device
-
-Assuming that MSI are not enabled on a device, you should look at
-dmesg to find messages that quirks may output when disabling MSI
-on some devices, some bridges or even globally.
-Then, lspci -t gives the list of bridges above a device. Reading
-/sys/bus/pci/devices/0000:00:0e/msi_bus will tell you whether MSI
-are enabled (1) or disabled (0). In 0 is found in a single bridge
-msi_bus file above the device, MSI cannot be enabled.
-
-7. FAQ
-
-Q1. Are there any limitations on using the MSI?
-
-A1. If the PCI device supports MSI and conforms to the
-specification and the platform supports the APIC local bus,
-then using MSI should work.
-
-Q2. Will it work on all the Pentium processors (P3, P4, Xeon,
-AMD processors)? In P3 IPI's are transmitted on the APIC local
-bus and in P4 and Xeon they are transmitted on the system
-bus. Are there any implications with this?
-
-A2. MSI support enables a PCI device sending an inbound
-memory write (0xfeexxxxx as target address) on its PCI bus
-directly to the FSB. Since the message address has a
-redirection hint bit cleared, it should work.
-
-Q3. The target address 0xfeexxxxx will be translated by the
-Host Bridge into an interrupt message. Are there any
-limitations on the chipsets such as Intel 8xx, Intel e7xxx,
-or VIA?
-
-A3. If these chipsets support an inbound memory write with
-target address set as 0xfeexxxxx, as conformed to PCI
-specification 2.3 or latest, then it should work.
-
-Q4. From the driver point of view, if the MSI is lost because
-of errors occurring during inbound memory write, then it may
-wait forever. Is there a mechanism for it to recover?
-
-A4. Since the target of the transaction is an inbound memory
-write, all transaction termination conditions (Retry,
-Master-Abort, Target-Abort, or normal completion) are
-supported. A device sending an MSI must abide by all the PCI
-rules and conditions regarding that inbound memory write. So,
-if a retry is signaled it must retry, etc... We believe that
-the recommendation for Abort is also a retry (refer to PCI
-specification 2.3 or latest).
+This API should be used to undo the effect of pci_enable_msix(). It frees
+the previously allocated message signaled interrupts. The interrupts may
+subsequently be assigned to another device, so drivers should not cache
+the value of the 'vector' elements over a call to pci_disable_msix().
+
+A device driver must always call free_irq() on the interrupt(s)
+for which it has called request_irq() before calling this function.
+Failure to do so will result in a BUG_ON(), the device will be left with
+MSI enabled and will leak its vector.
+
+4.3.3 The MSI-X Table
+
+The MSI-X capability specifies a BAR and offset within that BAR for the
+MSI-X Table. This address is mapped by the PCI subsystem, and should not
+be accessed directly by the device driver. If the driver wishes to
+mask or unmask an interrupt, it should call disable_irq() / enable_irq().
+
+4.4 Handling devices implementing both MSI and MSI-X capabilities
+
+If a device implements both MSI and MSI-X capabilities, it can
+run in either MSI mode or MSI-X mode but not both simultaneously.
+This is a requirement of the PCI spec, and it is enforced by the
+PCI layer. Calling pci_enable_msi() when MSI-X is already enabled or
+pci_enable_msix() when MSI is already enabled will result in an error.
+If a device driver wishes to switch between MSI and MSI-X at runtime,
+it must first quiesce the device, then switch it back to pin-interrupt
+mode, before calling pci_enable_msi() or pci_enable_msix() and resuming
+operation. This is not expected to be a common operation but may be
+useful for debugging or testing during development.
+
+4.5 Considerations when using MSIs
+
+4.5.1 Choosing between MSI-X and MSI
+
+If your device supports both MSI-X and MSI capabilities, you should use
+the MSI-X facilities in preference to the MSI facilities. As mentioned
+above, MSI-X supports any number of interrupts between 1 and 2048.
+In constrast, MSI is restricted to a maximum of 32 interrupts (and
+must be a power of two). In addition, the MSI interrupt vectors must
+be allocated consecutively, so the system may not be able to allocate
+as many vectors for MSI as it could for MSI-X. On some platforms, MSI
+interrupts must all be targetted at the same set of CPUs whereas MSI-X
+interrupts can all be targetted at different CPUs.
+
+4.5.2 Spinlocks
+
+Most device drivers have a per-device spinlock which is taken in the
+interrupt handler. With pin-based interrupts or a single MSI, it is not
+necessary to disable interrupts (Linux guarantees the same interrupt will
+not be re-entered). If a device uses multiple interrupts, the driver
+must disable interrupts while the lock is held. If the device sends
+a different interrupt, the driver will deadlock trying to recursively
+acquire the spinlock.
+
+There are two solutions. The first is to take the lock with
+spin_lock_irqsave() or spin_lock_irq() (see
+Documentation/DocBook/kernel-locking). The second is to specify
+IRQF_DISABLED to request_irq() so that the kernel runs the entire
+interrupt routine with interrupts disabled.
+
+If your MSI interrupt routine does not hold the lock for the whole time
+it is running, the first solution may be best. The second solution is
+normally preferred as it avoids making two transitions from interrupt
+disabled to enabled and back again.
+
+4.6 How to tell whether MSI/MSI-X is enabled on a device
+
+Using 'lspci -v' (as root) may show some devices with "MSI", "Message
+Signalled Interrupts" or "MSI-X" capabilities. Each of these capabilities
+has an 'Enable' flag which will be followed with either "+" (enabled)
+or "-" (disabled).
+
+
+5. MSI quirks
+
+Several PCI chipsets or devices are known not to support MSIs.
+The PCI stack provides three ways to disable MSIs:
+
+1. globally
+2. on all devices behind a specific bridge
+3. on a single device
+
+5.1. Disabling MSIs globally
+
+Some host chipsets simply don't support MSIs properly. If we're
+lucky, the manufacturer knows this and has indicated it in the ACPI
+FADT table. In this case, Linux will automatically disable MSIs.
+Some boards don't include this information in the table and so we have
+to detect them ourselves. The complete list of these is found near the
+quirk_disable_all_msi() function in drivers/pci/quirks.c.
+
+If you have a board which has problems with MSIs, you can pass pci=nomsi
+on the kernel command line to disable MSIs on all devices. It would be
+in your best interests to report the problem to linux-pci@vger.kernel.org
+including a full 'lspci -v' so we can add the quirks to the kernel.
+
+5.2. Disabling MSIs below a bridge
+
+Some PCI bridges are not able to route MSIs between busses properly.
+In this case, MSIs must be disabled on all devices behind the bridge.
+
+Some bridges allow you to enable MSIs by changing some bits in their
+PCI configuration space (especially the Hypertransport chipsets such
+as the nVidia nForce and Serverworks HT2000). As with host chipsets,
+Linux mostly knows about them and automatically enables MSIs if it can.
+If you have a bridge which Linux doesn't yet know about, you can enable
+MSIs in configuration space using whatever method you know works, then
+enable MSIs on that bridge by doing:
+
+ echo 1 > /sys/bus/pci/devices/$bridge/msi_bus
+
+where $bridge is the PCI address of the bridge you've enabled (eg
+0000:00:0e.0).
+
+To disable MSIs, echo 0 instead of 1. Changing this value should be
+done with caution as it can break interrupt handling for all devices
+below this bridge.
+
+Again, please notify linux-pci@vger.kernel.org of any bridges that need
+special handling.
+
+5.3. Disabling MSIs on a single device
+
+Some devices are known to have faulty MSI implementations. Usually this
+is handled in the individual device driver but occasionally it's necessary
+to handle this with a quirk. Some drivers have an option to disable use
+of MSI. While this is a convenient workaround for the driver author,
+it is not good practise, and should not be emulated.
+
+5.4. Finding why MSIs are disabled on a device
+
+From the above three sections, you can see that there are many reasons
+why MSIs may not be enabled for a given device. Your first step should
+be to examine your dmesg carefully to determine whether MSIs are enabled
+for your machine. You should also check your .config to be sure you
+have enabled CONFIG_PCI_MSI.
+
+Then, 'lspci -t' gives the list of bridges above a device. Reading
+/sys/bus/pci/devices/*/msi_bus will tell you whether MSI are enabled (1)
+or disabled (0). If 0 is found in any of the msi_bus files belonging
+to bridges between the PCI root and the device, MSIs are disabled.
+
+It is also worth checking the device driver to see whether it supports MSIs.
+For example, it may contain calls to pci_enable_msi(), pci_enable_msix() or
+pci_enable_msi_block().
diff --git a/Documentation/PCI/pci-iov-howto.txt b/Documentation/PCI/pci-iov-howto.txt
new file mode 100644
index 000000000000..fc73ef5d65b8
--- /dev/null
+++ b/Documentation/PCI/pci-iov-howto.txt
@@ -0,0 +1,99 @@
+ PCI Express I/O Virtualization Howto
+ Copyright (C) 2009 Intel Corporation
+ Yu Zhao <yu.zhao@intel.com>
+
+
+1. Overview
+
+1.1 What is SR-IOV
+
+Single Root I/O Virtualization (SR-IOV) is a PCI Express Extended
+capability which makes one physical device appear as multiple virtual
+devices. The physical device is referred to as Physical Function (PF)
+while the virtual devices are referred to as Virtual Functions (VF).
+Allocation of the VF can be dynamically controlled by the PF via
+registers encapsulated in the capability. By default, this feature is
+not enabled and the PF behaves as traditional PCIe device. Once it's
+turned on, each VF's PCI configuration space can be accessed by its own
+Bus, Device and Function Number (Routing ID). And each VF also has PCI
+Memory Space, which is used to map its register set. VF device driver
+operates on the register set so it can be functional and appear as a
+real existing PCI device.
+
+2. User Guide
+
+2.1 How can I enable SR-IOV capability
+
+The device driver (PF driver) will control the enabling and disabling
+of the capability via API provided by SR-IOV core. If the hardware
+has SR-IOV capability, loading its PF driver would enable it and all
+VFs associated with the PF.
+
+2.2 How can I use the Virtual Functions
+
+The VF is treated as hot-plugged PCI devices in the kernel, so they
+should be able to work in the same way as real PCI devices. The VF
+requires device driver that is same as a normal PCI device's.
+
+3. Developer Guide
+
+3.1 SR-IOV API
+
+To enable SR-IOV capability:
+ int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn);
+ 'nr_virtfn' is number of VFs to be enabled.
+
+To disable SR-IOV capability:
+ void pci_disable_sriov(struct pci_dev *dev);
+
+To notify SR-IOV core of Virtual Function Migration:
+ irqreturn_t pci_sriov_migration(struct pci_dev *dev);
+
+3.2 Usage example
+
+Following piece of code illustrates the usage of the SR-IOV API.
+
+static int __devinit dev_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+ pci_enable_sriov(dev, NR_VIRTFN);
+
+ ...
+
+ return 0;
+}
+
+static void __devexit dev_remove(struct pci_dev *dev)
+{
+ pci_disable_sriov(dev);
+
+ ...
+}
+
+static int dev_suspend(struct pci_dev *dev, pm_message_t state)
+{
+ ...
+
+ return 0;
+}
+
+static int dev_resume(struct pci_dev *dev)
+{
+ ...
+
+ return 0;
+}
+
+static void dev_shutdown(struct pci_dev *dev)
+{
+ ...
+}
+
+static struct pci_driver dev_driver = {
+ .name = "SR-IOV Physical Function driver",
+ .id_table = dev_id_table,
+ .probe = dev_probe,
+ .remove = __devexit_p(dev_remove),
+ .suspend = dev_suspend,
+ .resume = dev_resume,
+ .shutdown = dev_shutdown,
+};
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index 93feb8444489..4ea852345a47 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -333,12 +333,23 @@ The "xxx" is not interpreted by the cgroup code, but will appear in
To mount a cgroup hierarchy with just the cpuset and numtasks
subsystems, type:
-# mount -t cgroup -o cpuset,numtasks hier1 /dev/cgroup
+# mount -t cgroup -o cpuset,memory hier1 /dev/cgroup
To change the set of subsystems bound to a mounted hierarchy, just
remount with different options:
+# mount -o remount,cpuset,ns hier1 /dev/cgroup
-# mount -o remount,cpuset,ns /dev/cgroup
+Now memory is removed from the hierarchy and ns is added.
+
+Note this will add ns to the hierarchy but won't remove memory or
+cpuset, because the new options are appended to the old ones:
+# mount -o remount,ns /dev/cgroup
+
+To Specify a hierarchy's release_agent:
+# mount -t cgroup -o cpuset,release_agent="/sbin/cpuset_release_agent" \
+ xxx /dev/cgroup
+
+Note that specifying 'release_agent' more than once will return failure.
Note that changing the set of subsystems is currently only supported
when the hierarchy consists of a single (root) cgroup. Supporting
@@ -349,6 +360,11 @@ Then under /dev/cgroup you can find a tree that corresponds to the
tree of the cgroups in the system. For instance, /dev/cgroup
is the cgroup that holds the whole system.
+If you want to change the value of release_agent:
+# echo "/sbin/new_release_agent" > /dev/cgroup/release_agent
+
+It can also be changed via remount.
+
If you want to create a new cgroup under /dev/cgroup:
# cd /dev/cgroup
# mkdir my_cgroup
@@ -476,11 +492,13 @@ cgroup->parent is still valid. (Note - can also be called for a
newly-created cgroup if an error occurs after this subsystem's
create() method has been called for the new cgroup).
-void pre_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp);
+int pre_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp);
Called before checking the reference count on each subsystem. This may
be useful for subsystems which have some extra references even if
-there are not tasks in the cgroup.
+there are not tasks in the cgroup. If pre_destroy() returns error code,
+rmdir() will fail with it. From this behavior, pre_destroy() can be
+called multiple times against a cgroup.
int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
struct task_struct *task)
diff --git a/Documentation/cgroups/memcg_test.txt b/Documentation/cgroups/memcg_test.txt
index 523a9c16c400..8a11caf417a0 100644
--- a/Documentation/cgroups/memcg_test.txt
+++ b/Documentation/cgroups/memcg_test.txt
@@ -1,5 +1,5 @@
Memory Resource Controller(Memcg) Implementation Memo.
-Last Updated: 2009/1/19
+Last Updated: 2009/1/20
Base Kernel Version: based on 2.6.29-rc2.
Because VM is getting complex (one of reasons is memcg...), memcg's behavior
@@ -360,3 +360,21 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
# kill malloc task.
Of course, tmpfs v.s. swapoff test should be tested, too.
+
+ 9.8 OOM-Killer
+ Out-of-memory caused by memcg's limit will kill tasks under
+ the memcg. When hierarchy is used, a task under hierarchy
+ will be killed by the kernel.
+ In this case, panic_on_oom shouldn't be invoked and tasks
+ in other groups shouldn't be killed.
+
+ It's not difficult to cause OOM under memcg as following.
+ Case A) when you can swapoff
+ #swapoff -a
+ #echo 50M > /memory.limit_in_bytes
+ run 51M of malloc
+
+ Case B) when you use mem+swap limitation.
+ #echo 50M > memory.limit_in_bytes
+ #echo 50M > memory.memsw.limit_in_bytes
+ run 51M of malloc
diff --git a/Documentation/fb/00-INDEX b/Documentation/fb/00-INDEX
index caabbd395e61..a618fd99c9f0 100644
--- a/Documentation/fb/00-INDEX
+++ b/Documentation/fb/00-INDEX
@@ -11,8 +11,6 @@ aty128fb.txt
- info on the ATI Rage128 frame buffer driver.
cirrusfb.txt
- info on the driver for Cirrus Logic chipsets.
-cyblafb/
- - directory with documentation files related to the cyblafb driver.
deferred_io.txt
- an introduction to deferred IO.
fbcon.txt
diff --git a/Documentation/fb/cyblafb/bugs b/Documentation/fb/cyblafb/bugs
deleted file mode 100644
index 9443a6d72cdd..000000000000
--- a/Documentation/fb/cyblafb/bugs
+++ /dev/null
@@ -1,13 +0,0 @@
-Bugs
-====
-
-I currently don't know of any bug. Please do send reports to:
- - linux-fbdev-devel@lists.sourceforge.net
- - Knut_Petersen@t-online.de.
-
-
-Untested features
-=================
-
-All LCD stuff is untested. If it worked in tridentfb, it should work in
-cyblafb. Please test and report the results to Knut_Petersen@t-online.de.
diff --git a/Documentation/fb/cyblafb/credits b/Documentation/fb/cyblafb/credits
deleted file mode 100644
index 0eb3b443dc2b..000000000000
--- a/Documentation/fb/cyblafb/credits
+++ /dev/null
@@ -1,7 +0,0 @@
-Thanks to
-=========
- * Alan Hourihane, for writing the X trident driver
- * Jani Monoses, for writing the tridentfb driver
- * Antonino A. Daplas, for review of the first published
- version of cyblafb and some code
- * Jochen Hein, for testing and a helpfull bug report
diff --git a/Documentation/fb/cyblafb/documentation b/Documentation/fb/cyblafb/documentation
deleted file mode 100644
index bb1aac048425..000000000000
--- a/Documentation/fb/cyblafb/documentation
+++ /dev/null
@@ -1,17 +0,0 @@
-Available Documentation
-=======================
-
-Apollo PLE 133 Chipset VT8601A North Bridge Datasheet, Rev. 1.82, October 22,
-2001, available from VIA:
-
- http://www.viavpsd.com/product/6/15/DS8601A182.pdf
-
-The datasheet is incomplete, some registers that need to be programmed are not
-explained at all and important bits are listed as "reserved". But you really
-need the datasheet to understand the code. "p. xxx" comments refer to page
-numbers of this document.
-
-XFree/XOrg drivers are available and of good quality, looking at the code
-there is a good idea if the datasheet does not provide enough information
-or if the datasheet seems to be wrong.
-
diff --git a/Documentation/fb/cyblafb/fb.modes b/Documentation/fb/cyblafb/fb.modes
deleted file mode 100644
index fe0e5223ba86..000000000000
--- a/Documentation/fb/cyblafb/fb.modes
+++ /dev/null
@@ -1,154 +0,0 @@
-#
-# Sample fb.modes file
-#
-# Provides an incomplete list of working modes for
-# the cyberblade/i1 graphics core.
-#
-# The value 4294967256 is used instead of -40. Of course, -40 is not
-# a really reasonable value, but chip design does not always follow
-# logic. Believe me, it's ok, and it's the way the BIOS does it.
-#
-# fbset requires 4294967256 in fb.modes and -40 as an argument to
-# the -t parameter. That's also not too reasonable, and it might change
-# in the future or might even be differt for your current version.
-#
-
-mode "640x480-50"
- geometry 640 480 2048 4096 8
- timings 47619 4294967256 24 17 0 216 3
-endmode
-
-mode "640x480-60"
- geometry 640 480 2048 4096 8
- timings 39682 4294967256 24 17 0 216 3
-endmode
-
-mode "640x480-70"
- geometry 640 480 2048 4096 8
- timings 34013 4294967256 24 17 0 216 3
-endmode
-
-mode "640x480-72"
- geometry 640 480 2048 4096 8
- timings 33068 4294967256 24 17 0 216 3
-endmode
-
-mode "640x480-75"
- geometry 640 480 2048 4096 8
- timings 31746 4294967256 24 17 0 216 3
-endmode
-
-mode "640x480-80"
- geometry 640 480 2048 4096 8
- timings 29761 4294967256 24 17 0 216 3
-endmode
-
-mode "640x480-85"
- geometry 640 480 2048 4096 8
- timings 28011 4294967256 24 17 0 216 3
-endmode
-
-mode "800x600-50"
- geometry 800 600 2048 4096 8
- timings 30303 96 24 14 0 136 11
-endmode
-
-mode "800x600-60"
- geometry 800 600 2048 4096 8
- timings 25252 96 24 14 0 136 11
-endmode
-
-mode "800x600-70"
- geometry 800 600 2048 4096 8
- timings 21645 96 24 14 0 136 11
-endmode
-
-mode "800x600-72"
- geometry 800 600 2048 4096 8
- timings 21043 96 24 14 0 136 11
-endmode
-
-mode "800x600-75"
- geometry 800 600 2048 4096 8
- timings 20202 96 24 14 0 136 11
-endmode
-
-mode "800x600-80"
- geometry 800 600 2048 4096 8
- timings 18939 96 24 14 0 136 11
-endmode
-
-mode "800x600-85"
- geometry 800 600 2048 4096 8
- timings 17825 96 24 14 0 136 11
-endmode
-
-mode "1024x768-50"
- geometry 1024 768 2048 4096 8
- timings 19054 144 24 29 0 120 3
-endmode
-
-mode "1024x768-60"
- geometry 1024 768 2048 4096 8
- timings 15880 144 24 29 0 120 3
-endmode
-
-mode "1024x768-70"
- geometry 1024 768 2048 4096 8
- timings 13610 144 24 29 0 120 3
-endmode
-
-mode "1024x768-72"
- geometry 1024 768 2048 4096 8
- timings 13232 144 24 29 0 120 3
-endmode
-
-mode "1024x768-75"
- geometry 1024 768 2048 4096 8
- timings 12703 144 24 29 0 120 3
-endmode
-
-mode "1024x768-80"
- geometry 1024 768 2048 4096 8
- timings 11910 144 24 29 0 120 3
-endmode
-
-mode "1024x768-85"
- geometry 1024 768 2048 4096 8
- timings 11209 144 24 29 0 120 3
-endmode
-
-mode "1280x1024-50"
- geometry 1280 1024 2048 4096 8
- timings 11114 232 16 39 0 160 3
-endmode
-
-mode "1280x1024-60"
- geometry 1280 1024 2048 4096 8
- timings 9262 232 16 39 0 160 3
-endmode
-
-mode "1280x1024-70"
- geometry 1280 1024 2048 4096 8
- timings 7939 232 16 39 0 160 3
-endmode
-
-mode "1280x1024-72"
- geometry 1280 1024 2048 4096 8
- timings 7719 232 16 39 0 160 3
-endmode
-
-mode "1280x1024-75"
- geometry 1280 1024 2048 4096 8
- timings 7410 232 16 39 0 160 3
-endmode
-
-mode "1280x1024-80"
- geometry 1280 1024 2048 4096 8
- timings 6946 232 16 39 0 160 3
-endmode
-
-mode "1280x1024-85"
- geometry 1280 1024 2048 4096 8
- timings 6538 232 16 39 0 160 3
-endmode
diff --git a/Documentation/fb/cyblafb/performance b/Documentation/fb/cyblafb/performance
deleted file mode 100644
index 8d15d5dfc6b3..000000000000
--- a/Documentation/fb/cyblafb/performance
+++ /dev/null
@@ -1,79 +0,0 @@
-Speed
-=====
-
-CyBlaFB is much faster than tridentfb and vesafb. Compare the performance data
-for mode 1280x1024-[8,16,32]@61 Hz.
-
-Test 1: Cat a file with 2000 lines of 0 characters.
-Test 2: Cat a file with 2000 lines of 80 characters.
-Test 3: Cat a file with 2000 lines of 160 characters.
-
-All values show system time use in seconds, kernel 2.6.12 was used for
-the measurements. 2.6.13 is a bit slower, 2.6.14 hopefully will include a
-patch that speeds up kernel bitblitting a lot ( > 20%).
-
-+-----------+-----------------------------------------------------+
-| | not accelerated |
-| TRIDENTFB +-----------------+-----------------+-----------------+
-| of 2.6.12 | 8 bpp | 16 bpp | 32 bpp |
-| | noypan | ypan | noypan | ypan | noypan | ypan |
-+-----------+--------+--------+--------+--------+--------+--------+
-| Test 1 | 4.31 | 4.33 | 6.05 | 12.81 | ---- | ---- |
-| Test 2 | 67.94 | 5.44 | 123.16 | 14.79 | ---- | ---- |
-| Test 3 | 131.36 | 6.55 | 240.12 | 16.76 | ---- | ---- |
-+-----------+--------+--------+--------+--------+--------+--------+
-| Comments | | | completely bro- |
-| | | | ken, monitor |
-| | | | switches off |
-+-----------+-----------------+-----------------+-----------------+
-
-
-+-----------+-----------------------------------------------------+
-| | accelerated |
-| TRIDENTFB +-----------------+-----------------+-----------------+
-| of 2.6.12 | 8 bpp | 16 bpp | 32 bpp |
-| | noypan | ypan | noypan | ypan | noypan | ypan |
-+-----------+--------+--------+--------+--------+--------+--------+
-| Test 1 | ---- | ---- | 20.62 | 1.22 | ---- | ---- |
-| Test 2 | ---- | ---- | 22.61 | 3.19 | ---- | ---- |
-| Test 3 | ---- | ---- | 24.59 | 5.16 | ---- | ---- |
-+-----------+--------+--------+--------+--------+--------+--------+
-| Comments | broken, writing | broken, ok only | completely bro- |
-| | to wrong places | if bgcolor is | ken, monitor |
-| | on screen + bug | black, bug in | switches off |
-| | in fillrect() | fillrect() | |
-+-----------+-----------------+-----------------+-----------------+
-
-
-+-----------+-----------------------------------------------------+
-| | not accelerated |
-| VESAFB +-----------------+-----------------+-----------------+
-| of 2.6.12 | 8 bpp | 16 bpp | 32 bpp |
-| | noypan | ypan | noypan | ypan | noypan | ypan |
-+-----------+--------+--------+--------+--------+--------+--------+
-| Test 1 | 4.26 | 3.76 | 5.99 | 7.23 | ---- | ---- |
-| Test 2 | 65.65 | 4.89 | 120.88 | 9.08 | ---- | ---- |
-| Test 3 | 126.91 | 5.94 | 235.77 | 11.03 | ---- | ---- |
-+-----------+--------+--------+--------+--------+--------+--------+
-| Comments | vga=0x307 | vga=0x31a | vga=0x31b not |
-| | fh=80kHz | fh=80kHz | supported by |
-| | fv=75kHz | fv=75kHz | video BIOS and |
-| | | | hardware |
-+-----------+-----------------+-----------------+-----------------+
-
-
-+-----------+-----------------------------------------------------+
-| | accelerated |
-| CYBLAFB +-----------------+-----------------+-----------------+
-| | 8 bpp | 16 bpp | 32 bpp |
-| | noypan | ypan | noypan | ypan | noypan | ypan |
-+-----------+--------+--------+--------+--------+--------+--------+
-| Test 1 | 8.02 | 0.23 | 19.04 | 0.61 | 57.12 | 2.74 |
-| Test 2 | 8.38 | 0.55 | 19.39 | 0.92 | 57.54 | 3.13 |
-| Test 3 | 8.73 | 0.86 | 19.74 | 1.24 | 57.95 | 3.51 |
-+-----------+--------+--------+--------+--------+--------+--------+
-| Comments | | | |
-| | | | |
-| | | | |
-| | | | |
-+-----------+-----------------+-----------------+-----------------+
diff --git a/Documentation/fb/cyblafb/todo b/Documentation/fb/cyblafb/todo
deleted file mode 100644
index c5f6d0eae545..000000000000
--- a/Documentation/fb/cyblafb/todo
+++ /dev/null
@@ -1,31 +0,0 @@
-TODO / Missing features
-=======================
-
-Verify LCD stuff "stretch" and "center" options are
- completely untested ... this code needs to be
- verified. As I don't have access to such
- hardware, please contact me if you are
- willing run some tests.
-
-Interlaced video modes The reason that interleaved
- modes are disabled is that I do not know
- the meaning of the vertical interlace
- parameter. Also the datasheet mentions a
- bit d8 of a horizontal interlace parameter,
- but nowhere the lower 8 bits. Please help
- if you can.
-
-low-res double scan modes Who needs it?
-
-accelerated color blitting Who needs it? The console driver does use color
- blitting for nothing but drawing the penguine,
- everything else is done using color expanding
- blitting of 1bpp character bitmaps.
-
-ioctls Who needs it?
-
-TV-out Will be done later. Use "vga= " at boot time
- to set a suitable video mode.
-
-??? Feel free to contact me if you have any
- feature requests
diff --git a/Documentation/fb/cyblafb/usage b/Documentation/fb/cyblafb/usage
deleted file mode 100644
index a39bb3d402a2..000000000000
--- a/Documentation/fb/cyblafb/usage
+++ /dev/null
@@ -1,217 +0,0 @@
-CyBlaFB is a framebuffer driver for the Cyberblade/i1 graphics core integrated
-into the VIA Apollo PLE133 (aka vt8601) south bridge. It is developed and
-tested using a VIA EPIA 5000 board.
-
-Cyblafb - compiled into the kernel or as a module?
-==================================================
-
-You might compile cyblafb either as a module or compile it permanently into the
-kernel.
-
-Unless you have a real reason to do so you should not compile both vesafb and
-cyblafb permanently into the kernel. It's possible and it helps during the
-developement cycle, but it's useless and will at least block some otherwise
-usefull memory for ordinary users.
-
-Selecting Modes
-===============
-
- Startup Mode
- ============
-
- First of all, you might use the "vga=???" boot parameter as it is
- documented in vesafb.txt and svga.txt. Cyblafb will detect the video
- mode selected and will use the geometry and timings found by
- inspecting the hardware registers.
-
- video=cyblafb vga=0x317
-
- Alternatively you might use a combination of the mode, ref and bpp
- parameters. If you compiled the driver into the kernel, add something
- like this to the kernel command line:
-
- video=cyblafb:1280x1024,bpp=16,ref=50 ...
-
- If you compiled the driver as a module, the same mode would be
- selected by the following command:
-
- modprobe cyblafb mode=1280x1024 bpp=16 ref=50 ...
-
- None of the modes possible to select as startup modes are affected by
- the problems described at the end of the next subsection.
-
- For all startup modes cyblafb chooses a virtual x resolution of 2048,
- the only exception is mode 1280x1024 in combination with 32 bpp. This
- allows ywrap scrolling for all those modes if rotation is 0 or 2, and
- also fast scrolling if rotation is 1 or 3. The default virtual y reso-
- lution is 4096 for bpp == 8, 2048 for bpp==16 and 1024 for bpp == 32,
- again with the only exception of 1280x1024 at 32 bpp.
-
- Please do set your video memory size to 8 Mb in the Bios setup. Other
- values will work, but performace is decreased for a lot of modes.
-
- Mode changes using fbset
- ========================
-
- You might use fbset to change the video mode, see "man fbset". Cyblafb
- generally does assume that you know what you are doing. But it does
- some checks, especially those that are needed to prevent you from
- damaging your hardware.
-
- - only 8, 16, 24 and 32 bpp video modes are accepted
- - interlaced video modes are not accepted
- - double scan video modes are not accepted
- - if a flat panel is found, cyblafb does not allow you
- to program a resolution higher than the physical
- resolution of the flat panel monitor
- - cyblafb does not allow vclk to exceed 230 MHz. As 32 bpp
- and (currently) 24 bit modes use a doubled vclk internally,
- the dotclock limit as seen by fbset is 115 MHz for those
- modes and 230 MHz for 8 and 16 bpp modes.
- - cyblafb will allow you to select very high resolutions as
- long as the hardware can be programmed to these modes. The
- documented limit 1600x1200 is not enforced, but don't expect
- perfect signal quality.
-
- Any request that violates the rules given above will be either changed
- to something the hardware supports or an error value will be returned.
-
- If you program a virtual y resolution higher than the hardware limit,
- cyblafb will silently decrease that value to the highest possible
- value. The same is true for a virtual x resolution that is not
- supported by the hardware. Cyblafb tries to adapt vyres first because
- vxres decides if ywrap scrolling is possible or not.
-
- Attempts to disable acceleration are ignored, I believe that this is
- safe.
-
- Some video modes that should work do not work as expected. If you use
- the standard fb.modes, fbset 640x480-60 will program that mode, but
- you will see a vertical area, about two characters wide, with only
- much darker characters than the other characters on the screen.
- Cyblafb does allow that mode to be set, as it does not violate the
- official specifications. It would need a lot of code to reliably sort
- out all invalid modes, playing around with the margin values will
- give a valid mode quickly. And if cyblafb would detect such an invalid
- mode, should it silently alter the requested values or should it
- report an error? Both options have some pros and cons. As stated
- above, none of the startup modes are affected, and if you set
- verbosity to 1 or higher, cyblafb will print the fbset command that
- would be needed to program that mode using fbset.
-
-
-Other Parameters
-================
-
-
-crt don't autodetect, assume monitor connected to
- standard VGA connector
-
-fp don't autodetect, assume flat panel display
- connected to flat panel monitor interface
-
-nativex inform driver about native x resolution of
- flat panel monitor connected to special
- interface (should be autodetected)
-
-stretch stretch image to adapt low resolution modes to
- higer resolutions of flat panel monitors
- connected to special interface
-
-center center image to adapt low resolution modes to
- higer resolutions of flat panel monitors
- connected to special interface
-
-memsize use if autodetected memsize is wrong ...
- should never be necessary
-
-nopcirr disable PCI read retry
-nopciwr disable PCI write retry
-nopcirb disable PCI read bursts
-nopciwb disable PCI write bursts
-
-bpp bpp for specified modes
- valid values: 8 || 16 || 24 || 32
-
-ref refresh rate for specified mode
- valid values: 50 <= ref <= 85
-
-mode 640x480 or 800x600 or 1024x768 or 1280x1024
- if not specified, the startup mode will be detected
- and used, so you might also use the vga=??? parameter
- described in vesafb.txt. If you do not specify a mode,
- bpp and ref parameters are ignored.
-
-verbosity 0 is the default, increase to at least 2 for every
- bug report!
-
-Development hints
-=================
-
-It's much faster do compile a module and to load the new version after
-unloading the old module than to compile a new kernel and to reboot. So if you
-try to work on cyblafb, it might be a good idea to use cyblafb as a module.
-In real life, fast often means dangerous, and that's also the case here. If
-you introduce a serious bug when cyblafb is compiled into the kernel, the
-kernel will lock or oops with a high probability before the file system is
-mounted, and the danger for your data is low. If you load a broken own version
-of cyblafb on a running system, the danger for the integrity of the file
-system is much higher as you might need a hard reset afterwards. Decide
-yourself.
-
-Module unloading, the vfb method
-================================
-
-If you want to unload/reload cyblafb using the virtual framebuffer, you need
-to enable vfb support in the kernel first. After that, load the modules as
-shown below:
-
- modprobe vfb vfb_enable=1
- modprobe fbcon
- modprobe cyblafb
- fbset -fb /dev/fb1 1280x1024-60 -vyres 2662
- con2fb /dev/fb1 /dev/tty1
- ...
-
-If you now made some changes to cyblafb and want to reload it, you might do it
-as show below:
-
- con2fb /dev/fb0 /dev/tty1
- ...
- rmmod cyblafb
- modprobe cyblafb
- con2fb /dev/fb1 /dev/tty1
- ...
-
-Of course, you might choose another mode, and most certainly you also want to
-map some other /dev/tty* to the real framebuffer device. You might also choose
-to compile fbcon as a kernel module or place it permanently in the kernel.
-
-I do not know of any way to unload fbcon, and fbcon will prevent the
-framebuffer device loaded first from unloading. [If there is a way, then
-please add a description here!]
-
-Module unloading, the vesafb method
-===================================
-
-Configure the kernel:
-
- <*> Support for frame buffer devices
- [*] VESA VGA graphics support
- <M> Cyberblade/i1 support
-
-Add e.g. "video=vesafb:ypan vga=0x307" to the kernel parameters. The ypan
-parameter is important, choose any vga parameter you like as long as it is
-a graphics mode.
-
-After booting, load cyblafb without any mode and bpp parameter and assign
-cyblafb to individual ttys using con2fb, e.g.:
-
- modprobe cyblafb
- con2fb /dev/fb1 /dev/tty1
-
-Unloading cyblafb works without problems after you assign vesafb to all
-ttys again, e.g.:
-
- con2fb /dev/fb0 /dev/tty1
- rmmod cyblafb
diff --git a/Documentation/fb/cyblafb/whatsnew b/Documentation/fb/cyblafb/whatsnew
deleted file mode 100644
index 76c07a26e044..000000000000
--- a/Documentation/fb/cyblafb/whatsnew
+++ /dev/null
@@ -1,29 +0,0 @@
-0.62
-====
-
- - the vesafb parameter has been removed as I decided to allow the
- feature without any special parameter.
-
- - Cyblafb does not use the vga style of panning any longer, now the
- "right view" register in the graphics engine IO space is used. Without
- that change it was impossible to use all available memory, and without
- access to all available memory it is impossible to ywrap.
-
- - The imageblit function now uses hardware acceleration for all font
- widths. Hardware blitting across pixel column 2048 is broken in the
- cyberblade/i1 graphics core, but we work around that hardware bug.
-
- - modes with vxres != xres are supported now.
-
- - ywrap scrolling is supported now and the default. This is a big
- performance gain.
-
- - default video modes use vyres > yres and vxres > xres to allow
- almost optimal scrolling speed for normal and rotated screens
-
- - some features mainly usefull for debugging the upper layers of the
- framebuffer system have been added, have a look at the code
-
- - fixed: Oops after unloading cyblafb when reading /proc/io*
-
- - we work around some bugs of the higher framebuffer layers.
diff --git a/Documentation/fb/cyblafb/whycyblafb b/Documentation/fb/cyblafb/whycyblafb
deleted file mode 100644
index a123bc11e698..000000000000
--- a/Documentation/fb/cyblafb/whycyblafb
+++ /dev/null
@@ -1,85 +0,0 @@
-I tried the following framebuffer drivers:
-
- - TRIDENTFB is full of bugs. Acceleration is broken for Blade3D
- graphics cores like the cyberblade/i1. It claims to support a great
- number of devices, but documentation for most of these devices is
- unfortunately not available. There is _no_ reason to use tridentfb
- for cyberblade/i1 + CRT users. VESAFB is faster, and the one
- advantage, mode switching, is broken in tridentfb.
-
- - VESAFB is used by many distributions as a standard. Vesafb does
- not support mode switching. VESAFB is a bit faster than the working
- configurations of TRIDENTFB, but it is still too slow, even if you
- use ypan.
-
- - EPIAFB (you'll find it on sourceforge) supports the Cyberblade/i1
- graphics core, but it still has serious bugs and developement seems
- to have stopped. This is the one driver with TV-out support. If you
- do need this feature, try epiafb.
-
-None of these drivers was a real option for me.
-
-I believe that is unreasonable to change code that announces to support 20
-devices if I only have more or less sufficient documentation for exactly one
-of these. The risk of breaking device foo while fixing device bar is too high.
-
-So I decided to start CyBlaFB as a stripped down tridentfb.
-
-All code specific to other Trident chips has been removed. After that there
-were a lot of cosmetic changes to increase the readability of the code. All
-register names were changed to those mnemonics used in the datasheet. Function
-and macro names were changed if they hindered easy understanding of the code.
-
-After that I debugged the code and implemented some new features. I'll try to
-give a little summary of the main changes:
-
- - calculation of vertical and horizontal timings was fixed
-
- - video signal quality has been improved dramatically
-
- - acceleration:
-
- - fillrect and copyarea were fixed and reenabled
-
- - color expanding imageblit was newly implemented, color
- imageblit (only used to draw the penguine) still uses the
- generic code.
-
- - init of the acceleration engine was improved and moved to a
- place where it really works ...
-
- - sync function has a timeout now and tries to reset and
- reinit the accel engine if necessary
-
- - fewer slow copyarea calls when doing ypan scrolling by using
- undocumented bit d21 of screen start address stored in
- CR2B[5]. BIOS does use it also, so this should be safe.
-
- - cyblafb rejects any attempt to set modes that would cause vclk
- values above reasonable 230 MHz. 32bit modes use a clock
- multiplicator of 2, so fbset does show the correct values for
- pixclock but not for vclk in this case. The fbset limit is 115 MHz
- for 32 bpp modes.
-
- - cyblafb rejects modes known to be broken or unimplemented (all
- interlaced modes, all doublescan modes for now)
-
- - cyblafb now works independant of the video mode in effect at startup
- time (tridentfb does not init all needed registers to reasonable
- values)
-
- - switching between video modes does work reliably now
-
- - the first video mode now is the one selected on startup using the
- vga=???? mechanism or any of
- - 640x480, 800x600, 1024x768, 1280x1024
- - 8, 16, 24 or 32 bpp
- - refresh between 50 Hz and 85 Hz, 1 Hz steps (1280x1024-32
- is limited to 63Hz)
-
- - pci retry and pci burst mode are settable (try to disable if you
- experience latency problems)
-
- - built as a module cyblafb might be unloaded and reloaded using
- the vfb module and con2vt or might be used together with vesafb
-
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 5e02b83ac12b..a23361e80c64 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -255,6 +255,16 @@ Who: Jan Engelhardt <jengelh@computergmbh.de>
---------------------------
+What: GPIO autorequest on gpio_direction_{input,output}() in gpiolib
+When: February 2010
+Why: All callers should use explicit gpio_request()/gpio_free().
+ The autorequest mechanism in gpiolib was provided mostly as a
+ migration aid for legacy GPIO interfaces (for SOC based GPIOs).
+ Those users have now largely migrated. Platforms implementing
+ the GPIO interfaces without using gpiolib will see no changes.
+Who: David Brownell <dbrownell@users.sourceforge.net>
+---------------------------
+
What: b43 support for firmware revision < 410
When: The schedule was July 2008, but it was decided that we are going to keep the
code as long as there are no major maintanance headaches.
@@ -311,6 +321,18 @@ Who: Vlad Yasevich <vladislav.yasevich@hp.com>
---------------------------
+What: Ability for non root users to shm_get hugetlb pages based on mlock
+ resource limits
+When: 2.6.31
+Why: Non root users need to be part of /proc/sys/vm/hugetlb_shm_group or
+ have CAP_IPC_LOCK to be able to allocate shm segments backed by
+ huge pages. The mlock based rlimit check to allow shm hugetlb is
+ inconsistent with mmap based allocations. Hence it is being
+ deprecated.
+Who: Ravikiran Thirumalai <kiran@scalex86.org>
+
+---------------------------
+
What: CONFIG_THERMAL_HWMON
When: January 2009
Why: This option was introduced just to allow older lm-sensors userspace
@@ -380,3 +402,35 @@ Why: The defines and typedefs (hw_interrupt_type, no_irq_type, irq_desc_t)
have been kept around for migration reasons. After more than two years
it's time to remove them finally
Who: Thomas Gleixner <tglx@linutronix.de>
+
+---------------------------
+
+What: fakephp and associated sysfs files in /sys/bus/pci/slots/
+When: 2011
+Why: In 2.6.27, the semantics of /sys/bus/pci/slots was redefined to
+ represent a machine's physical PCI slots. The change in semantics
+ had userspace implications, as the hotplug core no longer allowed
+ drivers to create multiple sysfs files per physical slot (required
+ for multi-function devices, e.g.). fakephp was seen as a developer's
+ tool only, and its interface changed. Too late, we learned that
+ there were some users of the fakephp interface.
+
+ In 2.6.30, the original fakephp interface was restored. At the same
+ time, the PCI core gained the ability that fakephp provided, namely
+ function-level hot-remove and hot-add.
+
+ Since the PCI core now provides the same functionality, exposed in:
+
+ /sys/bus/pci/rescan
+ /sys/bus/pci/devices/.../remove
+ /sys/bus/pci/devices/.../rescan
+
+ there is no functional reason to maintain fakephp as well.
+
+ We will keep the existing module so that 'modprobe fakephp' will
+ present the old /sys/bus/pci/slots/... interface for compatibility,
+ but users are urged to migrate their applications to the API above.
+
+ After a reasonable transition period, we will remove the legacy
+ fakephp interface.
+Who: Alex Chiang <achiang@hp.com>
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 4e78ce677843..76efe5b71d7d 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -505,7 +505,7 @@ prototypes:
void (*open)(struct vm_area_struct*);
void (*close)(struct vm_area_struct*);
int (*fault)(struct vm_area_struct*, struct vm_fault *);
- int (*page_mkwrite)(struct vm_area_struct *, struct page *);
+ int (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *);
int (*access)(struct vm_area_struct *, unsigned long, void*, int, int);
locking rules:
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index cec829bc7291..97882df04865 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -85,7 +85,7 @@ Note: More extensive information for getting started with ext4 can be
* extent format more robust in face of on-disk corruption due to magics,
* internal redundancy in tree
* improved file allocation (multi-block alloc)
-* fix 32000 subdirectory limit
+* lift 32000 subdirectory limit imposed by i_links_count[1]
* nsec timestamps for mtime, atime, ctime, create time
* inode version field on disk (NFSv4, Lustre)
* reduced e2fsck time via uninit_bg feature
@@ -100,6 +100,9 @@ Note: More extensive information for getting started with ext4 can be
* efficent new ordered mode in JBD2 and ext4(avoid using buffer head to force
the ordering)
+[1] Filesystems with a block size of 1k may see a limit imposed by the
+directory hash tree having a maximum depth of two.
+
2.2 Candidate features for future inclusion
* Online defrag (patches available but not well tested)
@@ -180,8 +183,8 @@ commit=nrsec (*) Ext4 can be told to sync all its data and metadata
performance.
barrier=<0|1(*)> This enables/disables the use of write barriers in
- the jbd code. barrier=0 disables, barrier=1 enables.
- This also requires an IO stack which can support
+barrier(*) the jbd code. barrier=0 disables, barrier=1 enables.
+nobarrier This also requires an IO stack which can support
barriers, and if jbd gets an error on a barrier
write, it will disable again with a warning.
Write barriers enforce proper on-disk ordering
@@ -189,6 +192,9 @@ barrier=<0|1(*)> This enables/disables the use of write barriers in
safe to use, at some performance penalty. If
your disks are battery-backed in one way or another,
disabling barriers may safely improve performance.
+ The mount options "barrier" and "nobarrier" can
+ also be used to enable or disable barriers, for
+ consistency with other ext4 mount options.
inode_readahead=n This tuning parameter controls the maximum
number of inode table blocks that ext4's inode
@@ -310,6 +316,24 @@ journal_ioprio=prio The I/O priority (from 0 to 7, where 0 is the
a slightly higher priority than the default I/O
priority.
+auto_da_alloc(*) Many broken applications don't use fsync() when
+noauto_da_alloc replacing existing files via patterns such as
+ fd = open("foo.new")/write(fd,..)/close(fd)/
+ rename("foo.new", "foo"), or worse yet,
+ fd = open("foo", O_TRUNC)/write(fd,..)/close(fd).
+ If auto_da_alloc is enabled, ext4 will detect
+ the replace-via-rename and replace-via-truncate
+ patterns and force that any delayed allocation
+ blocks are allocated such that at the next
+ journal commit, in the default data=ordered
+ mode, the data blocks of the new file are forced
+ to disk before the rename() operation is
+ commited. This provides roughly the same level
+ of guarantees as ext3, and avoids the
+ "zero-length" problem that can happen when a
+ system crashes before the delayed allocation
+ blocks are forced to disk.
+
Data Mode
=========
There are 3 different data modes:
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 830bad7cce0f..ce84cfc9eae0 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -5,6 +5,7 @@
Bodo Bauer <bb@ricochet.net>
2.4.x update Jorge Nerin <comandante@zaralinux.com> November 14 2000
+move /proc/sys Shen Feng <shen@cn.fujitsu.com> April 1 2009
------------------------------------------------------------------------------
Version 1.3 Kernel version 2.2.12
Kernel version 2.4.0-test11-pre4
@@ -26,25 +27,17 @@ Table of Contents
1.6 Parallel port info in /proc/parport
1.7 TTY info in /proc/tty
1.8 Miscellaneous kernel statistics in /proc/stat
+ 1.9 Ext4 file system parameters
2 Modifying System Parameters
- 2.1 /proc/sys/fs - File system data
- 2.2 /proc/sys/fs/binfmt_misc - Miscellaneous binary formats
- 2.3 /proc/sys/kernel - general kernel parameters
- 2.4 /proc/sys/vm - The virtual memory subsystem
- 2.5 /proc/sys/dev - Device specific parameters
- 2.6 /proc/sys/sunrpc - Remote procedure calls
- 2.7 /proc/sys/net - Networking stuff
- 2.8 /proc/sys/net/ipv4 - IPV4 settings
- 2.9 Appletalk
- 2.10 IPX
- 2.11 /proc/sys/fs/mqueue - POSIX message queues filesystem
- 2.12 /proc/<pid>/oom_adj - Adjust the oom-killer score
- 2.13 /proc/<pid>/oom_score - Display current oom-killer score
- 2.14 /proc/<pid>/io - Display the IO accounting fields
- 2.15 /proc/<pid>/coredump_filter - Core dump filtering settings
- 2.16 /proc/<pid>/mountinfo - Information about mounts
- 2.17 /proc/sys/fs/epoll - Configuration options for the epoll interface
+
+ 3 Per-Process Parameters
+ 3.1 /proc/<pid>/oom_adj - Adjust the oom-killer score
+ 3.2 /proc/<pid>/oom_score - Display current oom-killer score
+ 3.3 /proc/<pid>/io - Display the IO accounting fields
+ 3.4 /proc/<pid>/coredump_filter - Core dump filtering settings
+ 3.5 /proc/<pid>/mountinfo - Information about mounts
+
------------------------------------------------------------------------------
Preface
@@ -940,27 +933,6 @@ Table 1-10: Files in /proc/fs/ext4/<devname>
File Content
mb_groups details of multiblock allocator buddy cache of free blocks
mb_history multiblock allocation history
- stats controls whether the multiblock allocator should start
- collecting statistics, which are shown during the unmount
- group_prealloc the multiblock allocator will round up allocation
- requests to a multiple of this tuning parameter if the
- stripe size is not set in the ext4 superblock
- max_to_scan The maximum number of extents the multiblock allocator
- will search to find the best extent
- min_to_scan The minimum number of extents the multiblock allocator
- will search to find the best extent
- order2_req Tuning parameter which controls the minimum size for
- requests (as a power of 2) where the buddy cache is
- used
- stream_req Files which have fewer blocks than this tunable
- parameter will have their blocks allocated out of a
- block group specific preallocation pool, so that small
- files are packed closely together. Each large file
- will have its blocks allocated out of its own unique
- preallocation pool.
-inode_readahead Tuning parameter which controls the maximum number of
- inode table blocks that ext4's inode table readahead
- algorithm will pre-read into the buffer cache
..............................................................................
@@ -1011,1021 +983,24 @@ review the kernel documentation in the directory /usr/src/linux/Documentation.
This chapter is heavily based on the documentation included in the pre 2.2
kernels, and became part of it in version 2.2.1 of the Linux kernel.
-2.1 /proc/sys/fs - File system data
------------------------------------
-
-This subdirectory contains specific file system, file handle, inode, dentry
-and quota information.
-
-Currently, these files are in /proc/sys/fs:
-
-dentry-state
-------------
-
-Status of the directory cache. Since directory entries are dynamically
-allocated and deallocated, this file indicates the current status. It holds
-six values, in which the last two are not used and are always zero. The others
-are listed in table 2-1.
-
-
-Table 2-1: Status files of the directory cache
-..............................................................................
- File Content
- nr_dentry Almost always zero
- nr_unused Number of unused cache entries
- age_limit
- in seconds after the entry may be reclaimed, when memory is short
- want_pages internally
-..............................................................................
-
-dquot-nr and dquot-max
-----------------------
-
-The file dquot-max shows the maximum number of cached disk quota entries.
-
-The file dquot-nr shows the number of allocated disk quota entries and the
-number of free disk quota entries.
-
-If the number of available cached disk quotas is very low and you have a large
-number of simultaneous system users, you might want to raise the limit.
-
-file-nr and file-max
---------------------
-
-The kernel allocates file handles dynamically, but doesn't free them again at
-this time.
-
-The value in file-max denotes the maximum number of file handles that the
-Linux kernel will allocate. When you get a lot of error messages about running
-out of file handles, you might want to raise this limit. The default value is
-10% of RAM in kilobytes. To change it, just write the new number into the
-file:
-
- # cat /proc/sys/fs/file-max
- 4096
- # echo 8192 > /proc/sys/fs/file-max
- # cat /proc/sys/fs/file-max
- 8192
-
-
-This method of revision is useful for all customizable parameters of the
-kernel - simply echo the new value to the corresponding file.
-
-Historically, the three values in file-nr denoted the number of allocated file
-handles, the number of allocated but unused file handles, and the maximum
-number of file handles. Linux 2.6 always reports 0 as the number of free file
-handles -- this is not an error, it just means that the number of allocated
-file handles exactly matches the number of used file handles.
-
-Attempts to allocate more file descriptors than file-max are reported with
-printk, look for "VFS: file-max limit <number> reached".
-
-inode-state and inode-nr
-------------------------
-
-The file inode-nr contains the first two items from inode-state, so we'll skip
-to that file...
-
-inode-state contains two actual numbers and five dummy values. The numbers
-are nr_inodes and nr_free_inodes (in order of appearance).
-
-nr_inodes
-~~~~~~~~~
-
-Denotes the number of inodes the system has allocated. This number will
-grow and shrink dynamically.
-
-nr_open
--------
-
-Denotes the maximum number of file-handles a process can
-allocate. Default value is 1024*1024 (1048576) which should be
-enough for most machines. Actual limit depends on RLIMIT_NOFILE
-resource limit.
-
-nr_free_inodes
---------------
-
-Represents the number of free inodes. Ie. The number of inuse inodes is
-(nr_inodes - nr_free_inodes).
-
-aio-nr and aio-max-nr
----------------------
-
-aio-nr is the running total of the number of events specified on the
-io_setup system call for all currently active aio contexts. If aio-nr
-reaches aio-max-nr then io_setup will fail with EAGAIN. Note that
-raising aio-max-nr does not result in the pre-allocation or re-sizing
-of any kernel data structures.
-
-2.2 /proc/sys/fs/binfmt_misc - Miscellaneous binary formats
------------------------------------------------------------
-
-Besides these files, there is the subdirectory /proc/sys/fs/binfmt_misc. This
-handles the kernel support for miscellaneous binary formats.
-
-Binfmt_misc provides the ability to register additional binary formats to the
-Kernel without compiling an additional module/kernel. Therefore, binfmt_misc
-needs to know magic numbers at the beginning or the filename extension of the
-binary.
-
-It works by maintaining a linked list of structs that contain a description of
-a binary format, including a magic with size (or the filename extension),
-offset and mask, and the interpreter name. On request it invokes the given
-interpreter with the original program as argument, as binfmt_java and
-binfmt_em86 and binfmt_mz do. Since binfmt_misc does not define any default
-binary-formats, you have to register an additional binary-format.
-
-There are two general files in binfmt_misc and one file per registered format.
-The two general files are register and status.
-
-Registering a new binary format
--------------------------------
-
-To register a new binary format you have to issue the command
-
- echo :name:type:offset:magic:mask:interpreter: > /proc/sys/fs/binfmt_misc/register
-
-
-
-with appropriate name (the name for the /proc-dir entry), offset (defaults to
-0, if omitted), magic, mask (which can be omitted, defaults to all 0xff) and
-last but not least, the interpreter that is to be invoked (for example and
-testing /bin/echo). Type can be M for usual magic matching or E for filename
-extension matching (give extension in place of magic).
-
-Check or reset the status of the binary format handler
-------------------------------------------------------
-
-If you do a cat on the file /proc/sys/fs/binfmt_misc/status, you will get the
-current status (enabled/disabled) of binfmt_misc. Change the status by echoing
-0 (disables) or 1 (enables) or -1 (caution: this clears all previously
-registered binary formats) to status. For example echo 0 > status to disable
-binfmt_misc (temporarily).
-
-Status of a single handler
---------------------------
-
-Each registered handler has an entry in /proc/sys/fs/binfmt_misc. These files
-perform the same function as status, but their scope is limited to the actual
-binary format. By cating this file, you also receive all related information
-about the interpreter/magic of the binfmt.
-
-Example usage of binfmt_misc (emulate binfmt_java)
---------------------------------------------------
-
- cd /proc/sys/fs/binfmt_misc
- echo ':Java:M::\xca\xfe\xba\xbe::/usr/local/java/bin/javawrapper:' > register
- echo ':HTML:E::html::/usr/local/java/bin/appletviewer:' > register
- echo ':Applet:M::<!--applet::/usr/local/java/bin/appletviewer:' > register
- echo ':DEXE:M::\x0eDEX::/usr/bin/dosexec:' > register
-
-
-These four lines add support for Java executables and Java applets (like
-binfmt_java, additionally recognizing the .html extension with no need to put
-<!--applet> to every applet file). You have to install the JDK and the
-shell-script /usr/local/java/bin/javawrapper too. It works around the
-brokenness of the Java filename handling. To add a Java binary, just create a
-link to the class-file somewhere in the path.
-
-2.3 /proc/sys/kernel - general kernel parameters
-------------------------------------------------
-
-This directory reflects general kernel behaviors. As I've said before, the
-contents depend on your configuration. Here you'll find the most important
-files, along with descriptions of what they mean and how to use them.
-
-acct
-----
-
-The file contains three values; highwater, lowwater, and frequency.
-
-It exists only when BSD-style process accounting is enabled. These values
-control its behavior. If the free space on the file system where the log lives
-goes below lowwater percentage, accounting suspends. If it goes above
-highwater percentage, accounting resumes. Frequency determines how often you
-check the amount of free space (value is in seconds). Default settings are: 4,
-2, and 30. That is, suspend accounting if there is less than 2 percent free;
-resume it if we have a value of 3 or more percent; consider information about
-the amount of free space valid for 30 seconds
-
-ctrl-alt-del
-------------
-
-When the value in this file is 0, ctrl-alt-del is trapped and sent to the init
-program to handle a graceful restart. However, when the value is greater that
-zero, Linux's reaction to this key combination will be an immediate reboot,
-without syncing its dirty buffers.
-
-[NOTE]
- When a program (like dosemu) has the keyboard in raw mode, the
- ctrl-alt-del is intercepted by the program before it ever reaches the
- kernel tty layer, and it is up to the program to decide what to do with
- it.
-
-domainname and hostname
------------------------
-
-These files can be controlled to set the NIS domainname and hostname of your
-box. For the classic darkstar.frop.org a simple:
-
- # echo "darkstar" > /proc/sys/kernel/hostname
- # echo "frop.org" > /proc/sys/kernel/domainname
-
-
-would suffice to set your hostname and NIS domainname.
-
-osrelease, ostype and version
------------------------------
-
-The names make it pretty obvious what these fields contain:
-
- > cat /proc/sys/kernel/osrelease
- 2.2.12
-
- > cat /proc/sys/kernel/ostype
- Linux
-
- > cat /proc/sys/kernel/version
- #4 Fri Oct 1 12:41:14 PDT 1999
-
-
-The files osrelease and ostype should be clear enough. Version needs a little
-more clarification. The #4 means that this is the 4th kernel built from this
-source base and the date after it indicates the time the kernel was built. The
-only way to tune these values is to rebuild the kernel.
-
-panic
------
-
-The value in this file represents the number of seconds the kernel waits
-before rebooting on a panic. When you use the software watchdog, the
-recommended setting is 60. If set to 0, the auto reboot after a kernel panic
-is disabled, which is the default setting.
-
-printk
-------
-
-The four values in printk denote
-* console_loglevel,
-* default_message_loglevel,
-* minimum_console_loglevel and
-* default_console_loglevel
-respectively.
-
-These values influence printk() behavior when printing or logging error
-messages, which come from inside the kernel. See syslog(2) for more
-information on the different log levels.
-
-console_loglevel
-----------------
-
-Messages with a higher priority than this will be printed to the console.
-
-default_message_level
----------------------
-
-Messages without an explicit priority will be printed with this priority.
-
-minimum_console_loglevel
-------------------------
-
-Minimum (highest) value to which the console_loglevel can be set.
-
-default_console_loglevel
-------------------------
-
-Default value for console_loglevel.
-
-sg-big-buff
------------
-
-This file shows the size of the generic SCSI (sg) buffer. At this point, you
-can't tune it yet, but you can change it at compile time by editing
-include/scsi/sg.h and changing the value of SG_BIG_BUFF.
-
-If you use a scanner with SANE (Scanner Access Now Easy) you might want to set
-this to a higher value. Refer to the SANE documentation on this issue.
-
-modprobe
---------
-
-The location where the modprobe binary is located. The kernel uses this
-program to load modules on demand.
-
-unknown_nmi_panic
------------------
-
-The value in this file affects behavior of handling NMI. When the value is
-non-zero, unknown NMI is trapped and then panic occurs. At that time, kernel
-debugging information is displayed on console.
-
-NMI switch that most IA32 servers have fires unknown NMI up, for example.
-If a system hangs up, try pressing the NMI switch.
-
-panic_on_unrecovered_nmi
-------------------------
-
-The default Linux behaviour on an NMI of either memory or unknown is to continue
-operation. For many environments such as scientific computing it is preferable
-that the box is taken out and the error dealt with than an uncorrected
-parity/ECC error get propogated.
-
-A small number of systems do generate NMI's for bizarre random reasons such as
-power management so the default is off. That sysctl works like the existing
-panic controls already in that directory.
-
-nmi_watchdog
-------------
-
-Enables/Disables the NMI watchdog on x86 systems. When the value is non-zero
-the NMI watchdog is enabled and will continuously test all online cpus to
-determine whether or not they are still functioning properly. Currently,
-passing "nmi_watchdog=" parameter at boot time is required for this function
-to work.
-
-If LAPIC NMI watchdog method is in use (nmi_watchdog=2 kernel parameter), the
-NMI watchdog shares registers with oprofile. By disabling the NMI watchdog,
-oprofile may have more registers to utilize.
-
-msgmni
-------
-
-Maximum number of message queue ids on the system.
-This value scales to the amount of lowmem. It is automatically recomputed
-upon memory add/remove or ipc namespace creation/removal.
-When a value is written into this file, msgmni's value becomes fixed, i.e. it
-is not recomputed anymore when one of the above events occurs.
-Use auto_msgmni to change this behavior.
-
-auto_msgmni
------------
-
-Enables/Disables automatic recomputing of msgmni upon memory add/remove or
-upon ipc namespace creation/removal (see the msgmni description above).
-Echoing "1" into this file enables msgmni automatic recomputing.
-Echoing "0" turns it off.
-auto_msgmni default value is 1.
-
-
-2.4 /proc/sys/vm - The virtual memory subsystem
------------------------------------------------
-
-Please see: Documentation/sysctls/vm.txt for a description of these
+Please see: Documentation/sysctls/ directory for descriptions of these
entries.
+------------------------------------------------------------------------------
+Summary
+------------------------------------------------------------------------------
+Certain aspects of kernel behavior can be modified at runtime, without the
+need to recompile the kernel, or even to reboot the system. The files in the
+/proc/sys tree can not only be read, but also modified. You can use the echo
+command to write value into these files, thereby changing the default settings
+of the kernel.
+------------------------------------------------------------------------------
-2.5 /proc/sys/dev - Device specific parameters
-----------------------------------------------
-
-Currently there is only support for CDROM drives, and for those, there is only
-one read-only file containing information about the CD-ROM drives attached to
-the system:
-
- >cat /proc/sys/dev/cdrom/info
- CD-ROM information, Id: cdrom.c 2.55 1999/04/25
-
- drive name: sr0 hdb
- drive speed: 32 40
- drive # of slots: 1 0
- Can close tray: 1 1
- Can open tray: 1 1
- Can lock tray: 1 1
- Can change speed: 1 1
- Can select disk: 0 1
- Can read multisession: 1 1
- Can read MCN: 1 1
- Reports media changed: 1 1
- Can play audio: 1 1
-
-
-You see two drives, sr0 and hdb, along with a list of their features.
-
-2.6 /proc/sys/sunrpc - Remote procedure calls
----------------------------------------------
-
-This directory contains four files, which enable or disable debugging for the
-RPC functions NFS, NFS-daemon, RPC and NLM. The default values are 0. They can
-be set to one to turn debugging on. (The default value is 0 for each)
-
-2.7 /proc/sys/net - Networking stuff
-------------------------------------
-
-The interface to the networking parts of the kernel is located in
-/proc/sys/net. Table 2-3 shows all possible subdirectories. You may see only
-some of them, depending on your kernel's configuration.
-
-
-Table 2-3: Subdirectories in /proc/sys/net
-..............................................................................
- Directory Content Directory Content
- core General parameter appletalk Appletalk protocol
- unix Unix domain sockets netrom NET/ROM
- 802 E802 protocol ax25 AX25
- ethernet Ethernet protocol rose X.25 PLP layer
- ipv4 IP version 4 x25 X.25 protocol
- ipx IPX token-ring IBM token ring
- bridge Bridging decnet DEC net
- ipv6 IP version 6
-..............................................................................
-
-We will concentrate on IP networking here. Since AX15, X.25, and DEC Net are
-only minor players in the Linux world, we'll skip them in this chapter. You'll
-find some short info on Appletalk and IPX further on in this chapter. Review
-the online documentation and the kernel source to get a detailed view of the
-parameters for those protocols. In this section we'll discuss the
-subdirectories printed in bold letters in the table above. As default values
-are suitable for most needs, there is no need to change these values.
-
-/proc/sys/net/core - Network core options
------------------------------------------
-
-rmem_default
-------------
-
-The default setting of the socket receive buffer in bytes.
-
-rmem_max
---------
-
-The maximum receive socket buffer size in bytes.
-
-wmem_default
-------------
-
-The default setting (in bytes) of the socket send buffer.
-
-wmem_max
---------
-
-The maximum send socket buffer size in bytes.
-
-message_burst and message_cost
-------------------------------
-
-These parameters are used to limit the warning messages written to the kernel
-log from the networking code. They enforce a rate limit to make a
-denial-of-service attack impossible. A higher message_cost factor, results in
-fewer messages that will be written. Message_burst controls when messages will
-be dropped. The default settings limit warning messages to one every five
-seconds.
-
-warnings
---------
-
-This controls console messages from the networking stack that can occur because
-of problems on the network like duplicate address or bad checksums. Normally,
-this should be enabled, but if the problem persists the messages can be
-disabled.
-
-netdev_budget
--------------
-
-Maximum number of packets taken from all interfaces in one polling cycle (NAPI
-poll). In one polling cycle interfaces which are registered to polling are
-probed in a round-robin manner. The limit of packets in one such probe can be
-set per-device via sysfs class/net/<device>/weight .
-
-netdev_max_backlog
-------------------
-
-Maximum number of packets, queued on the INPUT side, when the interface
-receives packets faster than kernel can process them.
-
-optmem_max
-----------
-
-Maximum ancillary buffer size allowed per socket. Ancillary data is a sequence
-of struct cmsghdr structures with appended data.
-
-/proc/sys/net/unix - Parameters for Unix domain sockets
--------------------------------------------------------
-
-There are only two files in this subdirectory. They control the delays for
-deleting and destroying socket descriptors.
-
-2.8 /proc/sys/net/ipv4 - IPV4 settings
---------------------------------------
-
-IP version 4 is still the most used protocol in Unix networking. It will be
-replaced by IP version 6 in the next couple of years, but for the moment it's
-the de facto standard for the internet and is used in most networking
-environments around the world. Because of the importance of this protocol,
-we'll have a deeper look into the subtree controlling the behavior of the IPv4
-subsystem of the Linux kernel.
-
-Let's start with the entries in /proc/sys/net/ipv4.
-
-ICMP settings
--------------
-
-icmp_echo_ignore_all and icmp_echo_ignore_broadcasts
-----------------------------------------------------
-
-Turn on (1) or off (0), if the kernel should ignore all ICMP ECHO requests, or
-just those to broadcast and multicast addresses.
-
-Please note that if you accept ICMP echo requests with a broadcast/multi\-cast
-destination address your network may be used as an exploder for denial of
-service packet flooding attacks to other hosts.
-
-icmp_destunreach_rate, icmp_echoreply_rate, icmp_paramprob_rate and icmp_timeexeed_rate
----------------------------------------------------------------------------------------
-
-Sets limits for sending ICMP packets to specific targets. A value of zero
-disables all limiting. Any positive value sets the maximum package rate in
-hundredth of a second (on Intel systems).
-
-IP settings
------------
-
-ip_autoconfig
--------------
-
-This file contains the number one if the host received its IP configuration by
-RARP, BOOTP, DHCP or a similar mechanism. Otherwise it is zero.
-
-ip_default_ttl
---------------
-
-TTL (Time To Live) for IPv4 interfaces. This is simply the maximum number of
-hops a packet may travel.
-
-ip_dynaddr
-----------
-
-Enable dynamic socket address rewriting on interface address change. This is
-useful for dialup interface with changing IP addresses.
-
-ip_forward
-----------
-
-Enable or disable forwarding of IP packages between interfaces. Changing this
-value resets all other parameters to their default values. They differ if the
-kernel is configured as host or router.
-
-ip_local_port_range
--------------------
-
-Range of ports used by TCP and UDP to choose the local port. Contains two
-numbers, the first number is the lowest port, the second number the highest
-local port. Default is 1024-4999. Should be changed to 32768-61000 for
-high-usage systems.
-
-ip_no_pmtu_disc
----------------
-
-Global switch to turn path MTU discovery off. It can also be set on a per
-socket basis by the applications or on a per route basis.
-
-ip_masq_debug
--------------
-
-Enable/disable debugging of IP masquerading.
-
-IP fragmentation settings
--------------------------
-
-ipfrag_high_trash and ipfrag_low_trash
---------------------------------------
-
-Maximum memory used to reassemble IP fragments. When ipfrag_high_thresh bytes
-of memory is allocated for this purpose, the fragment handler will toss
-packets until ipfrag_low_thresh is reached.
-
-ipfrag_time
------------
-
-Time in seconds to keep an IP fragment in memory.
-
-TCP settings
-------------
-
-tcp_ecn
--------
-
-This file controls the use of the ECN bit in the IPv4 headers. This is a new
-feature about Explicit Congestion Notification, but some routers and firewalls
-block traffic that has this bit set, so it could be necessary to echo 0 to
-/proc/sys/net/ipv4/tcp_ecn if you want to talk to these sites. For more info
-you could read RFC2481.
-
-tcp_retrans_collapse
---------------------
-
-Bug-to-bug compatibility with some broken printers. On retransmit, try to send
-larger packets to work around bugs in certain TCP stacks. Can be turned off by
-setting it to zero.
-
-tcp_keepalive_probes
---------------------
-
-Number of keep alive probes TCP sends out, until it decides that the
-connection is broken.
-
-tcp_keepalive_time
-------------------
-
-How often TCP sends out keep alive messages, when keep alive is enabled. The
-default is 2 hours.
-
-tcp_syn_retries
----------------
-
-Number of times initial SYNs for a TCP connection attempt will be
-retransmitted. Should not be higher than 255. This is only the timeout for
-outgoing connections, for incoming connections the number of retransmits is
-defined by tcp_retries1.
-
-tcp_sack
---------
-
-Enable select acknowledgments after RFC2018.
-
-tcp_timestamps
---------------
-
-Enable timestamps as defined in RFC1323.
-
-tcp_stdurg
-----------
-
-Enable the strict RFC793 interpretation of the TCP urgent pointer field. The
-default is to use the BSD compatible interpretation of the urgent pointer
-pointing to the first byte after the urgent data. The RFC793 interpretation is
-to have it point to the last byte of urgent data. Enabling this option may
-lead to interoperability problems. Disabled by default.
-
-tcp_syncookies
---------------
-
-Only valid when the kernel was compiled with CONFIG_SYNCOOKIES. Send out
-syncookies when the syn backlog queue of a socket overflows. This is to ward
-off the common 'syn flood attack'. Disabled by default.
-
-Note that the concept of a socket backlog is abandoned. This means the peer
-may not receive reliable error messages from an over loaded server with
-syncookies enabled.
-
-tcp_window_scaling
-------------------
-
-Enable window scaling as defined in RFC1323.
-
-tcp_fin_timeout
----------------
-
-The length of time in seconds it takes to receive a final FIN before the
-socket is always closed. This is strictly a violation of the TCP
-specification, but required to prevent denial-of-service attacks.
-
-tcp_max_ka_probes
------------------
-
-Indicates how many keep alive probes are sent per slow timer run. Should not
-be set too high to prevent bursts.
-
-tcp_max_syn_backlog
--------------------
-
-Length of the per socket backlog queue. Since Linux 2.2 the backlog specified
-in listen(2) only specifies the length of the backlog queue of already
-established sockets. When more connection requests arrive Linux starts to drop
-packets. When syncookies are enabled the packets are still answered and the
-maximum queue is effectively ignored.
-
-tcp_retries1
-------------
-
-Defines how often an answer to a TCP connection request is retransmitted
-before giving up.
-
-tcp_retries2
-------------
-
-Defines how often a TCP packet is retransmitted before giving up.
-
-Interface specific settings
----------------------------
-
-In the directory /proc/sys/net/ipv4/conf you'll find one subdirectory for each
-interface the system knows about and one directory calls all. Changes in the
-all subdirectory affect all interfaces, whereas changes in the other
-subdirectories affect only one interface. All directories have the same
-entries:
-
-accept_redirects
-----------------
-
-This switch decides if the kernel accepts ICMP redirect messages or not. The
-default is 'yes' if the kernel is configured for a regular host and 'no' for a
-router configuration.
-
-accept_source_route
--------------------
-
-Should source routed packages be accepted or declined. The default is
-dependent on the kernel configuration. It's 'yes' for routers and 'no' for
-hosts.
-
-bootp_relay
-~~~~~~~~~~~
-
-Accept packets with source address 0.b.c.d with destinations not to this host
-as local ones. It is supposed that a BOOTP relay daemon will catch and forward
-such packets.
-
-The default is 0, since this feature is not implemented yet (kernel version
-2.2.12).
-
-forwarding
-----------
-
-Enable or disable IP forwarding on this interface.
-
-log_martians
-------------
-
-Log packets with source addresses with no known route to kernel log.
-
-mc_forwarding
--------------
-
-Do multicast routing. The kernel needs to be compiled with CONFIG_MROUTE and a
-multicast routing daemon is required.
-
-proxy_arp
----------
-
-Does (1) or does not (0) perform proxy ARP.
-
-rp_filter
----------
-
-Integer value determines if a source validation should be made. 1 means yes, 0
-means no. Disabled by default, but local/broadcast address spoofing is always
-on.
-
-If you set this to 1 on a router that is the only connection for a network to
-the net, it will prevent spoofing attacks against your internal networks
-(external addresses can still be spoofed), without the need for additional
-firewall rules.
-
-secure_redirects
-----------------
-
-Accept ICMP redirect messages only for gateways, listed in default gateway
-list. Enabled by default.
-
-shared_media
-------------
-
-If it is not set the kernel does not assume that different subnets on this
-device can communicate directly. Default setting is 'yes'.
-
-send_redirects
---------------
-
-Determines whether to send ICMP redirects to other hosts.
-
-Routing settings
-----------------
-
-The directory /proc/sys/net/ipv4/route contains several file to control
-routing issues.
-
-error_burst and error_cost
---------------------------
-
-These parameters are used to limit how many ICMP destination unreachable to
-send from the host in question. ICMP destination unreachable messages are
-sent when we cannot reach the next hop while trying to transmit a packet.
-It will also print some error messages to kernel logs if someone is ignoring
-our ICMP redirects. The higher the error_cost factor is, the fewer
-destination unreachable and error messages will be let through. Error_burst
-controls when destination unreachable messages and error messages will be
-dropped. The default settings limit warning messages to five every second.
-
-flush
------
-
-Writing to this file results in a flush of the routing cache.
-
-gc_elasticity, gc_interval, gc_min_interval_ms, gc_timeout, gc_thresh
----------------------------------------------------------------------
-
-Values to control the frequency and behavior of the garbage collection
-algorithm for the routing cache. gc_min_interval is deprecated and replaced
-by gc_min_interval_ms.
-
-
-max_size
---------
-
-Maximum size of the routing cache. Old entries will be purged once the cache
-reached has this size.
-
-redirect_load, redirect_number
-------------------------------
-
-Factors which determine if more ICPM redirects should be sent to a specific
-host. No redirects will be sent once the load limit or the maximum number of
-redirects has been reached.
-
-redirect_silence
-----------------
-
-Timeout for redirects. After this period redirects will be sent again, even if
-this has been stopped, because the load or number limit has been reached.
-
-Network Neighbor handling
--------------------------
-
-Settings about how to handle connections with direct neighbors (nodes attached
-to the same link) can be found in the directory /proc/sys/net/ipv4/neigh.
-
-As we saw it in the conf directory, there is a default subdirectory which
-holds the default values, and one directory for each interface. The contents
-of the directories are identical, with the single exception that the default
-settings contain additional options to set garbage collection parameters.
-
-In the interface directories you'll find the following entries:
-
-base_reachable_time, base_reachable_time_ms
--------------------------------------------
-
-A base value used for computing the random reachable time value as specified
-in RFC2461.
-
-Expression of base_reachable_time, which is deprecated, is in seconds.
-Expression of base_reachable_time_ms is in milliseconds.
-
-retrans_time, retrans_time_ms
------------------------------
-
-The time between retransmitted Neighbor Solicitation messages.
-Used for address resolution and to determine if a neighbor is
-unreachable.
-
-Expression of retrans_time, which is deprecated, is in 1/100 seconds (for
-IPv4) or in jiffies (for IPv6).
-Expression of retrans_time_ms is in milliseconds.
-
-unres_qlen
-----------
-
-Maximum queue length for a pending arp request - the number of packets which
-are accepted from other layers while the ARP address is still resolved.
-
-anycast_delay
--------------
-
-Maximum for random delay of answers to neighbor solicitation messages in
-jiffies (1/100 sec). Not yet implemented (Linux does not have anycast support
-yet).
-
-ucast_solicit
--------------
-
-Maximum number of retries for unicast solicitation.
-
-mcast_solicit
--------------
-
-Maximum number of retries for multicast solicitation.
-
-delay_first_probe_time
-----------------------
-
-Delay for the first time probe if the neighbor is reachable. (see
-gc_stale_time)
-
-locktime
---------
-
-An ARP/neighbor entry is only replaced with a new one if the old is at least
-locktime old. This prevents ARP cache thrashing.
-
-proxy_delay
------------
-
-Maximum time (real time is random [0..proxytime]) before answering to an ARP
-request for which we have an proxy ARP entry. In some cases, this is used to
-prevent network flooding.
-
-proxy_qlen
-----------
-
-Maximum queue length of the delayed proxy arp timer. (see proxy_delay).
-
-app_solicit
-----------
-
-Determines the number of requests to send to the user level ARP daemon. Use 0
-to turn off.
-
-gc_stale_time
--------------
-
-Determines how often to check for stale ARP entries. After an ARP entry is
-stale it will be resolved again (which is useful when an IP address migrates
-to another machine). When ucast_solicit is greater than 0 it first tries to
-send an ARP packet directly to the known host When that fails and
-mcast_solicit is greater than 0, an ARP request is broadcasted.
-
-2.9 Appletalk
--------------
-
-The /proc/sys/net/appletalk directory holds the Appletalk configuration data
-when Appletalk is loaded. The configurable parameters are:
-
-aarp-expiry-time
-----------------
-
-The amount of time we keep an ARP entry before expiring it. Used to age out
-old hosts.
-
-aarp-resolve-time
------------------
-
-The amount of time we will spend trying to resolve an Appletalk address.
-
-aarp-retransmit-limit
----------------------
-
-The number of times we will retransmit a query before giving up.
-
-aarp-tick-time
---------------
-
-Controls the rate at which expires are checked.
-
-The directory /proc/net/appletalk holds the list of active Appletalk sockets
-on a machine.
-
-The fields indicate the DDP type, the local address (in network:node format)
-the remote address, the size of the transmit pending queue, the size of the
-received queue (bytes waiting for applications to read) the state and the uid
-owning the socket.
-
-/proc/net/atalk_iface lists all the interfaces configured for appletalk.It
-shows the name of the interface, its Appletalk address, the network range on
-that address (or network number for phase 1 networks), and the status of the
-interface.
-
-/proc/net/atalk_route lists each known network route. It lists the target
-(network) that the route leads to, the router (may be directly connected), the
-route flags, and the device the route is using.
-
-2.10 IPX
---------
-
-The IPX protocol has no tunable values in proc/sys/net.
-
-The IPX protocol does, however, provide proc/net/ipx. This lists each IPX
-socket giving the local and remote addresses in Novell format (that is
-network:node:port). In accordance with the strange Novell tradition,
-everything but the port is in hex. Not_Connected is displayed for sockets that
-are not tied to a specific remote address. The Tx and Rx queue sizes indicate
-the number of bytes pending for transmission and reception. The state
-indicates the state the socket is in and the uid is the owning uid of the
-socket.
-
-The /proc/net/ipx_interface file lists all IPX interfaces. For each interface
-it gives the network number, the node number, and indicates if the network is
-the primary network. It also indicates which device it is bound to (or
-Internal for internal networks) and the Frame Type if appropriate. Linux
-supports 802.3, 802.2, 802.2 SNAP and DIX (Blue Book) ethernet framing for
-IPX.
-
-The /proc/net/ipx_route table holds a list of IPX routes. For each route it
-gives the destination network, the router node (or Directly) and the network
-address of the router (or Connected) for internal networks.
-
-2.11 /proc/sys/fs/mqueue - POSIX message queues filesystem
-----------------------------------------------------------
-
-The "mqueue" filesystem provides the necessary kernel features to enable the
-creation of a user space library that implements the POSIX message queues
-API (as noted by the MSG tag in the POSIX 1003.1-2001 version of the System
-Interfaces specification.)
-
-The "mqueue" filesystem contains values for determining/setting the amount of
-resources used by the file system.
-
-/proc/sys/fs/mqueue/queues_max is a read/write file for setting/getting the
-maximum number of message queues allowed on the system.
-
-/proc/sys/fs/mqueue/msg_max is a read/write file for setting/getting the
-maximum number of messages in a queue value. In fact it is the limiting value
-for another (user) limit which is set in mq_open invocation. This attribute of
-a queue must be less or equal then msg_max.
-
-/proc/sys/fs/mqueue/msgsize_max is a read/write file for setting/getting the
-maximum message size value (it is every message queue's attribute set during
-its creation).
+------------------------------------------------------------------------------
+CHAPTER 3: PER-PROCESS PARAMETERS
+------------------------------------------------------------------------------
-2.12 /proc/<pid>/oom_adj - Adjust the oom-killer score
+3.1 /proc/<pid>/oom_adj - Adjust the oom-killer score
------------------------------------------------------
This file can be used to adjust the score used to select which processes
@@ -2062,25 +1037,15 @@ The task with the highest badness score is then selected and its children
are killed, process itself will be killed in an OOM situation when it does
not have children or some of them disabled oom like described above.
-2.13 /proc/<pid>/oom_score - Display current oom-killer score
+3.2 /proc/<pid>/oom_score - Display current oom-killer score
-------------------------------------------------------------
-------------------------------------------------------------------------------
This file can be used to check the current score used by the oom-killer is for
any given <pid>. Use it together with /proc/<pid>/oom_adj to tune which
process should be killed in an out-of-memory situation.
-------------------------------------------------------------------------------
-Summary
-------------------------------------------------------------------------------
-Certain aspects of kernel behavior can be modified at runtime, without the
-need to recompile the kernel, or even to reboot the system. The files in the
-/proc/sys tree can not only be read, but also modified. You can use the echo
-command to write value into these files, thereby changing the default settings
-of the kernel.
-------------------------------------------------------------------------------
-2.14 /proc/<pid>/io - Display the IO accounting fields
+3.3 /proc/<pid>/io - Display the IO accounting fields
-------------------------------------------------------
This file contains IO statistics for each running process
@@ -2182,7 +1147,7 @@ those 64-bit counters, process A could see an intermediate result.
More information about this can be found within the taskstats documentation in
Documentation/accounting.
-2.15 /proc/<pid>/coredump_filter - Core dump filtering settings
+3.4 /proc/<pid>/coredump_filter - Core dump filtering settings
---------------------------------------------------------------
When a process is dumped, all anonymous memory is written to a core file as
long as the size of the core file isn't limited. But sometimes we don't want
@@ -2226,7 +1191,7 @@ For example:
$ echo 0x7 > /proc/self/coredump_filter
$ ./some_program
-2.16 /proc/<pid>/mountinfo - Information about mounts
+3.5 /proc/<pid>/mountinfo - Information about mounts
--------------------------------------------------------
This file contains lines of the form:
@@ -2263,30 +1228,3 @@ For more information on mount propagation see:
Documentation/filesystems/sharedsubtree.txt
-2.17 /proc/sys/fs/epoll - Configuration options for the epoll interface
---------------------------------------------------------
-
-This directory contains configuration options for the epoll(7) interface.
-
-max_user_instances
-------------------
-
-This is the maximum number of epoll file descriptors that a single user can
-have open at a given time. The default value is 128, and should be enough
-for normal users.
-
-max_user_watches
-----------------
-
-Every epoll file descriptor can store a number of files to be monitored
-for event readiness. Each one of these monitored files constitutes a "watch".
-This configuration option sets the maximum number of "watches" that are
-allowed for each user.
-Each "watch" costs roughly 90 bytes on a 32bit kernel, and roughly 160 bytes
-on a 64bit one.
-The current default value for max_user_watches is the 1/32 of the available
-low memory, divided for the "watch" cost in bytes.
-
-
-------------------------------------------------------------------------------
-
diff --git a/Documentation/filesystems/sysfs-pci.txt b/Documentation/filesystems/sysfs-pci.txt
index 9f8740ca3f3b..26e4b8bc53ee 100644
--- a/Documentation/filesystems/sysfs-pci.txt
+++ b/Documentation/filesystems/sysfs-pci.txt
@@ -12,6 +12,7 @@ that support it. For example, a given bus might look like this:
| |-- enable
| |-- irq
| |-- local_cpus
+ | |-- remove
| |-- resource
| |-- resource0
| |-- resource1
@@ -36,6 +37,7 @@ files, each with their own function.
enable Whether the device is enabled (ascii, rw)
irq IRQ number (ascii, ro)
local_cpus nearby CPU mask (cpumask, ro)
+ remove remove device from kernel's list (ascii, wo)
resource PCI resource host addresses (ascii, ro)
resource0..N PCI resource N, if present (binary, mmap)
resource0_wc..N_wc PCI WC map resource N, if prefetchable (binary, mmap)
@@ -46,6 +48,7 @@ files, each with their own function.
ro - read only file
rw - file is readable and writable
+ wo - write only file
mmap - file is mmapable
ascii - file contains ascii text
binary - file contains binary data
@@ -73,6 +76,13 @@ that the device must be enabled for a rom read to return data succesfully.
In the event a driver is not bound to the device, it can be enabled using the
'enable' file, documented above.
+The 'remove' file is used to remove the PCI device, by writing a non-zero
+integer to the file. This does not involve any kind of hot-plug functionality,
+e.g. powering off the device. The device is removed from the kernel's list of
+PCI devices, the sysfs directory for it is removed, and the device will be
+removed from any drivers attached to it. Removal of PCI root buses is
+disallowed.
+
Accessing legacy resources through sysfs
----------------------------------------
diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt
index b1b988701247..145c25a170c7 100644
--- a/Documentation/gpio.txt
+++ b/Documentation/gpio.txt
@@ -123,7 +123,10 @@ platform-specific implementation issue.
Using GPIOs
-----------
-One of the first things to do with a GPIO, often in board setup code when
+The first thing a system should do with a GPIO is allocate it, using
+the gpio_request() call; see later.
+
+One of the next things to do with a GPIO, often in board setup code when
setting up a platform_device using the GPIO, is mark its direction:
/* set as input or output, returning 0 or negative errno */
@@ -141,8 +144,8 @@ This helps avoid signal glitching during system startup.
For compatibility with legacy interfaces to GPIOs, setting the direction
of a GPIO implicitly requests that GPIO (see below) if it has not been
-requested already. That compatibility may be removed in the future;
-explicitly requesting GPIOs is strongly preferred.
+requested already. That compatibility is being removed from the optional
+gpiolib framework.
Setting the direction can fail if the GPIO number is invalid, or when
that particular GPIO can't be used in that mode. It's generally a bad
@@ -195,7 +198,7 @@ This requires sleeping, which can't be done from inside IRQ handlers.
Platforms that support this type of GPIO distinguish them from other GPIOs
by returning nonzero from this call (which requires a valid GPIO number,
-either explicitly or implicitly requested):
+which should have been previously allocated with gpio_request):
int gpio_cansleep(unsigned gpio);
@@ -212,10 +215,9 @@ for GPIOs that can't be accessed from IRQ handlers, these calls act the
same as the spinlock-safe calls.
-Claiming and Releasing GPIOs (OPTIONAL)
----------------------------------------
+Claiming and Releasing GPIOs
+----------------------------
To help catch system configuration errors, two calls are defined.
-However, many platforms don't currently support this mechanism.
/* request GPIO, returning 0 or negative errno.
* non-null labels may be useful for diagnostics.
@@ -244,13 +246,6 @@ Some platforms may also use knowledge about what GPIOs are active for
power management, such as by powering down unused chip sectors and, more
easily, gating off unused clocks.
-These two calls are optional because not not all current Linux platforms
-offer such functionality in their GPIO support; a valid implementation
-could return success for all gpio_request() calls. Unlike the other calls,
-the state they represent doesn't normally match anything from a hardware
-register; it's just a software bitmap which clearly is not necessary for
-correct operation of hardware or (bug free) drivers.
-
Note that requesting a GPIO does NOT cause it to be configured in any
way; it just marks that GPIO as in use. Separate code must handle any
pin setup (e.g. controlling which pin the GPIO uses, pullup/pulldown).
diff --git a/Documentation/hwmon/lis3lv02d b/Documentation/hwmon/lis3lv02d
index 287f8c902656..effe949a7282 100644
--- a/Documentation/hwmon/lis3lv02d
+++ b/Documentation/hwmon/lis3lv02d
@@ -1,11 +1,11 @@
Kernel driver lis3lv02d
-==================
+=======================
Supported chips:
* STMicroelectronics LIS3LV02DL and LIS3LV02DQ
-Author:
+Authors:
Yan Burman <burman.yan@gmail.com>
Eric Piel <eric.piel@tremplin-utc.net>
@@ -15,7 +15,7 @@ Description
This driver provides support for the accelerometer found in various HP
laptops sporting the feature officially called "HP Mobile Data
-Protection System 3D" or "HP 3D DriveGuard". It detect automatically
+Protection System 3D" or "HP 3D DriveGuard". It detects automatically
laptops with this sensor. Known models (for now the HP 2133, nc6420,
nc2510, nc8510, nc84x0, nw9440 and nx9420) will have their axis
automatically oriented on standard way (eg: you can directly play
@@ -27,7 +27,7 @@ position - 3D position that the accelerometer reports. Format: "(x,y,z)"
calibrate - read: values (x, y, z) that are used as the base for input
class device operation.
write: forces the base to be recalibrated with the current
- position.
+ position.
rate - reports the sampling rate of the accelerometer device in HZ
This driver also provides an absolute input class device, allowing
@@ -48,7 +48,7 @@ For better compatibility between the various laptops. The values reported by
the accelerometer are converted into a "standard" organisation of the axes
(aka "can play neverball out of the box"):
* When the laptop is horizontal the position reported is about 0 for X and Y
-and a positive value for Z
+ and a positive value for Z
* If the left side is elevated, X increases (becomes positive)
* If the front side (where the touchpad is) is elevated, Y decreases
(becomes negative)
@@ -59,3 +59,13 @@ email to the authors to add it to the database. When reporting a new
laptop, please include the output of "dmidecode" plus the value of
/sys/devices/platform/lis3lv02d/position in these four cases.
+Q&A
+---
+
+Q: How do I safely simulate freefall? I have an HP "portable
+workstation" which has about 3.5kg and a plastic case, so letting it
+fall to the ground is out of question...
+
+A: The sensor is pretty sensitive, so your hands can do it. Lift it
+into free space, follow the fall with your hands for like 10
+centimeters. That should be enough to trigger the detection.
diff --git a/Documentation/hwmon/ltc4215 b/Documentation/hwmon/ltc4215
new file mode 100644
index 000000000000..2e6a21eb656c
--- /dev/null
+++ b/Documentation/hwmon/ltc4215
@@ -0,0 +1,50 @@
+Kernel driver ltc4215
+=====================
+
+Supported chips:
+ * Linear Technology LTC4215
+ Prefix: 'ltc4215'
+ Addresses scanned: 0x44
+ Datasheet:
+ http://www.linear.com/pc/downloadDocument.do?navId=H0,C1,C1003,C1006,C1163,P17572,D12697
+
+Author: Ira W. Snyder <iws@ovro.caltech.edu>
+
+
+Description
+-----------
+
+The LTC4215 controller allows a board to be safely inserted and removed
+from a live backplane.
+
+
+Usage Notes
+-----------
+
+This driver does not probe for LTC4215 devices, due to the fact that some
+of the possible addresses are unfriendly to probing. You will need to use
+the "force" parameter to tell the driver where to find the device.
+
+Example: the following will load the driver for an LTC4215 at address 0x44
+on I2C bus #0:
+$ modprobe ltc4215 force=0,0x44
+
+
+Sysfs entries
+-------------
+
+The LTC4215 has built-in limits for overvoltage, undervoltage, and
+undercurrent warnings. This makes it very likely that the reference
+circuit will be used.
+
+in1_input input voltage
+in2_input output voltage
+
+in1_min_alarm input undervoltage alarm
+in1_max_alarm input overvoltage alarm
+
+curr1_input current
+curr1_max_alarm overcurrent alarm
+
+power1_input power usage
+power1_alarm power bad alarm
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index aeedb89a307a..240257dd4238 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1695,6 +1695,8 @@ and is between 256 and 4096 characters. It is defined in the file
See also Documentation/blockdev/paride.txt.
pci=option[,option...] [PCI] various PCI subsystem options:
+ earlydump [X86] dump PCI config space before the kernel
+ changes anything
off [X86] don't probe for the PCI bus
bios [X86-32] force use of PCI BIOS, don't access
the hardware directly. Use this if your machine
@@ -1794,6 +1796,15 @@ and is between 256 and 4096 characters. It is defined in the file
cbmemsize=nn[KMG] The fixed amount of bus space which is
reserved for the CardBus bridge's memory
window. The default value is 64 megabytes.
+ resource_alignment=
+ Format:
+ [<order of align>@][<domain>:]<bus>:<slot>.<func>[; ...]
+ Specifies alignment and device to reassign
+ aligned memory resources.
+ If <order of align> is not specified,
+ PAGE_SIZE is used as alignment.
+ PCI-PCI bridge can be specified, if resource
+ windows need to be expanded.
pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power
Management.
diff --git a/Documentation/misc-devices/isl29003 b/Documentation/misc-devices/isl29003
new file mode 100644
index 000000000000..c4ff5f38e010
--- /dev/null
+++ b/Documentation/misc-devices/isl29003
@@ -0,0 +1,62 @@
+Kernel driver isl29003
+=====================
+
+Supported chips:
+* Intersil ISL29003
+Prefix: 'isl29003'
+Addresses scanned: none
+Datasheet:
+http://www.intersil.com/data/fn/fn7464.pdf
+
+Author: Daniel Mack <daniel@caiaq.de>
+
+
+Description
+-----------
+The ISL29003 is an integrated light sensor with a 16-bit integrating type
+ADC, I2C user programmable lux range select for optimized counts/lux, and
+I2C multi-function control and monitoring capabilities. The internal ADC
+provides 16-bit resolution while rejecting 50Hz and 60Hz flicker caused by
+artificial light sources.
+
+The driver allows to set the lux range, the bit resolution, the operational
+mode (see below) and the power state of device and can read the current lux
+value, of course.
+
+
+Detection
+---------
+
+The ISL29003 does not have an ID register which could be used to identify
+it, so the detection routine will just try to read from the configured I2C
+addess and consider the device to be present as soon as it ACKs the
+transfer.
+
+
+Sysfs entries
+-------------
+
+range:
+ 0: 0 lux to 1000 lux (default)
+ 1: 0 lux to 4000 lux
+ 2: 0 lux to 16,000 lux
+ 3: 0 lux to 64,000 lux
+
+resolution:
+ 0: 2^16 cycles (default)
+ 1: 2^12 cycles
+ 2: 2^8 cycles
+ 3: 2^4 cycles
+
+mode:
+ 0: diode1's current (unsigned 16bit) (default)
+ 1: diode1's current (unsigned 16bit)
+ 2: difference between diodes (l1 - l2, signed 15bit)
+
+power_state:
+ 0: device is disabled (default)
+ 1: device is enabled
+
+lux (read only):
+ returns the value from the last sensor reading
+
diff --git a/Documentation/networking/vxge.txt b/Documentation/networking/vxge.txt
new file mode 100644
index 000000000000..d2e2997e6fa0
--- /dev/null
+++ b/Documentation/networking/vxge.txt
@@ -0,0 +1,100 @@
+Neterion's (Formerly S2io) X3100 Series 10GbE PCIe Server Adapter Linux driver
+==============================================================================
+
+Contents
+--------
+
+1) Introduction
+2) Features supported
+3) Configurable driver parameters
+4) Troubleshooting
+
+1) Introduction:
+----------------
+This Linux driver supports all Neterion's X3100 series 10 GbE PCIe I/O
+Virtualized Server adapters.
+The X3100 series supports four modes of operation, configurable via
+firmware -
+ Single function mode
+ Multi function mode
+ SRIOV mode
+ MRIOV mode
+The functions share a 10GbE link and the pci-e bus, but hardly anything else
+inside the ASIC. Features like independent hw reset, statistics, bandwidth/
+priority allocation and guarantees, GRO, TSO, interrupt moderation etc are
+supported independently on each function.
+
+(See below for a complete list of features supported for both IPv4 and IPv6)
+
+2) Features supported:
+----------------------
+
+i) Single function mode (up to 17 queues)
+
+ii) Multi function mode (up to 17 functions)
+
+iii) PCI-SIG's I/O Virtualization
+ - Single Root mode: v1.0 (up to 17 functions)
+ - Multi-Root mode: v1.0 (up to 17 functions)
+
+iv) Jumbo frames
+ X3100 Series supports MTU up to 9600 bytes, modifiable using
+ ifconfig command.
+
+v) Offloads supported: (Enabled by default)
+ Checksum offload (TCP/UDP/IP) on transmit and receive paths
+ TCP Segmentation Offload (TSO) on transmit path
+ Generic Receive Offload (GRO) on receive path
+
+vi) MSI-X: (Enabled by default)
+ Resulting in noticeable performance improvement (up to 7% on certain
+ platforms).
+
+vii) NAPI: (Enabled by default)
+ For better Rx interrupt moderation.
+
+viii)RTH (Receive Traffic Hash): (Enabled by default)
+ Receive side steering for better scaling.
+
+ix) Statistics
+ Comprehensive MAC-level and software statistics displayed using
+ "ethtool -S" option.
+
+x) Multiple hardware queues: (Enabled by default)
+ Up to 17 hardware based transmit and receive data channels, with
+ multiple steering options (transmit multiqueue enabled by default).
+
+3) Configurable driver parameters:
+----------------------------------
+
+i) max_config_dev
+ Specifies maximum device functions to be enabled.
+ Valid range: 1-8
+
+ii) max_config_port
+ Specifies number of ports to be enabled.
+ Valid range: 1,2
+ Default: 1
+
+iii)max_config_vpath
+ Specifies maximum VPATH(s) configured for each device function.
+ Valid range: 1-17
+
+iv) vlan_tag_strip
+ Enables/disables vlan tag stripping from all received tagged frames that
+ are not replicated at the internal L2 switch.
+ Valid range: 0,1 (disabled, enabled respectively)
+ Default: 1
+
+v) addr_learn_en
+ Enable learning the mac address of the guest OS interface in
+ virtualization environment.
+ Valid range: 0,1 (disabled, enabled respectively)
+ Default: 0
+
+4) Troubleshooting:
+-------------------
+
+To resolve an issue with the source code or X3100 series adapter, please collect
+the statistics, register dumps using ethool, relevant logs and email them to
+support@neterion.com.
diff --git a/Documentation/powerpc/dts-bindings/mmc-spi-slot.txt b/Documentation/powerpc/dts-bindings/mmc-spi-slot.txt
new file mode 100644
index 000000000000..c39ac2891951
--- /dev/null
+++ b/Documentation/powerpc/dts-bindings/mmc-spi-slot.txt
@@ -0,0 +1,23 @@
+MMC/SD/SDIO slot directly connected to a SPI bus
+
+Required properties:
+- compatible : should be "mmc-spi-slot".
+- reg : should specify SPI address (chip-select number).
+- spi-max-frequency : maximum frequency for this device (Hz).
+- voltage-ranges : two cells are required, first cell specifies minimum
+ slot voltage (mV), second cell specifies maximum slot voltage (mV).
+ Several ranges could be specified.
+- gpios : (optional) may specify GPIOs in this order: Card-Detect GPIO,
+ Write-Protect GPIO.
+
+Example:
+
+ mmc-slot@0 {
+ compatible = "fsl,mpc8323rdb-mmc-slot",
+ "mmc-spi-slot";
+ reg = <0>;
+ gpios = <&qe_pio_d 14 1
+ &qe_pio_d 15 0>;
+ voltage-ranges = <3300 3300>;
+ spi-max-frequency = <50000000>;
+ };
diff --git a/Documentation/sysctl/00-INDEX b/Documentation/sysctl/00-INDEX
index a20a9066dc4c..1286f455992f 100644
--- a/Documentation/sysctl/00-INDEX
+++ b/Documentation/sysctl/00-INDEX
@@ -10,6 +10,8 @@ fs.txt
- documentation for /proc/sys/fs/*.
kernel.txt
- documentation for /proc/sys/kernel/*.
+net.txt
+ - documentation for /proc/sys/net/*.
sunrpc.txt
- documentation for /proc/sys/sunrpc/*.
vm.txt
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index f99254327ae5..1458448436cc 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -1,5 +1,6 @@
Documentation for /proc/sys/fs/* kernel version 2.2.10
(c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
+ (c) 2009, Shen Feng<shen@cn.fujitsu.com>
For general info and legal blurb, please look in README.
@@ -14,7 +15,12 @@ kernel. Since some of the files _can_ be used to screw up your
system, it is advisable to read both documentation and source
before actually making adjustments.
+1. /proc/sys/fs
+----------------------------------------------------------
+
Currently, these files are in /proc/sys/fs:
+- aio-max-nr
+- aio-nr
- dentry-state
- dquot-max
- dquot-nr
@@ -30,8 +36,15 @@ Currently, these files are in /proc/sys/fs:
- super-max
- super-nr
-Documentation for the files in /proc/sys/fs/binfmt_misc is
-in Documentation/binfmt_misc.txt.
+==============================================================
+
+aio-nr & aio-max-nr:
+
+aio-nr is the running total of the number of events specified on the
+io_setup system call for all currently active aio contexts. If aio-nr
+reaches aio-max-nr then io_setup will fail with EAGAIN. Note that
+raising aio-max-nr does not result in the pre-allocation or re-sizing
+of any kernel data structures.
==============================================================
@@ -178,3 +191,60 @@ requests. aio-max-nr allows you to change the maximum value
aio-nr can grow to.
==============================================================
+
+
+2. /proc/sys/fs/binfmt_misc
+----------------------------------------------------------
+
+Documentation for the files in /proc/sys/fs/binfmt_misc is
+in Documentation/binfmt_misc.txt.
+
+
+3. /proc/sys/fs/mqueue - POSIX message queues filesystem
+----------------------------------------------------------
+
+The "mqueue" filesystem provides the necessary kernel features to enable the
+creation of a user space library that implements the POSIX message queues
+API (as noted by the MSG tag in the POSIX 1003.1-2001 version of the System
+Interfaces specification.)
+
+The "mqueue" filesystem contains values for determining/setting the amount of
+resources used by the file system.
+
+/proc/sys/fs/mqueue/queues_max is a read/write file for setting/getting the
+maximum number of message queues allowed on the system.
+
+/proc/sys/fs/mqueue/msg_max is a read/write file for setting/getting the
+maximum number of messages in a queue value. In fact it is the limiting value
+for another (user) limit which is set in mq_open invocation. This attribute of
+a queue must be less or equal then msg_max.
+
+/proc/sys/fs/mqueue/msgsize_max is a read/write file for setting/getting the
+maximum message size value (it is every message queue's attribute set during
+its creation).
+
+
+4. /proc/sys/fs/epoll - Configuration options for the epoll interface
+--------------------------------------------------------
+
+This directory contains configuration options for the epoll(7) interface.
+
+max_user_instances
+------------------
+
+This is the maximum number of epoll file descriptors that a single user can
+have open at a given time. The default value is 128, and should be enough
+for normal users.
+
+max_user_watches
+----------------
+
+Every epoll file descriptor can store a number of files to be monitored
+for event readiness. Each one of these monitored files constitutes a "watch".
+This configuration option sets the maximum number of "watches" that are
+allowed for each user.
+Each "watch" costs roughly 90 bytes on a 32bit kernel, and roughly 160 bytes
+on a 64bit one.
+The current default value for max_user_watches is the 1/32 of the available
+low memory, divided for the "watch" cost in bytes.
+
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index a4ccdd1981cf..f11ca7979fa6 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -1,5 +1,6 @@
Documentation for /proc/sys/kernel/* kernel version 2.2.10
(c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
+ (c) 2009, Shen Feng<shen@cn.fujitsu.com>
For general info and legal blurb, please look in README.
@@ -18,6 +19,7 @@ Currently, these files might (depending on your configuration)
show up in /proc/sys/kernel:
- acpi_video_flags
- acct
+- auto_msgmni
- core_pattern
- core_uses_pid
- ctrl-alt-del
@@ -33,6 +35,7 @@ show up in /proc/sys/kernel:
- msgmax
- msgmnb
- msgmni
+- nmi_watchdog
- osrelease
- ostype
- overflowgid
@@ -40,6 +43,7 @@ show up in /proc/sys/kernel:
- panic
- pid_max
- powersave-nap [ PPC only ]
+- panic_on_unrecovered_nmi
- printk
- randomize_va_space
- real-root-dev ==> Documentation/initrd.txt
@@ -55,6 +59,7 @@ show up in /proc/sys/kernel:
- sysrq ==> Documentation/sysrq.txt
- tainted
- threads-max
+- unknown_nmi_panic
- version
==============================================================
@@ -381,3 +386,51 @@ can be ORed together:
512 - A kernel warning has occurred.
1024 - A module from drivers/staging was loaded.
+==============================================================
+
+auto_msgmni:
+
+Enables/Disables automatic recomputing of msgmni upon memory add/remove or
+upon ipc namespace creation/removal (see the msgmni description above).
+Echoing "1" into this file enables msgmni automatic recomputing.
+Echoing "0" turns it off.
+auto_msgmni default value is 1.
+
+==============================================================
+
+nmi_watchdog:
+
+Enables/Disables the NMI watchdog on x86 systems. When the value is non-zero
+the NMI watchdog is enabled and will continuously test all online cpus to
+determine whether or not they are still functioning properly. Currently,
+passing "nmi_watchdog=" parameter at boot time is required for this function
+to work.
+
+If LAPIC NMI watchdog method is in use (nmi_watchdog=2 kernel parameter), the
+NMI watchdog shares registers with oprofile. By disabling the NMI watchdog,
+oprofile may have more registers to utilize.
+
+==============================================================
+
+unknown_nmi_panic:
+
+The value in this file affects behavior of handling NMI. When the value is
+non-zero, unknown NMI is trapped and then panic occurs. At that time, kernel
+debugging information is displayed on console.
+
+NMI switch that most IA32 servers have fires unknown NMI up, for example.
+If a system hangs up, try pressing the NMI switch.
+
+==============================================================
+
+panic_on_unrecovered_nmi:
+
+The default Linux behaviour on an NMI of either memory or unknown is to continue
+operation. For many environments such as scientific computing it is preferable
+that the box is taken out and the error dealt with than an uncorrected
+parity/ECC error get propogated.
+
+A small number of systems do generate NMI's for bizarre random reasons such as
+power management so the default is off. That sysctl works like the existing
+panic controls already in that directory.
+
diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
new file mode 100644
index 000000000000..a34d55b65441
--- /dev/null
+++ b/Documentation/sysctl/net.txt
@@ -0,0 +1,175 @@
+Documentation for /proc/sys/net/* kernel version 2.4.0-test11-pre4
+ (c) 1999 Terrehon Bowden <terrehon@pacbell.net>
+ Bodo Bauer <bb@ricochet.net>
+ (c) 2000 Jorge Nerin <comandante@zaralinux.com>
+ (c) 2009 Shen Feng <shen@cn.fujitsu.com>
+
+For general info and legal blurb, please look in README.
+
+==============================================================
+
+This file contains the documentation for the sysctl files in
+/proc/sys/net and is valid for Linux kernel version 2.4.0-test11-pre4.
+
+The interface to the networking parts of the kernel is located in
+/proc/sys/net. The following table shows all possible subdirectories.You may
+see only some of them, depending on your kernel's configuration.
+
+
+Table : Subdirectories in /proc/sys/net
+..............................................................................
+ Directory Content Directory Content
+ core General parameter appletalk Appletalk protocol
+ unix Unix domain sockets netrom NET/ROM
+ 802 E802 protocol ax25 AX25
+ ethernet Ethernet protocol rose X.25 PLP layer
+ ipv4 IP version 4 x25 X.25 protocol
+ ipx IPX token-ring IBM token ring
+ bridge Bridging decnet DEC net
+ ipv6 IP version 6
+..............................................................................
+
+1. /proc/sys/net/core - Network core options
+-------------------------------------------------------
+
+rmem_default
+------------
+
+The default setting of the socket receive buffer in bytes.
+
+rmem_max
+--------
+
+The maximum receive socket buffer size in bytes.
+
+wmem_default
+------------
+
+The default setting (in bytes) of the socket send buffer.
+
+wmem_max
+--------
+
+The maximum send socket buffer size in bytes.
+
+message_burst and message_cost
+------------------------------
+
+These parameters are used to limit the warning messages written to the kernel
+log from the networking code. They enforce a rate limit to make a
+denial-of-service attack impossible. A higher message_cost factor, results in
+fewer messages that will be written. Message_burst controls when messages will
+be dropped. The default settings limit warning messages to one every five
+seconds.
+
+warnings
+--------
+
+This controls console messages from the networking stack that can occur because
+of problems on the network like duplicate address or bad checksums. Normally,
+this should be enabled, but if the problem persists the messages can be
+disabled.
+
+netdev_budget
+-------------
+
+Maximum number of packets taken from all interfaces in one polling cycle (NAPI
+poll). In one polling cycle interfaces which are registered to polling are
+probed in a round-robin manner. The limit of packets in one such probe can be
+set per-device via sysfs class/net/<device>/weight .
+
+netdev_max_backlog
+------------------
+
+Maximum number of packets, queued on the INPUT side, when the interface
+receives packets faster than kernel can process them.
+
+optmem_max
+----------
+
+Maximum ancillary buffer size allowed per socket. Ancillary data is a sequence
+of struct cmsghdr structures with appended data.
+
+2. /proc/sys/net/unix - Parameters for Unix domain sockets
+-------------------------------------------------------
+
+There is only one file in this directory.
+unix_dgram_qlen limits the max number of datagrams queued in Unix domain
+socket's buffer. It will not take effect unless PF_UNIX flag is spicified.
+
+
+3. /proc/sys/net/ipv4 - IPV4 settings
+-------------------------------------------------------
+Please see: Documentation/networking/ip-sysctl.txt and ipvs-sysctl.txt for
+descriptions of these entries.
+
+
+4. Appletalk
+-------------------------------------------------------
+
+The /proc/sys/net/appletalk directory holds the Appletalk configuration data
+when Appletalk is loaded. The configurable parameters are:
+
+aarp-expiry-time
+----------------
+
+The amount of time we keep an ARP entry before expiring it. Used to age out
+old hosts.
+
+aarp-resolve-time
+-----------------
+
+The amount of time we will spend trying to resolve an Appletalk address.
+
+aarp-retransmit-limit
+---------------------
+
+The number of times we will retransmit a query before giving up.
+
+aarp-tick-time
+--------------
+
+Controls the rate at which expires are checked.
+
+The directory /proc/net/appletalk holds the list of active Appletalk sockets
+on a machine.
+
+The fields indicate the DDP type, the local address (in network:node format)
+the remote address, the size of the transmit pending queue, the size of the
+received queue (bytes waiting for applications to read) the state and the uid
+owning the socket.
+
+/proc/net/atalk_iface lists all the interfaces configured for appletalk.It
+shows the name of the interface, its Appletalk address, the network range on
+that address (or network number for phase 1 networks), and the status of the
+interface.
+
+/proc/net/atalk_route lists each known network route. It lists the target
+(network) that the route leads to, the router (may be directly connected), the
+route flags, and the device the route is using.
+
+
+5. IPX
+-------------------------------------------------------
+
+The IPX protocol has no tunable values in proc/sys/net.
+
+The IPX protocol does, however, provide proc/net/ipx. This lists each IPX
+socket giving the local and remote addresses in Novell format (that is
+network:node:port). In accordance with the strange Novell tradition,
+everything but the port is in hex. Not_Connected is displayed for sockets that
+are not tied to a specific remote address. The Tx and Rx queue sizes indicate
+the number of bytes pending for transmission and reception. The state
+indicates the state the socket is in and the uid is the owning uid of the
+socket.
+
+The /proc/net/ipx_interface file lists all IPX interfaces. For each interface
+it gives the network number, the node number, and indicates if the network is
+the primary network. It also indicates which device it is bound to (or
+Internal for internal networks) and the Frame Type if appropriate. Linux
+supports 802.3, 802.2, 802.2 SNAP and DIX (Blue Book) ethernet framing for
+IPX.
+
+The /proc/net/ipx_route table holds a list of IPX routes. For each route it
+gives the destination network, the router node (or Directly) and the network
+address of the router (or Connected) for internal networks.
diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index 9e592c718afb..afa2946892da 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -81,6 +81,8 @@ On all - write a character to /proc/sysrq-trigger. e.g.:
'i' - Send a SIGKILL to all processes, except for init.
+'j' - Forcibly "Just thaw it" - filesystems frozen by the FIFREEZE ioctl.
+
'k' - Secure Access Key (SAK) Kills all programs on the current virtual
console. NOTE: See important comments below in SAK section.
@@ -160,6 +162,9 @@ t'E'rm and k'I'll are useful if you have some sort of runaway process you
are unable to kill any other way, especially if it's spawning other
processes.
+"'J'ust thaw it" is useful if your system becomes unresponsive due to a frozen
+(probably root) filesystem via the FIFREEZE ioctl.
+
* Sometimes SysRq seems to get 'stuck' after using it, what can I do?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
That happens to me, also. I've found that tapping shift, alt, and control
diff --git a/MAINTAINERS b/MAINTAINERS
index c5f4e9d27b64..ebaf77ebd8b7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -357,6 +357,7 @@ S: Odd Fixes for 2.4; Maintained for 2.6.
P: Ivan Kokshaysky
M: ink@jurassic.park.msu.ru
S: Maintained for 2.4; PCI support for 2.6.
+L: linux-alpha@vger.kernel.org
AMD GEODE CS5536 USB DEVICE CONTROLLER DRIVER
P: Thomas Dahlmann
@@ -1944,6 +1945,12 @@ L: lm-sensors@lm-sensors.org
W: http://www.kernel.org/pub/linux/kernel/people/fseidel/hdaps/
S: Maintained
+HYPERVISOR VIRTUAL CONSOLE DRIVER
+L: linuxppc-dev@ozlabs.org
+L: linux-kernel@vger.kernel.org
+S: Odd Fixes
+F: drivers/char/hvc_*
+
GSPCA FINEPIX SUBDRIVER
P: Frank Zago
M: frank@zago.net
@@ -2201,25 +2208,12 @@ L: linux-ide@vger.kernel.org
T: quilt kernel.org/pub/linux/kernel/people/bart/pata-2.6/
S: Maintained
-IDE/ATAPI CDROM DRIVER
+IDE/ATAPI DRIVERS
P: Borislav Petkov
M: petkovbb@gmail.com
L: linux-ide@vger.kernel.org
S: Maintained
-IDE/ATAPI FLOPPY DRIVERS
-P: Paul Bristow
-M: Paul Bristow <paul@paulbristow.net>
-W: http://paulbristow.net/linux/idefloppy.html
-L: linux-kernel@vger.kernel.org
-S: Maintained
-
-IDE/ATAPI TAPE DRIVERS
-P: Gadi Oxman
-M: Gadi Oxman <gadio@netvision.net.il>
-L: linux-kernel@vger.kernel.org
-S: Maintained
-
IDLE-I7300
P: Andy Henroid
M: andrew.d.henroid@intel.com
@@ -3110,7 +3104,7 @@ M: shemminger@linux-foundation.org
L: netem@lists.linux-foundation.org
S: Maintained
-NETERION (S2IO) Xframe 10GbE DRIVER
+NETERION (S2IO) 10GbE DRIVER (xframe/vxge)
P: Ramkrishna Vepa
M: ram.vepa@neterion.com
P: Rastapur Santosh
@@ -3119,8 +3113,11 @@ P: Sivakumar Subramani
M: sivakumar.subramani@neterion.com
P: Sreenivasa Honnur
M: sreenivasa.honnur@neterion.com
+P: Anil Murthy
+M: anil.murthy@neterion.com
L: netdev@vger.kernel.org
-W: http://trac.neterion.com/cgi-bin/trac.cgi/wiki/TitleIndex?anonymous
+W: http://trac.neterion.com/cgi-bin/trac.cgi/wiki/Linux?Anonymous
+W: http://trac.neterion.com/cgi-bin/trac.cgi/wiki/X3100Linux?Anonymous
S: Supported
NETFILTER/IPTABLES/IPCHAINS
diff --git a/arch/alpha/include/asm/machvec.h b/arch/alpha/include/asm/machvec.h
index fea4ea75b79d..13cd42743810 100644
--- a/arch/alpha/include/asm/machvec.h
+++ b/arch/alpha/include/asm/machvec.h
@@ -80,7 +80,7 @@ struct alpha_machine_vector
void (*update_irq_hw)(unsigned long, unsigned long, int);
void (*ack_irq)(unsigned long);
void (*device_interrupt)(unsigned long vector);
- void (*machine_check)(u64 vector, u64 la);
+ void (*machine_check)(unsigned long vector, unsigned long la);
void (*smp_callin)(void);
void (*init_arch)(void);
diff --git a/arch/alpha/include/asm/pci.h b/arch/alpha/include/asm/pci.h
index 2a14302c17a3..cb04eaa6ba33 100644
--- a/arch/alpha/include/asm/pci.h
+++ b/arch/alpha/include/asm/pci.h
@@ -273,4 +273,18 @@ struct pci_dev *alpha_gendev_to_pci(struct device *dev);
extern struct pci_dev *isa_bridge;
+extern int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val,
+ size_t count);
+extern int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val,
+ size_t count);
+extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
+ struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state);
+extern void pci_adjust_legacy_attr(struct pci_bus *bus,
+ enum pci_mmap_state mmap_type);
+#define HAVE_PCI_LEGACY 1
+
+extern int pci_create_resource_files(struct pci_dev *dev);
+extern void pci_remove_resource_files(struct pci_dev *dev);
+
#endif /* __ALPHA_PCI_H */
diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h
index aeeb125f6851..e38fb95cb335 100644
--- a/arch/alpha/include/asm/spinlock.h
+++ b/arch/alpha/include/asm/spinlock.h
@@ -166,6 +166,9 @@ static inline void __raw_write_unlock(raw_rwlock_t * lock)
lock->lock = 0;
}
+#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
+#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
+
#define _raw_spin_relax(lock) cpu_relax()
#define _raw_read_relax(lock) cpu_relax()
#define _raw_write_relax(lock) cpu_relax()
diff --git a/arch/alpha/include/asm/system.h b/arch/alpha/include/asm/system.h
index afe20fa58c99..5aa40cca4f23 100644
--- a/arch/alpha/include/asm/system.h
+++ b/arch/alpha/include/asm/system.h
@@ -309,518 +309,71 @@ extern int __min_ipl;
#define tbia() __tbi(-2, /* no second argument */)
/*
- * Atomic exchange.
- * Since it can be used to implement critical sections
- * it must clobber "memory" (also for interrupts in UP).
+ * Atomic exchange routines.
*/
-static inline unsigned long
-__xchg_u8(volatile char *m, unsigned long val)
-{
- unsigned long ret, tmp, addr64;
-
- __asm__ __volatile__(
- " andnot %4,7,%3\n"
- " insbl %1,%4,%1\n"
- "1: ldq_l %2,0(%3)\n"
- " extbl %2,%4,%0\n"
- " mskbl %2,%4,%2\n"
- " or %1,%2,%2\n"
- " stq_c %2,0(%3)\n"
- " beq %2,2f\n"
-#ifdef CONFIG_SMP
- " mb\n"
-#endif
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64)
- : "r" ((long)m), "1" (val) : "memory");
-
- return ret;
-}
-
-static inline unsigned long
-__xchg_u16(volatile short *m, unsigned long val)
-{
- unsigned long ret, tmp, addr64;
-
- __asm__ __volatile__(
- " andnot %4,7,%3\n"
- " inswl %1,%4,%1\n"
- "1: ldq_l %2,0(%3)\n"
- " extwl %2,%4,%0\n"
- " mskwl %2,%4,%2\n"
- " or %1,%2,%2\n"
- " stq_c %2,0(%3)\n"
- " beq %2,2f\n"
-#ifdef CONFIG_SMP
- " mb\n"
-#endif
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64)
- : "r" ((long)m), "1" (val) : "memory");
-
- return ret;
-}
-
-static inline unsigned long
-__xchg_u32(volatile int *m, unsigned long val)
-{
- unsigned long dummy;
-
- __asm__ __volatile__(
- "1: ldl_l %0,%4\n"
- " bis $31,%3,%1\n"
- " stl_c %1,%2\n"
- " beq %1,2f\n"
-#ifdef CONFIG_SMP
- " mb\n"
-#endif
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- : "=&r" (val), "=&r" (dummy), "=m" (*m)
- : "rI" (val), "m" (*m) : "memory");
-
- return val;
-}
-
-static inline unsigned long
-__xchg_u64(volatile long *m, unsigned long val)
-{
- unsigned long dummy;
-
- __asm__ __volatile__(
- "1: ldq_l %0,%4\n"
- " bis $31,%3,%1\n"
- " stq_c %1,%2\n"
- " beq %1,2f\n"
-#ifdef CONFIG_SMP
- " mb\n"
-#endif
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- : "=&r" (val), "=&r" (dummy), "=m" (*m)
- : "rI" (val), "m" (*m) : "memory");
+#define __ASM__MB
+#define ____xchg(type, args...) __xchg ## type ## _local(args)
+#define ____cmpxchg(type, args...) __cmpxchg ## type ## _local(args)
+#include <asm/xchg.h>
- return val;
-}
-
-/* This function doesn't exist, so you'll get a linker error
- if something tries to do an invalid xchg(). */
-extern void __xchg_called_with_bad_pointer(void);
-
-#define __xchg(ptr, x, size) \
-({ \
- unsigned long __xchg__res; \
- volatile void *__xchg__ptr = (ptr); \
- switch (size) { \
- case 1: __xchg__res = __xchg_u8(__xchg__ptr, x); break; \
- case 2: __xchg__res = __xchg_u16(__xchg__ptr, x); break; \
- case 4: __xchg__res = __xchg_u32(__xchg__ptr, x); break; \
- case 8: __xchg__res = __xchg_u64(__xchg__ptr, x); break; \
- default: __xchg_called_with_bad_pointer(); __xchg__res = x; \
- } \
- __xchg__res; \
-})
-
-#define xchg(ptr,x) \
- ({ \
- __typeof__(*(ptr)) _x_ = (x); \
- (__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \
+#define xchg_local(ptr,x) \
+ ({ \
+ __typeof__(*(ptr)) _x_ = (x); \
+ (__typeof__(*(ptr))) __xchg_local((ptr), (unsigned long)_x_, \
+ sizeof(*(ptr))); \
})
-static inline unsigned long
-__xchg_u8_local(volatile char *m, unsigned long val)
-{
- unsigned long ret, tmp, addr64;
-
- __asm__ __volatile__(
- " andnot %4,7,%3\n"
- " insbl %1,%4,%1\n"
- "1: ldq_l %2,0(%3)\n"
- " extbl %2,%4,%0\n"
- " mskbl %2,%4,%2\n"
- " or %1,%2,%2\n"
- " stq_c %2,0(%3)\n"
- " beq %2,2f\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64)
- : "r" ((long)m), "1" (val) : "memory");
-
- return ret;
-}
-
-static inline unsigned long
-__xchg_u16_local(volatile short *m, unsigned long val)
-{
- unsigned long ret, tmp, addr64;
-
- __asm__ __volatile__(
- " andnot %4,7,%3\n"
- " inswl %1,%4,%1\n"
- "1: ldq_l %2,0(%3)\n"
- " extwl %2,%4,%0\n"
- " mskwl %2,%4,%2\n"
- " or %1,%2,%2\n"
- " stq_c %2,0(%3)\n"
- " beq %2,2f\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64)
- : "r" ((long)m), "1" (val) : "memory");
-
- return ret;
-}
-
-static inline unsigned long
-__xchg_u32_local(volatile int *m, unsigned long val)
-{
- unsigned long dummy;
-
- __asm__ __volatile__(
- "1: ldl_l %0,%4\n"
- " bis $31,%3,%1\n"
- " stl_c %1,%2\n"
- " beq %1,2f\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- : "=&r" (val), "=&r" (dummy), "=m" (*m)
- : "rI" (val), "m" (*m) : "memory");
-
- return val;
-}
-
-static inline unsigned long
-__xchg_u64_local(volatile long *m, unsigned long val)
-{
- unsigned long dummy;
-
- __asm__ __volatile__(
- "1: ldq_l %0,%4\n"
- " bis $31,%3,%1\n"
- " stq_c %1,%2\n"
- " beq %1,2f\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- : "=&r" (val), "=&r" (dummy), "=m" (*m)
- : "rI" (val), "m" (*m) : "memory");
-
- return val;
-}
-
-#define __xchg_local(ptr, x, size) \
-({ \
- unsigned long __xchg__res; \
- volatile void *__xchg__ptr = (ptr); \
- switch (size) { \
- case 1: __xchg__res = __xchg_u8_local(__xchg__ptr, x); break; \
- case 2: __xchg__res = __xchg_u16_local(__xchg__ptr, x); break; \
- case 4: __xchg__res = __xchg_u32_local(__xchg__ptr, x); break; \
- case 8: __xchg__res = __xchg_u64_local(__xchg__ptr, x); break; \
- default: __xchg_called_with_bad_pointer(); __xchg__res = x; \
- } \
- __xchg__res; \
-})
-
-#define xchg_local(ptr,x) \
- ({ \
- __typeof__(*(ptr)) _x_ = (x); \
- (__typeof__(*(ptr))) __xchg_local((ptr), (unsigned long)_x_, \
- sizeof(*(ptr))); \
+#define cmpxchg_local(ptr, o, n) \
+ ({ \
+ __typeof__(*(ptr)) _o_ = (o); \
+ __typeof__(*(ptr)) _n_ = (n); \
+ (__typeof__(*(ptr))) __cmpxchg_local((ptr), (unsigned long)_o_, \
+ (unsigned long)_n_, \
+ sizeof(*(ptr))); \
})
-/*
- * Atomic compare and exchange. Compare OLD with MEM, if identical,
- * store NEW in MEM. Return the initial value in MEM. Success is
- * indicated by comparing RETURN with OLD.
- *
- * The memory barrier should be placed in SMP only when we actually
- * make the change. If we don't change anything (so if the returned
- * prev is equal to old) then we aren't acquiring anything new and
- * we don't need any memory barrier as far I can tell.
- */
-
-#define __HAVE_ARCH_CMPXCHG 1
-
-static inline unsigned long
-__cmpxchg_u8(volatile char *m, long old, long new)
-{
- unsigned long prev, tmp, cmp, addr64;
-
- __asm__ __volatile__(
- " andnot %5,7,%4\n"
- " insbl %1,%5,%1\n"
- "1: ldq_l %2,0(%4)\n"
- " extbl %2,%5,%0\n"
- " cmpeq %0,%6,%3\n"
- " beq %3,2f\n"
- " mskbl %2,%5,%2\n"
- " or %1,%2,%2\n"
- " stq_c %2,0(%4)\n"
- " beq %2,3f\n"
-#ifdef CONFIG_SMP
- " mb\n"
-#endif
- "2:\n"
- ".subsection 2\n"
- "3: br 1b\n"
- ".previous"
- : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64)
- : "r" ((long)m), "Ir" (old), "1" (new) : "memory");
-
- return prev;
-}
-
-static inline unsigned long
-__cmpxchg_u16(volatile short *m, long old, long new)
-{
- unsigned long prev, tmp, cmp, addr64;
-
- __asm__ __volatile__(
- " andnot %5,7,%4\n"
- " inswl %1,%5,%1\n"
- "1: ldq_l %2,0(%4)\n"
- " extwl %2,%5,%0\n"
- " cmpeq %0,%6,%3\n"
- " beq %3,2f\n"
- " mskwl %2,%5,%2\n"
- " or %1,%2,%2\n"
- " stq_c %2,0(%4)\n"
- " beq %2,3f\n"
-#ifdef CONFIG_SMP
- " mb\n"
-#endif
- "2:\n"
- ".subsection 2\n"
- "3: br 1b\n"
- ".previous"
- : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64)
- : "r" ((long)m), "Ir" (old), "1" (new) : "memory");
-
- return prev;
-}
-
-static inline unsigned long
-__cmpxchg_u32(volatile int *m, int old, int new)
-{
- unsigned long prev, cmp;
-
- __asm__ __volatile__(
- "1: ldl_l %0,%5\n"
- " cmpeq %0,%3,%1\n"
- " beq %1,2f\n"
- " mov %4,%1\n"
- " stl_c %1,%2\n"
- " beq %1,3f\n"
-#ifdef CONFIG_SMP
- " mb\n"
-#endif
- "2:\n"
- ".subsection 2\n"
- "3: br 1b\n"
- ".previous"
- : "=&r"(prev), "=&r"(cmp), "=m"(*m)
- : "r"((long) old), "r"(new), "m"(*m) : "memory");
-
- return prev;
-}
+#define cmpxchg64_local(ptr, o, n) \
+ ({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
+ cmpxchg_local((ptr), (o), (n)); \
+ })
-static inline unsigned long
-__cmpxchg_u64(volatile long *m, unsigned long old, unsigned long new)
-{
- unsigned long prev, cmp;
-
- __asm__ __volatile__(
- "1: ldq_l %0,%5\n"
- " cmpeq %0,%3,%1\n"
- " beq %1,2f\n"
- " mov %4,%1\n"
- " stq_c %1,%2\n"
- " beq %1,3f\n"
#ifdef CONFIG_SMP
- " mb\n"
+#undef __ASM__MB
+#define __ASM__MB "\tmb\n"
#endif
- "2:\n"
- ".subsection 2\n"
- "3: br 1b\n"
- ".previous"
- : "=&r"(prev), "=&r"(cmp), "=m"(*m)
- : "r"((long) old), "r"(new), "m"(*m) : "memory");
-
- return prev;
-}
-
-/* This function doesn't exist, so you'll get a linker error
- if something tries to do an invalid cmpxchg(). */
-extern void __cmpxchg_called_with_bad_pointer(void);
-
-static __always_inline unsigned long
-__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
-{
- switch (size) {
- case 1:
- return __cmpxchg_u8(ptr, old, new);
- case 2:
- return __cmpxchg_u16(ptr, old, new);
- case 4:
- return __cmpxchg_u32(ptr, old, new);
- case 8:
- return __cmpxchg_u64(ptr, old, new);
- }
- __cmpxchg_called_with_bad_pointer();
- return old;
-}
-
-#define cmpxchg(ptr, o, n) \
- ({ \
- __typeof__(*(ptr)) _o_ = (o); \
- __typeof__(*(ptr)) _n_ = (n); \
- (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \
- (unsigned long)_n_, sizeof(*(ptr))); \
+#undef ____xchg
+#undef ____cmpxchg
+#define ____xchg(type, args...) __xchg ##type(args)
+#define ____cmpxchg(type, args...) __cmpxchg ##type(args)
+#include <asm/xchg.h>
+
+#define xchg(ptr,x) \
+ ({ \
+ __typeof__(*(ptr)) _x_ = (x); \
+ (__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_, \
+ sizeof(*(ptr))); \
})
-#define cmpxchg64(ptr, o, n) \
- ({ \
- BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- cmpxchg((ptr), (o), (n)); \
- })
-
-static inline unsigned long
-__cmpxchg_u8_local(volatile char *m, long old, long new)
-{
- unsigned long prev, tmp, cmp, addr64;
-
- __asm__ __volatile__(
- " andnot %5,7,%4\n"
- " insbl %1,%5,%1\n"
- "1: ldq_l %2,0(%4)\n"
- " extbl %2,%5,%0\n"
- " cmpeq %0,%6,%3\n"
- " beq %3,2f\n"
- " mskbl %2,%5,%2\n"
- " or %1,%2,%2\n"
- " stq_c %2,0(%4)\n"
- " beq %2,3f\n"
- "2:\n"
- ".subsection 2\n"
- "3: br 1b\n"
- ".previous"
- : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64)
- : "r" ((long)m), "Ir" (old), "1" (new) : "memory");
-
- return prev;
-}
-
-static inline unsigned long
-__cmpxchg_u16_local(volatile short *m, long old, long new)
-{
- unsigned long prev, tmp, cmp, addr64;
-
- __asm__ __volatile__(
- " andnot %5,7,%4\n"
- " inswl %1,%5,%1\n"
- "1: ldq_l %2,0(%4)\n"
- " extwl %2,%5,%0\n"
- " cmpeq %0,%6,%3\n"
- " beq %3,2f\n"
- " mskwl %2,%5,%2\n"
- " or %1,%2,%2\n"
- " stq_c %2,0(%4)\n"
- " beq %2,3f\n"
- "2:\n"
- ".subsection 2\n"
- "3: br 1b\n"
- ".previous"
- : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64)
- : "r" ((long)m), "Ir" (old), "1" (new) : "memory");
-
- return prev;
-}
-
-static inline unsigned long
-__cmpxchg_u32_local(volatile int *m, int old, int new)
-{
- unsigned long prev, cmp;
-
- __asm__ __volatile__(
- "1: ldl_l %0,%5\n"
- " cmpeq %0,%3,%1\n"
- " beq %1,2f\n"
- " mov %4,%1\n"
- " stl_c %1,%2\n"
- " beq %1,3f\n"
- "2:\n"
- ".subsection 2\n"
- "3: br 1b\n"
- ".previous"
- : "=&r"(prev), "=&r"(cmp), "=m"(*m)
- : "r"((long) old), "r"(new), "m"(*m) : "memory");
-
- return prev;
-}
-
-static inline unsigned long
-__cmpxchg_u64_local(volatile long *m, unsigned long old, unsigned long new)
-{
- unsigned long prev, cmp;
-
- __asm__ __volatile__(
- "1: ldq_l %0,%5\n"
- " cmpeq %0,%3,%1\n"
- " beq %1,2f\n"
- " mov %4,%1\n"
- " stq_c %1,%2\n"
- " beq %1,3f\n"
- "2:\n"
- ".subsection 2\n"
- "3: br 1b\n"
- ".previous"
- : "=&r"(prev), "=&r"(cmp), "=m"(*m)
- : "r"((long) old), "r"(new), "m"(*m) : "memory");
-
- return prev;
-}
-
-static __always_inline unsigned long
-__cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new,
- int size)
-{
- switch (size) {
- case 1:
- return __cmpxchg_u8_local(ptr, old, new);
- case 2:
- return __cmpxchg_u16_local(ptr, old, new);
- case 4:
- return __cmpxchg_u32_local(ptr, old, new);
- case 8:
- return __cmpxchg_u64_local(ptr, old, new);
- }
- __cmpxchg_called_with_bad_pointer();
- return old;
-}
-#define cmpxchg_local(ptr, o, n) \
- ({ \
- __typeof__(*(ptr)) _o_ = (o); \
- __typeof__(*(ptr)) _n_ = (n); \
- (__typeof__(*(ptr))) __cmpxchg_local((ptr), (unsigned long)_o_, \
- (unsigned long)_n_, sizeof(*(ptr))); \
+#define cmpxchg(ptr, o, n) \
+ ({ \
+ __typeof__(*(ptr)) _o_ = (o); \
+ __typeof__(*(ptr)) _n_ = (n); \
+ (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \
+ (unsigned long)_n_, sizeof(*(ptr)));\
})
-#define cmpxchg64_local(ptr, o, n) \
- ({ \
- BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- cmpxchg_local((ptr), (o), (n)); \
+
+#define cmpxchg64(ptr, o, n) \
+ ({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
+ cmpxchg((ptr), (o), (n)); \
})
+#undef __ASM__MB
+#undef ____cmpxchg
+
+#define __HAVE_ARCH_CMPXCHG 1
#endif /* __ASSEMBLY__ */
diff --git a/arch/alpha/include/asm/types.h b/arch/alpha/include/asm/types.h
index c1541353ccef..f072f344497e 100644
--- a/arch/alpha/include/asm/types.h
+++ b/arch/alpha/include/asm/types.h
@@ -8,7 +8,12 @@
* not a major issue. However, for interoperability, libraries still
* need to be careful to avoid a name clashes.
*/
+
+#ifdef __KERNEL__
+#include <asm-generic/int-ll64.h>
+#else
#include <asm-generic/int-l64.h>
+#endif
#ifndef __ASSEMBLY__
diff --git a/arch/alpha/include/asm/uaccess.h b/arch/alpha/include/asm/uaccess.h
index 22de3b434a22..163f3053001c 100644
--- a/arch/alpha/include/asm/uaccess.h
+++ b/arch/alpha/include/asm/uaccess.h
@@ -498,13 +498,13 @@ struct exception_table_entry
};
/* Returns the new pc */
-#define fixup_exception(map_reg, fixup, pc) \
+#define fixup_exception(map_reg, _fixup, pc) \
({ \
- if ((fixup)->fixup.bits.valreg != 31) \
- map_reg((fixup)->fixup.bits.valreg) = 0; \
- if ((fixup)->fixup.bits.errreg != 31) \
- map_reg((fixup)->fixup.bits.errreg) = -EFAULT; \
- (pc) + (fixup)->fixup.bits.nextinsn; \
+ if ((_fixup)->fixup.bits.valreg != 31) \
+ map_reg((_fixup)->fixup.bits.valreg) = 0; \
+ if ((_fixup)->fixup.bits.errreg != 31) \
+ map_reg((_fixup)->fixup.bits.errreg) = -EFAULT; \
+ (pc) + (_fixup)->fixup.bits.nextinsn; \
})
diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h
new file mode 100644
index 000000000000..beba1b803e0d
--- /dev/null
+++ b/arch/alpha/include/asm/xchg.h
@@ -0,0 +1,258 @@
+#ifndef __ALPHA_SYSTEM_H
+#error Do not include xchg.h directly!
+#else
+/*
+ * xchg/xchg_local and cmpxchg/cmpxchg_local share the same code
+ * except that local version do not have the expensive memory barrier.
+ * So this file is included twice from asm/system.h.
+ */
+
+/*
+ * Atomic exchange.
+ * Since it can be used to implement critical sections
+ * it must clobber "memory" (also for interrupts in UP).
+ */
+
+static inline unsigned long
+____xchg(_u8, volatile char *m, unsigned long val)
+{
+ unsigned long ret, tmp, addr64;
+
+ __asm__ __volatile__(
+ " andnot %4,7,%3\n"
+ " insbl %1,%4,%1\n"
+ "1: ldq_l %2,0(%3)\n"
+ " extbl %2,%4,%0\n"
+ " mskbl %2,%4,%2\n"
+ " or %1,%2,%2\n"
+ " stq_c %2,0(%3)\n"
+ " beq %2,2f\n"
+ __ASM__MB
+ ".subsection 2\n"
+ "2: br 1b\n"
+ ".previous"
+ : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64)
+ : "r" ((long)m), "1" (val) : "memory");
+
+ return ret;
+}
+
+static inline unsigned long
+____xchg(_u16, volatile short *m, unsigned long val)
+{
+ unsigned long ret, tmp, addr64;
+
+ __asm__ __volatile__(
+ " andnot %4,7,%3\n"
+ " inswl %1,%4,%1\n"
+ "1: ldq_l %2,0(%3)\n"
+ " extwl %2,%4,%0\n"
+ " mskwl %2,%4,%2\n"
+ " or %1,%2,%2\n"
+ " stq_c %2,0(%3)\n"
+ " beq %2,2f\n"
+ __ASM__MB
+ ".subsection 2\n"
+ "2: br 1b\n"
+ ".previous"
+ : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64)
+ : "r" ((long)m), "1" (val) : "memory");
+
+ return ret;
+}
+
+static inline unsigned long
+____xchg(_u32, volatile int *m, unsigned long val)
+{
+ unsigned long dummy;
+
+ __asm__ __volatile__(
+ "1: ldl_l %0,%4\n"
+ " bis $31,%3,%1\n"
+ " stl_c %1,%2\n"
+ " beq %1,2f\n"
+ __ASM__MB
+ ".subsection 2\n"
+ "2: br 1b\n"
+ ".previous"
+ : "=&r" (val), "=&r" (dummy), "=m" (*m)
+ : "rI" (val), "m" (*m) : "memory");
+
+ return val;
+}
+
+static inline unsigned long
+____xchg(_u64, volatile long *m, unsigned long val)
+{
+ unsigned long dummy;
+
+ __asm__ __volatile__(
+ "1: ldq_l %0,%4\n"
+ " bis $31,%3,%1\n"
+ " stq_c %1,%2\n"
+ " beq %1,2f\n"
+ __ASM__MB
+ ".subsection 2\n"
+ "2: br 1b\n"
+ ".previous"
+ : "=&r" (val), "=&r" (dummy), "=m" (*m)
+ : "rI" (val), "m" (*m) : "memory");
+
+ return val;
+}
+
+/* This function doesn't exist, so you'll get a linker error
+ if something tries to do an invalid xchg(). */
+extern void __xchg_called_with_bad_pointer(void);
+
+static __always_inline unsigned long
+____xchg(, volatile void *ptr, unsigned long x, int size)
+{
+ switch (size) {
+ case 1:
+ return ____xchg(_u8, ptr, x);
+ case 2:
+ return ____xchg(_u16, ptr, x);
+ case 4:
+ return ____xchg(_u32, ptr, x);
+ case 8:
+ return ____xchg(_u64, ptr, x);
+ }
+ __xchg_called_with_bad_pointer();
+ return x;
+}
+
+/*
+ * Atomic compare and exchange. Compare OLD with MEM, if identical,
+ * store NEW in MEM. Return the initial value in MEM. Success is
+ * indicated by comparing RETURN with OLD.
+ *
+ * The memory barrier should be placed in SMP only when we actually
+ * make the change. If we don't change anything (so if the returned
+ * prev is equal to old) then we aren't acquiring anything new and
+ * we don't need any memory barrier as far I can tell.
+ */
+
+static inline unsigned long
+____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new)
+{
+ unsigned long prev, tmp, cmp, addr64;
+
+ __asm__ __volatile__(
+ " andnot %5,7,%4\n"
+ " insbl %1,%5,%1\n"
+ "1: ldq_l %2,0(%4)\n"
+ " extbl %2,%5,%0\n"
+ " cmpeq %0,%6,%3\n"
+ " beq %3,2f\n"
+ " mskbl %2,%5,%2\n"
+ " or %1,%2,%2\n"
+ " stq_c %2,0(%4)\n"
+ " beq %2,3f\n"
+ __ASM__MB
+ "2:\n"
+ ".subsection 2\n"
+ "3: br 1b\n"
+ ".previous"
+ : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64)
+ : "r" ((long)m), "Ir" (old), "1" (new) : "memory");
+
+ return prev;
+}
+
+static inline unsigned long
+____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new)
+{
+ unsigned long prev, tmp, cmp, addr64;
+
+ __asm__ __volatile__(
+ " andnot %5,7,%4\n"
+ " inswl %1,%5,%1\n"
+ "1: ldq_l %2,0(%4)\n"
+ " extwl %2,%5,%0\n"
+ " cmpeq %0,%6,%3\n"
+ " beq %3,2f\n"
+ " mskwl %2,%5,%2\n"
+ " or %1,%2,%2\n"
+ " stq_c %2,0(%4)\n"
+ " beq %2,3f\n"
+ __ASM__MB
+ "2:\n"
+ ".subsection 2\n"
+ "3: br 1b\n"
+ ".previous"
+ : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64)
+ : "r" ((long)m), "Ir" (old), "1" (new) : "memory");
+
+ return prev;
+}
+
+static inline unsigned long
+____cmpxchg(_u32, volatile int *m, int old, int new)
+{
+ unsigned long prev, cmp;
+
+ __asm__ __volatile__(
+ "1: ldl_l %0,%5\n"
+ " cmpeq %0,%3,%1\n"
+ " beq %1,2f\n"
+ " mov %4,%1\n"
+ " stl_c %1,%2\n"
+ " beq %1,3f\n"
+ __ASM__MB
+ "2:\n"
+ ".subsection 2\n"
+ "3: br 1b\n"
+ ".previous"
+ : "=&r"(prev), "=&r"(cmp), "=m"(*m)
+ : "r"((long) old), "r"(new), "m"(*m) : "memory");
+
+ return prev;
+}
+
+static inline unsigned long
+____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new)
+{
+ unsigned long prev, cmp;
+
+ __asm__ __volatile__(
+ "1: ldq_l %0,%5\n"
+ " cmpeq %0,%3,%1\n"
+ " beq %1,2f\n"
+ " mov %4,%1\n"
+ " stq_c %1,%2\n"
+ " beq %1,3f\n"
+ __ASM__MB
+ "2:\n"
+ ".subsection 2\n"
+ "3: br 1b\n"
+ ".previous"
+ : "=&r"(prev), "=&r"(cmp), "=m"(*m)
+ : "r"((long) old), "r"(new), "m"(*m) : "memory");
+
+ return prev;
+}
+
+/* This function doesn't exist, so you'll get a linker error
+ if something tries to do an invalid cmpxchg(). */
+extern void __cmpxchg_called_with_bad_pointer(void);
+
+static __always_inline unsigned long
+____cmpxchg(, volatile void *ptr, unsigned long old, unsigned long new,
+ int size)
+{
+ switch (size) {
+ case 1:
+ return ____cmpxchg(_u8, ptr, old, new);
+ case 2:
+ return ____cmpxchg(_u16, ptr, old, new);
+ case 4:
+ return ____cmpxchg(_u32, ptr, old, new);
+ case 8:
+ return ____cmpxchg(_u64, ptr, old, new);
+ }
+ __cmpxchg_called_with_bad_pointer();
+ return old;
+}
+
+#endif
diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile
index b4697759a123..a427538252f8 100644
--- a/arch/alpha/kernel/Makefile
+++ b/arch/alpha/kernel/Makefile
@@ -12,7 +12,7 @@ obj-y := entry.o traps.o process.o init_task.o osf_sys.o irq.o \
obj-$(CONFIG_VGA_HOSE) += console.o
obj-$(CONFIG_SMP) += smp.o
-obj-$(CONFIG_PCI) += pci.o pci_iommu.o
+obj-$(CONFIG_PCI) += pci.o pci_iommu.o pci-sysfs.o
obj-$(CONFIG_SRM_ENV) += srm_env.o
obj-$(CONFIG_MODULES) += module.o
diff --git a/arch/alpha/kernel/err_ev6.c b/arch/alpha/kernel/err_ev6.c
index 11aee012a8ae..985e5c1681ac 100644
--- a/arch/alpha/kernel/err_ev6.c
+++ b/arch/alpha/kernel/err_ev6.c
@@ -157,8 +157,8 @@ ev6_parse_cbox(u64 c_addr, u64 c1_syn, u64 c2_syn,
err_print_prefix,
streamname[stream], bitsname[bits], sourcename[source]);
- printk("%s Address: 0x%016lx\n"
- " Syndrome[upper.lower]: %02lx.%02lx\n",
+ printk("%s Address: 0x%016llx\n"
+ " Syndrome[upper.lower]: %02llx.%02llx\n",
err_print_prefix,
c_addr,
c2_syn, c1_syn);
diff --git a/arch/alpha/kernel/err_ev7.c b/arch/alpha/kernel/err_ev7.c
index 68cd493f54c5..73770c6ca013 100644
--- a/arch/alpha/kernel/err_ev7.c
+++ b/arch/alpha/kernel/err_ev7.c
@@ -246,13 +246,13 @@ ev7_process_pal_subpacket(struct el_subpacket *header)
switch(header->type) {
case EL_TYPE__PAL__LOGOUT_FRAME:
- printk("%s*** MCHK occurred on LPID %ld (RBOX %lx)\n",
+ printk("%s*** MCHK occurred on LPID %ld (RBOX %llx)\n",
err_print_prefix,
packet->by_type.logout.whami,
packet->by_type.logout.rbox_whami);
el_print_timestamp(&packet->by_type.logout.timestamp);
- printk("%s EXC_ADDR: %016lx\n"
- " HALT_CODE: %lx\n",
+ printk("%s EXC_ADDR: %016llx\n"
+ " HALT_CODE: %llx\n",
err_print_prefix,
packet->by_type.logout.exc_addr,
packet->by_type.logout.halt_code);
diff --git a/arch/alpha/kernel/err_marvel.c b/arch/alpha/kernel/err_marvel.c
index 413bf37eb094..6bfd243efba3 100644
--- a/arch/alpha/kernel/err_marvel.c
+++ b/arch/alpha/kernel/err_marvel.c
@@ -129,7 +129,7 @@ marvel_print_po7_crrct_sym(u64 crrct_sym)
printk("%s Correctable Error Symptoms:\n"
- "%s Syndrome: 0x%lx\n",
+ "%s Syndrome: 0x%llx\n",
err_print_prefix,
err_print_prefix, EXTRACT(crrct_sym, IO7__PO7_CRRCT_SYM__SYN));
marvel_print_err_cyc(EXTRACT(crrct_sym, IO7__PO7_CRRCT_SYM__ERR_CYC));
@@ -186,7 +186,7 @@ marvel_print_po7_uncrr_sym(u64 uncrr_sym, u64 valid_mask)
uncrr_sym &= valid_mask;
if (EXTRACT(valid_mask, IO7__PO7_UNCRR_SYM__SYN))
- printk("%s Syndrome: 0x%lx\n",
+ printk("%s Syndrome: 0x%llx\n",
err_print_prefix,
EXTRACT(uncrr_sym, IO7__PO7_UNCRR_SYM__SYN));
@@ -307,7 +307,7 @@ marvel_print_po7_ugbge_sym(u64 ugbge_sym)
sprintf(opcode_str, "BlkIO");
break;
default:
- sprintf(opcode_str, "0x%lx\n",
+ sprintf(opcode_str, "0x%llx\n",
EXTRACT(ugbge_sym, IO7__PO7_UGBGE_SYM__UPH_OPCODE));
break;
}
@@ -321,7 +321,7 @@ marvel_print_po7_ugbge_sym(u64 ugbge_sym)
opcode_str);
if (0xC5 != EXTRACT(ugbge_sym, IO7__PO7_UGBGE_SYM__UPH_OPCODE))
- printk("%s Packet Offset 0x%08lx\n",
+ printk("%s Packet Offset 0x%08llx\n",
err_print_prefix,
EXTRACT(ugbge_sym, IO7__PO7_UGBGE_SYM__UPH_PKT_OFF));
}
@@ -480,8 +480,8 @@ marvel_print_po7_err_sum(struct ev7_pal_io_subpacket *io)
printk("%s Lost Error\n", err_print_prefix);
printk("%s Failing Packet:\n"
- "%s Cycle 1: %016lx\n"
- "%s Cycle 2: %016lx\n",
+ "%s Cycle 1: %016llx\n"
+ "%s Cycle 2: %016llx\n",
err_print_prefix,
err_print_prefix, io->po7_err_pkt0,
err_print_prefix, io->po7_err_pkt1);
@@ -515,9 +515,9 @@ marvel_print_pox_tlb_err(u64 tlb_err)
if (!(tlb_err & IO7__POX_TLBERR__ERR_VALID))
return;
- printk("%s TLB Error on index 0x%lx:\n"
+ printk("%s TLB Error on index 0x%llx:\n"
"%s - %s\n"
- "%s - Addr: 0x%016lx\n",
+ "%s - Addr: 0x%016llx\n",
err_print_prefix,
EXTRACT(tlb_err, IO7__POX_TLBERR__ERR_TLB_PTR),
err_print_prefix,
@@ -579,7 +579,7 @@ marvel_print_pox_spl_cmplt(u64 spl_cmplt)
sprintf(message, "Uncorrectable Split Write Data Error");
break;
default:
- sprintf(message, "%08lx\n",
+ sprintf(message, "%08llx\n",
EXTRACT(spl_cmplt, IO7__POX_SPLCMPLT__MESSAGE));
break;
}
@@ -620,9 +620,9 @@ marvel_print_pox_trans_sum(u64 trans_sum)
return;
printk("%s Transaction Summary:\n"
- "%s Command: 0x%lx - %s\n"
- "%s Address: 0x%016lx%s\n"
- "%s PCI-X Master Slot: 0x%lx\n",
+ "%s Command: 0x%llx - %s\n"
+ "%s Address: 0x%016llx%s\n"
+ "%s PCI-X Master Slot: 0x%llx\n",
err_print_prefix,
err_print_prefix,
EXTRACT(trans_sum, IO7__POX_TRANSUM__PCIX_CMD),
@@ -964,12 +964,12 @@ marvel_process_io_error(struct ev7_lf_subpackets *lf_subpackets, int print)
#if 0
printk("%s PORT 7 ERROR:\n"
- "%s PO7_ERROR_SUM: %016lx\n"
- "%s PO7_UNCRR_SYM: %016lx\n"
- "%s PO7_CRRCT_SYM: %016lx\n"
- "%s PO7_UGBGE_SYM: %016lx\n"
- "%s PO7_ERR_PKT0: %016lx\n"
- "%s PO7_ERR_PKT1: %016lx\n",
+ "%s PO7_ERROR_SUM: %016llx\n"
+ "%s PO7_UNCRR_SYM: %016llx\n"
+ "%s PO7_CRRCT_SYM: %016llx\n"
+ "%s PO7_UGBGE_SYM: %016llx\n"
+ "%s PO7_ERR_PKT0: %016llx\n"
+ "%s PO7_ERR_PKT1: %016llx\n",
err_print_prefix,
err_print_prefix, io->po7_error_sum,
err_print_prefix, io->po7_uncrr_sym,
@@ -987,12 +987,12 @@ marvel_process_io_error(struct ev7_lf_subpackets *lf_subpackets, int print)
if (!MARVEL_IO_ERR_VALID(io->ports[i].pox_err_sum))
continue;
- printk("%s PID %u PORT %d POx_ERR_SUM: %016lx\n",
+ printk("%s PID %u PORT %d POx_ERR_SUM: %016llx\n",
err_print_prefix,
lf_subpackets->io_pid, i, io->ports[i].pox_err_sum);
marvel_print_pox_err(io->ports[i].pox_err_sum, &io->ports[i]);
- printk("%s [ POx_FIRST_ERR: %016lx ]\n",
+ printk("%s [ POx_FIRST_ERR: %016llx ]\n",
err_print_prefix, io->ports[i].pox_first_err);
marvel_print_pox_err(io->ports[i].pox_first_err,
&io->ports[i]);
diff --git a/arch/alpha/kernel/err_titan.c b/arch/alpha/kernel/err_titan.c
index 257449ed15ef..c7e28a88d6e3 100644
--- a/arch/alpha/kernel/err_titan.c
+++ b/arch/alpha/kernel/err_titan.c
@@ -107,12 +107,12 @@ titan_parse_p_serror(int which, u64 serror, int print)
if (!print)
return status;
- printk("%s PChip %d SERROR: %016lx\n",
+ printk("%s PChip %d SERROR: %016llx\n",
err_print_prefix, which, serror);
if (serror & TITAN__PCHIP_SERROR__ECCMASK) {
printk("%s %sorrectable ECC Error:\n"
" Source: %-6s Command: %-8s Syndrome: 0x%08x\n"
- " Address: 0x%lx\n",
+ " Address: 0x%llx\n",
err_print_prefix,
(serror & TITAN__PCHIP_SERROR__UECC) ? "Unc" : "C",
serror_src[EXTRACT(serror, TITAN__PCHIP_SERROR__SRC)],
@@ -223,7 +223,7 @@ titan_parse_p_perror(int which, int port, u64 perror, int print)
if (!print)
return status;
- printk("%s PChip %d %cPERROR: %016lx\n",
+ printk("%s PChip %d %cPERROR: %016llx\n",
err_print_prefix, which,
port ? 'A' : 'G', perror);
if (perror & TITAN__PCHIP_PERROR__IPTPW)
@@ -316,7 +316,7 @@ titan_parse_p_agperror(int which, u64 agperror, int print)
addr = EXTRACT(agperror, TITAN__PCHIP_AGPERROR__ADDR) << 3;
len = EXTRACT(agperror, TITAN__PCHIP_AGPERROR__LEN);
- printk("%s PChip %d AGPERROR: %016lx\n", err_print_prefix,
+ printk("%s PChip %d AGPERROR: %016llx\n", err_print_prefix,
which, agperror);
if (agperror & TITAN__PCHIP_AGPERROR__NOWINDOW)
printk("%s No Window\n", err_print_prefix);
@@ -597,16 +597,16 @@ privateer_process_680_frame(struct el_common *mchk_header, int print)
return status;
/* TODO - decode instead of just dumping... */
- printk("%s Summary Flags: %016lx\n"
- " CChip DIRx: %016lx\n"
- " System Management IR: %016lx\n"
- " CPU IR: %016lx\n"
- " Power Supply IR: %016lx\n"
- " LM78 Fault Status: %016lx\n"
- " System Doors: %016lx\n"
- " Temperature Warning: %016lx\n"
- " Fan Control: %016lx\n"
- " Fatal Power Down Code: %016lx\n",
+ printk("%s Summary Flags: %016llx\n"
+ " CChip DIRx: %016llx\n"
+ " System Management IR: %016llx\n"
+ " CPU IR: %016llx\n"
+ " Power Supply IR: %016llx\n"
+ " LM78 Fault Status: %016llx\n"
+ " System Doors: %016llx\n"
+ " Temperature Warning: %016llx\n"
+ " Fan Control: %016llx\n"
+ " Fatal Power Down Code: %016llx\n",
err_print_prefix,
emchk->summary,
emchk->c_dirx,
diff --git a/arch/alpha/kernel/pci-sysfs.c b/arch/alpha/kernel/pci-sysfs.c
new file mode 100644
index 000000000000..6ea822e7f724
--- /dev/null
+++ b/arch/alpha/kernel/pci-sysfs.c
@@ -0,0 +1,366 @@
+/*
+ * arch/alpha/kernel/pci-sysfs.c
+ *
+ * Copyright (C) 2009 Ivan Kokshaysky
+ *
+ * Alpha PCI resource files.
+ *
+ * Loosely based on generic HAVE_PCI_MMAP implementation in
+ * drivers/pci/pci-sysfs.c
+ */
+
+#include <linux/sched.h>
+#include <linux/pci.h>
+
+static int hose_mmap_page_range(struct pci_controller *hose,
+ struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_type, int sparse)
+{
+ unsigned long base;
+
+ if (mmap_type == pci_mmap_mem)
+ base = sparse ? hose->sparse_mem_base : hose->dense_mem_base;
+ else
+ base = sparse ? hose->sparse_io_base : hose->dense_io_base;
+
+ vma->vm_pgoff += base >> PAGE_SHIFT;
+ vma->vm_flags |= (VM_IO | VM_RESERVED);
+
+ return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot);
+}
+
+static int __pci_mmap_fits(struct pci_dev *pdev, int num,
+ struct vm_area_struct *vma, int sparse)
+{
+ unsigned long nr, start, size;
+ int shift = sparse ? 5 : 0;
+
+ nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ start = vma->vm_pgoff;
+ size = ((pci_resource_len(pdev, num) - 1) >> (PAGE_SHIFT - shift)) + 1;
+
+ if (start < size && size - start >= nr)
+ return 1;
+ WARN(1, "process \"%s\" tried to map%s 0x%08lx-0x%08lx on %s BAR %d "
+ "(size 0x%08lx)\n",
+ current->comm, sparse ? " sparse" : "", start, start + nr,
+ pci_name(pdev), num, size);
+ return 0;
+}
+
+/**
+ * pci_mmap_resource - map a PCI resource into user memory space
+ * @kobj: kobject for mapping
+ * @attr: struct bin_attribute for the file being mapped
+ * @vma: struct vm_area_struct passed into the mmap
+ * @sparse: address space type
+ *
+ * Use the bus mapping routines to map a PCI resource into userspace.
+ */
+static int pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr,
+ struct vm_area_struct *vma, int sparse)
+{
+ struct pci_dev *pdev = to_pci_dev(container_of(kobj,
+ struct device, kobj));
+ struct resource *res = (struct resource *)attr->private;
+ enum pci_mmap_state mmap_type;
+ struct pci_bus_region bar;
+ int i;
+
+ for (i = 0; i < PCI_ROM_RESOURCE; i++)
+ if (res == &pdev->resource[i])
+ break;
+ if (i >= PCI_ROM_RESOURCE)
+ return -ENODEV;
+
+ if (!__pci_mmap_fits(pdev, i, vma, sparse))
+ return -EINVAL;
+
+ if (iomem_is_exclusive(res->start))
+ return -EINVAL;
+
+ pcibios_resource_to_bus(pdev, &bar, res);
+ vma->vm_pgoff += bar.start >> (PAGE_SHIFT - (sparse ? 5 : 0));
+ mmap_type = res->flags & IORESOURCE_MEM ? pci_mmap_mem : pci_mmap_io;
+
+ return hose_mmap_page_range(pdev->sysdata, vma, mmap_type, sparse);
+}
+
+static int pci_mmap_resource_sparse(struct kobject *kobj,
+ struct bin_attribute *attr,
+ struct vm_area_struct *vma)
+{
+ return pci_mmap_resource(kobj, attr, vma, 1);
+}
+
+static int pci_mmap_resource_dense(struct kobject *kobj,
+ struct bin_attribute *attr,
+ struct vm_area_struct *vma)
+{
+ return pci_mmap_resource(kobj, attr, vma, 0);
+}
+
+/**
+ * pci_remove_resource_files - cleanup resource files
+ * @dev: dev to cleanup
+ *
+ * If we created resource files for @dev, remove them from sysfs and
+ * free their resources.
+ */
+void pci_remove_resource_files(struct pci_dev *pdev)
+{
+ int i;
+
+ for (i = 0; i < PCI_ROM_RESOURCE; i++) {
+ struct bin_attribute *res_attr;
+
+ res_attr = pdev->res_attr[i];
+ if (res_attr) {
+ sysfs_remove_bin_file(&pdev->dev.kobj, res_attr);
+ kfree(res_attr);
+ }
+
+ res_attr = pdev->res_attr_wc[i];
+ if (res_attr) {
+ sysfs_remove_bin_file(&pdev->dev.kobj, res_attr);
+ kfree(res_attr);
+ }
+ }
+}
+
+static int sparse_mem_mmap_fits(struct pci_dev *pdev, int num)
+{
+ struct pci_bus_region bar;
+ struct pci_controller *hose = pdev->sysdata;
+ long dense_offset;
+ unsigned long sparse_size;
+
+ pcibios_resource_to_bus(pdev, &bar, &pdev->resource[num]);
+
+ /* All core logic chips have 4G sparse address space, except
+ CIA which has 16G (see xxx_SPARSE_MEM and xxx_DENSE_MEM
+ definitions in asm/core_xxx.h files). This corresponds
+ to 128M or 512M of the bus space. */
+ dense_offset = (long)(hose->dense_mem_base - hose->sparse_mem_base);
+ sparse_size = dense_offset >= 0x400000000UL ? 0x20000000 : 0x8000000;
+
+ return bar.end < sparse_size;
+}
+
+static int pci_create_one_attr(struct pci_dev *pdev, int num, char *name,
+ char *suffix, struct bin_attribute *res_attr,
+ unsigned long sparse)
+{
+ size_t size = pci_resource_len(pdev, num);
+
+ sprintf(name, "resource%d%s", num, suffix);
+ res_attr->mmap = sparse ? pci_mmap_resource_sparse :
+ pci_mmap_resource_dense;
+ res_attr->attr.name = name;
+ res_attr->attr.mode = S_IRUSR | S_IWUSR;
+ res_attr->size = sparse ? size << 5 : size;
+ res_attr->private = &pdev->resource[num];
+ return sysfs_create_bin_file(&pdev->dev.kobj, res_attr);
+}
+
+static int pci_create_attr(struct pci_dev *pdev, int num)
+{
+ /* allocate attribute structure, piggyback attribute name */
+ int retval, nlen1, nlen2 = 0, res_count = 1;
+ unsigned long sparse_base, dense_base;
+ struct bin_attribute *attr;
+ struct pci_controller *hose = pdev->sysdata;
+ char *suffix, *attr_name;
+
+ suffix = ""; /* Assume bwx machine, normal resourceN files. */
+ nlen1 = 10;
+
+ if (pdev->resource[num].flags & IORESOURCE_MEM) {
+ sparse_base = hose->sparse_mem_base;
+ dense_base = hose->dense_mem_base;
+ if (sparse_base && !sparse_mem_mmap_fits(pdev, num)) {
+ sparse_base = 0;
+ suffix = "_dense";
+ nlen1 = 16; /* resourceN_dense */
+ }
+ } else {
+ sparse_base = hose->sparse_io_base;
+ dense_base = hose->dense_io_base;
+ }
+
+ if (sparse_base) {
+ suffix = "_sparse";
+ nlen1 = 17;
+ if (dense_base) {
+ nlen2 = 16; /* resourceN_dense */
+ res_count = 2;
+ }
+ }
+
+ attr = kzalloc(sizeof(*attr) * res_count + nlen1 + nlen2, GFP_ATOMIC);
+ if (!attr)
+ return -ENOMEM;
+
+ /* Create bwx, sparse or single dense file */
+ attr_name = (char *)(attr + res_count);
+ pdev->res_attr[num] = attr;
+ retval = pci_create_one_attr(pdev, num, attr_name, suffix, attr,
+ sparse_base);
+ if (retval || res_count == 1)
+ return retval;
+
+ /* Create dense file */
+ attr_name += nlen1;
+ attr++;
+ pdev->res_attr_wc[num] = attr;
+ return pci_create_one_attr(pdev, num, attr_name, "_dense", attr, 0);
+}
+
+/**
+ * pci_create_resource_files - create resource files in sysfs for @dev
+ * @dev: dev in question
+ *
+ * Walk the resources in @dev creating files for each resource available.
+ */
+int pci_create_resource_files(struct pci_dev *pdev)
+{
+ int i;
+ int retval;
+
+ /* Expose the PCI resources from this device as files */
+ for (i = 0; i < PCI_ROM_RESOURCE; i++) {
+
+ /* skip empty resources */
+ if (!pci_resource_len(pdev, i))
+ continue;
+
+ retval = pci_create_attr(pdev, i);
+ if (retval) {
+ pci_remove_resource_files(pdev);
+ return retval;
+ }
+ }
+ return 0;
+}
+
+/* Legacy I/O bus mapping stuff. */
+
+static int __legacy_mmap_fits(struct pci_controller *hose,
+ struct vm_area_struct *vma,
+ unsigned long res_size, int sparse)
+{
+ unsigned long nr, start, size;
+
+ nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ start = vma->vm_pgoff;
+ size = ((res_size - 1) >> PAGE_SHIFT) + 1;
+
+ if (start < size && size - start >= nr)
+ return 1;
+ WARN(1, "process \"%s\" tried to map%s 0x%08lx-0x%08lx on hose %d "
+ "(size 0x%08lx)\n",
+ current->comm, sparse ? " sparse" : "", start, start + nr,
+ hose->index, size);
+ return 0;
+}
+
+static inline int has_sparse(struct pci_controller *hose,
+ enum pci_mmap_state mmap_type)
+{
+ unsigned long base;
+
+ base = (mmap_type == pci_mmap_mem) ? hose->sparse_mem_base :
+ hose->sparse_io_base;
+
+ return base != 0;
+}
+
+int pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_type)
+{
+ struct pci_controller *hose = bus->sysdata;
+ int sparse = has_sparse(hose, mmap_type);
+ unsigned long res_size;
+
+ res_size = (mmap_type == pci_mmap_mem) ? bus->legacy_mem->size :
+ bus->legacy_io->size;
+ if (!__legacy_mmap_fits(hose, vma, res_size, sparse))
+ return -EINVAL;
+
+ return hose_mmap_page_range(hose, vma, mmap_type, sparse);
+}
+
+/**
+ * pci_adjust_legacy_attr - adjustment of legacy file attributes
+ * @b: bus to create files under
+ * @mmap_type: I/O port or memory
+ *
+ * Adjust file name and size for sparse mappings.
+ */
+void pci_adjust_legacy_attr(struct pci_bus *bus, enum pci_mmap_state mmap_type)
+{
+ struct pci_controller *hose = bus->sysdata;
+
+ if (!has_sparse(hose, mmap_type))
+ return;
+
+ if (mmap_type == pci_mmap_mem) {
+ bus->legacy_mem->attr.name = "legacy_mem_sparse";
+ bus->legacy_mem->size <<= 5;
+ } else {
+ bus->legacy_io->attr.name = "legacy_io_sparse";
+ bus->legacy_io->size <<= 5;
+ }
+ return;
+}
+
+/* Legacy I/O bus read/write functions */
+int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size)
+{
+ struct pci_controller *hose = bus->sysdata;
+
+ port += hose->io_space->start;
+
+ switch(size) {
+ case 1:
+ *((u8 *)val) = inb(port);
+ return 1;
+ case 2:
+ if (port & 1)
+ return -EINVAL;
+ *((u16 *)val) = inw(port);
+ return 2;
+ case 4:
+ if (port & 3)
+ return -EINVAL;
+ *((u32 *)val) = inl(port);
+ return 4;
+ }
+ return -EINVAL;
+}
+
+int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val, size_t size)
+{
+ struct pci_controller *hose = bus->sysdata;
+
+ port += hose->io_space->start;
+
+ switch(size) {
+ case 1:
+ outb(port, val);
+ return 1;
+ case 2:
+ if (port & 1)
+ return -EINVAL;
+ outw(port, val);
+ return 2;
+ case 4:
+ if (port & 3)
+ return -EINVAL;
+ outl(port, val);
+ return 4;
+ }
+ return -EINVAL;
+}
diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index a3b938811400..a91ba28999b5 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -168,7 +168,7 @@ pcibios_align_resource(void *data, struct resource *res,
*/
/* Align to multiple of size of minimum base. */
- alignto = max(0x1000UL, align);
+ alignto = max_t(resource_size_t, 0x1000, align);
start = ALIGN(start, alignto);
if (hose->sparse_mem_base && size <= 7 * 16*MB) {
if (((start / (16*MB)) & 0x7) == 0) {
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index b9094da05d7a..bfb880af959d 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -247,7 +247,7 @@ pci_map_single_1(struct pci_dev *pdev, void *cpu_addr, size_t size,
&& paddr + size <= __direct_map_size) {
ret = paddr + __direct_map_base;
- DBGA2("pci_map_single: [%p,%lx] -> direct %lx from %p\n",
+ DBGA2("pci_map_single: [%p,%zx] -> direct %llx from %p\n",
cpu_addr, size, ret, __builtin_return_address(0));
return ret;
@@ -258,7 +258,7 @@ pci_map_single_1(struct pci_dev *pdev, void *cpu_addr, size_t size,
if (dac_allowed) {
ret = paddr + alpha_mv.pci_dac_offset;
- DBGA2("pci_map_single: [%p,%lx] -> DAC %lx from %p\n",
+ DBGA2("pci_map_single: [%p,%zx] -> DAC %llx from %p\n",
cpu_addr, size, ret, __builtin_return_address(0));
return ret;
@@ -299,7 +299,7 @@ pci_map_single_1(struct pci_dev *pdev, void *cpu_addr, size_t size,
ret = arena->dma_base + dma_ofs * PAGE_SIZE;
ret += (unsigned long)cpu_addr & ~PAGE_MASK;
- DBGA2("pci_map_single: [%p,%lx] np %ld -> sg %lx from %p\n",
+ DBGA2("pci_map_single: [%p,%zx] np %ld -> sg %llx from %p\n",
cpu_addr, size, npages, ret, __builtin_return_address(0));
return ret;
@@ -355,14 +355,14 @@ pci_unmap_single(struct pci_dev *pdev, dma_addr_t dma_addr, size_t size,
&& dma_addr < __direct_map_base + __direct_map_size) {
/* Nothing to do. */
- DBGA2("pci_unmap_single: direct [%lx,%lx] from %p\n",
+ DBGA2("pci_unmap_single: direct [%llx,%zx] from %p\n",
dma_addr, size, __builtin_return_address(0));
return;
}
if (dma_addr > 0xffffffff) {
- DBGA2("pci64_unmap_single: DAC [%lx,%lx] from %p\n",
+ DBGA2("pci64_unmap_single: DAC [%llx,%zx] from %p\n",
dma_addr, size, __builtin_return_address(0));
return;
}
@@ -373,9 +373,9 @@ pci_unmap_single(struct pci_dev *pdev, dma_addr_t dma_addr, size_t size,
dma_ofs = (dma_addr - arena->dma_base) >> PAGE_SHIFT;
if (dma_ofs * PAGE_SIZE >= arena->size) {
- printk(KERN_ERR "Bogus pci_unmap_single: dma_addr %lx "
- " base %lx size %x\n", dma_addr, arena->dma_base,
- arena->size);
+ printk(KERN_ERR "Bogus pci_unmap_single: dma_addr %llx "
+ " base %llx size %x\n",
+ dma_addr, arena->dma_base, arena->size);
return;
BUG();
}
@@ -394,7 +394,7 @@ pci_unmap_single(struct pci_dev *pdev, dma_addr_t dma_addr, size_t size,
spin_unlock_irqrestore(&arena->lock, flags);
- DBGA2("pci_unmap_single: sg [%lx,%lx] np %ld from %p\n",
+ DBGA2("pci_unmap_single: sg [%llx,%zx] np %ld from %p\n",
dma_addr, size, npages, __builtin_return_address(0));
}
EXPORT_SYMBOL(pci_unmap_single);
@@ -444,7 +444,7 @@ try_again:
goto try_again;
}
- DBGA2("pci_alloc_consistent: %lx -> [%p,%x] from %p\n",
+ DBGA2("pci_alloc_consistent: %zx -> [%p,%llx] from %p\n",
size, cpu_addr, *dma_addrp, __builtin_return_address(0));
return cpu_addr;
@@ -464,7 +464,7 @@ pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu_addr,
pci_unmap_single(pdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL);
free_pages((unsigned long)cpu_addr, get_order(size));
- DBGA2("pci_free_consistent: [%x,%lx] from %p\n",
+ DBGA2("pci_free_consistent: [%llx,%zx] from %p\n",
dma_addr, size, __builtin_return_address(0));
}
EXPORT_SYMBOL(pci_free_consistent);
@@ -551,7 +551,7 @@ sg_fill(struct device *dev, struct scatterlist *leader, struct scatterlist *end,
out->dma_address = paddr + __direct_map_base;
out->dma_length = size;
- DBGA(" sg_fill: [%p,%lx] -> direct %lx\n",
+ DBGA(" sg_fill: [%p,%lx] -> direct %llx\n",
__va(paddr), size, out->dma_address);
return 0;
@@ -563,7 +563,7 @@ sg_fill(struct device *dev, struct scatterlist *leader, struct scatterlist *end,
out->dma_address = paddr + alpha_mv.pci_dac_offset;
out->dma_length = size;
- DBGA(" sg_fill: [%p,%lx] -> DAC %lx\n",
+ DBGA(" sg_fill: [%p,%lx] -> DAC %llx\n",
__va(paddr), size, out->dma_address);
return 0;
@@ -589,7 +589,7 @@ sg_fill(struct device *dev, struct scatterlist *leader, struct scatterlist *end,
out->dma_address = arena->dma_base + dma_ofs*PAGE_SIZE + paddr;
out->dma_length = size;
- DBGA(" sg_fill: [%p,%lx] -> sg %lx np %ld\n",
+ DBGA(" sg_fill: [%p,%lx] -> sg %llx np %ld\n",
__va(paddr), size, out->dma_address, npages);
/* All virtually contiguous. We need to find the length of each
@@ -752,7 +752,7 @@ pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sg, int nents,
if (addr > 0xffffffff) {
/* It's a DAC address -- nothing to do. */
- DBGA(" (%ld) DAC [%lx,%lx]\n",
+ DBGA(" (%ld) DAC [%llx,%zx]\n",
sg - end + nents, addr, size);
continue;
}
@@ -760,12 +760,12 @@ pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sg, int nents,
if (addr >= __direct_map_base
&& addr < __direct_map_base + __direct_map_size) {
/* Nothing to do. */
- DBGA(" (%ld) direct [%lx,%lx]\n",
+ DBGA(" (%ld) direct [%llx,%zx]\n",
sg - end + nents, addr, size);
continue;
}
- DBGA(" (%ld) sg [%lx,%lx]\n",
+ DBGA(" (%ld) sg [%llx,%zx]\n",
sg - end + nents, addr, size);
npages = iommu_num_pages(addr, size, PAGE_SIZE);
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 8d0097f10208..3a2fb7a02db4 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -272,7 +272,7 @@ alpha_vfork(struct pt_regs *regs)
*/
int
-copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
+copy_thread(unsigned long clone_flags, unsigned long usp,
unsigned long unused,
struct task_struct * p, struct pt_regs * regs)
{
diff --git a/arch/alpha/kernel/proto.h b/arch/alpha/kernel/proto.h
index fe14c6747cd6..567f2598d090 100644
--- a/arch/alpha/kernel/proto.h
+++ b/arch/alpha/kernel/proto.h
@@ -20,7 +20,7 @@ struct pci_controller;
extern struct pci_ops apecs_pci_ops;
extern void apecs_init_arch(void);
extern void apecs_pci_clr_err(void);
-extern void apecs_machine_check(u64, u64);
+extern void apecs_machine_check(unsigned long vector, unsigned long la_ptr);
extern void apecs_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t);
/* core_cia.c */
@@ -29,7 +29,7 @@ extern void cia_init_pci(void);
extern void cia_init_arch(void);
extern void pyxis_init_arch(void);
extern void cia_kill_arch(int);
-extern void cia_machine_check(u64, u64);
+extern void cia_machine_check(unsigned long vector, unsigned long la_ptr);
extern void cia_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t);
/* core_irongate.c */
@@ -42,7 +42,7 @@ extern void irongate_machine_check(u64, u64);
/* core_lca.c */
extern struct pci_ops lca_pci_ops;
extern void lca_init_arch(void);
-extern void lca_machine_check(u64, u64);
+extern void lca_machine_check(unsigned long vector, unsigned long la_ptr);
extern void lca_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t);
/* core_marvel.c */
@@ -64,7 +64,7 @@ void io7_clear_errors(struct io7 *io7);
extern struct pci_ops mcpcia_pci_ops;
extern void mcpcia_init_arch(void);
extern void mcpcia_init_hoses(void);
-extern void mcpcia_machine_check(u64, u64);
+extern void mcpcia_machine_check(unsigned long vector, unsigned long la_ptr);
extern void mcpcia_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t);
/* core_polaris.c */
@@ -72,14 +72,14 @@ extern struct pci_ops polaris_pci_ops;
extern int polaris_read_config_dword(struct pci_dev *, int, u32 *);
extern int polaris_write_config_dword(struct pci_dev *, int, u32);
extern void polaris_init_arch(void);
-extern void polaris_machine_check(u64, u64);
+extern void polaris_machine_check(unsigned long vector, unsigned long la_ptr);
#define polaris_pci_tbi ((void *)0)
/* core_t2.c */
extern struct pci_ops t2_pci_ops;
extern void t2_init_arch(void);
extern void t2_kill_arch(int);
-extern void t2_machine_check(u64, u64);
+extern void t2_machine_check(unsigned long vector, unsigned long la_ptr);
extern void t2_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t);
/* core_titan.c */
@@ -94,14 +94,14 @@ extern struct _alpha_agp_info *titan_agp_info(void);
extern struct pci_ops tsunami_pci_ops;
extern void tsunami_init_arch(void);
extern void tsunami_kill_arch(int);
-extern void tsunami_machine_check(u64, u64);
+extern void tsunami_machine_check(unsigned long vector, unsigned long la_ptr);
extern void tsunami_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t);
/* core_wildfire.c */
extern struct pci_ops wildfire_pci_ops;
extern void wildfire_init_arch(void);
extern void wildfire_kill_arch(int);
-extern void wildfire_machine_check(u64, u64);
+extern void wildfire_machine_check(unsigned long vector, unsigned long la_ptr);
extern void wildfire_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t);
extern int wildfire_pa_to_nid(unsigned long);
extern int wildfire_cpuid_to_nid(int);
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index 02bee6983ce2..80df86cd746b 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -1255,7 +1255,7 @@ show_cpuinfo(struct seq_file *f, void *slot)
platform_string(), nr_processors);
#ifdef CONFIG_SMP
- seq_printf(f, "cpus active\t\t: %d\n"
+ seq_printf(f, "cpus active\t\t: %u\n"
"cpu active mask\t\t: %016lx\n",
num_online_cpus(), cpus_addr(cpu_possible_map)[0]);
#endif
diff --git a/arch/alpha/kernel/smc37c669.c b/arch/alpha/kernel/smc37c669.c
index fd467b207f0f..bca5bda90cde 100644
--- a/arch/alpha/kernel/smc37c669.c
+++ b/arch/alpha/kernel/smc37c669.c
@@ -2542,8 +2542,8 @@ void __init SMC669_Init ( int index )
SMC37c669_display_device_info( );
#endif
local_irq_restore(flags);
- printk( "SMC37c669 Super I/O Controller found @ 0x%lx\n",
- (unsigned long) SMC_base );
+ printk( "SMC37c669 Super I/O Controller found @ 0x%p\n",
+ SMC_base );
}
else {
local_irq_restore(flags);
diff --git a/arch/alpha/kernel/sys_jensen.c b/arch/alpha/kernel/sys_jensen.c
index e2516f9a8967..2b5caf3d9b15 100644
--- a/arch/alpha/kernel/sys_jensen.c
+++ b/arch/alpha/kernel/sys_jensen.c
@@ -244,12 +244,11 @@ jensen_init_arch(void)
}
static void
-jensen_machine_check (u64 vector, u64 la)
+jensen_machine_check(unsigned long vector, unsigned long la)
{
printk(KERN_CRIT "Machine check\n");
}
-
/*
* The System Vector
*/
diff --git a/arch/alpha/kernel/sys_sable.c b/arch/alpha/kernel/sys_sable.c
index d232e42be018..9e263256a42d 100644
--- a/arch/alpha/kernel/sys_sable.c
+++ b/arch/alpha/kernel/sys_sable.c
@@ -453,7 +453,7 @@ sable_lynx_enable_irq(unsigned int irq)
sable_lynx_irq_swizzle->update_irq_hw(bit, mask);
spin_unlock(&sable_lynx_irq_lock);
#if 0
- printk("%s: mask 0x%lx bit 0x%x irq 0x%x\n",
+ printk("%s: mask 0x%lx bit 0x%lx irq 0x%x\n",
__func__, mask, bit, irq);
#endif
}
@@ -469,7 +469,7 @@ sable_lynx_disable_irq(unsigned int irq)
sable_lynx_irq_swizzle->update_irq_hw(bit, mask);
spin_unlock(&sable_lynx_irq_lock);
#if 0
- printk("%s: mask 0x%lx bit 0x%x irq 0x%x\n",
+ printk("%s: mask 0x%lx bit 0x%lx irq 0x%x\n",
__func__, mask, bit, irq);
#endif
}
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index cefc5a355ef9..6ee7655b7568 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -623,7 +623,7 @@ do_entUna(void * va, unsigned long opcode, unsigned long reg,
}
lock_kernel();
- printk("Bad unaligned kernel access at %016lx: %p %lx %ld\n",
+ printk("Bad unaligned kernel access at %016lx: %p %lx %lu\n",
pc, va, opcode, reg);
do_exit(SIGSEGV);
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index 2b41ebbfa7ff..c13681ac1ede 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -217,6 +217,9 @@ static inline int __raw_read_trylock(raw_rwlock_t *rw)
/* read_can_lock - would read_trylock() succeed? */
#define __raw_read_can_lock(x) ((x)->lock < 0x80000000)
+#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
+#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
+
#define _raw_spin_relax(lock) cpu_relax()
#define _raw_read_relax(lock) cpu_relax()
#define _raw_write_relax(lock) cpu_relax()
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 2de14e2afdc5..c3265a2e7cd4 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -301,7 +301,7 @@ void release_thread(struct task_struct *dead_task)
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
int
-copy_thread(int nr, unsigned long clone_flags, unsigned long stack_start,
+copy_thread(unsigned long clone_flags, unsigned long stack_start,
unsigned long stk_sz, struct task_struct *p, struct pt_regs *regs)
{
struct thread_info *thread = task_thread_info(p);
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c
index 43ae555ecb33..1bbe1da54869 100644
--- a/arch/avr32/kernel/process.c
+++ b/arch/avr32/kernel/process.c
@@ -332,7 +332,7 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu)
asmlinkage void ret_from_fork(void);
-int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
+int copy_thread(unsigned long clone_flags, unsigned long usp,
unsigned long unused,
struct task_struct *p, struct pt_regs *regs)
{
diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c
index ce4e4296b954..62d4abbaa654 100644
--- a/arch/avr32/mm/fault.c
+++ b/arch/avr32/mm/fault.c
@@ -250,21 +250,3 @@ asmlinkage void do_bus_error(unsigned long addr, int write_access,
dump_dtlb();
die("Bus Error", regs, SIGKILL);
}
-
-/*
- * This functionality is currently not possible to implement because
- * we're using segmentation to ensure a fixed mapping of the kernel
- * virtual address space.
- *
- * It would be possible to implement this, but it would require us to
- * disable segmentation at startup and load the kernel mappings into
- * the TLB like any other pages. There will be lots of trickery to
- * avoid recursive invocation of the TLB miss handler, though...
- */
-#ifdef CONFIG_DEBUG_PAGEALLOC
-void kernel_map_pages(struct page *page, int numpages, int enable)
-{
-
-}
-EXPORT_SYMBOL(kernel_map_pages);
-#endif
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index 33e2e8993f7f..f49427293ca1 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -193,7 +193,7 @@ asmlinkage int bfin_clone(struct pt_regs *regs)
}
int
-copy_thread(int nr, unsigned long clone_flags,
+copy_thread(unsigned long clone_flags,
unsigned long usp, unsigned long topstk,
struct task_struct *p, struct pt_regs *regs)
{
diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c
index bd9b3ff63f6c..c4c69cf721e5 100644
--- a/arch/cris/arch-v10/kernel/process.c
+++ b/arch/cris/arch-v10/kernel/process.c
@@ -115,7 +115,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
*/
asmlinkage void ret_from_fork(void);
-int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
+int copy_thread(unsigned long clone_flags, unsigned long usp,
unsigned long unused,
struct task_struct *p, struct pt_regs *regs)
{
diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c
index ced5b725d9bd..120e7f796fea 100644
--- a/arch/cris/arch-v32/kernel/process.c
+++ b/arch/cris/arch-v32/kernel/process.c
@@ -131,7 +131,7 @@ kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
extern asmlinkage void ret_from_fork(void);
int
-copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
+copy_thread(unsigned long clone_flags, unsigned long usp,
unsigned long unused,
struct task_struct *p, struct pt_regs *regs)
{
diff --git a/arch/cris/include/arch-v32/arch/spinlock.h b/arch/cris/include/arch-v32/arch/spinlock.h
index 0d5709b983a1..129756b96661 100644
--- a/arch/cris/include/arch-v32/arch/spinlock.h
+++ b/arch/cris/include/arch-v32/arch/spinlock.h
@@ -121,6 +121,8 @@ static inline int __raw_write_trylock(raw_rwlock_t *rw)
return 1;
}
+#define _raw_read_lock_flags(lock, flags) _raw_read_lock(lock)
+#define _raw_write_lock_flags(lock, flags) _raw_write_lock(lock)
#define _raw_spin_relax(lock) cpu_relax()
#define _raw_read_relax(lock) cpu_relax()
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c
index 9583a338e9d6..0de50df74970 100644
--- a/arch/frv/kernel/process.c
+++ b/arch/frv/kernel/process.c
@@ -204,7 +204,7 @@ void prepare_to_copy(struct task_struct *tsk)
/*
* set up the kernel stack and exception frames for a new process
*/
-int copy_thread(int nr, unsigned long clone_flags,
+int copy_thread(unsigned long clone_flags,
unsigned long usp, unsigned long topstk,
struct task_struct *p, struct pt_regs *regs)
{
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index a8ef654a5a0b..e2f33d0f9969 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -191,7 +191,7 @@ asmlinkage int h8300_clone(struct pt_regs *regs)
}
-int copy_thread(int nr, unsigned long clone_flags,
+int copy_thread(unsigned long clone_flags,
unsigned long usp, unsigned long topstk,
struct task_struct * p, struct pt_regs * regs)
{
diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig
index a109db30ce55..75645495c2dd 100644
--- a/arch/ia64/configs/generic_defconfig
+++ b/arch/ia64/configs/generic_defconfig
@@ -193,7 +193,6 @@ CONFIG_BOUNCE=y
CONFIG_NR_QUICK=1
CONFIG_VIRT_TO_BUS=y
CONFIG_UNEVICTABLE_LRU=y
-CONFIG_MMU_NOTIFIER=y
CONFIG_ARCH_SELECT_MEMORY_MODEL=y
CONFIG_ARCH_DISCONTIGMEM_ENABLE=y
CONFIG_ARCH_FLATMEM_ENABLE=y
@@ -416,8 +415,6 @@ CONFIG_SGI_IOC4=y
# CONFIG_ENCLOSURE_SERVICES is not set
CONFIG_SGI_XP=m
# CONFIG_HP_ILO is not set
-CONFIG_SGI_GRU=m
-# CONFIG_SGI_GRU_DEBUG is not set
# CONFIG_C2PORT is not set
CONFIG_HAVE_IDE=y
CONFIG_IDE=y
diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c
index 24b1ad5334cb..2bef5261d96d 100644
--- a/arch/ia64/hp/sim/simserial.c
+++ b/arch/ia64/hp/sim/simserial.c
@@ -24,6 +24,7 @@
#include <linux/major.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
+#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/capability.h>
#include <linux/console.h>
@@ -848,38 +849,36 @@ static int rs_open(struct tty_struct *tty, struct file * filp)
* /proc fs routines....
*/
-static inline int line_info(char *buf, struct serial_state *state)
+static inline void line_info(struct seq_file *m, struct serial_state *state)
{
- return sprintf(buf, "%d: uart:%s port:%lX irq:%d\n",
+ seq_printf(m, "%d: uart:%s port:%lX irq:%d\n",
state->line, uart_config[state->type].name,
state->port, state->irq);
}
-static int rs_read_proc(char *page, char **start, off_t off, int count,
- int *eof, void *data)
+static int rs_proc_show(struct seq_file *m, void *v)
{
- int i, len = 0, l;
- off_t begin = 0;
-
- len += sprintf(page, "simserinfo:1.0 driver:%s\n", serial_version);
- for (i = 0; i < NR_PORTS && len < 4000; i++) {
- l = line_info(page + len, &rs_table[i]);
- len += l;
- if (len+begin > off+count)
- goto done;
- if (len+begin < off) {
- begin += len;
- len = 0;
- }
- }
- *eof = 1;
-done:
- if (off >= len+begin)
- return 0;
- *start = page + (begin-off);
- return ((count < begin+len-off) ? count : begin+len-off);
+ int i;
+
+ seq_printf(m, "simserinfo:1.0 driver:%s\n", serial_version);
+ for (i = 0; i < NR_PORTS; i++)
+ line_info(m, &rs_table[i]);
+ return 0;
}
+static int rs_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, rs_proc_show, NULL);
+}
+
+static const struct file_operations rs_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = rs_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
/*
* ---------------------------------------------------------------------
* rs_init() and friends
@@ -917,7 +916,7 @@ static const struct tty_operations hp_ops = {
.start = rs_start,
.hangup = rs_hangup,
.wait_until_sent = rs_wait_until_sent,
- .read_proc = rs_read_proc,
+ .proc_fops = &rs_proc_fops,
};
/*
diff --git a/arch/ia64/include/asm/intrinsics.h b/arch/ia64/include/asm/intrinsics.h
index c47830e26cb7..111ed5222892 100644
--- a/arch/ia64/include/asm/intrinsics.h
+++ b/arch/ia64/include/asm/intrinsics.h
@@ -202,7 +202,11 @@ extern long ia64_cmpxchg_called_with_bad_pointer (void);
#ifndef __ASSEMBLY__
#if defined(CONFIG_PARAVIRT) && defined(__KERNEL__)
-#define IA64_INTRINSIC_API(name) pv_cpu_ops.name
+#ifdef ASM_SUPPORTED
+# define IA64_INTRINSIC_API(name) paravirt_ ## name
+#else
+# define IA64_INTRINSIC_API(name) pv_cpu_ops.name
+#endif
#define IA64_INTRINSIC_MACRO(name) paravirt_ ## name
#else
#define IA64_INTRINSIC_API(name) ia64_native_ ## name
diff --git a/arch/ia64/include/asm/mmu_context.h b/arch/ia64/include/asm/mmu_context.h
index 040bc87db930..7f2a456603cb 100644
--- a/arch/ia64/include/asm/mmu_context.h
+++ b/arch/ia64/include/asm/mmu_context.h
@@ -87,7 +87,7 @@ get_mmu_context (struct mm_struct *mm)
/* re-check, now that we've got the lock: */
context = mm->context;
if (context == 0) {
- cpus_clear(mm->cpu_vm_mask);
+ cpumask_clear(mm_cpumask(mm));
if (ia64_ctx.next >= ia64_ctx.limit) {
ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
ia64_ctx.max_ctx, ia64_ctx.next);
@@ -166,8 +166,8 @@ activate_context (struct mm_struct *mm)
do {
context = get_mmu_context(mm);
- if (!cpu_isset(smp_processor_id(), mm->cpu_vm_mask))
- cpu_set(smp_processor_id(), mm->cpu_vm_mask);
+ if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)))
+ cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
reload_context(context);
/*
* in the unlikely event of a TLB-flush by another thread,
diff --git a/arch/ia64/include/asm/module.h b/arch/ia64/include/asm/module.h
index d2da61e4c49b..908eaef42a08 100644
--- a/arch/ia64/include/asm/module.h
+++ b/arch/ia64/include/asm/module.h
@@ -16,6 +16,12 @@ struct mod_arch_specific {
struct elf64_shdr *got; /* global offset table */
struct elf64_shdr *opd; /* official procedure descriptors */
struct elf64_shdr *unwind; /* unwind-table section */
+#ifdef CONFIG_PARAVIRT
+ struct elf64_shdr *paravirt_bundles;
+ /* paravirt_alt_bundle_patch table */
+ struct elf64_shdr *paravirt_insts;
+ /* paravirt_alt_inst_patch table */
+#endif
unsigned long gp; /* global-pointer for module */
void *core_unw_table; /* core unwind-table cookie returned by unwinder */
diff --git a/arch/ia64/include/asm/native/inst.h b/arch/ia64/include/asm/native/inst.h
index 0a1026cca4fa..d2d46efb3e6e 100644
--- a/arch/ia64/include/asm/native/inst.h
+++ b/arch/ia64/include/asm/native/inst.h
@@ -30,6 +30,9 @@
#define __paravirt_work_processed_syscall_target \
ia64_work_processed_syscall
+#define paravirt_fsyscall_table ia64_native_fsyscall_table
+#define paravirt_fsys_bubble_down ia64_native_fsys_bubble_down
+
#ifdef CONFIG_PARAVIRT_GUEST_ASM_CLOBBER_CHECK
# define PARAVIRT_POISON 0xdeadbeefbaadf00d
# define CLOBBER(clob) \
@@ -74,6 +77,11 @@
(pred) mov reg = psr \
CLOBBER(clob)
+#define MOV_FROM_ITC(pred, pred_clob, reg, clob) \
+(pred) mov reg = ar.itc \
+ CLOBBER(clob) \
+ CLOBBER_PRED(pred_clob)
+
#define MOV_TO_IFA(reg, clob) \
mov cr.ifa = reg \
CLOBBER(clob)
@@ -158,6 +166,11 @@
#define RSM_PSR_DT \
rsm psr.dt
+#define RSM_PSR_BE_I(clob0, clob1) \
+ rsm psr.be | psr.i \
+ CLOBBER(clob0) \
+ CLOBBER(clob1)
+
#define SSM_PSR_DT_AND_SRLZ_I \
ssm psr.dt \
;; \
diff --git a/arch/ia64/include/asm/native/patchlist.h b/arch/ia64/include/asm/native/patchlist.h
new file mode 100644
index 000000000000..be16ca9311bf
--- /dev/null
+++ b/arch/ia64/include/asm/native/patchlist.h
@@ -0,0 +1,38 @@
+/******************************************************************************
+ * arch/ia64/include/asm/native/inst.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ * VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#define __paravirt_start_gate_fsyscall_patchlist \
+ __ia64_native_start_gate_fsyscall_patchlist
+#define __paravirt_end_gate_fsyscall_patchlist \
+ __ia64_native_end_gate_fsyscall_patchlist
+#define __paravirt_start_gate_brl_fsys_bubble_down_patchlist \
+ __ia64_native_start_gate_brl_fsys_bubble_down_patchlist
+#define __paravirt_end_gate_brl_fsys_bubble_down_patchlist \
+ __ia64_native_end_gate_brl_fsys_bubble_down_patchlist
+#define __paravirt_start_gate_vtop_patchlist \
+ __ia64_native_start_gate_vtop_patchlist
+#define __paravirt_end_gate_vtop_patchlist \
+ __ia64_native_end_gate_vtop_patchlist
+#define __paravirt_start_gate_mckinley_e9_patchlist \
+ __ia64_native_start_gate_mckinley_e9_patchlist
+#define __paravirt_end_gate_mckinley_e9_patchlist \
+ __ia64_native_end_gate_mckinley_e9_patchlist
diff --git a/arch/ia64/include/asm/native/pvchk_inst.h b/arch/ia64/include/asm/native/pvchk_inst.h
index b8e6eb1090d7..8d72962ec838 100644
--- a/arch/ia64/include/asm/native/pvchk_inst.h
+++ b/arch/ia64/include/asm/native/pvchk_inst.h
@@ -180,6 +180,11 @@
IS_PRED_IN(pred) \
IS_RREG_OUT(reg) \
IS_RREG_CLOB(clob)
+#define MOV_FROM_ITC(pred, pred_clob, reg, clob) \
+ IS_PRED_IN(pred) \
+ IS_PRED_CLOB(pred_clob) \
+ IS_RREG_OUT(reg) \
+ IS_RREG_CLOB(clob)
#define MOV_TO_IFA(reg, clob) \
IS_RREG_IN(reg) \
IS_RREG_CLOB(clob)
@@ -246,6 +251,9 @@
IS_RREG_CLOB(clob2)
#define RSM_PSR_DT \
nop 0
+#define RSM_PSR_BE_I(clob0, clob1) \
+ IS_RREG_CLOB(clob0) \
+ IS_RREG_CLOB(clob1)
#define SSM_PSR_DT_AND_SRLZ_I \
nop 0
#define BSW_0(clob0, clob1, clob2) \
diff --git a/arch/ia64/include/asm/paravirt.h b/arch/ia64/include/asm/paravirt.h
index 2bf3636473fe..2eb0a981a09a 100644
--- a/arch/ia64/include/asm/paravirt.h
+++ b/arch/ia64/include/asm/paravirt.h
@@ -22,6 +22,56 @@
#ifndef __ASM_PARAVIRT_H
#define __ASM_PARAVIRT_H
+#ifndef __ASSEMBLY__
+/******************************************************************************
+ * fsys related addresses
+ */
+struct pv_fsys_data {
+ unsigned long *fsyscall_table;
+ void *fsys_bubble_down;
+};
+
+extern struct pv_fsys_data pv_fsys_data;
+
+unsigned long *paravirt_get_fsyscall_table(void);
+char *paravirt_get_fsys_bubble_down(void);
+
+/******************************************************************************
+ * patchlist addresses for gate page
+ */
+enum pv_gate_patchlist {
+ PV_GATE_START_FSYSCALL,
+ PV_GATE_END_FSYSCALL,
+
+ PV_GATE_START_BRL_FSYS_BUBBLE_DOWN,
+ PV_GATE_END_BRL_FSYS_BUBBLE_DOWN,
+
+ PV_GATE_START_VTOP,
+ PV_GATE_END_VTOP,
+
+ PV_GATE_START_MCKINLEY_E9,
+ PV_GATE_END_MCKINLEY_E9,
+};
+
+struct pv_patchdata {
+ unsigned long start_fsyscall_patchlist;
+ unsigned long end_fsyscall_patchlist;
+ unsigned long start_brl_fsys_bubble_down_patchlist;
+ unsigned long end_brl_fsys_bubble_down_patchlist;
+ unsigned long start_vtop_patchlist;
+ unsigned long end_vtop_patchlist;
+ unsigned long start_mckinley_e9_patchlist;
+ unsigned long end_mckinley_e9_patchlist;
+
+ void *gate_section;
+};
+
+extern struct pv_patchdata pv_patchdata;
+
+unsigned long paravirt_get_gate_patchlist(enum pv_gate_patchlist type);
+void *paravirt_get_gate_section(void);
+#endif
+
#ifdef CONFIG_PARAVIRT_GUEST
#define PARAVIRT_HYPERVISOR_TYPE_DEFAULT 0
@@ -68,6 +118,14 @@ struct pv_init_ops {
int (*arch_setup_nomca)(void);
void (*post_smp_prepare_boot_cpu)(void);
+
+#ifdef ASM_SUPPORTED
+ unsigned long (*patch_bundle)(void *sbundle, void *ebundle,
+ unsigned long type);
+ unsigned long (*patch_inst)(unsigned long stag, unsigned long etag,
+ unsigned long type);
+#endif
+ void (*patch_branch)(unsigned long tag, unsigned long type);
};
extern struct pv_init_ops pv_init_ops;
@@ -210,6 +268,8 @@ struct pv_time_ops {
int (*do_steal_accounting)(unsigned long *new_itm);
void (*clocksource_resume)(void);
+
+ unsigned long long (*sched_clock)(void);
};
extern struct pv_time_ops pv_time_ops;
@@ -227,6 +287,11 @@ paravirt_do_steal_accounting(unsigned long *new_itm)
return pv_time_ops.do_steal_accounting(new_itm);
}
+static inline unsigned long long paravirt_sched_clock(void)
+{
+ return pv_time_ops.sched_clock();
+}
+
#endif /* !__ASSEMBLY__ */
#else
diff --git a/arch/ia64/include/asm/paravirt_patch.h b/arch/ia64/include/asm/paravirt_patch.h
new file mode 100644
index 000000000000..128ff5db6e67
--- /dev/null
+++ b/arch/ia64/include/asm/paravirt_patch.h
@@ -0,0 +1,143 @@
+/******************************************************************************
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ * VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef __ASM_PARAVIRT_PATCH_H
+#define __ASM_PARAVIRT_PATCH_H
+
+#ifdef __ASSEMBLY__
+
+ .section .paravirt_branches, "a"
+ .previous
+#define PARAVIRT_PATCH_SITE_BR(type) \
+ { \
+ [1:] ; \
+ br.cond.sptk.many 2f ; \
+ nop.b 0 ; \
+ nop.b 0;; ; \
+ } ; \
+ 2: \
+ .xdata8 ".paravirt_branches", 1b, type
+
+#else
+
+#include <linux/stringify.h>
+#include <asm/intrinsics.h>
+
+/* for binary patch */
+struct paravirt_patch_site_bundle {
+ void *sbundle;
+ void *ebundle;
+ unsigned long type;
+};
+
+/* label means the beginning of new bundle */
+#define paravirt_alt_bundle(instr, privop) \
+ "\t998:\n" \
+ "\t" instr "\n" \
+ "\t999:\n" \
+ "\t.pushsection .paravirt_bundles, \"a\"\n" \
+ "\t.popsection\n" \
+ "\t.xdata8 \".paravirt_bundles\", 998b, 999b, " \
+ __stringify(privop) "\n"
+
+
+struct paravirt_patch_bundle_elem {
+ const void *sbundle;
+ const void *ebundle;
+ unsigned long type;
+};
+
+
+struct paravirt_patch_site_inst {
+ unsigned long stag;
+ unsigned long etag;
+ unsigned long type;
+};
+
+#define paravirt_alt_inst(instr, privop) \
+ "\t[998:]\n" \
+ "\t" instr "\n" \
+ "\t[999:]\n" \
+ "\t.pushsection .paravirt_insts, \"a\"\n" \
+ "\t.popsection\n" \
+ "\t.xdata8 \".paravirt_insts\", 998b, 999b, " \
+ __stringify(privop) "\n"
+
+struct paravirt_patch_site_branch {
+ unsigned long tag;
+ unsigned long type;
+};
+
+struct paravirt_patch_branch_target {
+ const void *entry;
+ unsigned long type;
+};
+
+void
+__paravirt_patch_apply_branch(
+ unsigned long tag, unsigned long type,
+ const struct paravirt_patch_branch_target *entries,
+ unsigned int nr_entries);
+
+void
+paravirt_patch_reloc_br(unsigned long tag, const void *target);
+
+void
+paravirt_patch_reloc_brl(unsigned long tag, const void *target);
+
+
+#if defined(ASM_SUPPORTED) && defined(CONFIG_PARAVIRT)
+unsigned long
+ia64_native_patch_bundle(void *sbundle, void *ebundle, unsigned long type);
+
+unsigned long
+__paravirt_patch_apply_bundle(void *sbundle, void *ebundle, unsigned long type,
+ const struct paravirt_patch_bundle_elem *elems,
+ unsigned long nelems,
+ const struct paravirt_patch_bundle_elem **found);
+
+void
+paravirt_patch_apply_bundle(const struct paravirt_patch_site_bundle *start,
+ const struct paravirt_patch_site_bundle *end);
+
+void
+paravirt_patch_apply_inst(const struct paravirt_patch_site_inst *start,
+ const struct paravirt_patch_site_inst *end);
+
+void paravirt_patch_apply(void);
+#else
+#define paravirt_patch_apply_bundle(start, end) do { } while (0)
+#define paravirt_patch_apply_inst(start, end) do { } while (0)
+#define paravirt_patch_apply() do { } while (0)
+#endif
+
+#endif /* !__ASSEMBLEY__ */
+
+#endif /* __ASM_PARAVIRT_PATCH_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "linux"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/arch/ia64/include/asm/paravirt_privop.h b/arch/ia64/include/asm/paravirt_privop.h
index 33c8e55f5775..3d2951130b5f 100644
--- a/arch/ia64/include/asm/paravirt_privop.h
+++ b/arch/ia64/include/asm/paravirt_privop.h
@@ -33,7 +33,7 @@
*/
struct pv_cpu_ops {
- void (*fc)(unsigned long addr);
+ void (*fc)(void *addr);
unsigned long (*thash)(unsigned long addr);
unsigned long (*get_cpuid)(int index);
unsigned long (*get_pmd)(int index);
@@ -60,12 +60,18 @@ extern unsigned long ia64_native_getreg_func(int regnum);
/* Instructions paravirtualized for performance */
/************************************************/
+#ifndef ASM_SUPPORTED
+#define paravirt_ssm_i() pv_cpu_ops.ssm_i()
+#define paravirt_rsm_i() pv_cpu_ops.rsm_i()
+#define __paravirt_getreg() pv_cpu_ops.getreg()
+#endif
+
/* mask for ia64_native_ssm/rsm() must be constant.("i" constraing).
* static inline function doesn't satisfy it. */
#define paravirt_ssm(mask) \
do { \
if ((mask) == IA64_PSR_I) \
- pv_cpu_ops.ssm_i(); \
+ paravirt_ssm_i(); \
else \
ia64_native_ssm(mask); \
} while (0)
@@ -73,7 +79,7 @@ extern unsigned long ia64_native_getreg_func(int regnum);
#define paravirt_rsm(mask) \
do { \
if ((mask) == IA64_PSR_I) \
- pv_cpu_ops.rsm_i(); \
+ paravirt_rsm_i(); \
else \
ia64_native_rsm(mask); \
} while (0)
@@ -86,7 +92,7 @@ extern unsigned long ia64_native_getreg_func(int regnum);
if ((reg) == _IA64_REG_IP) \
res = ia64_native_getreg(_IA64_REG_IP); \
else \
- res = pv_cpu_ops.getreg(reg); \
+ res = __paravirt_getreg(reg); \
res; \
})
@@ -112,6 +118,12 @@ void paravirt_cpu_asm_init(const struct pv_cpu_asm_switch *cpu_asm_switch);
#endif /* CONFIG_PARAVIRT */
+#if defined(CONFIG_PARAVIRT) && defined(ASM_SUPPORTED)
+#define paravirt_dv_serialize_data() ia64_dv_serialize_data()
+#else
+#define paravirt_dv_serialize_data() /* nothing */
+#endif
+
/* these routines utilize privilege-sensitive or performance-sensitive
* privileged instructions so the code must be replaced with
* paravirtualized versions */
@@ -121,4 +133,349 @@ void paravirt_cpu_asm_init(const struct pv_cpu_asm_switch *cpu_asm_switch);
IA64_PARAVIRT_ASM_FUNC(work_processed_syscall)
#define ia64_leave_kernel IA64_PARAVIRT_ASM_FUNC(leave_kernel)
+
+#if defined(CONFIG_PARAVIRT)
+/******************************************************************************
+ * binary patching infrastructure
+ */
+#define PARAVIRT_PATCH_TYPE_FC 1
+#define PARAVIRT_PATCH_TYPE_THASH 2
+#define PARAVIRT_PATCH_TYPE_GET_CPUID 3
+#define PARAVIRT_PATCH_TYPE_GET_PMD 4
+#define PARAVIRT_PATCH_TYPE_PTCGA 5
+#define PARAVIRT_PATCH_TYPE_GET_RR 6
+#define PARAVIRT_PATCH_TYPE_SET_RR 7
+#define PARAVIRT_PATCH_TYPE_SET_RR0_TO_RR4 8
+#define PARAVIRT_PATCH_TYPE_SSM_I 9
+#define PARAVIRT_PATCH_TYPE_RSM_I 10
+#define PARAVIRT_PATCH_TYPE_GET_PSR_I 11
+#define PARAVIRT_PATCH_TYPE_INTRIN_LOCAL_IRQ_RESTORE 12
+
+/* PARAVIRT_PATY_TYPE_[GS]ETREG + _IA64_REG_xxx */
+#define PARAVIRT_PATCH_TYPE_GETREG 0x10000000
+#define PARAVIRT_PATCH_TYPE_SETREG 0x20000000
+
+/*
+ * struct task_struct* (*ia64_switch_to)(void* next_task);
+ * void *ia64_leave_syscall;
+ * void *ia64_work_processed_syscall
+ * void *ia64_leave_kernel;
+ */
+
+#define PARAVIRT_PATCH_TYPE_BR_START 0x30000000
+#define PARAVIRT_PATCH_TYPE_BR_SWITCH_TO \
+ (PARAVIRT_PATCH_TYPE_BR_START + 0)
+#define PARAVIRT_PATCH_TYPE_BR_LEAVE_SYSCALL \
+ (PARAVIRT_PATCH_TYPE_BR_START + 1)
+#define PARAVIRT_PATCH_TYPE_BR_WORK_PROCESSED_SYSCALL \
+ (PARAVIRT_PATCH_TYPE_BR_START + 2)
+#define PARAVIRT_PATCH_TYPE_BR_LEAVE_KERNEL \
+ (PARAVIRT_PATCH_TYPE_BR_START + 3)
+
+#ifdef ASM_SUPPORTED
+#include <asm/paravirt_patch.h>
+
+/*
+ * pv_cpu_ops calling stub.
+ * normal function call convension can't be written by gcc
+ * inline assembly.
+ *
+ * from the caller's point of view,
+ * the following registers will be clobbered.
+ * r2, r3
+ * r8-r15
+ * r16, r17
+ * b6, b7
+ * p6-p15
+ * ar.ccv
+ *
+ * from the callee's point of view ,
+ * the following registers can be used.
+ * r2, r3: scratch
+ * r8: scratch, input argument0 and return value
+ * r0-r15: scratch, input argument1-5
+ * b6: return pointer
+ * b7: scratch
+ * p6-p15: scratch
+ * ar.ccv: scratch
+ *
+ * other registers must not be changed. especially
+ * b0: rp: preserved. gcc ignores b0 in clobbered register.
+ * r16: saved gp
+ */
+/* 5 bundles */
+#define __PARAVIRT_BR \
+ ";;\n" \
+ "{ .mlx\n" \
+ "nop 0\n" \
+ "movl r2 = %[op_addr]\n"/* get function pointer address */ \
+ ";;\n" \
+ "}\n" \
+ "1:\n" \
+ "{ .mii\n" \
+ "ld8 r2 = [r2]\n" /* load function descriptor address */ \
+ "mov r17 = ip\n" /* get ip to calc return address */ \
+ "mov r16 = gp\n" /* save gp */ \
+ ";;\n" \
+ "}\n" \
+ "{ .mii\n" \
+ "ld8 r3 = [r2], 8\n" /* load entry address */ \
+ "adds r17 = 1f - 1b, r17\n" /* calculate return address */ \
+ ";;\n" \
+ "mov b7 = r3\n" /* set entry address */ \
+ "}\n" \
+ "{ .mib\n" \
+ "ld8 gp = [r2]\n" /* load gp value */ \
+ "mov b6 = r17\n" /* set return address */ \
+ "br.cond.sptk.few b7\n" /* intrinsics are very short isns */ \
+ "}\n" \
+ "1:\n" \
+ "{ .mii\n" \
+ "mov gp = r16\n" /* restore gp value */ \
+ "nop 0\n" \
+ "nop 0\n" \
+ ";;\n" \
+ "}\n"
+
+#define PARAVIRT_OP(op) \
+ [op_addr] "i"(&pv_cpu_ops.op)
+
+#define PARAVIRT_TYPE(type) \
+ PARAVIRT_PATCH_TYPE_ ## type
+
+#define PARAVIRT_REG_CLOBBERS0 \
+ "r2", "r3", /*"r8",*/ "r9", "r10", "r11", "r14", \
+ "r15", "r16", "r17"
+
+#define PARAVIRT_REG_CLOBBERS1 \
+ "r2","r3", /*"r8",*/ "r9", "r10", "r11", "r14", \
+ "r15", "r16", "r17"
+
+#define PARAVIRT_REG_CLOBBERS2 \
+ "r2", "r3", /*"r8", "r9",*/ "r10", "r11", "r14", \
+ "r15", "r16", "r17"
+
+#define PARAVIRT_REG_CLOBBERS5 \
+ "r2", "r3", /*"r8", "r9", "r10", "r11", "r14",*/ \
+ "r15", "r16", "r17"
+
+#define PARAVIRT_BR_CLOBBERS \
+ "b6", "b7"
+
+#define PARAVIRT_PR_CLOBBERS \
+ "p6", "p7", "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15"
+
+#define PARAVIRT_AR_CLOBBERS \
+ "ar.ccv"
+
+#define PARAVIRT_CLOBBERS0 \
+ PARAVIRT_REG_CLOBBERS0, \
+ PARAVIRT_BR_CLOBBERS, \
+ PARAVIRT_PR_CLOBBERS, \
+ PARAVIRT_AR_CLOBBERS, \
+ "memory"
+
+#define PARAVIRT_CLOBBERS1 \
+ PARAVIRT_REG_CLOBBERS1, \
+ PARAVIRT_BR_CLOBBERS, \
+ PARAVIRT_PR_CLOBBERS, \
+ PARAVIRT_AR_CLOBBERS, \
+ "memory"
+
+#define PARAVIRT_CLOBBERS2 \
+ PARAVIRT_REG_CLOBBERS2, \
+ PARAVIRT_BR_CLOBBERS, \
+ PARAVIRT_PR_CLOBBERS, \
+ PARAVIRT_AR_CLOBBERS, \
+ "memory"
+
+#define PARAVIRT_CLOBBERS5 \
+ PARAVIRT_REG_CLOBBERS5, \
+ PARAVIRT_BR_CLOBBERS, \
+ PARAVIRT_PR_CLOBBERS, \
+ PARAVIRT_AR_CLOBBERS, \
+ "memory"
+
+#define PARAVIRT_BR0(op, type) \
+ register unsigned long ia64_clobber asm ("r8"); \
+ asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \
+ PARAVIRT_TYPE(type)) \
+ : "=r"(ia64_clobber) \
+ : PARAVIRT_OP(op) \
+ : PARAVIRT_CLOBBERS0)
+
+#define PARAVIRT_BR0_RET(op, type) \
+ register unsigned long ia64_intri_res asm ("r8"); \
+ asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \
+ PARAVIRT_TYPE(type)) \
+ : "=r"(ia64_intri_res) \
+ : PARAVIRT_OP(op) \
+ : PARAVIRT_CLOBBERS0)
+
+#define PARAVIRT_BR1(op, type, arg1) \
+ register unsigned long __##arg1 asm ("r8") = arg1; \
+ register unsigned long ia64_clobber asm ("r8"); \
+ asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \
+ PARAVIRT_TYPE(type)) \
+ : "=r"(ia64_clobber) \
+ : PARAVIRT_OP(op), "0"(__##arg1) \
+ : PARAVIRT_CLOBBERS1)
+
+#define PARAVIRT_BR1_RET(op, type, arg1) \
+ register unsigned long ia64_intri_res asm ("r8"); \
+ register unsigned long __##arg1 asm ("r8") = arg1; \
+ asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \
+ PARAVIRT_TYPE(type)) \
+ : "=r"(ia64_intri_res) \
+ : PARAVIRT_OP(op), "0"(__##arg1) \
+ : PARAVIRT_CLOBBERS1)
+
+#define PARAVIRT_BR1_VOID(op, type, arg1) \
+ register void *__##arg1 asm ("r8") = arg1; \
+ register unsigned long ia64_clobber asm ("r8"); \
+ asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \
+ PARAVIRT_TYPE(type)) \
+ : "=r"(ia64_clobber) \
+ : PARAVIRT_OP(op), "0"(__##arg1) \
+ : PARAVIRT_CLOBBERS1)
+
+#define PARAVIRT_BR2(op, type, arg1, arg2) \
+ register unsigned long __##arg1 asm ("r8") = arg1; \
+ register unsigned long __##arg2 asm ("r9") = arg2; \
+ register unsigned long ia64_clobber1 asm ("r8"); \
+ register unsigned long ia64_clobber2 asm ("r9"); \
+ asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \
+ PARAVIRT_TYPE(type)) \
+ : "=r"(ia64_clobber1), "=r"(ia64_clobber2) \
+ : PARAVIRT_OP(op), "0"(__##arg1), "1"(__##arg2) \
+ : PARAVIRT_CLOBBERS2)
+
+
+#define PARAVIRT_DEFINE_CPU_OP0(op, type) \
+ static inline void \
+ paravirt_ ## op (void) \
+ { \
+ PARAVIRT_BR0(op, type); \
+ }
+
+#define PARAVIRT_DEFINE_CPU_OP0_RET(op, type) \
+ static inline unsigned long \
+ paravirt_ ## op (void) \
+ { \
+ PARAVIRT_BR0_RET(op, type); \
+ return ia64_intri_res; \
+ }
+
+#define PARAVIRT_DEFINE_CPU_OP1_VOID(op, type) \
+ static inline void \
+ paravirt_ ## op (void *arg1) \
+ { \
+ PARAVIRT_BR1_VOID(op, type, arg1); \
+ }
+
+#define PARAVIRT_DEFINE_CPU_OP1(op, type) \
+ static inline void \
+ paravirt_ ## op (unsigned long arg1) \
+ { \
+ PARAVIRT_BR1(op, type, arg1); \
+ }
+
+#define PARAVIRT_DEFINE_CPU_OP1_RET(op, type) \
+ static inline unsigned long \
+ paravirt_ ## op (unsigned long arg1) \
+ { \
+ PARAVIRT_BR1_RET(op, type, arg1); \
+ return ia64_intri_res; \
+ }
+
+#define PARAVIRT_DEFINE_CPU_OP2(op, type) \
+ static inline void \
+ paravirt_ ## op (unsigned long arg1, \
+ unsigned long arg2) \
+ { \
+ PARAVIRT_BR2(op, type, arg1, arg2); \
+ }
+
+
+PARAVIRT_DEFINE_CPU_OP1_VOID(fc, FC);
+PARAVIRT_DEFINE_CPU_OP1_RET(thash, THASH)
+PARAVIRT_DEFINE_CPU_OP1_RET(get_cpuid, GET_CPUID)
+PARAVIRT_DEFINE_CPU_OP1_RET(get_pmd, GET_PMD)
+PARAVIRT_DEFINE_CPU_OP2(ptcga, PTCGA)
+PARAVIRT_DEFINE_CPU_OP1_RET(get_rr, GET_RR)
+PARAVIRT_DEFINE_CPU_OP2(set_rr, SET_RR)
+PARAVIRT_DEFINE_CPU_OP0(ssm_i, SSM_I)
+PARAVIRT_DEFINE_CPU_OP0(rsm_i, RSM_I)
+PARAVIRT_DEFINE_CPU_OP0_RET(get_psr_i, GET_PSR_I)
+PARAVIRT_DEFINE_CPU_OP1(intrin_local_irq_restore, INTRIN_LOCAL_IRQ_RESTORE)
+
+static inline void
+paravirt_set_rr0_to_rr4(unsigned long val0, unsigned long val1,
+ unsigned long val2, unsigned long val3,
+ unsigned long val4)
+{
+ register unsigned long __val0 asm ("r8") = val0;
+ register unsigned long __val1 asm ("r9") = val1;
+ register unsigned long __val2 asm ("r10") = val2;
+ register unsigned long __val3 asm ("r11") = val3;
+ register unsigned long __val4 asm ("r14") = val4;
+
+ register unsigned long ia64_clobber0 asm ("r8");
+ register unsigned long ia64_clobber1 asm ("r9");
+ register unsigned long ia64_clobber2 asm ("r10");
+ register unsigned long ia64_clobber3 asm ("r11");
+ register unsigned long ia64_clobber4 asm ("r14");
+
+ asm volatile (paravirt_alt_bundle(__PARAVIRT_BR,
+ PARAVIRT_TYPE(SET_RR0_TO_RR4))
+ : "=r"(ia64_clobber0),
+ "=r"(ia64_clobber1),
+ "=r"(ia64_clobber2),
+ "=r"(ia64_clobber3),
+ "=r"(ia64_clobber4)
+ : PARAVIRT_OP(set_rr0_to_rr4),
+ "0"(__val0), "1"(__val1), "2"(__val2),
+ "3"(__val3), "4"(__val4)
+ : PARAVIRT_CLOBBERS5);
+}
+
+/* unsigned long paravirt_getreg(int reg) */
+#define __paravirt_getreg(reg) \
+ ({ \
+ register unsigned long ia64_intri_res asm ("r8"); \
+ register unsigned long __reg asm ("r8") = (reg); \
+ \
+ BUILD_BUG_ON(!__builtin_constant_p(reg)); \
+ asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \
+ PARAVIRT_TYPE(GETREG) \
+ + (reg)) \
+ : "=r"(ia64_intri_res) \
+ : PARAVIRT_OP(getreg), "0"(__reg) \
+ : PARAVIRT_CLOBBERS1); \
+ \
+ ia64_intri_res; \
+ })
+
+/* void paravirt_setreg(int reg, unsigned long val) */
+#define paravirt_setreg(reg, val) \
+ do { \
+ register unsigned long __val asm ("r8") = val; \
+ register unsigned long __reg asm ("r9") = reg; \
+ register unsigned long ia64_clobber1 asm ("r8"); \
+ register unsigned long ia64_clobber2 asm ("r9"); \
+ \
+ BUILD_BUG_ON(!__builtin_constant_p(reg)); \
+ asm volatile (paravirt_alt_bundle(__PARAVIRT_BR, \
+ PARAVIRT_TYPE(SETREG) \
+ + (reg)) \
+ : "=r"(ia64_clobber1), \
+ "=r"(ia64_clobber2) \
+ : PARAVIRT_OP(setreg), \
+ "1"(__reg), "0"(__val) \
+ : PARAVIRT_CLOBBERS2); \
+ } while (0)
+
+#endif /* ASM_SUPPORTED */
+#endif /* CONFIG_PARAVIRT && ASM_SUPPOTED */
+
#endif /* _ASM_IA64_PARAVIRT_PRIVOP_H */
diff --git a/arch/ia64/include/asm/smp.h b/arch/ia64/include/asm/smp.h
index 21c402365d0e..598408336251 100644
--- a/arch/ia64/include/asm/smp.h
+++ b/arch/ia64/include/asm/smp.h
@@ -126,7 +126,8 @@ extern void identify_siblings (struct cpuinfo_ia64 *);
extern int is_multithreading_enabled(void);
extern void arch_send_call_function_single_ipi(int cpu);
-extern void arch_send_call_function_ipi(cpumask_t mask);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
#else /* CONFIG_SMP */
diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
index 0229fb95fb38..13ab71576bc7 100644
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@@ -120,6 +120,38 @@ do { \
#define __raw_read_can_lock(rw) (*(volatile int *)(rw) >= 0)
#define __raw_write_can_lock(rw) (*(volatile int *)(rw) == 0)
+#ifdef ASM_SUPPORTED
+
+static __always_inline void
+__raw_read_lock_flags(raw_rwlock_t *lock, unsigned long flags)
+{
+ __asm__ __volatile__ (
+ "tbit.nz p6, p0 = %1,%2\n"
+ "br.few 3f\n"
+ "1:\n"
+ "fetchadd4.rel r2 = [%0], -1;;\n"
+ "(p6) ssm psr.i\n"
+ "2:\n"
+ "hint @pause\n"
+ "ld4 r2 = [%0];;\n"
+ "cmp4.lt p7,p0 = r2, r0\n"
+ "(p7) br.cond.spnt.few 2b\n"
+ "(p6) rsm psr.i\n"
+ ";;\n"
+ "3:\n"
+ "fetchadd4.acq r2 = [%0], 1;;\n"
+ "cmp4.lt p7,p0 = r2, r0\n"
+ "(p7) br.cond.spnt.few 1b\n"
+ : : "r"(lock), "r"(flags), "i"(IA64_PSR_I_BIT)
+ : "p6", "p7", "r2", "memory");
+}
+
+#define __raw_read_lock(lock) __raw_read_lock_flags(lock, 0)
+
+#else /* !ASM_SUPPORTED */
+
+#define __raw_read_lock_flags(rw, flags) __raw_read_lock(rw)
+
#define __raw_read_lock(rw) \
do { \
raw_rwlock_t *__read_lock_ptr = (rw); \
@@ -131,6 +163,8 @@ do { \
} \
} while (0)
+#endif /* !ASM_SUPPORTED */
+
#define __raw_read_unlock(rw) \
do { \
raw_rwlock_t *__read_lock_ptr = (rw); \
@@ -138,20 +172,33 @@ do { \
} while (0)
#ifdef ASM_SUPPORTED
-#define __raw_write_lock(rw) \
-do { \
- __asm__ __volatile__ ( \
- "mov ar.ccv = r0\n" \
- "dep r29 = -1, r0, 31, 1;;\n" \
- "1:\n" \
- "ld4 r2 = [%0];;\n" \
- "cmp4.eq p0,p7 = r0,r2\n" \
- "(p7) br.cond.spnt.few 1b \n" \
- "cmpxchg4.acq r2 = [%0], r29, ar.ccv;;\n" \
- "cmp4.eq p0,p7 = r0, r2\n" \
- "(p7) br.cond.spnt.few 1b;;\n" \
- :: "r"(rw) : "ar.ccv", "p7", "r2", "r29", "memory"); \
-} while(0)
+
+static __always_inline void
+__raw_write_lock_flags(raw_rwlock_t *lock, unsigned long flags)
+{
+ __asm__ __volatile__ (
+ "tbit.nz p6, p0 = %1, %2\n"
+ "mov ar.ccv = r0\n"
+ "dep r29 = -1, r0, 31, 1\n"
+ "br.few 3f;;\n"
+ "1:\n"
+ "(p6) ssm psr.i\n"
+ "2:\n"
+ "hint @pause\n"
+ "ld4 r2 = [%0];;\n"
+ "cmp4.eq p0,p7 = r0, r2\n"
+ "(p7) br.cond.spnt.few 2b\n"
+ "(p6) rsm psr.i\n"
+ ";;\n"
+ "3:\n"
+ "cmpxchg4.acq r2 = [%0], r29, ar.ccv;;\n"
+ "cmp4.eq p0,p7 = r0, r2\n"
+ "(p7) br.cond.spnt.few 1b;;\n"
+ : : "r"(lock), "r"(flags), "i"(IA64_PSR_I_BIT)
+ : "ar.ccv", "p6", "p7", "r2", "r29", "memory");
+}
+
+#define __raw_write_lock(rw) __raw_write_lock_flags(rw, 0)
#define __raw_write_trylock(rw) \
({ \
@@ -174,6 +221,8 @@ static inline void __raw_write_unlock(raw_rwlock_t *x)
#else /* !ASM_SUPPORTED */
+#define __raw_write_lock_flags(l, flags) __raw_write_lock(l)
+
#define __raw_write_lock(l) \
({ \
__u64 ia64_val, ia64_set_val = ia64_dep_mi(-1, 0, 31, 1); \
diff --git a/arch/ia64/include/asm/timex.h b/arch/ia64/include/asm/timex.h
index 4e03cfe74a0c..86c7db861180 100644
--- a/arch/ia64/include/asm/timex.h
+++ b/arch/ia64/include/asm/timex.h
@@ -40,5 +40,6 @@ get_cycles (void)
}
extern void ia64_cpu_local_tick (void);
+extern unsigned long long ia64_native_sched_clock (void);
#endif /* _ASM_IA64_TIMEX_H */
diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index f260dcf21515..7b4c8c70b2d1 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -112,11 +112,6 @@ void build_cpu_to_node_map(void);
extern void arch_fix_phys_package_id(int num, u32 slot);
-#define pcibus_to_cpumask(bus) (pcibus_to_node(bus) == -1 ? \
- CPU_MASK_ALL : \
- node_to_cpumask(pcibus_to_node(bus)) \
- )
-
#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \
cpu_all_mask : \
cpumask_of_node(pcibus_to_node(bus)))
diff --git a/arch/ia64/include/asm/uv/uv_hub.h b/arch/ia64/include/asm/uv/uv_hub.h
index f607018af4a1..53e9dfacd073 100644
--- a/arch/ia64/include/asm/uv/uv_hub.h
+++ b/arch/ia64/include/asm/uv/uv_hub.h
@@ -305,5 +305,11 @@ static inline int uv_num_possible_blades(void)
return 1;
}
+static inline void uv_hub_send_ipi(int pnode, int apicid, int vector)
+{
+ /* not currently needed on ia64 */
+}
+
+
#endif /* __ASM_IA64_UV_HUB__ */
diff --git a/arch/ia64/include/asm/uv/uv_mmrs.h b/arch/ia64/include/asm/uv/uv_mmrs.h
index c149ef085437..fe0b8f05e1a8 100644
--- a/arch/ia64/include/asm/uv/uv_mmrs.h
+++ b/arch/ia64/include/asm/uv/uv_mmrs.h
@@ -8,8 +8,8 @@
* Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
*/
-#ifndef __ASM_IA64_UV_MMRS__
-#define __ASM_IA64_UV_MMRS__
+#ifndef _ASM_IA64_UV_UV_MMRS_H
+#define _ASM_IA64_UV_UV_MMRS_H
#define UV_MMR_ENABLE (1UL << 63)
@@ -243,6 +243,158 @@ union uvh_event_occurred0_u {
#define UVH_EVENT_OCCURRED0_ALIAS_32 0x005f0
/* ========================================================================= */
+/* UVH_GR0_TLB_INT0_CONFIG */
+/* ========================================================================= */
+#define UVH_GR0_TLB_INT0_CONFIG 0x61b00UL
+
+#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0
+#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_GR0_TLB_INT0_CONFIG_DM_SHFT 8
+#define UVH_GR0_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVH_GR0_TLB_INT0_CONFIG_DESTMODE_SHFT 11
+#define UVH_GR0_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_GR0_TLB_INT0_CONFIG_STATUS_SHFT 12
+#define UVH_GR0_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVH_GR0_TLB_INT0_CONFIG_P_SHFT 13
+#define UVH_GR0_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL
+#define UVH_GR0_TLB_INT0_CONFIG_T_SHFT 15
+#define UVH_GR0_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL
+#define UVH_GR0_TLB_INT0_CONFIG_M_SHFT 16
+#define UVH_GR0_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL
+#define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_SHFT 32
+#define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
+union uvh_gr0_tlb_int0_config_u {
+ unsigned long v;
+ struct uvh_gr0_tlb_int0_config_s {
+ unsigned long vector_ : 8; /* RW */
+ unsigned long dm : 3; /* RW */
+ unsigned long destmode : 1; /* RW */
+ unsigned long status : 1; /* RO */
+ unsigned long p : 1; /* RO */
+ unsigned long rsvd_14 : 1; /* */
+ unsigned long t : 1; /* RO */
+ unsigned long m : 1; /* RW */
+ unsigned long rsvd_17_31: 15; /* */
+ unsigned long apic_id : 32; /* RW */
+ } s;
+};
+
+/* ========================================================================= */
+/* UVH_GR0_TLB_INT1_CONFIG */
+/* ========================================================================= */
+#define UVH_GR0_TLB_INT1_CONFIG 0x61b40UL
+
+#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0
+#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_GR0_TLB_INT1_CONFIG_DM_SHFT 8
+#define UVH_GR0_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVH_GR0_TLB_INT1_CONFIG_DESTMODE_SHFT 11
+#define UVH_GR0_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_GR0_TLB_INT1_CONFIG_STATUS_SHFT 12
+#define UVH_GR0_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVH_GR0_TLB_INT1_CONFIG_P_SHFT 13
+#define UVH_GR0_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL
+#define UVH_GR0_TLB_INT1_CONFIG_T_SHFT 15
+#define UVH_GR0_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL
+#define UVH_GR0_TLB_INT1_CONFIG_M_SHFT 16
+#define UVH_GR0_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL
+#define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_SHFT 32
+#define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
+union uvh_gr0_tlb_int1_config_u {
+ unsigned long v;
+ struct uvh_gr0_tlb_int1_config_s {
+ unsigned long vector_ : 8; /* RW */
+ unsigned long dm : 3; /* RW */
+ unsigned long destmode : 1; /* RW */
+ unsigned long status : 1; /* RO */
+ unsigned long p : 1; /* RO */
+ unsigned long rsvd_14 : 1; /* */
+ unsigned long t : 1; /* RO */
+ unsigned long m : 1; /* RW */
+ unsigned long rsvd_17_31: 15; /* */
+ unsigned long apic_id : 32; /* RW */
+ } s;
+};
+
+/* ========================================================================= */
+/* UVH_GR1_TLB_INT0_CONFIG */
+/* ========================================================================= */
+#define UVH_GR1_TLB_INT0_CONFIG 0x61f00UL
+
+#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0
+#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_GR1_TLB_INT0_CONFIG_DM_SHFT 8
+#define UVH_GR1_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVH_GR1_TLB_INT0_CONFIG_DESTMODE_SHFT 11
+#define UVH_GR1_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_GR1_TLB_INT0_CONFIG_STATUS_SHFT 12
+#define UVH_GR1_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVH_GR1_TLB_INT0_CONFIG_P_SHFT 13
+#define UVH_GR1_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL
+#define UVH_GR1_TLB_INT0_CONFIG_T_SHFT 15
+#define UVH_GR1_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL
+#define UVH_GR1_TLB_INT0_CONFIG_M_SHFT 16
+#define UVH_GR1_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL
+#define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_SHFT 32
+#define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
+union uvh_gr1_tlb_int0_config_u {
+ unsigned long v;
+ struct uvh_gr1_tlb_int0_config_s {
+ unsigned long vector_ : 8; /* RW */
+ unsigned long dm : 3; /* RW */
+ unsigned long destmode : 1; /* RW */
+ unsigned long status : 1; /* RO */
+ unsigned long p : 1; /* RO */
+ unsigned long rsvd_14 : 1; /* */
+ unsigned long t : 1; /* RO */
+ unsigned long m : 1; /* RW */
+ unsigned long rsvd_17_31: 15; /* */
+ unsigned long apic_id : 32; /* RW */
+ } s;
+};
+
+/* ========================================================================= */
+/* UVH_GR1_TLB_INT1_CONFIG */
+/* ========================================================================= */
+#define UVH_GR1_TLB_INT1_CONFIG 0x61f40UL
+
+#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0
+#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_GR1_TLB_INT1_CONFIG_DM_SHFT 8
+#define UVH_GR1_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVH_GR1_TLB_INT1_CONFIG_DESTMODE_SHFT 11
+#define UVH_GR1_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_GR1_TLB_INT1_CONFIG_STATUS_SHFT 12
+#define UVH_GR1_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVH_GR1_TLB_INT1_CONFIG_P_SHFT 13
+#define UVH_GR1_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL
+#define UVH_GR1_TLB_INT1_CONFIG_T_SHFT 15
+#define UVH_GR1_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL
+#define UVH_GR1_TLB_INT1_CONFIG_M_SHFT 16
+#define UVH_GR1_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL
+#define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_SHFT 32
+#define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
+union uvh_gr1_tlb_int1_config_u {
+ unsigned long v;
+ struct uvh_gr1_tlb_int1_config_s {
+ unsigned long vector_ : 8; /* RW */
+ unsigned long dm : 3; /* RW */
+ unsigned long destmode : 1; /* RW */
+ unsigned long status : 1; /* RO */
+ unsigned long p : 1; /* RO */
+ unsigned long rsvd_14 : 1; /* */
+ unsigned long t : 1; /* RO */
+ unsigned long m : 1; /* RW */
+ unsigned long rsvd_17_31: 15; /* */
+ unsigned long apic_id : 32; /* RW */
+ } s;
+};
+
+/* ========================================================================= */
/* UVH_INT_CMPB */
/* ========================================================================= */
#define UVH_INT_CMPB 0x22080UL
@@ -670,4 +822,4 @@ union uvh_si_alias2_overlay_config_u {
};
-#endif /* __ASM_IA64_UV_MMRS__ */
+#endif /* _ASM_IA64_UV_UV_MMRS_H */
diff --git a/arch/ia64/include/asm/xen/hypervisor.h b/arch/ia64/include/asm/xen/hypervisor.h
index 7a804e80fc67..e425227a418e 100644
--- a/arch/ia64/include/asm/xen/hypervisor.h
+++ b/arch/ia64/include/asm/xen/hypervisor.h
@@ -33,9 +33,6 @@
#ifndef _ASM_IA64_XEN_HYPERVISOR_H
#define _ASM_IA64_XEN_HYPERVISOR_H
-#ifdef CONFIG_XEN
-
-#include <linux/init.h>
#include <xen/interface/xen.h>
#include <xen/interface/version.h> /* to compile feature.c */
#include <xen/features.h> /* to comiple xen-netfront.c */
@@ -43,22 +40,32 @@
/* xen_domain_type is set before executing any C code by early_xen_setup */
enum xen_domain_type {
- XEN_NATIVE,
- XEN_PV_DOMAIN,
- XEN_HVM_DOMAIN,
+ XEN_NATIVE, /* running on bare hardware */
+ XEN_PV_DOMAIN, /* running in a PV domain */
+ XEN_HVM_DOMAIN, /* running in a Xen hvm domain*/
};
+#ifdef CONFIG_XEN
extern enum xen_domain_type xen_domain_type;
+#else
+#define xen_domain_type XEN_NATIVE
+#endif
#define xen_domain() (xen_domain_type != XEN_NATIVE)
-#define xen_pv_domain() (xen_domain_type == XEN_PV_DOMAIN)
-#define xen_initial_domain() (xen_pv_domain() && \
+#define xen_pv_domain() (xen_domain() && \
+ xen_domain_type == XEN_PV_DOMAIN)
+#define xen_hvm_domain() (xen_domain() && \
+ xen_domain_type == XEN_HVM_DOMAIN)
+
+#ifdef CONFIG_XEN_DOM0
+#define xen_initial_domain() (xen_pv_domain() && \
(xen_start_info->flags & SIF_INITDOMAIN))
-#define xen_hvm_domain() (xen_domain_type == XEN_HVM_DOMAIN)
+#else
+#define xen_initial_domain() (0)
+#endif
-/* deprecated. remove this */
-#define is_running_on_xen() (xen_domain_type == XEN_PV_DOMAIN)
+#ifdef CONFIG_XEN
extern struct shared_info *HYPERVISOR_shared_info;
extern struct start_info *xen_start_info;
@@ -74,16 +81,6 @@ void force_evtchn_callback(void);
/* For setup_arch() in arch/ia64/kernel/setup.c */
void xen_ia64_enable_opt_feature(void);
-
-#else /* CONFIG_XEN */
-
-#define xen_domain() (0)
-#define xen_pv_domain() (0)
-#define xen_initial_domain() (0)
-#define xen_hvm_domain() (0)
-#define is_running_on_xen() (0) /* deprecated. remove this */
#endif
-#define is_initial_xendomain() (0) /* deprecated. remove this */
-
#endif /* _ASM_IA64_XEN_HYPERVISOR_H */
diff --git a/arch/ia64/include/asm/xen/inst.h b/arch/ia64/include/asm/xen/inst.h
index 19c2ae1d878a..c53a47611208 100644
--- a/arch/ia64/include/asm/xen/inst.h
+++ b/arch/ia64/include/asm/xen/inst.h
@@ -33,6 +33,9 @@
#define __paravirt_work_processed_syscall_target \
xen_work_processed_syscall
+#define paravirt_fsyscall_table xen_fsyscall_table
+#define paravirt_fsys_bubble_down xen_fsys_bubble_down
+
#define MOV_FROM_IFA(reg) \
movl reg = XSI_IFA; \
;; \
@@ -110,6 +113,27 @@
.endm
#define MOV_FROM_PSR(pred, reg, clob) __MOV_FROM_PSR pred, reg, clob
+/* assuming ar.itc is read with interrupt disabled. */
+#define MOV_FROM_ITC(pred, pred_clob, reg, clob) \
+(pred) movl clob = XSI_ITC_OFFSET; \
+ ;; \
+(pred) ld8 clob = [clob]; \
+(pred) mov reg = ar.itc; \
+ ;; \
+(pred) add reg = reg, clob; \
+ ;; \
+(pred) movl clob = XSI_ITC_LAST; \
+ ;; \
+(pred) ld8 clob = [clob]; \
+ ;; \
+(pred) cmp.geu.unc pred_clob, p0 = clob, reg; \
+ ;; \
+(pred_clob) add reg = 1, clob; \
+ ;; \
+(pred) movl clob = XSI_ITC_LAST; \
+ ;; \
+(pred) st8 [clob] = reg
+
#define MOV_TO_IFA(reg, clob) \
movl clob = XSI_IFA; \
@@ -362,6 +386,10 @@
#define RSM_PSR_DT \
XEN_HYPER_RSM_PSR_DT
+#define RSM_PSR_BE_I(clob0, clob1) \
+ RSM_PSR_I(p0, clob0, clob1); \
+ rum psr.be
+
#define SSM_PSR_DT_AND_SRLZ_I \
XEN_HYPER_SSM_PSR_DT
diff --git a/arch/ia64/include/asm/xen/interface.h b/arch/ia64/include/asm/xen/interface.h
index f00fab40854d..e951e740bdf2 100644
--- a/arch/ia64/include/asm/xen/interface.h
+++ b/arch/ia64/include/asm/xen/interface.h
@@ -209,6 +209,15 @@ struct mapped_regs {
unsigned long krs[8]; /* kernel registers */
unsigned long tmp[16]; /* temp registers
(e.g. for hyperprivops) */
+
+ /* itc paravirtualization
+ * vAR.ITC = mAR.ITC + itc_offset
+ * itc_last is one which was lastly passed to
+ * the guest OS in order to prevent it from
+ * going backwords.
+ */
+ unsigned long itc_offset;
+ unsigned long itc_last;
};
};
};
diff --git a/arch/ia64/include/asm/xen/minstate.h b/arch/ia64/include/asm/xen/minstate.h
index 4d92d9bbda7b..c57fa910f2c9 100644
--- a/arch/ia64/include/asm/xen/minstate.h
+++ b/arch/ia64/include/asm/xen/minstate.h
@@ -1,3 +1,12 @@
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+/* read ar.itc in advance, and use it before leaving bank 0 */
+#define XEN_ACCOUNT_GET_STAMP \
+ MOV_FROM_ITC(pUStk, p6, r20, r2);
+#else
+#define XEN_ACCOUNT_GET_STAMP
+#endif
+
/*
* DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
* the minimum state necessary that allows us to turn psr.ic back
@@ -123,7 +132,7 @@
;; \
.mem.offset 0,0; st8.spill [r16]=r2,16; \
.mem.offset 8,0; st8.spill [r17]=r3,16; \
- ACCOUNT_GET_STAMP \
+ XEN_ACCOUNT_GET_STAMP \
adds r2=IA64_PT_REGS_R16_OFFSET,r1; \
;; \
EXTRA; \
diff --git a/arch/ia64/include/asm/xen/patchlist.h b/arch/ia64/include/asm/xen/patchlist.h
new file mode 100644
index 000000000000..eae944e88846
--- /dev/null
+++ b/arch/ia64/include/asm/xen/patchlist.h
@@ -0,0 +1,38 @@
+/******************************************************************************
+ * arch/ia64/include/asm/xen/patchlist.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ * VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#define __paravirt_start_gate_fsyscall_patchlist \
+ __xen_start_gate_fsyscall_patchlist
+#define __paravirt_end_gate_fsyscall_patchlist \
+ __xen_end_gate_fsyscall_patchlist
+#define __paravirt_start_gate_brl_fsys_bubble_down_patchlist \
+ __xen_start_gate_brl_fsys_bubble_down_patchlist
+#define __paravirt_end_gate_brl_fsys_bubble_down_patchlist \
+ __xen_end_gate_brl_fsys_bubble_down_patchlist
+#define __paravirt_start_gate_vtop_patchlist \
+ __xen_start_gate_vtop_patchlist
+#define __paravirt_end_gate_vtop_patchlist \
+ __xen_end_gate_vtop_patchlist
+#define __paravirt_start_gate_mckinley_e9_patchlist \
+ __xen_start_gate_mckinley_e9_patchlist
+#define __paravirt_end_gate_mckinley_e9_patchlist \
+ __xen_end_gate_mckinley_e9_patchlist
diff --git a/arch/ia64/include/asm/xen/privop.h b/arch/ia64/include/asm/xen/privop.h
index 71ec7546e100..fb4ec5e0b066 100644
--- a/arch/ia64/include/asm/xen/privop.h
+++ b/arch/ia64/include/asm/xen/privop.h
@@ -55,6 +55,8 @@
#define XSI_BANK1_R16 (XSI_BASE + XSI_BANK1_R16_OFS)
#define XSI_BANKNUM (XSI_BASE + XSI_BANKNUM_OFS)
#define XSI_IHA (XSI_BASE + XSI_IHA_OFS)
+#define XSI_ITC_OFFSET (XSI_BASE + XSI_ITC_OFFSET_OFS)
+#define XSI_ITC_LAST (XSI_BASE + XSI_ITC_LAST_OFS)
#endif
#ifndef __ASSEMBLY__
@@ -67,7 +69,7 @@
* may have different semantics depending on whether they are executed
* at PL0 vs PL!=0. When paravirtualized, these instructions mustn't
* be allowed to execute directly, lest incorrect semantics result. */
-extern void xen_fc(unsigned long addr);
+extern void xen_fc(void *addr);
extern unsigned long xen_thash(unsigned long addr);
/* Note that "ttag" and "cover" are also privilege-sensitive; "ttag"
@@ -80,8 +82,10 @@ extern unsigned long xen_thash(unsigned long addr);
extern unsigned long xen_get_cpuid(int index);
extern unsigned long xen_get_pmd(int index);
+#ifndef ASM_SUPPORTED
extern unsigned long xen_get_eflag(void); /* see xen_ia64_getreg */
extern void xen_set_eflag(unsigned long); /* see xen_ia64_setreg */
+#endif
/************************************************/
/* Instructions paravirtualized for performance */
@@ -106,6 +110,7 @@ extern void xen_set_eflag(unsigned long); /* see xen_ia64_setreg */
#define xen_get_virtual_pend() \
(*(((uint8_t *)XEN_MAPPEDREGS->interrupt_mask_addr) - 1))
+#ifndef ASM_SUPPORTED
/* Although all privileged operations can be left to trap and will
* be properly handled by Xen, some are frequent enough that we use
* hyperprivops for performance. */
@@ -123,6 +128,7 @@ extern void xen_set_rr0_to_rr4(unsigned long val0, unsigned long val1,
unsigned long val4);
extern void xen_set_kr(unsigned long index, unsigned long val);
extern void xen_ptcga(unsigned long addr, unsigned long size);
+#endif /* !ASM_SUPPORTED */
#endif /* !__ASSEMBLY__ */
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index f2778f2c4fd9..5628e9a990a6 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -5,7 +5,7 @@
extra-y := head.o init_task.o vmlinux.lds
obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \
- irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \
+ irq_lsapic.o ivt.o machvec.o pal.o paravirt_patchlist.o patch.o process.o perfmon.o ptrace.o sal.o \
salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
unwind.o mca.o mca_asm.o topology.o dma-mapping.o
@@ -36,7 +36,8 @@ obj-$(CONFIG_PCI_MSI) += msi_ia64.o
mca_recovery-y += mca_drv.o mca_drv_asm.o
obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o
-obj-$(CONFIG_PARAVIRT) += paravirt.o paravirtentry.o
+obj-$(CONFIG_PARAVIRT) += paravirt.o paravirtentry.o \
+ paravirt_patch.o
obj-$(CONFIG_IA64_ESI) += esi.o
ifneq ($(CONFIG_IA64_ESI),)
@@ -45,35 +46,13 @@ endif
obj-$(CONFIG_DMAR) += pci-dma.o
obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
-# The gate DSO image is built using a special linker script.
-targets += gate.so gate-syms.o
-
-extra-y += gate.so gate-syms.o gate.lds gate.o
-
# fp_emulate() expects f2-f5,f16-f31 to contain the user-level state.
CFLAGS_traps.o += -mfixed-range=f2-f5,f16-f31
-CPPFLAGS_gate.lds := -P -C -U$(ARCH)
-
-quiet_cmd_gate = GATE $@
- cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@
-
-GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 \
- $(call ld-option, -Wl$(comma)--hash-style=sysv)
-$(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE
- $(call if_changed,gate)
-
-$(obj)/built-in.o: $(obj)/gate-syms.o
-$(obj)/built-in.o: ld_flags += -R $(obj)/gate-syms.o
-
-GATECFLAGS_gate-syms.o = -r
-$(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE
- $(call if_changed,gate)
-
-# gate-data.o contains the gate DSO image as data in section .data.gate.
-# We must build gate.so before we can assemble it.
-# Note: kbuild does not track this dependency due to usage of .incbin
-$(obj)/gate-data.o: $(obj)/gate.so
+# The gate DSO image is built using a special linker script.
+include $(srctree)/arch/ia64/kernel/Makefile.gate
+# tell compiled for native
+CPPFLAGS_gate.lds += -D__IA64_GATE_PARAVIRTUALIZED_NATIVE
# Calculate NR_IRQ = max(IA64_NATIVE_NR_IRQS, XEN_NR_IRQS, ...) based on config
define sed-y
@@ -109,9 +88,9 @@ include/asm-ia64/nr-irqs.h: arch/$(SRCARCH)/kernel/nr-irqs.s
clean-files += $(objtree)/include/asm-ia64/nr-irqs.h
#
-# native ivt.S and entry.S
+# native ivt.S, entry.S and fsys.S
#
-ASM_PARAVIRT_OBJS = ivt.o entry.o
+ASM_PARAVIRT_OBJS = ivt.o entry.o fsys.o
define paravirtualized_native
AFLAGS_$(1) += -D__IA64_ASM_PARAVIRTUALIZED_NATIVE
AFLAGS_pvchk-sed-$(1) += -D__IA64_ASM_PARAVIRTUALIZED_PVCHECK
diff --git a/arch/ia64/kernel/Makefile.gate b/arch/ia64/kernel/Makefile.gate
new file mode 100644
index 000000000000..1d87f84069b3
--- /dev/null
+++ b/arch/ia64/kernel/Makefile.gate
@@ -0,0 +1,27 @@
+# The gate DSO image is built using a special linker script.
+
+targets += gate.so gate-syms.o
+
+extra-y += gate.so gate-syms.o gate.lds gate.o
+
+CPPFLAGS_gate.lds := -P -C -U$(ARCH)
+
+quiet_cmd_gate = GATE $@
+ cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@
+
+GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 \
+ $(call ld-option, -Wl$(comma)--hash-style=sysv)
+$(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE
+ $(call if_changed,gate)
+
+$(obj)/built-in.o: $(obj)/gate-syms.o
+$(obj)/built-in.o: ld_flags += -R $(obj)/gate-syms.o
+
+GATECFLAGS_gate-syms.o = -r
+$(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE
+ $(call if_changed,gate)
+
+# gate-data.o contains the gate DSO image as data in section .data.gate.
+# We must build gate.so before we can assemble it.
+# Note: kbuild does not track this dependency due to usage of .incbin
+$(obj)/gate-data.o: $(obj)/gate.so
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index bdef2ce38c8b..5510317db37b 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -890,7 +890,7 @@ __init void prefill_possible_map(void)
possible, max((possible - available_cpus), 0));
for (i = 0; i < possible; i++)
- cpu_set(i, cpu_possible_map);
+ set_cpu_possible(i, true);
}
int acpi_map_lsapic(acpi_handle handle, int *pcpu)
@@ -928,9 +928,9 @@ int acpi_map_lsapic(acpi_handle handle, int *pcpu)
buffer.length = ACPI_ALLOCATE_BUFFER;
buffer.pointer = NULL;
- cpus_complement(tmp_map, cpu_present_map);
- cpu = first_cpu(tmp_map);
- if (cpu >= NR_CPUS)
+ cpumask_complement(&tmp_map, cpu_present_mask);
+ cpu = cpumask_first(&tmp_map);
+ if (cpu >= nr_cpu_ids)
return -EINVAL;
acpi_map_cpu2node(handle, cpu, physid);
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index 742dbb1d5a4f..af5650169043 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -316,5 +316,7 @@ void foo(void)
DEFINE_MAPPED_REG_OFS(XSI_BANK1_R16_OFS, bank1_regs[0]);
DEFINE_MAPPED_REG_OFS(XSI_B0NATS_OFS, vbnat);
DEFINE_MAPPED_REG_OFS(XSI_B1NATS_OFS, vnat);
+ DEFINE_MAPPED_REG_OFS(XSI_ITC_OFFSET_OFS, itc_offset);
+ DEFINE_MAPPED_REG_OFS(XSI_ITC_LAST_OFS, itc_last);
#endif /* CONFIG_XEN */
}
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index efaff15d8cf1..7ef80e8161ce 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -456,6 +456,7 @@ efi_map_pal_code (void)
GRANULEROUNDDOWN((unsigned long) pal_vaddr),
pte_val(pfn_pte(__pa(pal_vaddr) >> PAGE_SHIFT, PAGE_KERNEL)),
IA64_GRANULE_SHIFT);
+ paravirt_dv_serialize_data();
ia64_set_psr(psr); /* restore psr */
}
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index e5341e2c1175..ccfdeee9d89f 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -735,7 +735,7 @@ GLOBAL_ENTRY(__paravirt_leave_syscall)
__paravirt_work_processed_syscall:
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
adds r2=PT(LOADRS)+16,r12
-(pUStk) mov.m r22=ar.itc // fetch time at leave
+ MOV_FROM_ITC(pUStk, p9, r22, r19) // fetch time at leave
adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
;;
(p6) ld4 r31=[r18] // load current_thread_info()->flags
@@ -984,7 +984,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
.pred.rel.mutex pUStk,pKStk
MOV_FROM_PSR(pKStk, r22, r29) // M2 read PSR now that interrupts are disabled
-(pUStk) mov.m r22=ar.itc // M fetch time at leave
+ MOV_FROM_ITC(pUStk, p9, r22, r29) // M fetch time at leave
nop.i 0
;;
#else
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index c1625c7e1779..3567d54f8cee 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -25,6 +25,7 @@
#include <asm/unistd.h>
#include "entry.h"
+#include "paravirt_inst.h"
/*
* See Documentation/ia64/fsys.txt for details on fsyscalls.
@@ -279,7 +280,7 @@ ENTRY(fsys_gettimeofday)
(p9) cmp.eq p13,p0 = 0,r30 // if mmio_ptr, clear p13 jitter control
;;
.pred.rel.mutex p8,p9
-(p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!!
+ MOV_FROM_ITC(p8, p6, r2, r10) // CPU_TIMER. 36 clocks latency!!!
(p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues..
(p13) ld8 r25 = [r19] // get itc_lastcycle value
ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET // tv_sec
@@ -418,7 +419,7 @@ EX(.fail_efault, ld8 r14=[r33]) // r14 <- *set
mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1))
;;
- rsm psr.i // mask interrupt delivery
+ RSM_PSR_I(p0, r18, r19) // mask interrupt delivery
mov ar.ccv=0
andcm r14=r14,r17 // filter out SIGKILL & SIGSTOP
@@ -491,7 +492,7 @@ EX(.fail_efault, ld8 r14=[r33]) // r14 <- *set
#ifdef CONFIG_SMP
st4.rel [r31]=r0 // release the lock
#endif
- ssm psr.i
+ SSM_PSR_I(p0, p9, r31)
;;
srlz.d // ensure psr.i is set again
@@ -513,7 +514,7 @@ EX(.fail_efault, (p15) st8 [r34]=r3)
#ifdef CONFIG_SMP
st4.rel [r31]=r0 // release the lock
#endif
- ssm psr.i
+ SSM_PSR_I(p0, p9, r17)
;;
srlz.d
br.sptk.many fsys_fallback_syscall // with signal pending, do the heavy-weight syscall
@@ -521,7 +522,7 @@ EX(.fail_efault, (p15) st8 [r34]=r3)
#ifdef CONFIG_SMP
.lock_contention:
/* Rather than spinning here, fall back on doing a heavy-weight syscall. */
- ssm psr.i
+ SSM_PSR_I(p0, p9, r17)
;;
srlz.d
br.sptk.many fsys_fallback_syscall
@@ -592,17 +593,17 @@ ENTRY(fsys_fallback_syscall)
adds r17=-1024,r15
movl r14=sys_call_table
;;
- rsm psr.i
+ RSM_PSR_I(p0, r26, r27)
shladd r18=r17,3,r14
;;
ld8 r18=[r18] // load normal (heavy-weight) syscall entry-point
- mov r29=psr // read psr (12 cyc load latency)
+ MOV_FROM_PSR(p0, r29, r26) // read psr (12 cyc load latency)
mov r27=ar.rsc
mov r21=ar.fpsr
mov r26=ar.pfs
END(fsys_fallback_syscall)
/* FALL THROUGH */
-GLOBAL_ENTRY(fsys_bubble_down)
+GLOBAL_ENTRY(paravirt_fsys_bubble_down)
.prologue
.altrp b6
.body
@@ -640,7 +641,7 @@ GLOBAL_ENTRY(fsys_bubble_down)
*
* PSR.BE : already is turned off in __kernel_syscall_via_epc()
* PSR.AC : don't care (kernel normally turns PSR.AC on)
- * PSR.I : already turned off by the time fsys_bubble_down gets
+ * PSR.I : already turned off by the time paravirt_fsys_bubble_down gets
* invoked
* PSR.DFL: always 0 (kernel never turns it on)
* PSR.DFH: don't care --- kernel never touches f32-f127 on its own
@@ -650,7 +651,7 @@ GLOBAL_ENTRY(fsys_bubble_down)
* PSR.DB : don't care --- kernel never enables kernel-level
* breakpoints
* PSR.TB : must be 0 already; if it wasn't zero on entry to
- * __kernel_syscall_via_epc, the branch to fsys_bubble_down
+ * __kernel_syscall_via_epc, the branch to paravirt_fsys_bubble_down
* will trigger a taken branch; the taken-trap-handler then
* converts the syscall into a break-based system-call.
*/
@@ -683,7 +684,7 @@ GLOBAL_ENTRY(fsys_bubble_down)
;;
mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- mov.m r30=ar.itc // M get cycle for accounting
+ MOV_FROM_ITC(p0, p6, r30, r23) // M get cycle for accounting
#else
nop.m 0
#endif
@@ -734,21 +735,21 @@ GLOBAL_ENTRY(fsys_bubble_down)
mov rp=r14 // I0 set the real return addr
and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A
;;
- ssm psr.i // M2 we're on kernel stacks now, reenable irqs
+ SSM_PSR_I(p0, p6, r22) // M2 we're on kernel stacks now, reenable irqs
cmp.eq p8,p0=r3,r0 // A
(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT
nop.m 0
(p8) br.call.sptk.many b6=b6 // B (ignore return address)
br.cond.spnt ia64_trace_syscall // B
-END(fsys_bubble_down)
+END(paravirt_fsys_bubble_down)
.rodata
.align 8
- .globl fsyscall_table
+ .globl paravirt_fsyscall_table
- data8 fsys_bubble_down
-fsyscall_table:
+ data8 paravirt_fsys_bubble_down
+paravirt_fsyscall_table:
data8 fsys_ni_syscall
data8 0 // exit // 1025
data8 0 // read
@@ -1033,4 +1034,4 @@ fsyscall_table:
// fill in zeros for the remaining entries
.zero:
- .space fsyscall_table + 8*NR_syscalls - .zero, 0
+ .space paravirt_fsyscall_table + 8*NR_syscalls - .zero, 0
diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S
index 74b1ccce4e84..cf5e0a105e16 100644
--- a/arch/ia64/kernel/gate.S
+++ b/arch/ia64/kernel/gate.S
@@ -13,6 +13,7 @@
#include <asm/sigcontext.h>
#include <asm/system.h>
#include <asm/unistd.h>
+#include "paravirt_inst.h"
/*
* We can't easily refer to symbols inside the kernel. To avoid full runtime relocation,
@@ -48,87 +49,6 @@ GLOBAL_ENTRY(__kernel_syscall_via_break)
}
END(__kernel_syscall_via_break)
-/*
- * On entry:
- * r11 = saved ar.pfs
- * r15 = system call #
- * b0 = saved return address
- * b6 = return address
- * On exit:
- * r11 = saved ar.pfs
- * r15 = system call #
- * b0 = saved return address
- * all other "scratch" registers: undefined
- * all "preserved" registers: same as on entry
- */
-
-GLOBAL_ENTRY(__kernel_syscall_via_epc)
- .prologue
- .altrp b6
- .body
-{
- /*
- * Note: the kernel cannot assume that the first two instructions in this
- * bundle get executed. The remaining code must be safe even if
- * they do not get executed.
- */
- adds r17=-1024,r15 // A
- mov r10=0 // A default to successful syscall execution
- epc // B causes split-issue
-}
- ;;
- rsm psr.be | psr.i // M2 (5 cyc to srlz.d)
- LOAD_FSYSCALL_TABLE(r14) // X
- ;;
- mov r16=IA64_KR(CURRENT) // M2 (12 cyc)
- shladd r18=r17,3,r14 // A
- mov r19=NR_syscalls-1 // A
- ;;
- lfetch [r18] // M0|1
- mov r29=psr // M2 (12 cyc)
- // If r17 is a NaT, p6 will be zero
- cmp.geu p6,p7=r19,r17 // A (sysnr > 0 && sysnr < 1024+NR_syscalls)?
- ;;
- mov r21=ar.fpsr // M2 (12 cyc)
- tnat.nz p10,p9=r15 // I0
- mov.i r26=ar.pfs // I0 (would stall anyhow due to srlz.d...)
- ;;
- srlz.d // M0 (forces split-issue) ensure PSR.BE==0
-(p6) ld8 r18=[r18] // M0|1
- nop.i 0
- ;;
- nop.m 0
-(p6) tbit.z.unc p8,p0=r18,0 // I0 (dual-issues with "mov b7=r18"!)
- nop.i 0
- ;;
-(p8) ssm psr.i
-(p6) mov b7=r18 // I0
-(p8) br.dptk.many b7 // B
-
- mov r27=ar.rsc // M2 (12 cyc)
-/*
- * brl.cond doesn't work as intended because the linker would convert this branch
- * into a branch to a PLT. Perhaps there will be a way to avoid this with some
- * future version of the linker. In the meantime, we just use an indirect branch
- * instead.
- */
-#ifdef CONFIG_ITANIUM
-(p6) add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry
- ;;
-(p6) ld8 r14=[r14] // r14 <- fsys_bubble_down
- ;;
-(p6) mov b7=r14
-(p6) br.sptk.many b7
-#else
- BRL_COND_FSYS_BUBBLE_DOWN(p6)
-#endif
- ssm psr.i
- mov r10=-1
-(p10) mov r8=EINVAL
-(p9) mov r8=ENOSYS
- FSYS_RETURN
-END(__kernel_syscall_via_epc)
-
# define ARG0_OFF (16 + IA64_SIGFRAME_ARG0_OFFSET)
# define ARG1_OFF (16 + IA64_SIGFRAME_ARG1_OFFSET)
# define ARG2_OFF (16 + IA64_SIGFRAME_ARG2_OFFSET)
@@ -374,3 +294,92 @@ restore_rbs:
// invala not necessary as that will happen when returning to user-mode
br.cond.sptk back_from_restore_rbs
END(__kernel_sigtramp)
+
+/*
+ * On entry:
+ * r11 = saved ar.pfs
+ * r15 = system call #
+ * b0 = saved return address
+ * b6 = return address
+ * On exit:
+ * r11 = saved ar.pfs
+ * r15 = system call #
+ * b0 = saved return address
+ * all other "scratch" registers: undefined
+ * all "preserved" registers: same as on entry
+ */
+
+GLOBAL_ENTRY(__kernel_syscall_via_epc)
+ .prologue
+ .altrp b6
+ .body
+{
+ /*
+ * Note: the kernel cannot assume that the first two instructions in this
+ * bundle get executed. The remaining code must be safe even if
+ * they do not get executed.
+ */
+ adds r17=-1024,r15 // A
+ mov r10=0 // A default to successful syscall execution
+ epc // B causes split-issue
+}
+ ;;
+ RSM_PSR_BE_I(r20, r22) // M2 (5 cyc to srlz.d)
+ LOAD_FSYSCALL_TABLE(r14) // X
+ ;;
+ mov r16=IA64_KR(CURRENT) // M2 (12 cyc)
+ shladd r18=r17,3,r14 // A
+ mov r19=NR_syscalls-1 // A
+ ;;
+ lfetch [r18] // M0|1
+ MOV_FROM_PSR(p0, r29, r8) // M2 (12 cyc)
+ // If r17 is a NaT, p6 will be zero
+ cmp.geu p6,p7=r19,r17 // A (sysnr > 0 && sysnr < 1024+NR_syscalls)?
+ ;;
+ mov r21=ar.fpsr // M2 (12 cyc)
+ tnat.nz p10,p9=r15 // I0
+ mov.i r26=ar.pfs // I0 (would stall anyhow due to srlz.d...)
+ ;;
+ srlz.d // M0 (forces split-issue) ensure PSR.BE==0
+(p6) ld8 r18=[r18] // M0|1
+ nop.i 0
+ ;;
+ nop.m 0
+(p6) tbit.z.unc p8,p0=r18,0 // I0 (dual-issues with "mov b7=r18"!)
+ nop.i 0
+ ;;
+ SSM_PSR_I(p8, p14, r25)
+(p6) mov b7=r18 // I0
+(p8) br.dptk.many b7 // B
+
+ mov r27=ar.rsc // M2 (12 cyc)
+/*
+ * brl.cond doesn't work as intended because the linker would convert this branch
+ * into a branch to a PLT. Perhaps there will be a way to avoid this with some
+ * future version of the linker. In the meantime, we just use an indirect branch
+ * instead.
+ */
+#ifdef CONFIG_ITANIUM
+(p6) add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry
+ ;;
+(p6) ld8 r14=[r14] // r14 <- fsys_bubble_down
+ ;;
+(p6) mov b7=r14
+(p6) br.sptk.many b7
+#else
+ BRL_COND_FSYS_BUBBLE_DOWN(p6)
+#endif
+ SSM_PSR_I(p0, p14, r10)
+ mov r10=-1
+(p10) mov r8=EINVAL
+(p9) mov r8=ENOSYS
+ FSYS_RETURN
+
+#ifdef CONFIG_PARAVIRT
+ /*
+ * padd to make the size of this symbol constant
+ * independent of paravirtualization.
+ */
+ .align PAGE_SIZE / 8
+#endif
+END(__kernel_syscall_via_epc)
diff --git a/arch/ia64/kernel/gate.lds.S b/arch/ia64/kernel/gate.lds.S
index 3cb1abc00e24..88c64ed47c36 100644
--- a/arch/ia64/kernel/gate.lds.S
+++ b/arch/ia64/kernel/gate.lds.S
@@ -7,6 +7,7 @@
#include <asm/system.h>
+#include "paravirt_patchlist.h"
SECTIONS
{
@@ -33,21 +34,21 @@ SECTIONS
. = GATE_ADDR + 0x600;
.data.patch : {
- __start_gate_mckinley_e9_patchlist = .;
+ __paravirt_start_gate_mckinley_e9_patchlist = .;
*(.data.patch.mckinley_e9)
- __end_gate_mckinley_e9_patchlist = .;
+ __paravirt_end_gate_mckinley_e9_patchlist = .;
- __start_gate_vtop_patchlist = .;
+ __paravirt_start_gate_vtop_patchlist = .;
*(.data.patch.vtop)
- __end_gate_vtop_patchlist = .;
+ __paravirt_end_gate_vtop_patchlist = .;
- __start_gate_fsyscall_patchlist = .;
+ __paravirt_start_gate_fsyscall_patchlist = .;
*(.data.patch.fsyscall_table)
- __end_gate_fsyscall_patchlist = .;
+ __paravirt_end_gate_fsyscall_patchlist = .;
- __start_gate_brl_fsys_bubble_down_patchlist = .;
+ __paravirt_start_gate_brl_fsys_bubble_down_patchlist = .;
*(.data.patch.brl_fsys_bubble_down)
- __end_gate_brl_fsys_bubble_down_patchlist = .;
+ __paravirt_end_gate_brl_fsys_bubble_down_patchlist = .;
} :readable
.IA_64.unwind_info : { *(.IA_64.unwind_info*) }
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index 59301c472800..23f846de62d5 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -1050,7 +1050,7 @@ END(ia64_delay_loop)
* except that the multiplication and the shift are done with 128-bit
* intermediate precision so that we can produce a full 64-bit result.
*/
-GLOBAL_ENTRY(sched_clock)
+GLOBAL_ENTRY(ia64_native_sched_clock)
addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
mov.m r9=ar.itc // fetch cycle-counter (35 cyc)
;;
@@ -1066,7 +1066,13 @@ GLOBAL_ENTRY(sched_clock)
;;
shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
br.ret.sptk.many rp
-END(sched_clock)
+END(ia64_native_sched_clock)
+#ifndef CONFIG_PARAVIRT
+ //unsigned long long
+ //sched_clock(void) __attribute__((alias("ia64_native_sched_clock")));
+ .global sched_clock
+sched_clock = ia64_native_sched_clock
+#endif
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
GLOBAL_ENTRY(cycle_to_cputime)
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index f675d8e33853..ec9a5fdfa1b9 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -804,7 +804,7 @@ ENTRY(break_fault)
///////////////////////////////////////////////////////////////////////
st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- mov.m r30=ar.itc // M get cycle for accounting
+ MOV_FROM_ITC(p0, p14, r30, r18) // M get cycle for accounting
#else
mov b6=r30 // I0 setup syscall handler branch reg early
#endif
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index bab1de2d2f6a..8f33a8840422 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1456,9 +1456,9 @@ ia64_mca_cmc_int_caller(int cmc_irq, void *arg)
ia64_mca_cmc_int_handler(cmc_irq, arg);
- for (++cpuid ; cpuid < NR_CPUS && !cpu_online(cpuid) ; cpuid++);
+ cpuid = cpumask_next(cpuid+1, cpu_online_mask);
- if (cpuid < NR_CPUS) {
+ if (cpuid < nr_cpu_ids) {
platform_send_ipi(cpuid, IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0);
} else {
/* If no log record, switch out of polling mode */
@@ -1525,7 +1525,7 @@ ia64_mca_cpe_int_caller(int cpe_irq, void *arg)
ia64_mca_cpe_int_handler(cpe_irq, arg);
- for (++cpuid ; cpuid < NR_CPUS && !cpu_online(cpuid) ; cpuid++);
+ cpuid = cpumask_next(cpuid+1, cpu_online_mask);
if (cpuid < NR_CPUS) {
platform_send_ipi(cpuid, IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0);
diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c
index aaa7d901521f..da3b0cf495a3 100644
--- a/arch/ia64/kernel/module.c
+++ b/arch/ia64/kernel/module.c
@@ -446,6 +446,14 @@ module_frob_arch_sections (Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings,
mod->arch.opd = s;
else if (strcmp(".IA_64.unwind", secstrings + s->sh_name) == 0)
mod->arch.unwind = s;
+#ifdef CONFIG_PARAVIRT
+ else if (strcmp(".paravirt_bundles",
+ secstrings + s->sh_name) == 0)
+ mod->arch.paravirt_bundles = s;
+ else if (strcmp(".paravirt_insts",
+ secstrings + s->sh_name) == 0)
+ mod->arch.paravirt_insts = s;
+#endif
if (!mod->arch.core_plt || !mod->arch.init_plt || !mod->arch.got || !mod->arch.opd) {
printk(KERN_ERR "%s: sections missing\n", mod->name);
@@ -525,8 +533,7 @@ get_ltoff (struct module *mod, uint64_t value, int *okp)
goto found;
/* Not enough GOT entries? */
- if (e >= (struct got_entry *) (mod->arch.got->sh_addr + mod->arch.got->sh_size))
- BUG();
+ BUG_ON(e >= (struct got_entry *) (mod->arch.got->sh_addr + mod->arch.got->sh_size));
e->val = value;
++mod->arch.next_got_entry;
@@ -921,6 +928,30 @@ module_finalize (const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mo
DEBUGP("%s: init: entry=%p\n", __func__, mod->init);
if (mod->arch.unwind)
register_unwind_table(mod);
+#ifdef CONFIG_PARAVIRT
+ if (mod->arch.paravirt_bundles) {
+ struct paravirt_patch_site_bundle *start =
+ (struct paravirt_patch_site_bundle *)
+ mod->arch.paravirt_bundles->sh_addr;
+ struct paravirt_patch_site_bundle *end =
+ (struct paravirt_patch_site_bundle *)
+ (mod->arch.paravirt_bundles->sh_addr +
+ mod->arch.paravirt_bundles->sh_size);
+
+ paravirt_patch_apply_bundle(start, end);
+ }
+ if (mod->arch.paravirt_insts) {
+ struct paravirt_patch_site_inst *start =
+ (struct paravirt_patch_site_inst *)
+ mod->arch.paravirt_insts->sh_addr;
+ struct paravirt_patch_site_inst *end =
+ (struct paravirt_patch_site_inst *)
+ (mod->arch.paravirt_insts->sh_addr +
+ mod->arch.paravirt_insts->sh_size);
+
+ paravirt_patch_apply_inst(start, end);
+ }
+#endif
return 0;
}
diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c
index 9f14c16f6369..a21d7bb9c69c 100644
--- a/arch/ia64/kernel/paravirt.c
+++ b/arch/ia64/kernel/paravirt.c
@@ -46,13 +46,23 @@ struct pv_info pv_info = {
* initialization hooks.
*/
-struct pv_init_ops pv_init_ops;
+static void __init
+ia64_native_patch_branch(unsigned long tag, unsigned long type);
+
+struct pv_init_ops pv_init_ops =
+{
+#ifdef ASM_SUPPORTED
+ .patch_bundle = ia64_native_patch_bundle,
+#endif
+ .patch_branch = ia64_native_patch_branch,
+};
/***************************************************************************
* pv_cpu_ops
* intrinsics hooks.
*/
+#ifndef ASM_SUPPORTED
/* ia64_native_xxx are macros so that we have to make them real functions */
#define DEFINE_VOID_FUNC1(name) \
@@ -60,7 +70,14 @@ struct pv_init_ops pv_init_ops;
ia64_native_ ## name ## _func(unsigned long arg) \
{ \
ia64_native_ ## name(arg); \
- } \
+ }
+
+#define DEFINE_VOID_FUNC1_VOID(name) \
+ static void \
+ ia64_native_ ## name ## _func(void *arg) \
+ { \
+ ia64_native_ ## name(arg); \
+ }
#define DEFINE_VOID_FUNC2(name) \
static void \
@@ -68,7 +85,7 @@ struct pv_init_ops pv_init_ops;
unsigned long arg1) \
{ \
ia64_native_ ## name(arg0, arg1); \
- } \
+ }
#define DEFINE_FUNC0(name) \
static unsigned long \
@@ -84,7 +101,7 @@ struct pv_init_ops pv_init_ops;
return ia64_native_ ## name(arg); \
} \
-DEFINE_VOID_FUNC1(fc);
+DEFINE_VOID_FUNC1_VOID(fc);
DEFINE_VOID_FUNC1(intrin_local_irq_restore);
DEFINE_VOID_FUNC2(ptcga);
@@ -274,6 +291,266 @@ ia64_native_setreg_func(int regnum, unsigned long val)
break;
}
}
+#else
+
+#define __DEFINE_FUNC(name, code) \
+ extern const char ia64_native_ ## name ## _direct_start[]; \
+ extern const char ia64_native_ ## name ## _direct_end[]; \
+ asm (".align 32\n" \
+ ".proc ia64_native_" #name "_func\n" \
+ "ia64_native_" #name "_func:\n" \
+ "ia64_native_" #name "_direct_start:\n" \
+ code \
+ "ia64_native_" #name "_direct_end:\n" \
+ "br.cond.sptk.many b6\n" \
+ ".endp ia64_native_" #name "_func\n")
+
+#define DEFINE_VOID_FUNC0(name, code) \
+ extern void \
+ ia64_native_ ## name ## _func(void); \
+ __DEFINE_FUNC(name, code)
+
+#define DEFINE_VOID_FUNC1(name, code) \
+ extern void \
+ ia64_native_ ## name ## _func(unsigned long arg); \
+ __DEFINE_FUNC(name, code)
+
+#define DEFINE_VOID_FUNC1_VOID(name, code) \
+ extern void \
+ ia64_native_ ## name ## _func(void *arg); \
+ __DEFINE_FUNC(name, code)
+
+#define DEFINE_VOID_FUNC2(name, code) \
+ extern void \
+ ia64_native_ ## name ## _func(unsigned long arg0, \
+ unsigned long arg1); \
+ __DEFINE_FUNC(name, code)
+
+#define DEFINE_FUNC0(name, code) \
+ extern unsigned long \
+ ia64_native_ ## name ## _func(void); \
+ __DEFINE_FUNC(name, code)
+
+#define DEFINE_FUNC1(name, type, code) \
+ extern unsigned long \
+ ia64_native_ ## name ## _func(type arg); \
+ __DEFINE_FUNC(name, code)
+
+DEFINE_VOID_FUNC1_VOID(fc,
+ "fc r8\n");
+DEFINE_VOID_FUNC1(intrin_local_irq_restore,
+ ";;\n"
+ " cmp.ne p6, p7 = r8, r0\n"
+ ";;\n"
+ "(p6) ssm psr.i\n"
+ "(p7) rsm psr.i\n"
+ ";;\n"
+ "(p6) srlz.d\n");
+
+DEFINE_VOID_FUNC2(ptcga,
+ "ptc.ga r8, r9\n");
+DEFINE_VOID_FUNC2(set_rr,
+ "mov rr[r8] = r9\n");
+
+/* ia64_native_getreg(_IA64_REG_PSR) & IA64_PSR_I */
+DEFINE_FUNC0(get_psr_i,
+ "mov r2 = " __stringify(1 << IA64_PSR_I_BIT) "\n"
+ "mov r8 = psr\n"
+ ";;\n"
+ "and r8 = r2, r8\n");
+
+DEFINE_FUNC1(thash, unsigned long,
+ "thash r8 = r8\n");
+DEFINE_FUNC1(get_cpuid, int,
+ "mov r8 = cpuid[r8]\n");
+DEFINE_FUNC1(get_pmd, int,
+ "mov r8 = pmd[r8]\n");
+DEFINE_FUNC1(get_rr, unsigned long,
+ "mov r8 = rr[r8]\n");
+
+DEFINE_VOID_FUNC0(ssm_i,
+ "ssm psr.i\n");
+DEFINE_VOID_FUNC0(rsm_i,
+ "rsm psr.i\n");
+
+extern void
+ia64_native_set_rr0_to_rr4_func(unsigned long val0, unsigned long val1,
+ unsigned long val2, unsigned long val3,
+ unsigned long val4);
+__DEFINE_FUNC(set_rr0_to_rr4,
+ "mov rr[r0] = r8\n"
+ "movl r2 = 0x2000000000000000\n"
+ ";;\n"
+ "mov rr[r2] = r9\n"
+ "shl r3 = r2, 1\n" /* movl r3 = 0x4000000000000000 */
+ ";;\n"
+ "add r2 = r2, r3\n" /* movl r2 = 0x6000000000000000 */
+ "mov rr[r3] = r10\n"
+ ";;\n"
+ "mov rr[r2] = r11\n"
+ "shl r3 = r3, 1\n" /* movl r3 = 0x8000000000000000 */
+ ";;\n"
+ "mov rr[r3] = r14\n");
+
+extern unsigned long ia64_native_getreg_func(int regnum);
+asm(".global ia64_native_getreg_func\n");
+#define __DEFINE_GET_REG(id, reg) \
+ "mov r2 = " __stringify(_IA64_REG_ ## id) "\n" \
+ ";;\n" \
+ "cmp.eq p6, p0 = r2, r8\n" \
+ ";;\n" \
+ "(p6) mov r8 = " #reg "\n" \
+ "(p6) br.cond.sptk.many b6\n" \
+ ";;\n"
+#define __DEFINE_GET_AR(id, reg) __DEFINE_GET_REG(AR_ ## id, ar.reg)
+#define __DEFINE_GET_CR(id, reg) __DEFINE_GET_REG(CR_ ## id, cr.reg)
+
+__DEFINE_FUNC(getreg,
+ __DEFINE_GET_REG(GP, gp)
+ /*__DEFINE_GET_REG(IP, ip)*/ /* returned ip value shouldn't be constant */
+ __DEFINE_GET_REG(PSR, psr)
+ __DEFINE_GET_REG(TP, tp)
+ __DEFINE_GET_REG(SP, sp)
+
+ __DEFINE_GET_REG(AR_KR0, ar0)
+ __DEFINE_GET_REG(AR_KR1, ar1)
+ __DEFINE_GET_REG(AR_KR2, ar2)
+ __DEFINE_GET_REG(AR_KR3, ar3)
+ __DEFINE_GET_REG(AR_KR4, ar4)
+ __DEFINE_GET_REG(AR_KR5, ar5)
+ __DEFINE_GET_REG(AR_KR6, ar6)
+ __DEFINE_GET_REG(AR_KR7, ar7)
+ __DEFINE_GET_AR(RSC, rsc)
+ __DEFINE_GET_AR(BSP, bsp)
+ __DEFINE_GET_AR(BSPSTORE, bspstore)
+ __DEFINE_GET_AR(RNAT, rnat)
+ __DEFINE_GET_AR(FCR, fcr)
+ __DEFINE_GET_AR(EFLAG, eflag)
+ __DEFINE_GET_AR(CSD, csd)
+ __DEFINE_GET_AR(SSD, ssd)
+ __DEFINE_GET_REG(AR_CFLAG, ar27)
+ __DEFINE_GET_AR(FSR, fsr)
+ __DEFINE_GET_AR(FIR, fir)
+ __DEFINE_GET_AR(FDR, fdr)
+ __DEFINE_GET_AR(CCV, ccv)
+ __DEFINE_GET_AR(UNAT, unat)
+ __DEFINE_GET_AR(FPSR, fpsr)
+ __DEFINE_GET_AR(ITC, itc)
+ __DEFINE_GET_AR(PFS, pfs)
+ __DEFINE_GET_AR(LC, lc)
+ __DEFINE_GET_AR(EC, ec)
+
+ __DEFINE_GET_CR(DCR, dcr)
+ __DEFINE_GET_CR(ITM, itm)
+ __DEFINE_GET_CR(IVA, iva)
+ __DEFINE_GET_CR(PTA, pta)
+ __DEFINE_GET_CR(IPSR, ipsr)
+ __DEFINE_GET_CR(ISR, isr)
+ __DEFINE_GET_CR(IIP, iip)
+ __DEFINE_GET_CR(IFA, ifa)
+ __DEFINE_GET_CR(ITIR, itir)
+ __DEFINE_GET_CR(IIPA, iipa)
+ __DEFINE_GET_CR(IFS, ifs)
+ __DEFINE_GET_CR(IIM, iim)
+ __DEFINE_GET_CR(IHA, iha)
+ __DEFINE_GET_CR(LID, lid)
+ __DEFINE_GET_CR(IVR, ivr)
+ __DEFINE_GET_CR(TPR, tpr)
+ __DEFINE_GET_CR(EOI, eoi)
+ __DEFINE_GET_CR(IRR0, irr0)
+ __DEFINE_GET_CR(IRR1, irr1)
+ __DEFINE_GET_CR(IRR2, irr2)
+ __DEFINE_GET_CR(IRR3, irr3)
+ __DEFINE_GET_CR(ITV, itv)
+ __DEFINE_GET_CR(PMV, pmv)
+ __DEFINE_GET_CR(CMCV, cmcv)
+ __DEFINE_GET_CR(LRR0, lrr0)
+ __DEFINE_GET_CR(LRR1, lrr1)
+
+ "mov r8 = -1\n" /* unsupported case */
+ );
+
+extern void ia64_native_setreg_func(int regnum, unsigned long val);
+asm(".global ia64_native_setreg_func\n");
+#define __DEFINE_SET_REG(id, reg) \
+ "mov r2 = " __stringify(_IA64_REG_ ## id) "\n" \
+ ";;\n" \
+ "cmp.eq p6, p0 = r2, r9\n" \
+ ";;\n" \
+ "(p6) mov " #reg " = r8\n" \
+ "(p6) br.cond.sptk.many b6\n" \
+ ";;\n"
+#define __DEFINE_SET_AR(id, reg) __DEFINE_SET_REG(AR_ ## id, ar.reg)
+#define __DEFINE_SET_CR(id, reg) __DEFINE_SET_REG(CR_ ## id, cr.reg)
+__DEFINE_FUNC(setreg,
+ "mov r2 = " __stringify(_IA64_REG_PSR_L) "\n"
+ ";;\n"
+ "cmp.eq p6, p0 = r2, r9\n"
+ ";;\n"
+ "(p6) mov psr.l = r8\n"
+#ifdef HAVE_SERIALIZE_DIRECTIVE
+ ".serialize.data\n"
+#endif
+ "(p6) br.cond.sptk.many b6\n"
+ __DEFINE_SET_REG(GP, gp)
+ __DEFINE_SET_REG(SP, sp)
+
+ __DEFINE_SET_REG(AR_KR0, ar0)
+ __DEFINE_SET_REG(AR_KR1, ar1)
+ __DEFINE_SET_REG(AR_KR2, ar2)
+ __DEFINE_SET_REG(AR_KR3, ar3)
+ __DEFINE_SET_REG(AR_KR4, ar4)
+ __DEFINE_SET_REG(AR_KR5, ar5)
+ __DEFINE_SET_REG(AR_KR6, ar6)
+ __DEFINE_SET_REG(AR_KR7, ar7)
+ __DEFINE_SET_AR(RSC, rsc)
+ __DEFINE_SET_AR(BSP, bsp)
+ __DEFINE_SET_AR(BSPSTORE, bspstore)
+ __DEFINE_SET_AR(RNAT, rnat)
+ __DEFINE_SET_AR(FCR, fcr)
+ __DEFINE_SET_AR(EFLAG, eflag)
+ __DEFINE_SET_AR(CSD, csd)
+ __DEFINE_SET_AR(SSD, ssd)
+ __DEFINE_SET_REG(AR_CFLAG, ar27)
+ __DEFINE_SET_AR(FSR, fsr)
+ __DEFINE_SET_AR(FIR, fir)
+ __DEFINE_SET_AR(FDR, fdr)
+ __DEFINE_SET_AR(CCV, ccv)
+ __DEFINE_SET_AR(UNAT, unat)
+ __DEFINE_SET_AR(FPSR, fpsr)
+ __DEFINE_SET_AR(ITC, itc)
+ __DEFINE_SET_AR(PFS, pfs)
+ __DEFINE_SET_AR(LC, lc)
+ __DEFINE_SET_AR(EC, ec)
+
+ __DEFINE_SET_CR(DCR, dcr)
+ __DEFINE_SET_CR(ITM, itm)
+ __DEFINE_SET_CR(IVA, iva)
+ __DEFINE_SET_CR(PTA, pta)
+ __DEFINE_SET_CR(IPSR, ipsr)
+ __DEFINE_SET_CR(ISR, isr)
+ __DEFINE_SET_CR(IIP, iip)
+ __DEFINE_SET_CR(IFA, ifa)
+ __DEFINE_SET_CR(ITIR, itir)
+ __DEFINE_SET_CR(IIPA, iipa)
+ __DEFINE_SET_CR(IFS, ifs)
+ __DEFINE_SET_CR(IIM, iim)
+ __DEFINE_SET_CR(IHA, iha)
+ __DEFINE_SET_CR(LID, lid)
+ __DEFINE_SET_CR(IVR, ivr)
+ __DEFINE_SET_CR(TPR, tpr)
+ __DEFINE_SET_CR(EOI, eoi)
+ __DEFINE_SET_CR(IRR0, irr0)
+ __DEFINE_SET_CR(IRR1, irr1)
+ __DEFINE_SET_CR(IRR2, irr2)
+ __DEFINE_SET_CR(IRR3, irr3)
+ __DEFINE_SET_CR(ITV, itv)
+ __DEFINE_SET_CR(PMV, pmv)
+ __DEFINE_SET_CR(CMCV, cmcv)
+ __DEFINE_SET_CR(LRR0, lrr0)
+ __DEFINE_SET_CR(LRR1, lrr1)
+ );
+#endif
struct pv_cpu_ops pv_cpu_ops = {
.fc = ia64_native_fc_func,
@@ -366,4 +643,258 @@ ia64_native_do_steal_accounting(unsigned long *new_itm)
struct pv_time_ops pv_time_ops = {
.do_steal_accounting = ia64_native_do_steal_accounting,
+ .sched_clock = ia64_native_sched_clock,
+};
+
+/***************************************************************************
+ * binary pacthing
+ * pv_init_ops.patch_bundle
+ */
+
+#ifdef ASM_SUPPORTED
+#define IA64_NATIVE_PATCH_DEFINE_GET_REG(name, reg) \
+ __DEFINE_FUNC(get_ ## name, \
+ ";;\n" \
+ "mov r8 = " #reg "\n" \
+ ";;\n")
+
+#define IA64_NATIVE_PATCH_DEFINE_SET_REG(name, reg) \
+ __DEFINE_FUNC(set_ ## name, \
+ ";;\n" \
+ "mov " #reg " = r8\n" \
+ ";;\n")
+
+#define IA64_NATIVE_PATCH_DEFINE_REG(name, reg) \
+ IA64_NATIVE_PATCH_DEFINE_GET_REG(name, reg); \
+ IA64_NATIVE_PATCH_DEFINE_SET_REG(name, reg) \
+
+#define IA64_NATIVE_PATCH_DEFINE_AR(name, reg) \
+ IA64_NATIVE_PATCH_DEFINE_REG(ar_ ## name, ar.reg)
+
+#define IA64_NATIVE_PATCH_DEFINE_CR(name, reg) \
+ IA64_NATIVE_PATCH_DEFINE_REG(cr_ ## name, cr.reg)
+
+
+IA64_NATIVE_PATCH_DEFINE_GET_REG(psr, psr);
+IA64_NATIVE_PATCH_DEFINE_GET_REG(tp, tp);
+
+/* IA64_NATIVE_PATCH_DEFINE_SET_REG(psr_l, psr.l); */
+__DEFINE_FUNC(set_psr_l,
+ ";;\n"
+ "mov psr.l = r8\n"
+#ifdef HAVE_SERIALIZE_DIRECTIVE
+ ".serialize.data\n"
+#endif
+ ";;\n");
+
+IA64_NATIVE_PATCH_DEFINE_REG(gp, gp);
+IA64_NATIVE_PATCH_DEFINE_REG(sp, sp);
+
+IA64_NATIVE_PATCH_DEFINE_REG(kr0, ar0);
+IA64_NATIVE_PATCH_DEFINE_REG(kr1, ar1);
+IA64_NATIVE_PATCH_DEFINE_REG(kr2, ar2);
+IA64_NATIVE_PATCH_DEFINE_REG(kr3, ar3);
+IA64_NATIVE_PATCH_DEFINE_REG(kr4, ar4);
+IA64_NATIVE_PATCH_DEFINE_REG(kr5, ar5);
+IA64_NATIVE_PATCH_DEFINE_REG(kr6, ar6);
+IA64_NATIVE_PATCH_DEFINE_REG(kr7, ar7);
+
+IA64_NATIVE_PATCH_DEFINE_AR(rsc, rsc);
+IA64_NATIVE_PATCH_DEFINE_AR(bsp, bsp);
+IA64_NATIVE_PATCH_DEFINE_AR(bspstore, bspstore);
+IA64_NATIVE_PATCH_DEFINE_AR(rnat, rnat);
+IA64_NATIVE_PATCH_DEFINE_AR(fcr, fcr);
+IA64_NATIVE_PATCH_DEFINE_AR(eflag, eflag);
+IA64_NATIVE_PATCH_DEFINE_AR(csd, csd);
+IA64_NATIVE_PATCH_DEFINE_AR(ssd, ssd);
+IA64_NATIVE_PATCH_DEFINE_REG(ar27, ar27);
+IA64_NATIVE_PATCH_DEFINE_AR(fsr, fsr);
+IA64_NATIVE_PATCH_DEFINE_AR(fir, fir);
+IA64_NATIVE_PATCH_DEFINE_AR(fdr, fdr);
+IA64_NATIVE_PATCH_DEFINE_AR(ccv, ccv);
+IA64_NATIVE_PATCH_DEFINE_AR(unat, unat);
+IA64_NATIVE_PATCH_DEFINE_AR(fpsr, fpsr);
+IA64_NATIVE_PATCH_DEFINE_AR(itc, itc);
+IA64_NATIVE_PATCH_DEFINE_AR(pfs, pfs);
+IA64_NATIVE_PATCH_DEFINE_AR(lc, lc);
+IA64_NATIVE_PATCH_DEFINE_AR(ec, ec);
+
+IA64_NATIVE_PATCH_DEFINE_CR(dcr, dcr);
+IA64_NATIVE_PATCH_DEFINE_CR(itm, itm);
+IA64_NATIVE_PATCH_DEFINE_CR(iva, iva);
+IA64_NATIVE_PATCH_DEFINE_CR(pta, pta);
+IA64_NATIVE_PATCH_DEFINE_CR(ipsr, ipsr);
+IA64_NATIVE_PATCH_DEFINE_CR(isr, isr);
+IA64_NATIVE_PATCH_DEFINE_CR(iip, iip);
+IA64_NATIVE_PATCH_DEFINE_CR(ifa, ifa);
+IA64_NATIVE_PATCH_DEFINE_CR(itir, itir);
+IA64_NATIVE_PATCH_DEFINE_CR(iipa, iipa);
+IA64_NATIVE_PATCH_DEFINE_CR(ifs, ifs);
+IA64_NATIVE_PATCH_DEFINE_CR(iim, iim);
+IA64_NATIVE_PATCH_DEFINE_CR(iha, iha);
+IA64_NATIVE_PATCH_DEFINE_CR(lid, lid);
+IA64_NATIVE_PATCH_DEFINE_CR(ivr, ivr);
+IA64_NATIVE_PATCH_DEFINE_CR(tpr, tpr);
+IA64_NATIVE_PATCH_DEFINE_CR(eoi, eoi);
+IA64_NATIVE_PATCH_DEFINE_CR(irr0, irr0);
+IA64_NATIVE_PATCH_DEFINE_CR(irr1, irr1);
+IA64_NATIVE_PATCH_DEFINE_CR(irr2, irr2);
+IA64_NATIVE_PATCH_DEFINE_CR(irr3, irr3);
+IA64_NATIVE_PATCH_DEFINE_CR(itv, itv);
+IA64_NATIVE_PATCH_DEFINE_CR(pmv, pmv);
+IA64_NATIVE_PATCH_DEFINE_CR(cmcv, cmcv);
+IA64_NATIVE_PATCH_DEFINE_CR(lrr0, lrr0);
+IA64_NATIVE_PATCH_DEFINE_CR(lrr1, lrr1);
+
+static const struct paravirt_patch_bundle_elem ia64_native_patch_bundle_elems[]
+__initdata_or_module =
+{
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM(name, type) \
+ { \
+ (void*)ia64_native_ ## name ## _direct_start, \
+ (void*)ia64_native_ ## name ## _direct_end, \
+ PARAVIRT_PATCH_TYPE_ ## type, \
+ }
+
+ IA64_NATIVE_PATCH_BUNDLE_ELEM(fc, FC),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM(thash, THASH),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM(get_cpuid, GET_CPUID),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM(get_pmd, GET_PMD),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM(ptcga, PTCGA),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM(get_rr, GET_RR),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM(set_rr, SET_RR),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM(set_rr0_to_rr4, SET_RR0_TO_RR4),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM(ssm_i, SSM_I),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM(rsm_i, RSM_I),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM(get_psr_i, GET_PSR_I),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM(intrin_local_irq_restore,
+ INTRIN_LOCAL_IRQ_RESTORE),
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(name, reg) \
+ { \
+ (void*)ia64_native_get_ ## name ## _direct_start, \
+ (void*)ia64_native_get_ ## name ## _direct_end, \
+ PARAVIRT_PATCH_TYPE_GETREG + _IA64_REG_ ## reg, \
+ }
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(name, reg) \
+ { \
+ (void*)ia64_native_set_ ## name ## _direct_start, \
+ (void*)ia64_native_set_ ## name ## _direct_end, \
+ PARAVIRT_PATCH_TYPE_SETREG + _IA64_REG_ ## reg, \
+ }
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(name, reg) \
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(name, reg), \
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(name, reg) \
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(name, reg) \
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(ar_ ## name, AR_ ## reg)
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(name, reg) \
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(cr_ ## name, CR_ ## reg)
+
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(psr, PSR),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(tp, TP),
+
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(psr_l, PSR_L),
+
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(gp, GP),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(sp, SP),
+
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr0, AR_KR0),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr1, AR_KR1),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr2, AR_KR2),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr3, AR_KR3),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr4, AR_KR4),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr5, AR_KR5),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr6, AR_KR6),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr7, AR_KR7),
+
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(rsc, RSC),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(bsp, BSP),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(bspstore, BSPSTORE),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(rnat, RNAT),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fcr, FCR),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(eflag, EFLAG),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(csd, CSD),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ssd, SSD),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(ar27, AR_CFLAG),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fsr, FSR),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fir, FIR),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fdr, FDR),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ccv, CCV),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(unat, UNAT),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fpsr, FPSR),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(itc, ITC),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(pfs, PFS),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(lc, LC),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ec, EC),
+
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(dcr, DCR),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itm, ITM),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iva, IVA),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(pta, PTA),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ipsr, IPSR),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(isr, ISR),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iip, IIP),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ifa, IFA),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itir, ITIR),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iipa, IIPA),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ifs, IFS),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iim, IIM),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iha, IHA),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lid, LID),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ivr, IVR),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(tpr, TPR),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(eoi, EOI),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr0, IRR0),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr1, IRR1),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr2, IRR2),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr3, IRR3),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itv, ITV),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(pmv, PMV),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(cmcv, CMCV),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lrr0, LRR0),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lrr1, LRR1),
};
+
+unsigned long __init_or_module
+ia64_native_patch_bundle(void *sbundle, void *ebundle, unsigned long type)
+{
+ const unsigned long nelems = sizeof(ia64_native_patch_bundle_elems) /
+ sizeof(ia64_native_patch_bundle_elems[0]);
+
+ return __paravirt_patch_apply_bundle(sbundle, ebundle, type,
+ ia64_native_patch_bundle_elems,
+ nelems, NULL);
+}
+#endif /* ASM_SUPPOTED */
+
+extern const char ia64_native_switch_to[];
+extern const char ia64_native_leave_syscall[];
+extern const char ia64_native_work_processed_syscall[];
+extern const char ia64_native_leave_kernel[];
+
+const struct paravirt_patch_branch_target ia64_native_branch_target[]
+__initconst = {
+#define PARAVIRT_BR_TARGET(name, type) \
+ { \
+ ia64_native_ ## name, \
+ PARAVIRT_PATCH_TYPE_BR_ ## type, \
+ }
+ PARAVIRT_BR_TARGET(switch_to, SWITCH_TO),
+ PARAVIRT_BR_TARGET(leave_syscall, LEAVE_SYSCALL),
+ PARAVIRT_BR_TARGET(work_processed_syscall, WORK_PROCESSED_SYSCALL),
+ PARAVIRT_BR_TARGET(leave_kernel, LEAVE_KERNEL),
+};
+
+static void __init
+ia64_native_patch_branch(unsigned long tag, unsigned long type)
+{
+ const unsigned long nelem =
+ sizeof(ia64_native_branch_target) /
+ sizeof(ia64_native_branch_target[0]);
+ __paravirt_patch_apply_branch(tag, type,
+ ia64_native_branch_target, nelem);
+}
diff --git a/arch/ia64/kernel/paravirt_patch.c b/arch/ia64/kernel/paravirt_patch.c
new file mode 100644
index 000000000000..bfdfef1b1ffd
--- /dev/null
+++ b/arch/ia64/kernel/paravirt_patch.c
@@ -0,0 +1,514 @@
+/******************************************************************************
+ * linux/arch/ia64/xen/paravirt_patch.c
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ * VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/init.h>
+#include <asm/intrinsics.h>
+#include <asm/kprobes.h>
+#include <asm/paravirt.h>
+#include <asm/paravirt_patch.h>
+
+typedef union ia64_inst {
+ struct {
+ unsigned long long qp : 6;
+ unsigned long long : 31;
+ unsigned long long opcode : 4;
+ unsigned long long reserved : 23;
+ } generic;
+ unsigned long long l;
+} ia64_inst_t;
+
+/*
+ * flush_icache_range() can't be used here.
+ * we are here before cpu_init() which initializes
+ * ia64_i_cache_stride_shift. flush_icache_range() uses it.
+ */
+void __init_or_module
+paravirt_flush_i_cache_range(const void *instr, unsigned long size)
+{
+ extern void paravirt_fc_i(const void *addr);
+ unsigned long i;
+
+ for (i = 0; i < size; i += sizeof(bundle_t))
+ paravirt_fc_i(instr + i);
+}
+
+bundle_t* __init_or_module
+paravirt_get_bundle(unsigned long tag)
+{
+ return (bundle_t *)(tag & ~3UL);
+}
+
+unsigned long __init_or_module
+paravirt_get_slot(unsigned long tag)
+{
+ return tag & 3UL;
+}
+
+unsigned long __init_or_module
+paravirt_get_num_inst(unsigned long stag, unsigned long etag)
+{
+ bundle_t *sbundle = paravirt_get_bundle(stag);
+ unsigned long sslot = paravirt_get_slot(stag);
+ bundle_t *ebundle = paravirt_get_bundle(etag);
+ unsigned long eslot = paravirt_get_slot(etag);
+
+ return (ebundle - sbundle) * 3 + eslot - sslot + 1;
+}
+
+unsigned long __init_or_module
+paravirt_get_next_tag(unsigned long tag)
+{
+ unsigned long slot = paravirt_get_slot(tag);
+
+ switch (slot) {
+ case 0:
+ case 1:
+ return tag + 1;
+ case 2: {
+ bundle_t *bundle = paravirt_get_bundle(tag);
+ return (unsigned long)(bundle + 1);
+ }
+ default:
+ BUG();
+ }
+ /* NOTREACHED */
+}
+
+ia64_inst_t __init_or_module
+paravirt_read_slot0(const bundle_t *bundle)
+{
+ ia64_inst_t inst;
+ inst.l = bundle->quad0.slot0;
+ return inst;
+}
+
+ia64_inst_t __init_or_module
+paravirt_read_slot1(const bundle_t *bundle)
+{
+ ia64_inst_t inst;
+ inst.l = bundle->quad0.slot1_p0 |
+ ((unsigned long long)bundle->quad1.slot1_p1 << 18UL);
+ return inst;
+}
+
+ia64_inst_t __init_or_module
+paravirt_read_slot2(const bundle_t *bundle)
+{
+ ia64_inst_t inst;
+ inst.l = bundle->quad1.slot2;
+ return inst;
+}
+
+ia64_inst_t __init_or_module
+paravirt_read_inst(unsigned long tag)
+{
+ bundle_t *bundle = paravirt_get_bundle(tag);
+ unsigned long slot = paravirt_get_slot(tag);
+
+ switch (slot) {
+ case 0:
+ return paravirt_read_slot0(bundle);
+ case 1:
+ return paravirt_read_slot1(bundle);
+ case 2:
+ return paravirt_read_slot2(bundle);
+ default:
+ BUG();
+ }
+ /* NOTREACHED */
+}
+
+void __init_or_module
+paravirt_write_slot0(bundle_t *bundle, ia64_inst_t inst)
+{
+ bundle->quad0.slot0 = inst.l;
+}
+
+void __init_or_module
+paravirt_write_slot1(bundle_t *bundle, ia64_inst_t inst)
+{
+ bundle->quad0.slot1_p0 = inst.l;
+ bundle->quad1.slot1_p1 = inst.l >> 18UL;
+}
+
+void __init_or_module
+paravirt_write_slot2(bundle_t *bundle, ia64_inst_t inst)
+{
+ bundle->quad1.slot2 = inst.l;
+}
+
+void __init_or_module
+paravirt_write_inst(unsigned long tag, ia64_inst_t inst)
+{
+ bundle_t *bundle = paravirt_get_bundle(tag);
+ unsigned long slot = paravirt_get_slot(tag);
+
+ switch (slot) {
+ case 0:
+ paravirt_write_slot0(bundle, inst);
+ break;
+ case 1:
+ paravirt_write_slot1(bundle, inst);
+ break;
+ case 2:
+ paravirt_write_slot2(bundle, inst);
+ break;
+ default:
+ BUG();
+ break;
+ }
+ paravirt_flush_i_cache_range(bundle, sizeof(*bundle));
+}
+
+/* for debug */
+void
+paravirt_print_bundle(const bundle_t *bundle)
+{
+ const unsigned long *quad = (const unsigned long *)bundle;
+ ia64_inst_t slot0 = paravirt_read_slot0(bundle);
+ ia64_inst_t slot1 = paravirt_read_slot1(bundle);
+ ia64_inst_t slot2 = paravirt_read_slot2(bundle);
+
+ printk(KERN_DEBUG
+ "bundle 0x%p 0x%016lx 0x%016lx\n", bundle, quad[0], quad[1]);
+ printk(KERN_DEBUG
+ "bundle template 0x%x\n",
+ bundle->quad0.template);
+ printk(KERN_DEBUG
+ "slot0 0x%lx slot1_p0 0x%lx slot1_p1 0x%lx slot2 0x%lx\n",
+ (unsigned long)bundle->quad0.slot0,
+ (unsigned long)bundle->quad0.slot1_p0,
+ (unsigned long)bundle->quad1.slot1_p1,
+ (unsigned long)bundle->quad1.slot2);
+ printk(KERN_DEBUG
+ "slot0 0x%016llx slot1 0x%016llx slot2 0x%016llx\n",
+ slot0.l, slot1.l, slot2.l);
+}
+
+static int noreplace_paravirt __init_or_module = 0;
+
+static int __init setup_noreplace_paravirt(char *str)
+{
+ noreplace_paravirt = 1;
+ return 1;
+}
+__setup("noreplace-paravirt", setup_noreplace_paravirt);
+
+#ifdef ASM_SUPPORTED
+static void __init_or_module
+fill_nop_bundle(void *sbundle, void *ebundle)
+{
+ extern const char paravirt_nop_bundle[];
+ extern const unsigned long paravirt_nop_bundle_size;
+
+ void *bundle = sbundle;
+
+ BUG_ON((((unsigned long)sbundle) % sizeof(bundle_t)) != 0);
+ BUG_ON((((unsigned long)ebundle) % sizeof(bundle_t)) != 0);
+
+ while (bundle < ebundle) {
+ memcpy(bundle, paravirt_nop_bundle, paravirt_nop_bundle_size);
+
+ bundle += paravirt_nop_bundle_size;
+ }
+}
+
+/* helper function */
+unsigned long __init_or_module
+__paravirt_patch_apply_bundle(void *sbundle, void *ebundle, unsigned long type,
+ const struct paravirt_patch_bundle_elem *elems,
+ unsigned long nelems,
+ const struct paravirt_patch_bundle_elem **found)
+{
+ unsigned long used = 0;
+ unsigned long i;
+
+ BUG_ON((((unsigned long)sbundle) % sizeof(bundle_t)) != 0);
+ BUG_ON((((unsigned long)ebundle) % sizeof(bundle_t)) != 0);
+
+ found = NULL;
+ for (i = 0; i < nelems; i++) {
+ const struct paravirt_patch_bundle_elem *p = &elems[i];
+ if (p->type == type) {
+ unsigned long need = p->ebundle - p->sbundle;
+ unsigned long room = ebundle - sbundle;
+
+ if (found != NULL)
+ *found = p;
+
+ if (room < need) {
+ /* no room to replace. skip it */
+ printk(KERN_DEBUG
+ "the space is too small to put "
+ "bundles. type %ld need %ld room %ld\n",
+ type, need, room);
+ break;
+ }
+
+ used = need;
+ memcpy(sbundle, p->sbundle, used);
+ break;
+ }
+ }
+
+ return used;
+}
+
+void __init_or_module
+paravirt_patch_apply_bundle(const struct paravirt_patch_site_bundle *start,
+ const struct paravirt_patch_site_bundle *end)
+{
+ const struct paravirt_patch_site_bundle *p;
+
+ if (noreplace_paravirt)
+ return;
+ if (pv_init_ops.patch_bundle == NULL)
+ return;
+
+ for (p = start; p < end; p++) {
+ unsigned long used;
+
+ used = (*pv_init_ops.patch_bundle)(p->sbundle, p->ebundle,
+ p->type);
+ if (used == 0)
+ continue;
+
+ fill_nop_bundle(p->sbundle + used, p->ebundle);
+ paravirt_flush_i_cache_range(p->sbundle,
+ p->ebundle - p->sbundle);
+ }
+ ia64_sync_i();
+ ia64_srlz_i();
+}
+
+/*
+ * nop.i, nop.m, nop.f instruction are same format.
+ * but nop.b has differennt format.
+ * This doesn't support nop.b for now.
+ */
+static void __init_or_module
+fill_nop_inst(unsigned long stag, unsigned long etag)
+{
+ extern const bundle_t paravirt_nop_mfi_inst_bundle[];
+ unsigned long tag;
+ const ia64_inst_t nop_inst =
+ paravirt_read_slot0(paravirt_nop_mfi_inst_bundle);
+
+ for (tag = stag; tag < etag; tag = paravirt_get_next_tag(tag))
+ paravirt_write_inst(tag, nop_inst);
+}
+
+void __init_or_module
+paravirt_patch_apply_inst(const struct paravirt_patch_site_inst *start,
+ const struct paravirt_patch_site_inst *end)
+{
+ const struct paravirt_patch_site_inst *p;
+
+ if (noreplace_paravirt)
+ return;
+ if (pv_init_ops.patch_inst == NULL)
+ return;
+
+ for (p = start; p < end; p++) {
+ unsigned long tag;
+ bundle_t *sbundle;
+ bundle_t *ebundle;
+
+ tag = (*pv_init_ops.patch_inst)(p->stag, p->etag, p->type);
+ if (tag == p->stag)
+ continue;
+
+ fill_nop_inst(tag, p->etag);
+ sbundle = paravirt_get_bundle(p->stag);
+ ebundle = paravirt_get_bundle(p->etag) + 1;
+ paravirt_flush_i_cache_range(sbundle, (ebundle - sbundle) *
+ sizeof(bundle_t));
+ }
+ ia64_sync_i();
+ ia64_srlz_i();
+}
+#endif /* ASM_SUPPOTED */
+
+/* brl.cond.sptk.many <target64> X3 */
+typedef union inst_x3_op {
+ ia64_inst_t inst;
+ struct {
+ unsigned long qp: 6;
+ unsigned long btyp: 3;
+ unsigned long unused: 3;
+ unsigned long p: 1;
+ unsigned long imm20b: 20;
+ unsigned long wh: 2;
+ unsigned long d: 1;
+ unsigned long i: 1;
+ unsigned long opcode: 4;
+ };
+ unsigned long l;
+} inst_x3_op_t;
+
+typedef union inst_x3_imm {
+ ia64_inst_t inst;
+ struct {
+ unsigned long unused: 2;
+ unsigned long imm39: 39;
+ };
+ unsigned long l;
+} inst_x3_imm_t;
+
+void __init_or_module
+paravirt_patch_reloc_brl(unsigned long tag, const void *target)
+{
+ unsigned long tag_op = paravirt_get_next_tag(tag);
+ unsigned long tag_imm = tag;
+ bundle_t *bundle = paravirt_get_bundle(tag);
+
+ ia64_inst_t inst_op = paravirt_read_inst(tag_op);
+ ia64_inst_t inst_imm = paravirt_read_inst(tag_imm);
+
+ inst_x3_op_t inst_x3_op = { .l = inst_op.l };
+ inst_x3_imm_t inst_x3_imm = { .l = inst_imm.l };
+
+ unsigned long imm60 =
+ ((unsigned long)target - (unsigned long)bundle) >> 4;
+
+ BUG_ON(paravirt_get_slot(tag) != 1); /* MLX */
+ BUG_ON(((unsigned long)target & (sizeof(bundle_t) - 1)) != 0);
+
+ /* imm60[59] 1bit */
+ inst_x3_op.i = (imm60 >> 59) & 1;
+ /* imm60[19:0] 20bit */
+ inst_x3_op.imm20b = imm60 & ((1UL << 20) - 1);
+ /* imm60[58:20] 39bit */
+ inst_x3_imm.imm39 = (imm60 >> 20) & ((1UL << 39) - 1);
+
+ inst_op.l = inst_x3_op.l;
+ inst_imm.l = inst_x3_imm.l;
+
+ paravirt_write_inst(tag_op, inst_op);
+ paravirt_write_inst(tag_imm, inst_imm);
+}
+
+/* br.cond.sptk.many <target25> B1 */
+typedef union inst_b1 {
+ ia64_inst_t inst;
+ struct {
+ unsigned long qp: 6;
+ unsigned long btype: 3;
+ unsigned long unused: 3;
+ unsigned long p: 1;
+ unsigned long imm20b: 20;
+ unsigned long wh: 2;
+ unsigned long d: 1;
+ unsigned long s: 1;
+ unsigned long opcode: 4;
+ };
+ unsigned long l;
+} inst_b1_t;
+
+void __init
+paravirt_patch_reloc_br(unsigned long tag, const void *target)
+{
+ bundle_t *bundle = paravirt_get_bundle(tag);
+ ia64_inst_t inst = paravirt_read_inst(tag);
+ unsigned long target25 = (unsigned long)target - (unsigned long)bundle;
+ inst_b1_t inst_b1;
+
+ BUG_ON(((unsigned long)target & (sizeof(bundle_t) - 1)) != 0);
+
+ inst_b1.l = inst.l;
+ if (target25 & (1UL << 63))
+ inst_b1.s = 1;
+ else
+ inst_b1.s = 0;
+
+ inst_b1.imm20b = target25 >> 4;
+ inst.l = inst_b1.l;
+
+ paravirt_write_inst(tag, inst);
+}
+
+void __init
+__paravirt_patch_apply_branch(
+ unsigned long tag, unsigned long type,
+ const struct paravirt_patch_branch_target *entries,
+ unsigned int nr_entries)
+{
+ unsigned int i;
+ for (i = 0; i < nr_entries; i++) {
+ if (entries[i].type == type) {
+ paravirt_patch_reloc_br(tag, entries[i].entry);
+ break;
+ }
+ }
+}
+
+static void __init
+paravirt_patch_apply_branch(const struct paravirt_patch_site_branch *start,
+ const struct paravirt_patch_site_branch *end)
+{
+ const struct paravirt_patch_site_branch *p;
+
+ if (noreplace_paravirt)
+ return;
+ if (pv_init_ops.patch_branch == NULL)
+ return;
+
+ for (p = start; p < end; p++)
+ (*pv_init_ops.patch_branch)(p->tag, p->type);
+
+ ia64_sync_i();
+ ia64_srlz_i();
+}
+
+void __init
+paravirt_patch_apply(void)
+{
+ extern const char __start_paravirt_bundles[];
+ extern const char __stop_paravirt_bundles[];
+ extern const char __start_paravirt_insts[];
+ extern const char __stop_paravirt_insts[];
+ extern const char __start_paravirt_branches[];
+ extern const char __stop_paravirt_branches[];
+
+ paravirt_patch_apply_bundle((const struct paravirt_patch_site_bundle *)
+ __start_paravirt_bundles,
+ (const struct paravirt_patch_site_bundle *)
+ __stop_paravirt_bundles);
+ paravirt_patch_apply_inst((const struct paravirt_patch_site_inst *)
+ __start_paravirt_insts,
+ (const struct paravirt_patch_site_inst *)
+ __stop_paravirt_insts);
+ paravirt_patch_apply_branch((const struct paravirt_patch_site_branch *)
+ __start_paravirt_branches,
+ (const struct paravirt_patch_site_branch *)
+ __stop_paravirt_branches);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "linux"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/arch/ia64/kernel/paravirt_patchlist.c b/arch/ia64/kernel/paravirt_patchlist.c
new file mode 100644
index 000000000000..b28082a95d45
--- /dev/null
+++ b/arch/ia64/kernel/paravirt_patchlist.c
@@ -0,0 +1,79 @@
+/******************************************************************************
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ * VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/bug.h>
+#include <asm/paravirt.h>
+
+#define DECLARE(name) \
+ extern unsigned long \
+ __ia64_native_start_gate_##name##_patchlist[]; \
+ extern unsigned long \
+ __ia64_native_end_gate_##name##_patchlist[]
+
+DECLARE(fsyscall);
+DECLARE(brl_fsys_bubble_down);
+DECLARE(vtop);
+DECLARE(mckinley_e9);
+
+extern unsigned long __start_gate_section[];
+
+#define ASSIGN(name) \
+ .start_##name##_patchlist = \
+ (unsigned long)__ia64_native_start_gate_##name##_patchlist, \
+ .end_##name##_patchlist = \
+ (unsigned long)__ia64_native_end_gate_##name##_patchlist
+
+struct pv_patchdata pv_patchdata __initdata = {
+ ASSIGN(fsyscall),
+ ASSIGN(brl_fsys_bubble_down),
+ ASSIGN(vtop),
+ ASSIGN(mckinley_e9),
+
+ .gate_section = (void*)__start_gate_section,
+};
+
+
+unsigned long __init
+paravirt_get_gate_patchlist(enum pv_gate_patchlist type)
+{
+
+#define CASE(NAME, name) \
+ case PV_GATE_START_##NAME: \
+ return pv_patchdata.start_##name##_patchlist; \
+ case PV_GATE_END_##NAME: \
+ return pv_patchdata.end_##name##_patchlist; \
+
+ switch (type) {
+ CASE(FSYSCALL, fsyscall);
+ CASE(BRL_FSYS_BUBBLE_DOWN, brl_fsys_bubble_down);
+ CASE(VTOP, vtop);
+ CASE(MCKINLEY_E9, mckinley_e9);
+ default:
+ BUG();
+ break;
+ }
+ return 0;
+}
+
+void * __init
+paravirt_get_gate_section(void)
+{
+ return pv_patchdata.gate_section;
+}
diff --git a/arch/ia64/kernel/paravirt_patchlist.h b/arch/ia64/kernel/paravirt_patchlist.h
new file mode 100644
index 000000000000..0684aa6c6507
--- /dev/null
+++ b/arch/ia64/kernel/paravirt_patchlist.h
@@ -0,0 +1,28 @@
+/******************************************************************************
+ * linux/arch/ia64/xen/paravirt_patchlist.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ * VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#if defined(__IA64_GATE_PARAVIRTUALIZED_XEN)
+#include <asm/xen/patchlist.h>
+#else
+#include <asm/native/patchlist.h>
+#endif
+
diff --git a/arch/ia64/kernel/paravirtentry.S b/arch/ia64/kernel/paravirtentry.S
index 2f42fcb9776a..6158560d7f17 100644
--- a/arch/ia64/kernel/paravirtentry.S
+++ b/arch/ia64/kernel/paravirtentry.S
@@ -20,8 +20,11 @@
*
*/
+#include <linux/init.h>
#include <asm/asmmacro.h>
#include <asm/asm-offsets.h>
+#include <asm/paravirt_privop.h>
+#include <asm/paravirt_patch.h>
#include "entry.h"
#define DATA8(sym, init_value) \
@@ -32,29 +35,87 @@
data8 init_value ; \
.popsection
-#define BRANCH(targ, reg, breg) \
- movl reg=targ ; \
- ;; \
- ld8 reg=[reg] ; \
- ;; \
- mov breg=reg ; \
+#define BRANCH(targ, reg, breg, type) \
+ PARAVIRT_PATCH_SITE_BR(PARAVIRT_PATCH_TYPE_BR_ ## type) ; \
+ ;; \
+ movl reg=targ ; \
+ ;; \
+ ld8 reg=[reg] ; \
+ ;; \
+ mov breg=reg ; \
br.cond.sptk.many breg
-#define BRANCH_PROC(sym, reg, breg) \
- DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \
- GLOBAL_ENTRY(paravirt_ ## sym) ; \
- BRANCH(paravirt_ ## sym ## _targ, reg, breg) ; \
+#define BRANCH_PROC(sym, reg, breg, type) \
+ DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \
+ GLOBAL_ENTRY(paravirt_ ## sym) ; \
+ BRANCH(paravirt_ ## sym ## _targ, reg, breg, type) ; \
END(paravirt_ ## sym)
-#define BRANCH_PROC_UNWINFO(sym, reg, breg) \
- DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \
- GLOBAL_ENTRY(paravirt_ ## sym) ; \
- PT_REGS_UNWIND_INFO(0) ; \
- BRANCH(paravirt_ ## sym ## _targ, reg, breg) ; \
+#define BRANCH_PROC_UNWINFO(sym, reg, breg, type) \
+ DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \
+ GLOBAL_ENTRY(paravirt_ ## sym) ; \
+ PT_REGS_UNWIND_INFO(0) ; \
+ BRANCH(paravirt_ ## sym ## _targ, reg, breg, type) ; \
END(paravirt_ ## sym)
-BRANCH_PROC(switch_to, r22, b7)
-BRANCH_PROC_UNWINFO(leave_syscall, r22, b7)
-BRANCH_PROC(work_processed_syscall, r2, b7)
-BRANCH_PROC_UNWINFO(leave_kernel, r22, b7)
+BRANCH_PROC(switch_to, r22, b7, SWITCH_TO)
+BRANCH_PROC_UNWINFO(leave_syscall, r22, b7, LEAVE_SYSCALL)
+BRANCH_PROC(work_processed_syscall, r2, b7, WORK_PROCESSED_SYSCALL)
+BRANCH_PROC_UNWINFO(leave_kernel, r22, b7, LEAVE_KERNEL)
+
+
+#ifdef CONFIG_MODULES
+#define __INIT_OR_MODULE .text
+#define __INITDATA_OR_MODULE .data
+#else
+#define __INIT_OR_MODULE __INIT
+#define __INITDATA_OR_MODULE __INITDATA
+#endif /* CONFIG_MODULES */
+
+ __INIT_OR_MODULE
+ GLOBAL_ENTRY(paravirt_fc_i)
+ fc.i r32
+ br.ret.sptk.many rp
+ END(paravirt_fc_i)
+ __FINIT
+
+ __INIT_OR_MODULE
+ .align 32
+ GLOBAL_ENTRY(paravirt_nop_b_inst_bundle)
+ {
+ nop.b 0
+ nop.b 0
+ nop.b 0
+ }
+ END(paravirt_nop_b_inst_bundle)
+ __FINIT
+
+ /* NOTE: nop.[mfi] has same format */
+ __INIT_OR_MODULE
+ GLOBAL_ENTRY(paravirt_nop_mfi_inst_bundle)
+ {
+ nop.m 0
+ nop.f 0
+ nop.i 0
+ }
+ END(paravirt_nop_mfi_inst_bundle)
+ __FINIT
+
+ __INIT_OR_MODULE
+ GLOBAL_ENTRY(paravirt_nop_bundle)
+paravirt_nop_bundle_start:
+ {
+ nop 0
+ nop 0
+ nop 0
+ }
+paravirt_nop_bundle_end:
+ END(paravirt_nop_bundle)
+ __FINIT
+
+ __INITDATA_OR_MODULE
+ .align 8
+ .global paravirt_nop_bundle_size
+paravirt_nop_bundle_size:
+ data8 paravirt_nop_bundle_end - paravirt_nop_bundle_start
diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c
index b83b2c516008..68a1311db806 100644
--- a/arch/ia64/kernel/patch.c
+++ b/arch/ia64/kernel/patch.c
@@ -7,6 +7,7 @@
#include <linux/init.h>
#include <linux/string.h>
+#include <asm/paravirt.h>
#include <asm/patch.h>
#include <asm/processor.h>
#include <asm/sections.h>
@@ -169,16 +170,35 @@ ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
ia64_srlz_i();
}
+extern unsigned long ia64_native_fsyscall_table[NR_syscalls];
+extern char ia64_native_fsys_bubble_down[];
+struct pv_fsys_data pv_fsys_data __initdata = {
+ .fsyscall_table = (unsigned long *)ia64_native_fsyscall_table,
+ .fsys_bubble_down = (void *)ia64_native_fsys_bubble_down,
+};
+
+unsigned long * __init
+paravirt_get_fsyscall_table(void)
+{
+ return pv_fsys_data.fsyscall_table;
+}
+
+char * __init
+paravirt_get_fsys_bubble_down(void)
+{
+ return pv_fsys_data.fsys_bubble_down;
+}
+
static void __init
patch_fsyscall_table (unsigned long start, unsigned long end)
{
- extern unsigned long fsyscall_table[NR_syscalls];
+ u64 fsyscall_table = (u64)paravirt_get_fsyscall_table();
s32 *offp = (s32 *) start;
u64 ip;
while (offp < (s32 *) end) {
ip = (u64) ia64_imva((char *) offp + *offp);
- ia64_patch_imm64(ip, (u64) fsyscall_table);
+ ia64_patch_imm64(ip, fsyscall_table);
ia64_fc((void *) ip);
++offp;
}
@@ -189,7 +209,7 @@ patch_fsyscall_table (unsigned long start, unsigned long end)
static void __init
patch_brl_fsys_bubble_down (unsigned long start, unsigned long end)
{
- extern char fsys_bubble_down[];
+ u64 fsys_bubble_down = (u64)paravirt_get_fsys_bubble_down();
s32 *offp = (s32 *) start;
u64 ip;
@@ -207,13 +227,13 @@ patch_brl_fsys_bubble_down (unsigned long start, unsigned long end)
void __init
ia64_patch_gate (void)
{
-# define START(name) ((unsigned long) __start_gate_##name##_patchlist)
-# define END(name) ((unsigned long)__end_gate_##name##_patchlist)
+# define START(name) paravirt_get_gate_patchlist(PV_GATE_START_##name)
+# define END(name) paravirt_get_gate_patchlist(PV_GATE_END_##name)
- patch_fsyscall_table(START(fsyscall), END(fsyscall));
- patch_brl_fsys_bubble_down(START(brl_fsys_bubble_down), END(brl_fsys_bubble_down));
- ia64_patch_vtop(START(vtop), END(vtop));
- ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9));
+ patch_fsyscall_table(START(FSYSCALL), END(FSYSCALL));
+ patch_brl_fsys_bubble_down(START(BRL_FSYS_BUBBLE_DOWN), END(BRL_FSYS_BUBBLE_DOWN));
+ ia64_patch_vtop(START(VTOP), END(VTOP));
+ ia64_patch_mckinley_e9(START(MCKINLEY_E9), END(MCKINLEY_E9));
}
void ia64_patch_phys_stack_reg(unsigned long val)
@@ -229,7 +249,7 @@ void ia64_patch_phys_stack_reg(unsigned long val)
while (offp < end) {
ip = (u64) offp + *offp;
ia64_patch(ip, mask, imm);
- ia64_fc(ip);
+ ia64_fc((void *)ip);
++offp;
}
ia64_sync_i();
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 5c0f408cfd71..8a06dc480594 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -5603,7 +5603,7 @@ pfm_interrupt_handler(int irq, void *arg)
* /proc/perfmon interface, for debug only
*/
-#define PFM_PROC_SHOW_HEADER ((void *)NR_CPUS+1)
+#define PFM_PROC_SHOW_HEADER ((void *)nr_cpu_ids+1)
static void *
pfm_proc_start(struct seq_file *m, loff_t *pos)
@@ -5612,7 +5612,7 @@ pfm_proc_start(struct seq_file *m, loff_t *pos)
return PFM_PROC_SHOW_HEADER;
}
- while (*pos <= NR_CPUS) {
+ while (*pos <= nr_cpu_ids) {
if (cpu_online(*pos - 1)) {
return (void *)*pos;
}
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index c57162705147..5d7c0e5b9e76 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -413,7 +413,7 @@ ia64_load_extra (struct task_struct *task)
* so there is nothing to worry about.
*/
int
-copy_thread (int nr, unsigned long clone_flags,
+copy_thread(unsigned long clone_flags,
unsigned long user_stack_base, unsigned long user_stack_size,
struct task_struct *p, struct pt_regs *regs)
{
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
index ecb9eb78d687..7053c55b7649 100644
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -317,7 +317,7 @@ retry:
}
n = data->cpu_check;
- for (i = 0; i < NR_CPUS; i++) {
+ for (i = 0; i < nr_cpu_ids; i++) {
if (cpu_isset(n, data->cpu_event)) {
if (!cpu_online(n)) {
cpu_clear(n, data->cpu_event);
@@ -326,7 +326,7 @@ retry:
cpu = n;
break;
}
- if (++n == NR_CPUS)
+ if (++n == nr_cpu_ids)
n = 0;
}
@@ -337,7 +337,7 @@ retry:
/* for next read, start checking at next CPU */
data->cpu_check = cpu;
- if (++data->cpu_check == NR_CPUS)
+ if (++data->cpu_check == nr_cpu_ids)
data->cpu_check = 0;
snprintf(cmd, sizeof(cmd), "read %d\n", cpu);
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 865af27c7737..714066aeda7f 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -52,6 +52,7 @@
#include <asm/meminit.h>
#include <asm/page.h>
#include <asm/paravirt.h>
+#include <asm/paravirt_patch.h>
#include <asm/patch.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
@@ -537,6 +538,7 @@ setup_arch (char **cmdline_p)
paravirt_arch_setup_early();
ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist);
+ paravirt_patch_apply();
*cmdline_p = __va(ia64_boot_param->command_line);
strlcpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE);
@@ -730,10 +732,10 @@ static void *
c_start (struct seq_file *m, loff_t *pos)
{
#ifdef CONFIG_SMP
- while (*pos < NR_CPUS && !cpu_isset(*pos, cpu_online_map))
+ while (*pos < nr_cpu_ids && !cpu_online(*pos))
++*pos;
#endif
- return *pos < NR_CPUS ? cpu_data(*pos) : NULL;
+ return *pos < nr_cpu_ids ? cpu_data(*pos) : NULL;
}
static void *
@@ -1016,8 +1018,7 @@ cpu_init (void)
| IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC));
atomic_inc(&init_mm.mm_count);
current->active_mm = &init_mm;
- if (current->mm)
- BUG();
+ BUG_ON(current->mm);
ia64_mmu_init(ia64_imva(cpu_data));
ia64_mca_cpu_init(ia64_imva(cpu_data));
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
index da8f020d82c1..2ea4199d9c57 100644
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -166,11 +166,11 @@ send_IPI_allbutself (int op)
* Called with preemption disabled.
*/
static inline void
-send_IPI_mask(cpumask_t mask, int op)
+send_IPI_mask(const struct cpumask *mask, int op)
{
unsigned int cpu;
- for_each_cpu_mask(cpu, mask) {
+ for_each_cpu(cpu, mask) {
send_IPI_single(cpu, op);
}
}
@@ -316,7 +316,7 @@ void arch_send_call_function_single_ipi(int cpu)
send_IPI_single(cpu, IPI_CALL_FUNC_SINGLE);
}
-void arch_send_call_function_ipi(cpumask_t mask)
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
{
send_IPI_mask(mask, IPI_CALL_FUNC);
}
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 52290547c85b..7700e23034bb 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -581,14 +581,14 @@ smp_build_cpu_map (void)
ia64_cpu_to_sapicid[0] = boot_cpu_id;
cpus_clear(cpu_present_map);
- cpu_set(0, cpu_present_map);
- cpu_set(0, cpu_possible_map);
+ set_cpu_present(0, true);
+ set_cpu_possible(0, true);
for (cpu = 1, i = 0; i < smp_boot_data.cpu_count; i++) {
sapicid = smp_boot_data.cpu_phys_id[i];
if (sapicid == boot_cpu_id)
continue;
- cpu_set(cpu, cpu_present_map);
- cpu_set(cpu, cpu_possible_map);
+ set_cpu_present(cpu, true);
+ set_cpu_possible(cpu, true);
ia64_cpu_to_sapicid[cpu] = sapicid;
cpu++;
}
@@ -626,12 +626,9 @@ smp_prepare_cpus (unsigned int max_cpus)
*/
if (!max_cpus) {
printk(KERN_INFO "SMP mode deactivated.\n");
- cpus_clear(cpu_online_map);
- cpus_clear(cpu_present_map);
- cpus_clear(cpu_possible_map);
- cpu_set(0, cpu_online_map);
- cpu_set(0, cpu_present_map);
- cpu_set(0, cpu_possible_map);
+ init_cpu_online(cpumask_of(0));
+ init_cpu_present(cpumask_of(0));
+ init_cpu_possible(cpumask_of(0));
return;
}
}
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index f0ebb342409d..641c8b61c4f1 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -20,6 +20,7 @@
#include <linux/efi.h>
#include <linux/timex.h>
#include <linux/clocksource.h>
+#include <linux/platform_device.h>
#include <asm/machvec.h>
#include <asm/delay.h>
@@ -50,6 +51,15 @@ EXPORT_SYMBOL(last_cli_ip);
#endif
#ifdef CONFIG_PARAVIRT
+/* We need to define a real function for sched_clock, to override the
+ weak default version */
+unsigned long long sched_clock(void)
+{
+ return paravirt_sched_clock();
+}
+#endif
+
+#ifdef CONFIG_PARAVIRT
static void
paravirt_clocksource_resume(void)
{
@@ -405,6 +415,21 @@ static struct irqaction timer_irqaction = {
.name = "timer"
};
+static struct platform_device rtc_efi_dev = {
+ .name = "rtc-efi",
+ .id = -1,
+};
+
+static int __init rtc_init(void)
+{
+ if (platform_device_register(&rtc_efi_dev) < 0)
+ printk(KERN_ERR "unable to register rtc device...\n");
+
+ /* not necessarily an error */
+ return 0;
+}
+module_init(rtc_init);
+
void __init
time_init (void)
{
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 3765efc5f963..4a95e86b9ac2 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -169,6 +169,30 @@ SECTIONS
__end___mckinley_e9_bundles = .;
}
+#if defined(CONFIG_PARAVIRT)
+ . = ALIGN(16);
+ .paravirt_bundles : AT(ADDR(.paravirt_bundles) - LOAD_OFFSET)
+ {
+ __start_paravirt_bundles = .;
+ *(.paravirt_bundles)
+ __stop_paravirt_bundles = .;
+ }
+ . = ALIGN(16);
+ .paravirt_insts : AT(ADDR(.paravirt_insts) - LOAD_OFFSET)
+ {
+ __start_paravirt_insts = .;
+ *(.paravirt_insts)
+ __stop_paravirt_insts = .;
+ }
+ . = ALIGN(16);
+ .paravirt_branches : AT(ADDR(.paravirt_branches) - LOAD_OFFSET)
+ {
+ __start_paravirt_branches = .;
+ *(.paravirt_branches)
+ __stop_paravirt_branches = .;
+ }
+#endif
+
#if defined(CONFIG_IA64_GENERIC)
/* Machine Vector */
. = ALIGN(16);
@@ -201,6 +225,12 @@ SECTIONS
__start_gate_section = .;
*(.data.gate)
__stop_gate_section = .;
+#ifdef CONFIG_XEN
+ . = ALIGN(PAGE_SIZE);
+ __xen_start_gate_section = .;
+ *(.data.gate.xen)
+ __xen_stop_gate_section = .;
+#endif
}
. = ALIGN(PAGE_SIZE); /* make sure the gate page doesn't expose
* kernel data
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 076b00d1dbff..28af6a731bb8 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -70,7 +70,7 @@ static void kvm_flush_icache(unsigned long start, unsigned long len)
int l;
for (l = 0; l < (len + 32); l += 32)
- ia64_fc(start + l);
+ ia64_fc((void *)(start + l));
ia64_sync_i();
ia64_srlz_i();
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
index d4d280505878..a18ee17b9192 100644
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -386,7 +386,7 @@ void set_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
else
*rnat_addr = (*rnat_addr) & (~nat_mask);
- ia64_setreg(_IA64_REG_AR_BSPSTORE, bspstore);
+ ia64_setreg(_IA64_REG_AR_BSPSTORE, (unsigned long)bspstore);
ia64_setreg(_IA64_REG_AR_RNAT, rnat);
}
local_irq_restore(psr);
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
index 38232b37668b..2c2501f13159 100644
--- a/arch/ia64/kvm/vtlb.c
+++ b/arch/ia64/kvm/vtlb.c
@@ -210,6 +210,7 @@ void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va, int type)
phy_pte &= ~PAGE_FLAGS_RV_MASK;
psr = ia64_clear_ic();
ia64_itc(type, va, phy_pte, itir_ps(itir));
+ paravirt_dv_serialize_data();
ia64_set_psr(psr);
}
@@ -456,6 +457,7 @@ void thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
phy_pte &= ~PAGE_FLAGS_RV_MASK;
psr = ia64_clear_ic();
ia64_itc(type, ifa, phy_pte, ps);
+ paravirt_dv_serialize_data();
ia64_set_psr(psr);
}
if (!(pte&VTLB_PTE_IO))
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 56e12903973c..c0f3bee69042 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -35,6 +35,7 @@
#include <asm/uaccess.h>
#include <asm/unistd.h>
#include <asm/mca.h>
+#include <asm/paravirt.h>
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -259,6 +260,7 @@ put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot)
static void __init
setup_gate (void)
{
+ void *gate_section;
struct page *page;
/*
@@ -266,10 +268,11 @@ setup_gate (void)
* headers etc. and once execute-only page to enable
* privilege-promotion via "epc":
*/
- page = virt_to_page(ia64_imva(__start_gate_section));
+ gate_section = paravirt_get_gate_section();
+ page = virt_to_page(ia64_imva(gate_section));
put_kernel_page(page, GATE_ADDR, PAGE_READONLY);
#ifdef HAVE_BUGGY_SEGREL
- page = virt_to_page(ia64_imva(__start_gate_section + PAGE_SIZE));
+ page = virt_to_page(ia64_imva(gate_section + PAGE_SIZE));
put_kernel_page(page, GATE_ADDR + PAGE_SIZE, PAGE_GATE);
#else
put_kernel_page(page, GATE_ADDR + PERCPU_PAGE_SIZE, PAGE_GATE);
@@ -633,8 +636,7 @@ mem_init (void)
#endif
#ifdef CONFIG_FLATMEM
- if (!mem_map)
- BUG();
+ BUG_ON(!mem_map);
max_mapnr = max_low_pfn;
#endif
@@ -667,8 +669,8 @@ mem_init (void)
* code can tell them apart.
*/
for (i = 0; i < NR_syscalls; ++i) {
- extern unsigned long fsyscall_table[NR_syscalls];
extern unsigned long sys_call_table[NR_syscalls];
+ unsigned long *fsyscall_table = paravirt_get_fsyscall_table();
if (!fsyscall_table[i] || nolwsys)
fsyscall_table[i] = sys_call_table[i] | 1;
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index bd9818a36b47..b9f3d7bbb338 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -309,7 +309,7 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
preempt_disable();
#ifdef CONFIG_SMP
- if (mm != current->active_mm || cpus_weight(mm->cpu_vm_mask) != 1) {
+ if (mm != current->active_mm || cpumask_weight(mm_cpumask(mm)) != 1) {
platform_global_tlb_purge(mm, start, end, nbits);
preempt_enable();
return;
diff --git a/arch/ia64/scripts/pvcheck.sed b/arch/ia64/scripts/pvcheck.sed
index ba66ac2e4c60..e59809a3fc01 100644
--- a/arch/ia64/scripts/pvcheck.sed
+++ b/arch/ia64/scripts/pvcheck.sed
@@ -17,6 +17,7 @@ s/mov.*=.*cr\.iip/.warning \"cr.iip should not used directly\"/g
s/mov.*=.*cr\.ivr/.warning \"cr.ivr should not used directly\"/g
s/mov.*=[^\.]*psr/.warning \"psr should not used directly\"/g # avoid ar.fpsr
s/mov.*=.*ar\.eflags/.warning \"ar.eflags should not used directly\"/g
+s/mov.*=.*ar\.itc.*/.warning \"ar.itc should not used directly\"/g
s/mov.*cr\.ifa.*=.*/.warning \"cr.ifa should not used directly\"/g
s/mov.*cr\.itir.*=.*/.warning \"cr.itir should not used directly\"/g
s/mov.*cr\.iha.*=.*/.warning \"cr.iha should not used directly\"/g
diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c
index 0d4ffa4da1da..57f280dd9def 100644
--- a/arch/ia64/sn/kernel/io_common.c
+++ b/arch/ia64/sn/kernel/io_common.c
@@ -135,8 +135,7 @@ static s64 sn_device_fixup_war(u64 nasid, u64 widget, int device,
}
war_list = kzalloc(DEV_PER_WIDGET * sizeof(*war_list), GFP_KERNEL);
- if (!war_list)
- BUG();
+ BUG_ON(!war_list);
SAL_CALL_NOLOCK(isrv, SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST,
nasid, widget, __pa(war_list), 0, 0, 0 ,0);
@@ -180,23 +179,20 @@ sn_common_hubdev_init(struct hubdev_info *hubdev)
sizeof(struct sn_flush_device_kernel *);
hubdev->hdi_flush_nasid_list.widget_p =
kzalloc(size, GFP_KERNEL);
- if (!hubdev->hdi_flush_nasid_list.widget_p)
- BUG();
+ BUG_ON(!hubdev->hdi_flush_nasid_list.widget_p);
for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) {
size = DEV_PER_WIDGET *
sizeof(struct sn_flush_device_kernel);
sn_flush_device_kernel = kzalloc(size, GFP_KERNEL);
- if (!sn_flush_device_kernel)
- BUG();
+ BUG_ON(!sn_flush_device_kernel);
dev_entry = sn_flush_device_kernel;
for (device = 0; device < DEV_PER_WIDGET;
device++, dev_entry++) {
size = sizeof(struct sn_flush_device_common);
dev_entry->common = kzalloc(size, GFP_KERNEL);
- if (!dev_entry->common)
- BUG();
+ BUG_ON(!dev_entry->common);
if (sn_prom_feature_available(PRF_DEVICE_FLUSH_LIST))
status = sal_get_device_dmaflush_list(
hubdev->hdi_nasid, widget, device,
@@ -326,8 +322,7 @@ sn_common_bus_fixup(struct pci_bus *bus,
*/
controller->platform_data = kzalloc(sizeof(struct sn_platform_data),
GFP_KERNEL);
- if (controller->platform_data == NULL)
- BUG();
+ BUG_ON(controller->platform_data == NULL);
sn_platform_data =
(struct sn_platform_data *) controller->platform_data;
sn_platform_data->provider_soft = provider_soft;
diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
index e2eb2da60f96..ee774c366a06 100644
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -128,8 +128,7 @@ sn_legacy_pci_window_fixup(struct pci_controller *controller,
{
controller->window = kcalloc(2, sizeof(struct pci_window),
GFP_KERNEL);
- if (controller->window == NULL)
- BUG();
+ BUG_ON(controller->window == NULL);
controller->window[0].offset = legacy_io;
controller->window[0].resource.name = "legacy_io";
controller->window[0].resource.flags = IORESOURCE_IO;
@@ -168,8 +167,7 @@ sn_pci_window_fixup(struct pci_dev *dev, unsigned int count,
idx = controller->windows;
new_count = controller->windows + count;
new_window = kcalloc(new_count, sizeof(struct pci_window), GFP_KERNEL);
- if (new_window == NULL)
- BUG();
+ BUG_ON(new_window == NULL);
if (controller->window) {
memcpy(new_window, controller->window,
sizeof(struct pci_window) * controller->windows);
@@ -222,8 +220,7 @@ sn_io_slot_fixup(struct pci_dev *dev)
(u64) __pa(pcidev_info),
(u64) __pa(sn_irq_info));
- if (status)
- BUG(); /* Cannot get platform pci device information */
+ BUG_ON(status); /* Cannot get platform pci device information */
/* Copy over PIO Mapped Addresses */
@@ -307,8 +304,7 @@ sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus)
prom_bussoft_ptr = __va(prom_bussoft_ptr);
controller = kzalloc(sizeof(*controller), GFP_KERNEL);
- if (!controller)
- BUG();
+ BUG_ON(!controller);
controller->segment = segment;
/*
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index 02c5b8a9fb60..e456f062f241 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -732,8 +732,7 @@ void __init build_cnode_tables(void)
kl_config_hdr_t *klgraph_header;
nasid = cnodeid_to_nasid(node);
klgraph_header = ia64_sn_get_klconfig_addr(nasid);
- if (klgraph_header == NULL)
- BUG();
+ BUG_ON(klgraph_header == NULL);
brd = NODE_OFFSET_TO_LBOARD(nasid, klgraph_header->ch_board_info);
while (brd) {
if (board_needs_cnode(brd->brd_type) && physical_node_map[brd->brd_nasid] < 0) {
@@ -750,7 +749,7 @@ nasid_slice_to_cpuid(int nasid, int slice)
{
long cpu;
- for (cpu = 0; cpu < NR_CPUS; cpu++)
+ for (cpu = 0; cpu < nr_cpu_ids; cpu++)
if (cpuid_to_nasid(cpu) == nasid &&
cpuid_to_slice(cpu) == slice)
return cpu;
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index e585f9a2afb9..1176506b2bae 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -133,7 +133,7 @@ sn2_ipi_flush_all_tlb(struct mm_struct *mm)
unsigned long itc;
itc = ia64_get_itc();
- smp_flush_tlb_cpumask(mm->cpu_vm_mask);
+ smp_flush_tlb_cpumask(*mm_cpumask(mm));
itc = ia64_get_itc() - itc;
__get_cpu_var(ptcstats).shub_ipi_flushes_itc_clocks += itc;
__get_cpu_var(ptcstats).shub_ipi_flushes++;
@@ -182,7 +182,7 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
nodes_clear(nodes_flushed);
i = 0;
- for_each_cpu_mask(cpu, mm->cpu_vm_mask) {
+ for_each_cpu(cpu, mm_cpumask(mm)) {
cnode = cpu_to_node(cpu);
node_set(cnode, nodes_flushed);
lcpu = cpu;
@@ -461,7 +461,7 @@ bool sn_cpu_disable_allowed(int cpu)
static void *sn2_ptc_seq_start(struct seq_file *file, loff_t * offset)
{
- if (*offset < NR_CPUS)
+ if (*offset < nr_cpu_ids)
return offset;
return NULL;
}
@@ -469,7 +469,7 @@ static void *sn2_ptc_seq_start(struct seq_file *file, loff_t * offset)
static void *sn2_ptc_seq_next(struct seq_file *file, void *data, loff_t * offset)
{
(*offset)++;
- if (*offset < NR_CPUS)
+ if (*offset < nr_cpu_ids)
return offset;
return NULL;
}
@@ -491,7 +491,7 @@ static int sn2_ptc_seq_show(struct seq_file *file, void *data)
seq_printf(file, "# ptctest %d, flushopt %d\n", sn2_ptctest, sn2_flush_opt);
}
- if (cpu < NR_CPUS && cpu_online(cpu)) {
+ if (cpu < nr_cpu_ids && cpu_online(cpu)) {
stat = &per_cpu(ptcstats, cpu);
seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l,
stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed,
@@ -554,7 +554,7 @@ static int __init sn2_ptc_init(void)
proc_sn2_ptc = proc_create(PTC_BASENAME, 0444,
NULL, &proc_sn2_ptc_operations);
- if (!&proc_sn2_ptc_operations) {
+ if (!proc_sn2_ptc) {
printk(KERN_ERR "unable to create %s proc entry", PTC_BASENAME);
return -EINVAL;
}
diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
index be339477f906..9e6491cf72bd 100644
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -275,8 +275,7 @@ static int sn_hwperf_get_nearest_node_objdata(struct sn_hwperf_object_info *objb
/* get it's interconnect topology */
sz = op->ports * sizeof(struct sn_hwperf_port_info);
- if (sz > sizeof(ptdata))
- BUG();
+ BUG_ON(sz > sizeof(ptdata));
e = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
SN_HWPERF_ENUM_PORTS, nodeobj->id, sz,
(u64)&ptdata, 0, 0, NULL);
@@ -310,8 +309,7 @@ static int sn_hwperf_get_nearest_node_objdata(struct sn_hwperf_object_info *objb
if (router && (!found_cpu || !found_mem)) {
/* search for a node connected to the same router */
sz = router->ports * sizeof(struct sn_hwperf_port_info);
- if (sz > sizeof(ptdata))
- BUG();
+ BUG_ON(sz > sizeof(ptdata));
e = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
SN_HWPERF_ENUM_PORTS, router->id, sz,
(u64)&ptdata, 0, 0, NULL);
@@ -612,7 +610,7 @@ static int sn_hwperf_op_cpu(struct sn_hwperf_op_info *op_info)
op_info->a->arg &= SN_HWPERF_ARG_OBJID_MASK;
if (cpu != SN_HWPERF_ARG_ANY_CPU) {
- if (cpu >= NR_CPUS || !cpu_online(cpu)) {
+ if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
r = -EINVAL;
goto out;
}
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
index 060df4aa9916..c659ad5613a0 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
@@ -256,9 +256,7 @@ void sn_dma_flush(u64 addr)
hubinfo = (NODEPDA(nasid_to_cnodeid(nasid)))->pdinfo;
- if (!hubinfo) {
- BUG();
- }
+ BUG_ON(!hubinfo);
flush_nasid_list = &hubinfo->hdi_flush_nasid_list;
if (flush_nasid_list->widget_p == NULL)
diff --git a/arch/ia64/xen/Makefile b/arch/ia64/xen/Makefile
index 0ad0224693d9..e6f4a0a74228 100644
--- a/arch/ia64/xen/Makefile
+++ b/arch/ia64/xen/Makefile
@@ -3,14 +3,29 @@
#
obj-y := hypercall.o xenivt.o xensetup.o xen_pv_ops.o irq_xen.o \
- hypervisor.o xencomm.o xcom_hcall.o grant-table.o time.o suspend.o
+ hypervisor.o xencomm.o xcom_hcall.o grant-table.o time.o suspend.o \
+ gate-data.o
obj-$(CONFIG_IA64_GENERIC) += machvec.o
+# The gate DSO image is built using a special linker script.
+include $(srctree)/arch/ia64/kernel/Makefile.gate
+
+# tell compiled for xen
+CPPFLAGS_gate.lds += -D__IA64_GATE_PARAVIRTUALIZED_XEN
+AFLAGS_gate.o += -D__IA64_ASM_PARAVIRTUALIZED_XEN -D__IA64_GATE_PARAVIRTUALIZED_XEN
+
+# use same file of native.
+$(obj)/gate.o: $(src)/../kernel/gate.S FORCE
+ $(call if_changed_dep,as_o_S)
+$(obj)/gate.lds: $(src)/../kernel/gate.lds.S FORCE
+ $(call if_changed_dep,cpp_lds_S)
+
+
AFLAGS_xenivt.o += -D__IA64_ASM_PARAVIRTUALIZED_XEN
# xen multi compile
-ASM_PARAVIRT_MULTI_COMPILE_SRCS = ivt.S entry.S
+ASM_PARAVIRT_MULTI_COMPILE_SRCS = ivt.S entry.S fsys.S
ASM_PARAVIRT_OBJS = $(addprefix xen-,$(ASM_PARAVIRT_MULTI_COMPILE_SRCS:.S=.o))
obj-y += $(ASM_PARAVIRT_OBJS)
define paravirtualized_xen
diff --git a/arch/ia64/xen/gate-data.S b/arch/ia64/xen/gate-data.S
new file mode 100644
index 000000000000..7d4830afc91d
--- /dev/null
+++ b/arch/ia64/xen/gate-data.S
@@ -0,0 +1,3 @@
+ .section .data.gate.xen, "aw"
+
+ .incbin "arch/ia64/xen/gate.so"
diff --git a/arch/ia64/xen/hypercall.S b/arch/ia64/xen/hypercall.S
index 45e02bb64a92..e32dae444dd6 100644
--- a/arch/ia64/xen/hypercall.S
+++ b/arch/ia64/xen/hypercall.S
@@ -9,6 +9,7 @@
#include <asm/intrinsics.h>
#include <asm/xen/privop.h>
+#ifdef __INTEL_COMPILER
/*
* Hypercalls without parameter.
*/
@@ -72,6 +73,7 @@ GLOBAL_ENTRY(xen_set_rr0_to_rr4)
br.ret.sptk.many rp
;;
END(xen_set_rr0_to_rr4)
+#endif
GLOBAL_ENTRY(xen_send_ipi)
mov r14=r32
diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c
index 68d6204c3f16..fb8332690179 100644
--- a/arch/ia64/xen/time.c
+++ b/arch/ia64/xen/time.c
@@ -175,10 +175,58 @@ static void xen_itc_jitter_data_reset(void)
} while (unlikely(ret != lcycle));
}
+/* based on xen_sched_clock() in arch/x86/xen/time.c. */
+/*
+ * This relies on HAVE_UNSTABLE_SCHED_CLOCK. If it can't be defined,
+ * something similar logic should be implemented here.
+ */
+/*
+ * Xen sched_clock implementation. Returns the number of unstolen
+ * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED
+ * states.
+ */
+static unsigned long long xen_sched_clock(void)
+{
+ struct vcpu_runstate_info runstate;
+
+ unsigned long long now;
+ unsigned long long offset;
+ unsigned long long ret;
+
+ /*
+ * Ideally sched_clock should be called on a per-cpu basis
+ * anyway, so preempt should already be disabled, but that's
+ * not current practice at the moment.
+ */
+ preempt_disable();
+
+ /*
+ * both ia64_native_sched_clock() and xen's runstate are
+ * based on mAR.ITC. So difference of them makes sense.
+ */
+ now = ia64_native_sched_clock();
+
+ get_runstate_snapshot(&runstate);
+
+ WARN_ON(runstate.state != RUNSTATE_running);
+
+ offset = 0;
+ if (now > runstate.state_entry_time)
+ offset = now - runstate.state_entry_time;
+ ret = runstate.time[RUNSTATE_blocked] +
+ runstate.time[RUNSTATE_running] +
+ offset;
+
+ preempt_enable();
+
+ return ret;
+}
+
struct pv_time_ops xen_time_ops __initdata = {
.init_missing_ticks_accounting = xen_init_missing_ticks_accounting,
.do_steal_accounting = xen_do_steal_accounting,
.clocksource_resume = xen_itc_jitter_data_reset,
+ .sched_clock = xen_sched_clock,
};
/* Called after suspend, to resume time. */
diff --git a/arch/ia64/xen/xen_pv_ops.c b/arch/ia64/xen/xen_pv_ops.c
index 936cff3c96e0..5e2270a999fa 100644
--- a/arch/ia64/xen/xen_pv_ops.c
+++ b/arch/ia64/xen/xen_pv_ops.c
@@ -24,6 +24,7 @@
#include <linux/irq.h>
#include <linux/kernel.h>
#include <linux/pm.h>
+#include <linux/unistd.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/xencomm.h>
@@ -153,6 +154,13 @@ xen_post_smp_prepare_boot_cpu(void)
xen_setup_vcpu_info_placement();
}
+#ifdef ASM_SUPPORTED
+static unsigned long __init_or_module
+xen_patch_bundle(void *sbundle, void *ebundle, unsigned long type);
+#endif
+static void __init
+xen_patch_branch(unsigned long tag, unsigned long type);
+
static const struct pv_init_ops xen_init_ops __initconst = {
.banner = xen_banner,
@@ -163,6 +171,53 @@ static const struct pv_init_ops xen_init_ops __initconst = {
.arch_setup_nomca = xen_arch_setup_nomca,
.post_smp_prepare_boot_cpu = xen_post_smp_prepare_boot_cpu,
+#ifdef ASM_SUPPORTED
+ .patch_bundle = xen_patch_bundle,
+#endif
+ .patch_branch = xen_patch_branch,
+};
+
+/***************************************************************************
+ * pv_fsys_data
+ * addresses for fsys
+ */
+
+extern unsigned long xen_fsyscall_table[NR_syscalls];
+extern char xen_fsys_bubble_down[];
+struct pv_fsys_data xen_fsys_data __initdata = {
+ .fsyscall_table = (unsigned long *)xen_fsyscall_table,
+ .fsys_bubble_down = (void *)xen_fsys_bubble_down,
+};
+
+/***************************************************************************
+ * pv_patchdata
+ * patchdata addresses
+ */
+
+#define DECLARE(name) \
+ extern unsigned long __xen_start_gate_##name##_patchlist[]; \
+ extern unsigned long __xen_end_gate_##name##_patchlist[]
+
+DECLARE(fsyscall);
+DECLARE(brl_fsys_bubble_down);
+DECLARE(vtop);
+DECLARE(mckinley_e9);
+
+extern unsigned long __xen_start_gate_section[];
+
+#define ASSIGN(name) \
+ .start_##name##_patchlist = \
+ (unsigned long)__xen_start_gate_##name##_patchlist, \
+ .end_##name##_patchlist = \
+ (unsigned long)__xen_end_gate_##name##_patchlist
+
+static struct pv_patchdata xen_patchdata __initdata = {
+ ASSIGN(fsyscall),
+ ASSIGN(brl_fsys_bubble_down),
+ ASSIGN(vtop),
+ ASSIGN(mckinley_e9),
+
+ .gate_section = (void*)__xen_start_gate_section,
};
/***************************************************************************
@@ -170,6 +225,76 @@ static const struct pv_init_ops xen_init_ops __initconst = {
* intrinsics hooks.
*/
+#ifndef ASM_SUPPORTED
+static void
+xen_set_itm_with_offset(unsigned long val)
+{
+ /* ia64_cpu_local_tick() calls this with interrupt enabled. */
+ /* WARN_ON(!irqs_disabled()); */
+ xen_set_itm(val - XEN_MAPPEDREGS->itc_offset);
+}
+
+static unsigned long
+xen_get_itm_with_offset(void)
+{
+ /* unused at this moment */
+ printk(KERN_DEBUG "%s is called.\n", __func__);
+
+ WARN_ON(!irqs_disabled());
+ return ia64_native_getreg(_IA64_REG_CR_ITM) +
+ XEN_MAPPEDREGS->itc_offset;
+}
+
+/* ia64_set_itc() is only called by
+ * cpu_init() with ia64_set_itc(0) and ia64_sync_itc().
+ * So XEN_MAPPEDRESG->itc_offset cal be considered as almost constant.
+ */
+static void
+xen_set_itc(unsigned long val)
+{
+ unsigned long mitc;
+
+ WARN_ON(!irqs_disabled());
+ mitc = ia64_native_getreg(_IA64_REG_AR_ITC);
+ XEN_MAPPEDREGS->itc_offset = val - mitc;
+ XEN_MAPPEDREGS->itc_last = val;
+}
+
+static unsigned long
+xen_get_itc(void)
+{
+ unsigned long res;
+ unsigned long itc_offset;
+ unsigned long itc_last;
+ unsigned long ret_itc_last;
+
+ itc_offset = XEN_MAPPEDREGS->itc_offset;
+ do {
+ itc_last = XEN_MAPPEDREGS->itc_last;
+ res = ia64_native_getreg(_IA64_REG_AR_ITC);
+ res += itc_offset;
+ if (itc_last >= res)
+ res = itc_last + 1;
+ ret_itc_last = cmpxchg(&XEN_MAPPEDREGS->itc_last,
+ itc_last, res);
+ } while (unlikely(ret_itc_last != itc_last));
+ return res;
+
+#if 0
+ /* ia64_itc_udelay() calls ia64_get_itc() with interrupt enabled.
+ Should it be paravirtualized instead? */
+ WARN_ON(!irqs_disabled());
+ itc_offset = XEN_MAPPEDREGS->itc_offset;
+ itc_last = XEN_MAPPEDREGS->itc_last;
+ res = ia64_native_getreg(_IA64_REG_AR_ITC);
+ res += itc_offset;
+ if (itc_last >= res)
+ res = itc_last + 1;
+ XEN_MAPPEDREGS->itc_last = res;
+ return res;
+#endif
+}
+
static void xen_setreg(int regnum, unsigned long val)
{
switch (regnum) {
@@ -181,11 +306,14 @@ static void xen_setreg(int regnum, unsigned long val)
xen_set_eflag(val);
break;
#endif
+ case _IA64_REG_AR_ITC:
+ xen_set_itc(val);
+ break;
case _IA64_REG_CR_TPR:
xen_set_tpr(val);
break;
case _IA64_REG_CR_ITM:
- xen_set_itm(val);
+ xen_set_itm_with_offset(val);
break;
case _IA64_REG_CR_EOI:
xen_eoi(val);
@@ -209,6 +337,12 @@ static unsigned long xen_getreg(int regnum)
res = xen_get_eflag();
break;
#endif
+ case _IA64_REG_AR_ITC:
+ res = xen_get_itc();
+ break;
+ case _IA64_REG_CR_ITM:
+ res = xen_get_itm_with_offset();
+ break;
case _IA64_REG_CR_IVR:
res = xen_get_ivr();
break;
@@ -259,8 +393,417 @@ xen_intrin_local_irq_restore(unsigned long mask)
else
xen_rsm_i();
}
+#else
+#define __DEFINE_FUNC(name, code) \
+ extern const char xen_ ## name ## _direct_start[]; \
+ extern const char xen_ ## name ## _direct_end[]; \
+ asm (".align 32\n" \
+ ".proc xen_" #name "\n" \
+ "xen_" #name ":\n" \
+ "xen_" #name "_direct_start:\n" \
+ code \
+ "xen_" #name "_direct_end:\n" \
+ "br.cond.sptk.many b6\n" \
+ ".endp xen_" #name "\n")
+
+#define DEFINE_VOID_FUNC0(name, code) \
+ extern void \
+ xen_ ## name (void); \
+ __DEFINE_FUNC(name, code)
+
+#define DEFINE_VOID_FUNC1(name, code) \
+ extern void \
+ xen_ ## name (unsigned long arg); \
+ __DEFINE_FUNC(name, code)
+
+#define DEFINE_VOID_FUNC1_VOID(name, code) \
+ extern void \
+ xen_ ## name (void *arg); \
+ __DEFINE_FUNC(name, code)
+
+#define DEFINE_VOID_FUNC2(name, code) \
+ extern void \
+ xen_ ## name (unsigned long arg0, \
+ unsigned long arg1); \
+ __DEFINE_FUNC(name, code)
-static const struct pv_cpu_ops xen_cpu_ops __initdata = {
+#define DEFINE_FUNC0(name, code) \
+ extern unsigned long \
+ xen_ ## name (void); \
+ __DEFINE_FUNC(name, code)
+
+#define DEFINE_FUNC1(name, type, code) \
+ extern unsigned long \
+ xen_ ## name (type arg); \
+ __DEFINE_FUNC(name, code)
+
+#define XEN_PSR_I_ADDR_ADDR (XSI_BASE + XSI_PSR_I_ADDR_OFS)
+
+/*
+ * static void xen_set_itm_with_offset(unsigned long val)
+ * xen_set_itm(val - XEN_MAPPEDREGS->itc_offset);
+ */
+/* 2 bundles */
+DEFINE_VOID_FUNC1(set_itm_with_offset,
+ "mov r2 = " __stringify(XSI_BASE) " + "
+ __stringify(XSI_ITC_OFFSET_OFS) "\n"
+ ";;\n"
+ "ld8 r3 = [r2]\n"
+ ";;\n"
+ "sub r8 = r8, r3\n"
+ "break " __stringify(HYPERPRIVOP_SET_ITM) "\n");
+
+/*
+ * static unsigned long xen_get_itm_with_offset(void)
+ * return ia64_native_getreg(_IA64_REG_CR_ITM) + XEN_MAPPEDREGS->itc_offset;
+ */
+/* 2 bundles */
+DEFINE_FUNC0(get_itm_with_offset,
+ "mov r2 = " __stringify(XSI_BASE) " + "
+ __stringify(XSI_ITC_OFFSET_OFS) "\n"
+ ";;\n"
+ "ld8 r3 = [r2]\n"
+ "mov r8 = cr.itm\n"
+ ";;\n"
+ "add r8 = r8, r2\n");
+
+/*
+ * static void xen_set_itc(unsigned long val)
+ * unsigned long mitc;
+ *
+ * WARN_ON(!irqs_disabled());
+ * mitc = ia64_native_getreg(_IA64_REG_AR_ITC);
+ * XEN_MAPPEDREGS->itc_offset = val - mitc;
+ * XEN_MAPPEDREGS->itc_last = val;
+ */
+/* 2 bundles */
+DEFINE_VOID_FUNC1(set_itc,
+ "mov r2 = " __stringify(XSI_BASE) " + "
+ __stringify(XSI_ITC_LAST_OFS) "\n"
+ "mov r3 = ar.itc\n"
+ ";;\n"
+ "sub r3 = r8, r3\n"
+ "st8 [r2] = r8, "
+ __stringify(XSI_ITC_LAST_OFS) " - "
+ __stringify(XSI_ITC_OFFSET_OFS) "\n"
+ ";;\n"
+ "st8 [r2] = r3\n");
+
+/*
+ * static unsigned long xen_get_itc(void)
+ * unsigned long res;
+ * unsigned long itc_offset;
+ * unsigned long itc_last;
+ * unsigned long ret_itc_last;
+ *
+ * itc_offset = XEN_MAPPEDREGS->itc_offset;
+ * do {
+ * itc_last = XEN_MAPPEDREGS->itc_last;
+ * res = ia64_native_getreg(_IA64_REG_AR_ITC);
+ * res += itc_offset;
+ * if (itc_last >= res)
+ * res = itc_last + 1;
+ * ret_itc_last = cmpxchg(&XEN_MAPPEDREGS->itc_last,
+ * itc_last, res);
+ * } while (unlikely(ret_itc_last != itc_last));
+ * return res;
+ */
+/* 5 bundles */
+DEFINE_FUNC0(get_itc,
+ "mov r2 = " __stringify(XSI_BASE) " + "
+ __stringify(XSI_ITC_OFFSET_OFS) "\n"
+ ";;\n"
+ "ld8 r9 = [r2], " __stringify(XSI_ITC_LAST_OFS) " - "
+ __stringify(XSI_ITC_OFFSET_OFS) "\n"
+ /* r9 = itc_offset */
+ /* r2 = XSI_ITC_OFFSET */
+ "888:\n"
+ "mov r8 = ar.itc\n" /* res = ar.itc */
+ ";;\n"
+ "ld8 r3 = [r2]\n" /* r3 = itc_last */
+ "add r8 = r8, r9\n" /* res = ar.itc + itc_offset */
+ ";;\n"
+ "cmp.gtu p6, p0 = r3, r8\n"
+ ";;\n"
+ "(p6) add r8 = 1, r3\n" /* if (itc_last > res) itc_last + 1 */
+ ";;\n"
+ "mov ar.ccv = r8\n"
+ ";;\n"
+ "cmpxchg8.acq r10 = [r2], r8, ar.ccv\n"
+ ";;\n"
+ "cmp.ne p6, p0 = r10, r3\n"
+ "(p6) hint @pause\n"
+ "(p6) br.cond.spnt 888b\n");
+
+DEFINE_VOID_FUNC1_VOID(fc,
+ "break " __stringify(HYPERPRIVOP_FC) "\n");
+
+/*
+ * psr_i_addr_addr = XEN_PSR_I_ADDR_ADDR
+ * masked_addr = *psr_i_addr_addr
+ * pending_intr_addr = masked_addr - 1
+ * if (val & IA64_PSR_I) {
+ * masked = *masked_addr
+ * *masked_addr = 0:xen_set_virtual_psr_i(1)
+ * compiler barrier
+ * if (masked) {
+ * uint8_t pending = *pending_intr_addr;
+ * if (pending)
+ * XEN_HYPER_SSM_I
+ * }
+ * } else {
+ * *masked_addr = 1:xen_set_virtual_psr_i(0)
+ * }
+ */
+/* 6 bundles */
+DEFINE_VOID_FUNC1(intrin_local_irq_restore,
+ /* r8 = input value: 0 or IA64_PSR_I
+ * p6 = (flags & IA64_PSR_I)
+ * = if clause
+ * p7 = !(flags & IA64_PSR_I)
+ * = else clause
+ */
+ "cmp.ne p6, p7 = r8, r0\n"
+ "mov r9 = " __stringify(XEN_PSR_I_ADDR_ADDR) "\n"
+ ";;\n"
+ /* r9 = XEN_PSR_I_ADDR */
+ "ld8 r9 = [r9]\n"
+ ";;\n"
+
+ /* r10 = masked previous value */
+ "(p6) ld1.acq r10 = [r9]\n"
+ ";;\n"
+
+ /* p8 = !masked interrupt masked previously? */
+ "(p6) cmp.ne.unc p8, p0 = r10, r0\n"
+
+ /* p7 = else clause */
+ "(p7) mov r11 = 1\n"
+ ";;\n"
+ /* masked = 1 */
+ "(p7) st1.rel [r9] = r11\n"
+
+ /* p6 = if clause */
+ /* masked = 0
+ * r9 = masked_addr - 1
+ * = pending_intr_addr
+ */
+ "(p8) st1.rel [r9] = r0, -1\n"
+ ";;\n"
+ /* r8 = pending_intr */
+ "(p8) ld1.acq r11 = [r9]\n"
+ ";;\n"
+ /* p9 = interrupt pending? */
+ "(p8) cmp.ne.unc p9, p10 = r11, r0\n"
+ ";;\n"
+ "(p10) mf\n"
+ /* issue hypercall to trigger interrupt */
+ "(p9) break " __stringify(HYPERPRIVOP_SSM_I) "\n");
+
+DEFINE_VOID_FUNC2(ptcga,
+ "break " __stringify(HYPERPRIVOP_PTC_GA) "\n");
+DEFINE_VOID_FUNC2(set_rr,
+ "break " __stringify(HYPERPRIVOP_SET_RR) "\n");
+
+/*
+ * tmp = XEN_MAPPEDREGS->interrupt_mask_addr = XEN_PSR_I_ADDR_ADDR;
+ * tmp = *tmp
+ * tmp = *tmp;
+ * psr_i = tmp? 0: IA64_PSR_I;
+ */
+/* 4 bundles */
+DEFINE_FUNC0(get_psr_i,
+ "mov r9 = " __stringify(XEN_PSR_I_ADDR_ADDR) "\n"
+ ";;\n"
+ "ld8 r9 = [r9]\n" /* r9 = XEN_PSR_I_ADDR */
+ "mov r8 = 0\n" /* psr_i = 0 */
+ ";;\n"
+ "ld1.acq r9 = [r9]\n" /* r9 = XEN_PSR_I */
+ ";;\n"
+ "cmp.eq.unc p6, p0 = r9, r0\n" /* p6 = (XEN_PSR_I != 0) */
+ ";;\n"
+ "(p6) mov r8 = " __stringify(1 << IA64_PSR_I_BIT) "\n");
+
+DEFINE_FUNC1(thash, unsigned long,
+ "break " __stringify(HYPERPRIVOP_THASH) "\n");
+DEFINE_FUNC1(get_cpuid, int,
+ "break " __stringify(HYPERPRIVOP_GET_CPUID) "\n");
+DEFINE_FUNC1(get_pmd, int,
+ "break " __stringify(HYPERPRIVOP_GET_PMD) "\n");
+DEFINE_FUNC1(get_rr, unsigned long,
+ "break " __stringify(HYPERPRIVOP_GET_RR) "\n");
+
+/*
+ * void xen_privop_ssm_i(void)
+ *
+ * int masked = !xen_get_virtual_psr_i();
+ * // masked = *(*XEN_MAPPEDREGS->interrupt_mask_addr)
+ * xen_set_virtual_psr_i(1)
+ * // *(*XEN_MAPPEDREGS->interrupt_mask_addr) = 0
+ * // compiler barrier
+ * if (masked) {
+ * uint8_t* pend_int_addr =
+ * (uint8_t*)(*XEN_MAPPEDREGS->interrupt_mask_addr) - 1;
+ * uint8_t pending = *pend_int_addr;
+ * if (pending)
+ * XEN_HYPER_SSM_I
+ * }
+ */
+/* 4 bundles */
+DEFINE_VOID_FUNC0(ssm_i,
+ "mov r8 = " __stringify(XEN_PSR_I_ADDR_ADDR) "\n"
+ ";;\n"
+ "ld8 r8 = [r8]\n" /* r8 = XEN_PSR_I_ADDR */
+ ";;\n"
+ "ld1.acq r9 = [r8]\n" /* r9 = XEN_PSR_I */
+ ";;\n"
+ "st1.rel [r8] = r0, -1\n" /* psr_i = 0. enable interrupt
+ * r8 = XEN_PSR_I_ADDR - 1
+ * = pend_int_addr
+ */
+ "cmp.eq.unc p0, p6 = r9, r0\n"/* p6 = !XEN_PSR_I
+ * previously interrupt
+ * masked?
+ */
+ ";;\n"
+ "(p6) ld1.acq r8 = [r8]\n" /* r8 = xen_pend_int */
+ ";;\n"
+ "(p6) cmp.eq.unc p6, p7 = r8, r0\n" /*interrupt pending?*/
+ ";;\n"
+ /* issue hypercall to get interrupt */
+ "(p7) break " __stringify(HYPERPRIVOP_SSM_I) "\n"
+ ";;\n");
+
+/*
+ * psr_i_addr_addr = XEN_MAPPEDREGS->interrupt_mask_addr
+ * = XEN_PSR_I_ADDR_ADDR;
+ * psr_i_addr = *psr_i_addr_addr;
+ * *psr_i_addr = 1;
+ */
+/* 2 bundles */
+DEFINE_VOID_FUNC0(rsm_i,
+ "mov r8 = " __stringify(XEN_PSR_I_ADDR_ADDR) "\n"
+ /* r8 = XEN_PSR_I_ADDR */
+ "mov r9 = 1\n"
+ ";;\n"
+ "ld8 r8 = [r8]\n" /* r8 = XEN_PSR_I */
+ ";;\n"
+ "st1.rel [r8] = r9\n"); /* XEN_PSR_I = 1 */
+
+extern void
+xen_set_rr0_to_rr4(unsigned long val0, unsigned long val1,
+ unsigned long val2, unsigned long val3,
+ unsigned long val4);
+__DEFINE_FUNC(set_rr0_to_rr4,
+ "break " __stringify(HYPERPRIVOP_SET_RR0_TO_RR4) "\n");
+
+
+extern unsigned long xen_getreg(int regnum);
+#define __DEFINE_GET_REG(id, privop) \
+ "mov r2 = " __stringify(_IA64_REG_ ## id) "\n" \
+ ";;\n" \
+ "cmp.eq p6, p0 = r2, r8\n" \
+ ";;\n" \
+ "(p6) break " __stringify(HYPERPRIVOP_GET_ ## privop) "\n" \
+ "(p6) br.cond.sptk.many b6\n" \
+ ";;\n"
+
+__DEFINE_FUNC(getreg,
+ __DEFINE_GET_REG(PSR, PSR)
+#ifdef CONFIG_IA32_SUPPORT
+ __DEFINE_GET_REG(AR_EFLAG, EFLAG)
+#endif
+
+ /* get_itc */
+ "mov r2 = " __stringify(_IA64_REG_AR_ITC) "\n"
+ ";;\n"
+ "cmp.eq p6, p0 = r2, r8\n"
+ ";;\n"
+ "(p6) br.cond.spnt xen_get_itc\n"
+ ";;\n"
+
+ /* get itm */
+ "mov r2 = " __stringify(_IA64_REG_CR_ITM) "\n"
+ ";;\n"
+ "cmp.eq p6, p0 = r2, r8\n"
+ ";;\n"
+ "(p6) br.cond.spnt xen_get_itm_with_offset\n"
+ ";;\n"
+
+ __DEFINE_GET_REG(CR_IVR, IVR)
+ __DEFINE_GET_REG(CR_TPR, TPR)
+
+ /* fall back */
+ "movl r2 = ia64_native_getreg_func\n"
+ ";;\n"
+ "mov b7 = r2\n"
+ ";;\n"
+ "br.cond.sptk.many b7\n");
+
+extern void xen_setreg(int regnum, unsigned long val);
+#define __DEFINE_SET_REG(id, privop) \
+ "mov r2 = " __stringify(_IA64_REG_ ## id) "\n" \
+ ";;\n" \
+ "cmp.eq p6, p0 = r2, r9\n" \
+ ";;\n" \
+ "(p6) break " __stringify(HYPERPRIVOP_ ## privop) "\n" \
+ "(p6) br.cond.sptk.many b6\n" \
+ ";;\n"
+
+__DEFINE_FUNC(setreg,
+ /* kr0 .. kr 7*/
+ /*
+ * if (_IA64_REG_AR_KR0 <= regnum &&
+ * regnum <= _IA64_REG_AR_KR7) {
+ * register __index asm ("r8") = regnum - _IA64_REG_AR_KR0
+ * register __val asm ("r9") = val
+ * "break HYPERPRIVOP_SET_KR"
+ * }
+ */
+ "mov r17 = r9\n"
+ "mov r2 = " __stringify(_IA64_REG_AR_KR0) "\n"
+ ";;\n"
+ "cmp.ge p6, p0 = r9, r2\n"
+ "sub r17 = r17, r2\n"
+ ";;\n"
+ "(p6) cmp.ge.unc p7, p0 = "
+ __stringify(_IA64_REG_AR_KR7) " - " __stringify(_IA64_REG_AR_KR0)
+ ", r17\n"
+ ";;\n"
+ "(p7) mov r9 = r8\n"
+ ";;\n"
+ "(p7) mov r8 = r17\n"
+ "(p7) break " __stringify(HYPERPRIVOP_SET_KR) "\n"
+
+ /* set itm */
+ "mov r2 = " __stringify(_IA64_REG_CR_ITM) "\n"
+ ";;\n"
+ "cmp.eq p6, p0 = r2, r8\n"
+ ";;\n"
+ "(p6) br.cond.spnt xen_set_itm_with_offset\n"
+
+ /* set itc */
+ "mov r2 = " __stringify(_IA64_REG_AR_ITC) "\n"
+ ";;\n"
+ "cmp.eq p6, p0 = r2, r8\n"
+ ";;\n"
+ "(p6) br.cond.spnt xen_set_itc\n"
+
+#ifdef CONFIG_IA32_SUPPORT
+ __DEFINE_SET_REG(AR_EFLAG, SET_EFLAG)
+#endif
+ __DEFINE_SET_REG(CR_TPR, SET_TPR)
+ __DEFINE_SET_REG(CR_EOI, EOI)
+
+ /* fall back */
+ "movl r2 = ia64_native_setreg_func\n"
+ ";;\n"
+ "mov b7 = r2\n"
+ ";;\n"
+ "br.cond.sptk.many b7\n");
+#endif
+
+static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.fc = xen_fc,
.thash = xen_thash,
.get_cpuid = xen_get_cpuid,
@@ -337,7 +880,7 @@ xen_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val)
HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op);
}
-static const struct pv_iosapic_ops xen_iosapic_ops __initconst = {
+static struct pv_iosapic_ops xen_iosapic_ops __initdata = {
.pcat_compat_init = xen_pcat_compat_init,
.__get_irq_chip = xen_iosapic_get_irq_chip,
@@ -355,6 +898,8 @@ xen_setup_pv_ops(void)
xen_info_init();
pv_info = xen_info;
pv_init_ops = xen_init_ops;
+ pv_fsys_data = xen_fsys_data;
+ pv_patchdata = xen_patchdata;
pv_cpu_ops = xen_cpu_ops;
pv_iosapic_ops = xen_iosapic_ops;
pv_irq_ops = xen_irq_ops;
@@ -362,3 +907,252 @@ xen_setup_pv_ops(void)
paravirt_cpu_asm_init(&xen_cpu_asm_switch);
}
+
+#ifdef ASM_SUPPORTED
+/***************************************************************************
+ * binary pacthing
+ * pv_init_ops.patch_bundle
+ */
+
+#define DEFINE_FUNC_GETREG(name, privop) \
+ DEFINE_FUNC0(get_ ## name, \
+ "break "__stringify(HYPERPRIVOP_GET_ ## privop) "\n")
+
+DEFINE_FUNC_GETREG(psr, PSR);
+DEFINE_FUNC_GETREG(eflag, EFLAG);
+DEFINE_FUNC_GETREG(ivr, IVR);
+DEFINE_FUNC_GETREG(tpr, TPR);
+
+#define DEFINE_FUNC_SET_KR(n) \
+ DEFINE_VOID_FUNC0(set_kr ## n, \
+ ";;\n" \
+ "mov r9 = r8\n" \
+ "mov r8 = " #n "\n" \
+ "break " __stringify(HYPERPRIVOP_SET_KR) "\n")
+
+DEFINE_FUNC_SET_KR(0);
+DEFINE_FUNC_SET_KR(1);
+DEFINE_FUNC_SET_KR(2);
+DEFINE_FUNC_SET_KR(3);
+DEFINE_FUNC_SET_KR(4);
+DEFINE_FUNC_SET_KR(5);
+DEFINE_FUNC_SET_KR(6);
+DEFINE_FUNC_SET_KR(7);
+
+#define __DEFINE_FUNC_SETREG(name, privop) \
+ DEFINE_VOID_FUNC0(name, \
+ "break "__stringify(HYPERPRIVOP_ ## privop) "\n")
+
+#define DEFINE_FUNC_SETREG(name, privop) \
+ __DEFINE_FUNC_SETREG(set_ ## name, SET_ ## privop)
+
+DEFINE_FUNC_SETREG(eflag, EFLAG);
+DEFINE_FUNC_SETREG(tpr, TPR);
+__DEFINE_FUNC_SETREG(eoi, EOI);
+
+extern const char xen_check_events[];
+extern const char __xen_intrin_local_irq_restore_direct_start[];
+extern const char __xen_intrin_local_irq_restore_direct_end[];
+extern const unsigned long __xen_intrin_local_irq_restore_direct_reloc;
+
+asm (
+ ".align 32\n"
+ ".proc xen_check_events\n"
+ "xen_check_events:\n"
+ /* masked = 0
+ * r9 = masked_addr - 1
+ * = pending_intr_addr
+ */
+ "st1.rel [r9] = r0, -1\n"
+ ";;\n"
+ /* r8 = pending_intr */
+ "ld1.acq r11 = [r9]\n"
+ ";;\n"
+ /* p9 = interrupt pending? */
+ "cmp.ne p9, p10 = r11, r0\n"
+ ";;\n"
+ "(p10) mf\n"
+ /* issue hypercall to trigger interrupt */
+ "(p9) break " __stringify(HYPERPRIVOP_SSM_I) "\n"
+ "br.cond.sptk.many b6\n"
+ ".endp xen_check_events\n"
+ "\n"
+ ".align 32\n"
+ ".proc __xen_intrin_local_irq_restore_direct\n"
+ "__xen_intrin_local_irq_restore_direct:\n"
+ "__xen_intrin_local_irq_restore_direct_start:\n"
+ "1:\n"
+ "{\n"
+ "cmp.ne p6, p7 = r8, r0\n"
+ "mov r17 = ip\n" /* get ip to calc return address */
+ "mov r9 = "__stringify(XEN_PSR_I_ADDR_ADDR) "\n"
+ ";;\n"
+ "}\n"
+ "{\n"
+ /* r9 = XEN_PSR_I_ADDR */
+ "ld8 r9 = [r9]\n"
+ ";;\n"
+ /* r10 = masked previous value */
+ "(p6) ld1.acq r10 = [r9]\n"
+ "adds r17 = 1f - 1b, r17\n" /* calculate return address */
+ ";;\n"
+ "}\n"
+ "{\n"
+ /* p8 = !masked interrupt masked previously? */
+ "(p6) cmp.ne.unc p8, p0 = r10, r0\n"
+ "\n"