/*
 * Linux-os.c. 
 * From FreeBSD-os.c
 * From osf1-os.c,v 1.1 94/03/27 15:30:51 hallgren Exp $
 *
 * OS-dependent routines.  This file (along with os.h) exports an
 * OS-independent interface to the operating system VM facilities.
 * Suprisingly, this interface looks a lot like the Mach interface
 * (but simpler in some places).  For some operating systems, a subset
 * of these functions will have to be emulated.
 *
 * This is the OSF1 version.  By Sean Hallgren.
 * Much hacked by Paul Werkowski
 * Morfed from the FreeBSD file by Peter Van Eynde (July 1996)
 * GENCGC support by Douglas Crosher, 1996, 1997.
 * Alpha support by Julian Dolby, 1999.
 *
 * $Header: /home/CVS-cmucl/src/lisp/Linux-os.c,v 1.10 2000/01/19 18:15:15 dtc Exp $
 *
 */

#include <stdio.h>
#include <sys/param.h>
#include <sys/file.h>
#include <errno.h>
#include "./signal.h"
#include "os.h"
#include "arch.h"
#include "globals.h"
#include "interrupt.h"
#include "lispregs.h"
#include "internals.h"
#include <sys/socket.h>
#include <sys/utsname.h>

#include <sys/types.h>
#include <signal.h>
/* #include <sys/sysinfo.h> */
#include <sys/time.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/resource.h>
#include <sys/wait.h>
#include <netdb.h>

#include "validate.h"
size_t os_vm_page_size;

#define DPRINTF(t,a) {if (t) fprintf a;}

#if defined GENCGC
#include "gencgc.h"
#endif

#if ((LINUX_VERSION_CODE >= linuxversion(2,1,0)) || (__GNU_LIBRARY__ >= 6))
int PVE_stub_errno;
#endif

#if ((LINUX_VERSION_CODE >= linuxversion(2,1,0)) || (__GNU_LIBRARY__ >= 6))
void update_errno (void)
{
  PVE_stub_errno = errno;
}
#endif

static has_mmap_tuning=0;


void os_init(void)
{
  struct utsname name;
  
  uname(&name);

  /* We need this for mmap */

  if (name.release[0] < '2')
   {
    printf("Linux version should be later than 2.0.0!\n");
    printf("Dazed and confused but trying to continue...\n");
    has_mmap_tuning=0;
   }
 else 
   {
     if (((name.release[0]) > '2') ||
	 (((name.release[0]) == '2') && ((name.release[2]) >= '1')))
       {
	 DPRINTF(0,(stderr,"has mman tuning\n"));
	 has_mmap_tuning=1;
       }
   }
  
  os_vm_page_size = getpagesize();

#ifdef i386
  setfpucw(0x1372|4|8|16|32); /* No interrupts */
#endif
}

#ifdef i386
#if (LINUX_VERSION_CODE >= linuxversion(2,1,0)) || (__GNU_LIBRARY__ >= 6)
int sc_reg(struct sigcontext *c, int offset)
#else
int sc_reg(struct sigcontext_struct *c, int offset)
#endif
{
  switch(offset)
    {
    case  0: return c->eax;
    case  2: return c->ecx;
    case  4: return c->edx;
    case  6: return c->ebx;
    case  8: return c->esp;
    case 10: return c->ebp;
    case 12: return c->esi;
    case 14: return c->edi;
    }
  return 0;
}
#endif

void os_save_context(void)
{
  /*
   * Called from interrupt handlers so C stuff knows things set in Lisp.
   */
}

void os_set_context(void)
{
}

int do_mmap(os_vm_address_t *addr, os_vm_size_t len, int flags)
{
  /* We _must_ have the memory where we want it... */
  os_vm_address_t old_addr = *addr;

  DPRINTF(0,(stderr,"do_mmap: %x->%x %x\n",*addr,(*addr)+len, flags));
  
  *addr = mmap(*addr, len, OS_VM_PROT_ALL, flags, -1, 0);
  if ((old_addr != NULL && *addr != old_addr) || 
      *addr == (os_vm_address_t) -1)
    {
      if (has_mmap_tuning)
	fprintf(stderr, "Error in allocating memory

CMUCL asks the kernel to make a lot of memory potentially available.
Truely a lot of memory, actually it asks for all memory a process
can allocate.

Now you have two choices:
 - Accept this and lift the kernel and other limits by doing:
 as root:
 echo 1 > /proc/sys/vm/overcommit_memory
 as the user:
 ulimit -d unlimited 
 ulimit -v unlimited 
 ulimit -m unlimited 

  - Try to use the lazy-allocation routines. They are pretty experimental
 and might interact badly with some kernels. To do this start lisp with the
 \"-lazy\" flag, like:
 lisp -lazy

");
      else
	fprintf(stderr, "Error in allocating memory, do you have more than 16MB of memory+swap?\n");
      perror("mmap");
      return 1;
    }
 return 0;
}

static boolean in_range_p(os_vm_address_t a, lispobj sbeg, size_t slen)
{
  char* beg = (char*) sbeg;
  char* end = (char*) sbeg + slen;
  char* adr = (char*) a;
  return (adr >= beg && adr < end);
}

os_vm_address_t os_validate(os_vm_address_t addr, os_vm_size_t len)
{
  int flags = MAP_PRIVATE | MAP_ANONYMOUS;

  if (lazy_memory_allocation == 1)
    {
      switch((unsigned long) addr) 
	{
	case READ_ONLY_SPACE_START: 
	  DPRINTF(0,(stderr,
		  "It's readonly space... ignoring request for memory\n")); 
	  return addr;
	case STATIC_SPACE_START: 
	  DPRINTF(0,(stderr,
		  "It's static space... ignoring request for memory\n")); 
	  return addr;
	case BINDING_STACK_START: 
	  DPRINTF(0,(stderr,
		  "It's the binding stack... ignoring request for memory\n")); 
	  return addr;
	case CONTROL_STACK_START: 
	  DPRINTF(0,(stderr,"It's the control stack %x->%x %x\n",addr, 
		     (os_vm_address_t) (((unsigned long) addr + len)), flags));  
	  flags |= MAP_GROWSDOWN; 
	  addr = (os_vm_address_t) (((unsigned long) addr + len - 4096) & ~0xFFF);
	  len = 4096;
	  break;
	case DYNAMIC_0_SPACE_START:  
	  {
	    int start_page = find_page_index( (void *) addr);
	    int number_of_pages = len / 4096;
	    int page;
	    
	    DPRINTF(0,(stderr,
		       "It's dynamic 0 space...ignoring request for memory\n"));  
	    
	    DPRINTF(0,(stderr,"start: %i number: %i\n",
		       start_page, number_of_pages));
	    // clean the allocate mask;
	    for(page = 0; page <= number_of_pages; page++)
	      page_table[page+ start_page].flags 
		&= ~PAGE_LAZY_ALLOCATE_MASK;
	  }
	  return addr; 
	}
    }
  else
    flags |= MAP_NORESERVE;
  /* Try to avoid turning on overcommit globally */

  if (addr)
    flags |= MAP_FIXED;
  else
    flags |= MAP_VARIABLE;

  DPRINTF(0, (stderr, "os_validate %x ->  %x %x => ", addr, addr+len, flags));
  if (do_mmap(&addr, len, flags))
    return NULL;
  else
    {
      DPRINTF(0, (stderr, "%x\n", addr));
      return addr;
    }
}

void os_invalidate(os_vm_address_t addr, os_vm_size_t len)
{
  DPRINTF(0, (stderr, "os_invalidate %x -> %x\n", addr, addr+len));

  if (munmap(addr, len) == -1)
    perror("munmap");
}

os_vm_address_t os_map(int fd, int offset, os_vm_address_t addr,
		       os_vm_size_t len)
{
  DPRINTF(0,(stderr,"os map: %i %x %x -> %x\n", fd, offset, addr, addr+len));

  if ((lazy_memory_allocation == 1) &&
      in_range_p((os_vm_address_t) addr, 
		 DYNAMIC_0_SPACE_START, DYNAMIC_SPACE_SIZE))
    {
      int start_page = find_page_index( (void *) addr);
      int number_of_pages = len / 4096;
      int page;
	    
      // clean the allocate mask;
      for(page = 0; page <= number_of_pages; page++)
	page_table[page+ start_page].flags 
	  &= ~PAGE_LAZY_ALLOCATE_MASK;
    }
  
  addr = mmap(addr, len,
	      OS_VM_PROT_ALL,
	      MAP_PRIVATE | MAP_FILE | MAP_FIXED,
	      fd, (off_t) offset);
  DPRINTF(0,(stderr,"osmap: %x -> %x\n",
	  (unsigned long) addr,
	  (unsigned long) (addr+len)));
  if (addr == (os_vm_address_t) -1)
    perror("mmap");

  return addr;
}

void os_flush_icache(os_vm_address_t address, os_vm_size_t length)
{
}

void os_protect(os_vm_address_t address, os_vm_size_t length,
		os_vm_prot_t prot)
{
  /* make certain the page is already mapped! */
 if ( ! ((lazy_memory_allocation == 1) &&
		 in_range_p(address, DYNAMIC_0_SPACE_START, DYNAMIC_SPACE_SIZE) &&
    PAGE_LAZY_ALLOCATE(find_page_index((void *) address)) == 0))
  if (mprotect(address, length, prot) == -1) 
    perror("mprotect");
}




boolean valid_addr(os_vm_address_t addr)
{
  int ret;
  os_vm_address_t newaddr;
  newaddr = os_trunc_to_page(addr);

  if (   in_range_p(addr, READ_ONLY_SPACE_START, READ_ONLY_SPACE_SIZE)
      || in_range_p(addr, STATIC_SPACE_START   , STATIC_SPACE_SIZE   )
      || in_range_p(addr, DYNAMIC_0_SPACE_START, DYNAMIC_SPACE_SIZE  )
#ifndef GENCGC
      || in_range_p(addr, DYNAMIC_1_SPACE_START, DYNAMIC_SPACE_SIZE  )
#endif
      || in_range_p(addr, CONTROL_STACK_START  , CONTROL_STACK_SIZE  )
      || in_range_p(addr, BINDING_STACK_START  , BINDING_STACK_SIZE  ))
    return TRUE;
  return FALSE;
}



#if defined GENCGC
void sigsegv_handler(HANDLER_ARGS)
{
  GET_CONTEXT

  int  fault_addr = ((struct sigcontext_struct *) (&contextstruct))->cr2;
  int  page_index = find_page_index((void *) fault_addr);

  /* First we see if it is because of the lazy-allocation magic... */

  if (lazy_memory_allocation == 1)
    {
      if (in_range_p((os_vm_address_t) fault_addr, 
		     READ_ONLY_SPACE_START, READ_ONLY_SPACE_SIZE))
	{
	  DPRINTF(0,(stderr,"mapping read-only page in at %x\n",fault_addr));
	  fault_addr &= 0xFFFFF000L;
	  if (do_mmap((os_vm_address_t *) &fault_addr, 4096, 
		      MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED))
	    perror("map failed");	  
	  return;
	}
      else
	if (in_range_p((os_vm_address_t) fault_addr, 
		       STATIC_SPACE_START, STATIC_SPACE_SIZE))
	  {
	    DPRINTF(0,(stderr,"mapping static page in at %x\n",fault_addr));
	    fault_addr &= 0xFFFFF000L;
	    if (do_mmap((os_vm_address_t *) &fault_addr, 4096, 
			MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED))
	      perror("map failed");	  
	    return;
	  }
      else
	if (in_range_p((os_vm_address_t) fault_addr, 
		       BINDING_STACK_START, BINDING_STACK_SIZE))
	  {
	    DPRINTF(0,(stderr,"mapping binding stack page in at %x\n",
		    fault_addr));
	    fault_addr &= 0xFFFFF000L;
	    if (do_mmap((os_vm_address_t *) &fault_addr, 4096, 
			MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED))
	      perror("map failed");	  
	    return;
	  }
      else
	if (in_range_p((os_vm_address_t) fault_addr, 
		       CONTROL_STACK_START, CONTROL_STACK_SIZE))
	  {
	    DPRINTF(0,(stderr,"mapping control stack page in at %x\n",
		    fault_addr));
	    fault_addr &= 0xFFFFF000L;
	    if (do_mmap((os_vm_address_t *) &fault_addr, 4096, 
			MAP_GROWSDOWN | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED))
	      perror("map failed");	  
	    return;
	  }
    }
      
  /* Check if the fault is within the dynamic space. */
  if (page_index != -1) {
    /* Un-protect the page */

    if ((lazy_memory_allocation == 1) &&
	(PAGE_LAZY_ALLOCATE(page_index) == 0))
      {
	DPRINTF(0,(stderr,"mapping dynamic space page in at %x %i\n",
		fault_addr,PAGE_WRITE_PROTECTED(page_index)));
	fault_addr &= 0xFFFFF000L;
	if (do_mmap((os_vm_address_t *) &fault_addr, 4096, 
		    MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED))
	  perror("map failed");	  
        
	page_table[page_index].flags |= PAGE_LAZY_ALLOCATE_MASK;

	if (PAGE_WRITE_PROTECTED(page_index))
	  os_protect(page_address(page_index), 4096, OS_VM_PROT_READ | OS_VM_PROT_EXECUTE);
	
	return;
      }
    else    
    {
	/* The page should have been marked write protected */
	if (!PAGE_WRITE_PROTECTED(page_index))
	  fprintf(stderr, 
		  "*** Sigsegv in page not marked as write protected: %x %i %x %i\n",
		  fault_addr,lazy_memory_allocation,
		  page_table[page_index].flags,
		  page_table[page_index].flags & PAGE_LAZY_ALLOCATE_MASK);

	os_protect(page_address(page_index), 4096, OS_VM_PROT_ALL);
	page_table[page_index].flags &= ~PAGE_WRITE_PROTECTED_MASK;
	page_table[page_index].flags |= PAGE_WRITE_PROTECT_CLEARED_MASK;
	
	return;
    }
  }

  DPRINTF(0,(stderr,"sigsegv: eip: %p\n",context->eip));
  interrupt_handle_now(signal, contextstruct);
}
#else
static void sigsegv_handler(HANDLER_ARGS)
{
  os_vm_address_t addr;

#ifdef i386
  GET_CONTEXT
#endif

  DPRINTF(0, (stderr, "sigsegv\n"));
#ifdef i386
  interrupt_handle_now(signal, contextstruct);
#else
#define CONTROL_STACK_TOP (((char*) CONTROL_STACK_START) + CONTROL_STACK_SIZE)

  addr = arch_get_bad_addr(signal,code,context);

  if (addr != NULL && context->sc_regs[reg_ALLOC] & (1 << 63)) {
    context->sc_regs[reg_ALLOC] -= (1 << 63);
    interrupt_handle_pending(context);
  } else if (addr > CONTROL_STACK_TOP && addr < BINDING_STACK_START) {
    DPRINTF(0,(stderr, "Possible stack overflow at 0x%08lX!\n", addr));
    /* try to fix control frame pointer */
    while (!(CONTROL_STACK_START <= *current_control_frame_pointer &&
	     *current_control_frame_pointer <= CONTROL_STACK_TOP))
      ((char*) current_control_frame_pointer) -= sizeof(lispobj);
    ldb_monitor();
  } else if (!interrupt_maybe_gc(signal, code, context))
    interrupt_handle_now(signal, code, context);
#endif
}
#endif

static void sigbus_handler(HANDLER_ARGS)
{
#ifdef i386
  GET_CONTEXT
#endif

  DPRINTF(0, (stderr, "sigbus:\n")); /* there is no sigbus in linux??? */
#ifdef i386
  interrupt_handle_now(signal, contextstruct);
#else
  interrupt_handle_now(signal, code, context);
#endif  
}

void os_install_interrupt_handlers(void)
{
  interrupt_install_low_level_handler(SIGSEGV, sigsegv_handler);
  interrupt_install_low_level_handler(SIGBUS, sigbus_handler);
}
