/***************************************
  $Header: /home/amb/wwwoffle/RCS/misc.c 2.14 1998/04/03 18:43:08 amb Exp $

  WWWOFFLE - World Wide Web Offline Explorer - Version 2.1b.
  Miscellaneous HTTP / HTML functions.
  ******************/ /******************
  Written by Andrew M. Bishop

  This file Copyright 1997,98 Andrew M. Bishop
  It may be distributed under the GNU Public License, version 2, or
  any higher version.  See section COPYING of the GNU Public license
  for conditions under which this file may be redistributed.
  ***************************************/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#include <time.h>
#include <sys/time.h>
#include <sys/types.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>

#include "misc.h"
#include "config.h"
#include "errors.h"
#include "proto.h"

#include "md5.h"


/*++++++++++++++++++++++++++++++++++++++
  Split a URL into a protocol, hostname, path name and an argument list.

  URL *SplitURL Returns a URL structure containing the information.

  char *url The name of the url to split.
  ++++++++++++++++++++++++++++++++++++++*/

URL *SplitURL(char *url)
{
 URL *Url=(URL*)malloc(sizeof(URL));
 char *copyurl,*mallocurl=malloc(strlen(url)+2);
 int i=0,n=0;
 char *colon,*slash,*at,*temppath,root[2];

 copyurl=mallocurl;
 strcpy(copyurl,url);

 /* Protocol */

 colon=strchr(copyurl,':');
 slash=strchr(copyurl,'/');
 at   =strchr(copyurl,'@');

 if(slash==copyurl)                     /* /dir/... (local) */
   {
    Url->proto=(char*)malloc(5);
    strcpy(Url->proto,"http");
   }
 else if(colon && slash && (colon+1)==slash) /* http://... */
   {
    *colon=0;
    Url->proto=(char*)malloc(colon-copyurl+1);
    strcpy(Url->proto,copyurl);
    copyurl=slash+1;
    if(*copyurl=='/')
       copyurl++;

    colon=strchr(copyurl,':');
    slash=strchr(copyurl,'/');
   }
 else if(colon && !isdigit(*(colon+1)) &&
         (!at || (slash && at>slash)))  /* http:www.foo.com/... */
   {
    *colon=0;
    Url->proto=(char*)malloc(colon-copyurl+1);
    strcpy(Url->proto,copyurl);
    copyurl=colon+1;

    colon=strchr(copyurl,':');
   }
 else                                   /* www.foo.com:80/... */
   {
    Url->proto=(char*)malloc(5);
    strcpy(Url->proto,"http");
   }

 for(i=0;Url->proto[i];i++)
    Url->proto[i]=tolower(Url->proto[i]);

 Url->Protocol=NULL;

 for(i=0;i<NProtocols;i++)
    if(!strcmp(Protocols[i].name,Url->proto))
       Url->Protocol=&Protocols[i];

 /* Password */

 if(at && (!slash || slash>at))
   {
    if(colon && at>colon)               /* user:pass@www.foo.com... */
      {
       *colon=0;
       Url->user=UrlDecode(copyurl,0);
       copyurl=colon+1;
       *at=0;
       Url->pass=UrlDecode(copyurl,0);
       copyurl=at+1;
      }
    else                                /* user@www.foo.com... */
      {
       *at=0;
       Url->user=UrlDecode(copyurl,0);
       copyurl=at+1;
       Url->pass=NULL;
      }
   }
 else
   {
    Url->user=NULL;
    Url->pass=NULL;
   }

 /* Hostname */

 if(*copyurl=='/')              /* /path/... (local) */
   {
    Url->host=GetLocalHost(1);
    Url->local=1;
   }
 else                           /* www.foo.com... */
   {
    Url->host=copyurl;
    Url->local=0;

    if(slash)                   /* www.foo.com/... */
       copyurl=slash;
    else                        /* www.foo.com */
      {root[0]='/';root[1]=0;copyurl=root;}
   }

 /* Arguments */

 Url->args=NULL;

 for(i=0;copyurl[i];i++)
    if(copyurl[i]=='?')
      {
       copyurl[i]=0;
       Url->args=(char*)malloc(strlen(copyurl+i+1)+1);
       strcpy(Url->args,copyurl+i+1);
       break;
      }

 /* Pathname */

 temppath=UrlDecode(copyurl,0);
 Url->path=UrlEncode(temppath);
 free(temppath);

 /* Hostname (cont) */

 if(!Url->local)
   {
    *copyurl=0;
    copyurl=Url->host;
    Url->host=(char*)malloc(strlen(copyurl)+1);
    strcpy(Url->host,copyurl);
   }

 for(i=0;Url->host[i] && Url->host[i]!=':';i++)
    Url->host[i]=tolower(Url->host[i]);

 if(!Url->local && !strcmp(Url->host,GetLocalHost(1)))
    Url->local=1;

 if(Url->host[i]==':')
    if(atoi(&Url->host[i+1])==(Url->Protocol?Url->Protocol->defport:80))
       Url->host[i]=0;

 /* Canonicalise the URL. */

 Url->name=(char*)malloc(strlen(Url->proto)+strlen(Url->host)+strlen(Url->path)+(Url->args?strlen(Url->args):1)+8);

 sprintf(Url->name,"%s://",Url->proto);
 n=strlen(Url->proto)+3;

 Url->hostp=Url->name+n;

 sprintf(Url->name+n,"%s",Url->host);
 n+=strlen(Url->host);

 Url->pathp=Url->name+n;

 sprintf(Url->name+n,"%s",Url->path);
 n+=strlen(Url->path);

 if(Url->args && *Url->args)
    sprintf(Url->name+n,"?%s",Url->args);

 if(Url->Protocol && !Url->Protocol->proxyable)
   {
    char *localhost=GetLocalHost(1);
    Url->link=(char*)malloc(strlen(Url->name)+strlen(localhost)+8);
    sprintf(Url->link,"http://%s/%s/%s",localhost,Url->proto,Url->hostp);
   }
 else
    Url->link=Url->name;

 /* end */

 free(mallocurl);

 return(Url);
}


/*++++++++++++++++++++++++++++++++++++++
  Free the memory in a URL.

  URL *Url The URL to free.
  ++++++++++++++++++++++++++++++++++++++*/

void FreeURL(URL *Url)
{
 if(Url->name!=Url->link)
    free(Url->link);

 free(Url->name);

 if(Url->proto) free(Url->proto);
 if(Url->host)  free(Url->host);
 if(Url->path)  free(Url->path);
 if(Url->args)  free(Url->args);

 if(Url->user)  free(Url->user);
 if(Url->pass)  free(Url->pass);

 free(Url);
}


/*++++++++++++++++++++++++++++++++++++++
  Generate a hash value for a string.

  char *MakeHash Returns a string that can be used as the hashed string.

  const char *args The arguments.
  ++++++++++++++++++++++++++++++++++++++*/

char *MakeHash(const char *args)
{
 char md5[17];
 char *hash,*p;
 struct MD5Context ctx;

 /* Initialize the computation context.  */
 MD5Init (&ctx);

 /* Process whole buffer but last len % 64 bytes.  */
 MD5Update (&ctx, args, strlen(args));

 /* Put result in desired memory area.  */
 MD5Final (md5, &ctx);

 md5[17]=0;

 hash=Base64Encode(md5,16);

 for(p=hash;*p;p++)
    if(*p=='/')
       *p='-';
    else if(*p=='=')
       *p=0;

 return(hash);
}


/*++++++++++++++++++++++++++++++++++++++
  Convert the time into an RFC 822 compliant date.

  char *RFC822Date Returns a pointer to a fixed string containing the date.

  long t The time.

  int utc Set to true to get Universal Time, else localtime.
  ++++++++++++++++++++++++++++++++++++++*/

char *RFC822Date(long t,int utc)
{
 static char *week[7]={"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
 static char *month[12]={"Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"};
 static char value[32];
 struct tm *tim;

 if(utc)
    tim=gmtime(&t);
 else
   {
    tim=localtime(&t);
    if(tim->tm_isdst<0)
      {tim=gmtime(&t);utc=1;}
   }

 /*             Sun, 06 Nov 1994 08:49:37 GMT    ; RFC 822, updated by RFC 1123 */
 sprintf(value,"%3s, %02d %3s %4d %02d:%02d:%02d %s",
         week[tim->tm_wday],
         tim->tm_mday,
         month[tim->tm_mon],
         tim->tm_year+1900,
         tim->tm_hour,
         tim->tm_min,
         tim->tm_sec,
         utc?"GMT":tzname[tim->tm_isdst>0]);

 return(value);
}


/*++++++++++++++++++++++++++++++++++++++
  Convert a string representing a date into a time.

  long DateToTimeT Returns the time.

  const char *date The date string.
  ++++++++++++++++++++++++++++++++++++++*/

long DateToTimeT(const char *date)
{
 static char *month[12]={"Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"};
 int  year,day,hour,min,sec;
 char monthstr[16];
 long retval=0;

 if(sscanf(date,"%*s %d %s %d %d:%d:%d",&day,monthstr,&year,&hour,&min,&sec)==6 ||
    sscanf(date,"%*s %d-%3s-%d %d:%d:%d",&day,monthstr,&year,&hour,&min,&sec)==6 ||
    sscanf(date,"%*s %3s %d %d:%d:%d %d",monthstr,&day,&hour,&min,&sec,&year)==6)
   {
    struct tm tim;
    int mon;

    for(mon=0;mon<12;mon++)
       if(!strcmp(monthstr,month[mon]))
          break;

    tim.tm_sec=sec;
    tim.tm_min=min;
    tim.tm_hour=hour;
    tim.tm_mday=day;
    tim.tm_mon=mon;
    if(year<38)
       tim.tm_year=year+100;
    else if(year<100)
       tim.tm_year=year;
    else
       tim.tm_year=year-1900;
    tim.tm_isdst=0;

    retval=mktime(&tim);

    if(retval==-1)
       retval=0;
   }
 else if(sscanf(date,"%ld %1s",&retval,monthstr)==1)
    ;

 return(retval);
}


/*++++++++++++++++++++++++++++++++++++++
  Decode a string that has been UrlEncoded.

  char *UrlDecode Returns a malloced copy of the decoded string.

  const char *str The string to be decoded.

  int isform Set to true if the string being decoded is from a form.
  ++++++++++++++++++++++++++++++++++++++*/

char *UrlDecode(const char *str, int isform)
{
 int i,j;
 char *copy=(char*)malloc(strlen(str)+1);

 for(i=0,j=0;str[i];i++)
    if(str[i]=='+' && isform)
       copy[j++]=' ';
    else if(str[i]=='%')
      {
       unsigned int val=0;
       i++;
       if(str[i]>='a') val=str[i]-'a'+10;
       else if(str[i]>='A') val=str[i]-'A'+10;
       else val=str[i]-'0';
       val*=16;
       i++;
       if(str[i]>='a') val+=str[i]-'a'+10;
       else if(str[i]>='A') val+=str[i]-'A'+10;
       else val+=str[i]-'0';
       copy[j++]=val;
      }
    else
       copy[j++]=str[i];

 copy[j]=0;

 return(copy);
}


/*++++++++++++++++++++++++++++++++++++++
  Encode a string using the UrlEncode method as used with the POST method.

  char *UrlEncode Returns a malloced copy of the encoded string.

  const char *str The string to be encoded.
  ++++++++++++++++++++++++++++++++++++++*/

char *UrlEncode(const char *str)
{
 int i,j;
 char *copy=(char*)malloc(3*strlen(str)+1);

 for(i=0,j=0;str[i];i++)
    if(isalpha(str[i]) ||
       isdigit(str[i]) ||
       str[i]=='/' || str[i]=='?' || str[i]==':' || str[i]=='@' || str[i]=='=' ||
       str[i]=='$' || str[i]=='-' || str[i]=='_' || str[i]=='.' || str[i]=='+' || str[i]=='!' ||
       str[i]=='*' || str[i]=='(' || str[i]==')' || str[i]==',' || str[i]=='\'')
       copy[j++]=str[i];
    else
      {
       unsigned int val=str[i];
       copy[j]='%';
       if(val%16>9)
          copy[j+2]=val%16+'A'-10;
       else
          copy[j+2]=val%16+'0';
       val/=16;
       if(val%16>9)
          copy[j+1]=val%16+'A'-10;
       else
          copy[j+1]=val%16+'0';
       j+=3;
      }

 copy[j]=0;

 return(copy);
}


/*+ The conversion from a 6 bit value to an ASCII character. +*/
static char base64[64]={'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
                        'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
                        'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
                        'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'};

/*++++++++++++++++++++++++++++++++++++++
  Decode a base 64 string.

  char *Base64Decode Return a malloced string containing the decoded version.

  const char *str The string to be decoded.

  int *l Returns the length of the decoded string.
  ++++++++++++++++++++++++++++++++++++++*/

char *Base64Decode(const char *str,int *l)
{
 int le=strlen(str);
 char *decoded=(char*)malloc(le+1);
 int i,j,k;

 while(str[le-1]=='=')
    le--;

 *l=3*(le/4)+(le%4)-1+!(le%4);

 for(j=0;j<le;j++)
    for(k=0;k<64;k++)
       if(base64[k]==str[j])
         {decoded[j]=k;break;}

 for(i=j=0;j<(le+4);i+=3,j+=4)
   {
    unsigned long s=0;

    for(k=0;k<4;k++)
       if((j+k)<le)
          s|=((unsigned long)decoded[j+k]&0xff)<<(18-6*k);

    for(k=0;k<3;k++)
       if((i+k)<*l)
          decoded[i+k]=(char)((s>>(16-8*k))&0xff);
   }
 decoded[*l]=0;

 return(decoded);
}


/*++++++++++++++++++++++++++++++++++++++
  Encode a string into base 64.

  char *Base64Encode Return a malloced string containing the encoded version.

  const char *str The string to be encoded.

  int l The length of the string to be encoded.
  ++++++++++++++++++++++++++++++++++++++*/

char *Base64Encode(const char *str,int l)
{
 int le=4*(l/3)+(l%3)+!!(l%3);
 char *encoded=(char*)malloc(4*(le/4)+4*!!(le%4)+1);
 int i,j,k;

 for(i=j=0;i<(l+3);i+=3,j+=4)
   {
    unsigned long s=0;

    for(k=0;k<3;k++)
       if((i+k)<l)
          s|=((unsigned long)str[i+k]&0xff)<<(16-8*k);

    for(k=0;k<4;k++)
       if((j+k)<le)
          encoded[j+k]=(char)((s>>(18-6*k))&0x3f);
   }

 for(j=0;j<le;j++)
    encoded[j]=base64[(int)encoded[j]];
 for(;j%4;j++)
    encoded[j]='=';
 encoded[j]=0;

 return(encoded);
}


/*++++++++++++++++++++++++++++++++++++++
  Make the input string safe to output as HTML ( not < > & " ).

  char* HTMLString Returns a safe HTML string.

  const char* c A non-safe HTML string.
  ++++++++++++++++++++++++++++++++++++++*/

char* HTMLString(const char* c)
{
 int i=0,j=0,len=256-5;              /* 5 is the longest possible inserted amount */
 char* ret=(char*)malloc(257);

 do
   {
    for(;j<len && c[i];i++)
       switch(c[i])
         {
         case '<':
          ret[j++]='&';
          ret[j++]='l';
          ret[j++]='t';
          ret[j++]=';';
          break;
         case '>':
          ret[j++]='&';
          ret[j++]='g';
          ret[j++]='t';
          ret[j++]=';';
          break;
         case '"':
          ret[j++]='&';
          ret[j++]='q';
          ret[j++]='u';
          ret[j++]='o';
          ret[j++]='t';
          ret[j++]=';';
          break;
         case '&':
          ret[j++]='&';
          ret[j++]='a';
          ret[j++]='m';
          ret[j++]='p';
          ret[j++]=';';
          break;
         default:
          ret[j++]=c[i];
         }

    if(c[i])                 /* Not finished */
      {
       ret=(char*)realloc((void*)ret,len+256+5);
       len+=256;
      }
   }
 while(c[i]);

 ret[j]=0;

 return(ret);
}
