#include "udm_config.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h>

#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif

#include "udm_utils.h"
#include "udm_common.h"
#include "udm_parseurl.h"
#include "udm_xmalloc.h"

int UdmParseURL(UDM_URL *url,char *s){
	char *schema,*anchor,*file,*query;
	char query_string[UDM_URLSIZE]="";

	if(strlen(s)>=UDM_URLSIZE)
		return(UDM_PARSEURL_LONG);
	*(url->schema)=0;
	*(url->specific)=0;
	*(url->hostinfo)=0;
	*(url->hostname)=0;
	*(url->anchor)=0;
	*(url->auth)=0;
	url->port=0;
	url->default_port=0;
	*(url->path)=0;
	*(url->filename)=0;

	/* Find possible schema end than   */	
	/* Check that it is really schema  */
	/* It must consist of alphas only  */
	/* We will take in account digits  */
	/* also for oracle8:// for example */
	/* We must check it because        */
	/* It might be anchor also         */
	/* For example:                    */
	/* "mod/index.html#a:1"            */

	if((schema=strchr(s,':'))){
		char * ch;
		for(ch=s;ch<schema;ch++){
			if(!isalnum(*ch)){
				/* Bad character       */
				/* so it is not schema */
				schema=0;break;
			}
		}
	}

	if(schema){
		/* Have scheme - absolute path */
		*schema=0;
		strcpy(url->schema,s);
		strcpy(url->specific,schema+1);
		*schema=':';
		if(!strcasecmp(url->schema,"http"))url->default_port=80;
		else
		if(!strcasecmp(url->schema,"https"))url->default_port=443;
		else
		if(!strcasecmp(url->schema,"nntp"))url->default_port=119;
		else
		if(!strcasecmp(url->schema,"news"))url->default_port=119;
		else
		if(!strcasecmp(url->schema,"ftp"))url->default_port=21;

		if(!strncmp(url->specific,"//",2)){
			char * ss;
			char * hostname;
			/* Have hostinfo */
			if((ss=strchr(url->specific+2,'/'))){
				/* Have hostname with path */
				*ss=0;
				strcpy(url->hostinfo,url->specific+2);
				*ss='/';
				strcpy(url->path,ss);
			}else{
				/* Hostname without path */
				strcpy(url->hostinfo,url->specific+2);
				strcpy(url->path,"/");
			}
			if((hostname=strchr(url->hostinfo,'@'))){
				/* Username and password is given  */
				/* Store auth string user:password */
				*hostname=0;
				strcpy(url->auth,url->hostinfo);
				*hostname='@';
				hostname++;
			}else{
				hostname=url->hostinfo;
			}
			if((ss=strchr(hostname,':'))){
				*ss=0;
				strcpy(url->hostname,hostname);
				*ss=':';
				url->port=atoi(ss+1);
			}else{
				strcpy(url->hostname,hostname);
				url->port=0;
			}
		}else{
			/* Have not host but have schema */
			/* This is possible for:         */
			/* file:  mailto:  htdb: news:   */
			
			/* FIXME This is actually durty hack for now       */
			/* to avoid changes in "stable"    indexer.c       */
			/* As far as we do not need mailto: just ignore it */
			
			if(!strcasecmp(url->schema,"mailto"))
				return(UDM_PARSEURL_BAD);
			else
			if(!strcasecmp(url->schema,"file"))
				
				strcpy(url->path,url->specific);
			else
			if(!strcasecmp(url->schema,"exec"))
				strcpy(url->path,url->specific);
			else
			if(!strcasecmp(url->schema,"cgi"))
				strcpy(url->path,url->specific);
			else
			if(!strcasecmp(url->schema,"htdb"))
				strcpy(url->path,url->specific);
			else
			if(!strcasecmp(url->schema,"news")){
				/* Now we will use localhost as NNTP    */
				/* server as it is not indicated in URL */
				strcpy(url->hostname,"localhost");
				sprintf(url->path,"/%s",url->specific);
				url->default_port=119;
			}else{
				/* Unknown strange schema */
				return(UDM_PARSEURL_BAD);
			}
		}
	}else{
		strcpy(url->path,s);
	}

	/* Cat an anchor if exist */
	if((anchor=strstr(url->path,"#")))*anchor=0;


	/* If path is not full just copy it to filename    */
	/* i.e. neither  /usr/local/ nor  c:/windows/temp/ */

	if((url->path[0]!='/')&&(url->path[1]!=':')){ 
		/* Relative path */
		if(!strncmp(url->path,"./",2))
			strcpy(url->filename,url->path+2);
		else
			strcpy(url->filename,url->path);
		url->path[0] = 0;
	}

	/* truncate path to query_string */
	/* and store query_string        */

	if((query=strchr(url->path,'?'))){
		strcpy(query_string,query);
		*(query) = 0;
	}

	/* Now find right '/' sign and copy the rest to filename */

	if((file=strrchr(url->path,'/'))&&(strcmp(file,"/"))){
		strcpy(url->filename,file+1);
		*(file+1)=0;
	}

	/* Restore query_string to filename*/
	strcat(url->filename,query_string);
	
	UdmRemove2Dot(url->path);

	return(0);
}


/*********************** Tags ****************************/

int UdmFreeTag(UDM_TAG *tag){
	UDM_FREE(tag->tag);
	UDM_FREE(tag->href);
	UDM_FREE(tag->src);
	UDM_FREE(tag->content);
	UDM_FREE(tag->value);
	UDM_FREE(tag->selected);
	UDM_FREE(tag->name);
	UDM_FREE(tag->type);
	UDM_FREE(tag->lang);
	return(0);
}

int UdmParseTag(UDM_TAG *tag,char *stag){
char *s,*e,*p,*l;
int len;

	len=strlen(stag);s=stag+1; e=s;
	while(!strchr(" \t\r\n>",*e))e++; 
	*e=0;
	tag->tag=strdup(s);
	tag->href=NULL;
	tag->src=NULL;
	tag->content=NULL;
	tag->value=NULL;
	tag->selected=NULL;
	tag->name=NULL;
	tag->type=NULL;
	tag->lang = NULL;
	e++;
	UDM_SKIP(e," \t\r\n>");
	while(e-stag<len){
		s=e;
		UDM_SKIPN(e," \t\r\n=>");
		if(*e!='='){
			*e++=0;
			UDM_SKIP(e," \t\r\n>");
		}
		if(*e=='='){
			*e=0;e++;
			UDM_SKIP(e," \t\r\n>");
			p=e;
			if(*p=='"'){
				p++;e++;
				UDM_SKIPN(e,"\">");
			}else
			if(*p=='\''){
				p++;e++;
				UDM_SKIPN(e,"'>");
			}else{
				UDM_SKIPN(e," \t\r\n>");
			}
			*e=0;e++;

			/* Make lower string */
			for(l=s;*l;*l=tolower(*l),l++);

			/* This line to get statistics */
			/*printf("VAR %s\n",s);*/

			if(!strcmp(s,"charset")){					
				UDM_FREE(tag->name);
				UDM_FREE(tag->content);
				tag->name=strdup("Content-Type");
				tag->content=UdmXmalloc(9+strlen(p));
				strcpy(tag->content,"charset=");
				strcat(tag->content,p);
			}else
			if(!strcmp(s,"href")){		/* 50846 */
				UDM_FREE(tag->href);
				tag->href=strdup(UdmTrim(p," \r\n\t"));
			}else
			if(!strcmp(s,"name")){		/* 16987 */
				UDM_FREE(tag->name);
				tag->name=strdup(p);			
			}else
			if(!strcmp(s,"content")){	/* 4160 */
				UDM_FREE(tag->content);
				tag->content=strdup(p);
			}else
			if(!strcmp(s,"src")){		/* 2792 */
				UDM_FREE(tag->src);
				tag->src=strdup(UdmTrim(p," \r\n\t"));
			}else
			if(!strcmp(s,"http-equiv")){	/* 699 */
				UDM_FREE(tag->name);
				tag->name=strdup(p);
			}else
			if(!strcmp(s,"value")){		/* 22 */
				UDM_FREE(tag->value);
				tag->value=strdup(p);
			}else
			if(!strcmp(s,"selected")){
				UDM_FREE(tag->selected);
				tag->selected=strdup(p);
			}else
			if(!strcmp(s,"checked")){
				UDM_FREE(tag->selected);
				tag->selected=strdup(p);
			}else
			if(!strcmp(s,"type")){
				UDM_FREE(tag->type);
				tag->type=strdup(p);
			}else
			  if(!strcmp(s,"lang")){
			    UDM_FREE(tag->lang);
			    tag->lang=strdup(p);
			    tag->lang[0] = tolower(tag->lang[0]);
			    tag->lang[1] = tolower(tag->lang[1]);
			  }
		}
	}return(0);
}
