#ifndef _UDM_COMMON_H
#define _UDM_COMMON_H

#include <stdio.h> /* for FILE etc. */

#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif

#include <time.h>

#include "udm_crc32.h"
#include <regex.h>

/* Some constants */
#define UDMSTRSIZ				1024*5
#define UDM_LANGPERDOC				16	/* FIXME */
#define UDM_MAXPARSER				256	/* FIXME */
#define UDM_USER_AGENT				"UdmSearch"
#define UDM_MAXWORDINFO				1024
#define UDM_MAXWORDPERQUERY			32

/* Some sizes and others definitions */
#define UDM_URLSIZE				127
#define UDM_MAXDOCSIZE				1024*1024  /* 1Mb */
#define UDM_MAXWORDSIZE				32
#define UDM_DEFAULT_REINDEX_TIME		7*24*60*60
#define UDM_DEFAULT_MAX_HOPS			256
#define UDM_READ_TIMEOUT			30
#define UDM_DOC_TIMEOUT				90
#define UDM_MAXNETERRORS			16
#define UDM_MAXTITLESIZE			128
#define UDM_MAXKEYWORDSIZE			255
#define UDM_MAXDESCSIZE				100
#define UDM_DEFAULT_NET_ERROR_DELAY_TIME	86400
#ifdef NEWS_EXT
#define UDM_MAXTEXTSIZE				256000
#define UDM_MAXFROMSIZE				255
#define UDM_MAXDATESIZE				50
#define UDM_MAXSUBJSIZE				1024
#define UDM_MAXGROUPSIZE			255
#define UDM_MAXREFSIZE				1024
#else
#define UDM_MAXTEXTSIZE				255
#endif


/* Locking consts */
#define UDM_LOCK		1
#define UDM_UNLOCK		2
#define UDM_LOCK_USER		0
#define UDM_LOCK_WIN		15
#define UDM_LOCK_ROBOTS		16
#define UDM_LOCK_QUERY		17
#define UDM_LOCK_TARGET		18
#define UDM_LOCK_RESOLVE	19
#define UDM_LOCK_CACHE		20
#define UDM_LOCK_MAX		21


/* storage types */
#define UDM_DBMODE_SINGLE	0
#define UDM_DBMODE_MULTI	1
#define UDM_DBMODE_SINGLE_CRC	2
#define UDM_DBMODE_WORD2URL	3
#define UDM_DBMODE_MULTI_CRC	4
#define UDM_DBMODE_CACHE	5


/* database open modes */
#define UDM_OPEN_MODE_READ	0
#define UDM_OPEN_MODE_WRITE	1


/* search modes */
#define UDM_ORD_RATE		0
#define UDM_ORD_DATE		1
#define UDM_MODE_ALL		0
#define UDM_MODE_ANY		1
#define UDM_MODE_BOOL		2
#define UDM_MODE_PHRASE		3

/* word match type */
#define UDM_MATCH_WORD		0
#define UDM_MATCH_BEGIN		1
#define UDM_MATCH_SUBSTR	2
#define UDM_MATCH_END		3

/* search cache */
#define UDM_CACHE_ENABLED	1
#define UDM_CACHE_DISABLED	0

/* Indexer return codes */
#define IND_UNKNOWN		0
#define IND_OK			1
#define IND_ERROR		2
#define IND_NO_TARGET		3
#define IND_TERMINATED		4


/* Flags for indexing */
#define UDM_FLAG_REINDEX	1
#define UDM_FLAG_EXP_FIRST	2
#define UDM_FLAG_ADD_SERV	4
#define UDM_FLAG_MARK		8
#define UDM_FLAG_SPELL		16
#define UDM_FLAG_INSERT		32
#define UDM_FLAG_SORT_HOPS	64
#define UDM_FLAG_INIT		128
#define UDM_FLAG_SKIP_LOCKING	256

/* Tracking modes */
#define UDM_TRACK_DISABLED	0
#define UDM_TRACK_QUERIES	1
#define UDM_TRACK_WORDS		2


/* URLFile actions */
#define UDM_URL_FILE_REINDEX	1
#define UDM_URL_FILE_CLEAR	2
#define UDM_URL_FILE_INSERT	3
#define UDM_URL_FILE_PARSE	4

/* Ispell mode binary flags */
#define UDM_ISPELL_MODE_DB	1
#define UDM_ISPELL_USE_PREFIXES	2
#define UDM_ISPELL_MODE_SERVER  4

/* UdmSearch types */
typedef struct udm_crcword_struct {
	int	weight;
	int	url_id;
	int	wrd_id;
} UDM_CRCWORD;


/* StopList unit */
typedef struct udm_stopword_struct {
	char *word;
	char lang[3];
} UDM_STOPWORD;


/* Word list unit */
typedef struct udm_word_struct {
	int count;
	char *word;
} UDM_WORD;

/* Cross-word list unit */
typedef struct {
	int count;
	char *word;
	char *url;
	int  referree_id;
} UDM_CROSSWORD;

/* Language quesser structure for one language   */
/* We could use the type above instead of course */
typedef struct udm_lang_struct {
	int  count;
	char lang[3];
} UDM_LANG;


/* Structure to store server parameters */
typedef struct udm_server_struct {
	/* General items */
	int  rec_id;	/* to store order of appearence */
	int  match_type;
	void * regexp; 	/* for realm */
	char *url;
	char *alias;    /* For primary aliases */
	char *charset;
	char *category;
	char *tag;
	char lang[3];
	int  period;

	/* Auth */
	char *basic_auth;
	char *user;
	char *passwd;

	/* Proxy stuff */
	char *proxy;
	char *proxy_basic_auth;
	int  proxy_port;

        /* HTDB */
	char *htdb_list;
	char *htdb_doc;

	/* Mirror stuff */
	int  use_mirror;
	char *mirror_root;
	char *mirror_headers;

	/* Network stuff */
	int  net_errors;
	int  max_net_errors;
	int  net_error_delay_time;
	int  read_timeout;
	int  doc_timeout;

	/* Spider & indexer stuff */
	int  index;
	int  follow;
	int  maxhops;
	int  deletebad;
	int  use_robots;
	int  use_clones;
	int  delete_no_server;

	/* Words stuff */
	int  crossweight;
	int  bodyweight;
	int  titleweight;	
	int  urlweight;
	int  urlhostweight;
	int  urlpathweight;
	int  urlfileweight;
	int  descweight;
	int  keywordweight;
	int  correct_factor;
	int  incorrect_factor;
	int  number_factor;
	int  alnum_factor;

	/* MP3 extensions */
	int check_mp3_tag;
	int check_only_mp3_tag;
} UDM_SERVER;


/* All components in this structure correspondent */
/* to fields in the "url" table                   */

typedef struct udm_url_struct {
	int	url_id;
	int	status;
	int	size;
	int	rating;
	int	order; /* For search result */
	int	referrer;
	int	tag;
	int	hops;
	int	indexed; /* for own backend */
	char	*url;
	char	*content_type;	
	char	*title;
	char	*keywords;
	char	*description;
	char 	*text;
	char	*category;
	char	*content;
	time_t	last_mod_time;
	time_t	last_index_time;
	time_t  next_index_time;
	udmcrc32_t	crc32;
} UDM_DOCUMENT;

typedef struct udm_conn_struct {
        int status;
        int connected;
        int err;
        int retry;
        int conn_fd;
        int port;
        int timeout;
        char *hostname;
        FILE *in, *out;
        struct sockaddr_in sin;
        int buf_len;
        int buf_len_total;
        int len;
        char *buf;
        struct udm_conn_struct *connp;
        struct udm_indexer_struct *indexer;
} UDM_CONN;

/* Aliases item structure */
typedef struct udm_alias_struct {
	char *find;
	char *replace;
} UDM_ALIAS;


/* All links are stored in the cache of this structure */
/* before actual INSERT into database                  */
typedef struct udm_href_struct {
	char *href;
	int referrer;
	int hops;
	int stored;
	char * tag;
	char * cat;
} UDM_HREF;


/* External Parsers */
typedef struct udm_parser_struct{
        char *from_mime;
	char *to_mime;
	char *cmd;
} UDM_PARSER;

/* Resolve stuff */
typedef struct udm_host_addr_struct {
	char *hostname;
	struct in_addr addr;
	int net_errors;
	time_t last_used;
}UDM_HOST_ADDR;

typedef struct spell_struct {
	char * word;
	char flag[10];
	char lang[3];
} UDM_SPELL;


typedef struct aff_struct {
	char flag;
	char type;
	char lang[3];
	char mask[33];
        char find[16];
	char repl[16];
	regex_t reg;
	size_t replen;
        char compile;
} UDM_AFFIX;

typedef struct Tree_struct {
        int Left[256], Right[256];
        char lang[3];
} Tree_struct;


typedef struct udm_robots_struct {
	char *hostinfo;
	char *path;
} UDM_ROBOT;

typedef struct {
	size_t nrobots;
	UDM_ROBOT * robots;
} UDM_ROBOTS;


/* Config file */
typedef struct udm_config_struct {
	char vardir[1024];
	int errcode;
	char errstr[UDMSTRSIZ];
	int local_charset;
	int force1251;
	int  min_word_len;
	int  max_word_len;
	size_t max_doc_size;
	char user_agent[UDMSTRSIZ];
	char extra_headers[UDMSTRSIZ];
	char * url_file_name;
	size_t nservers;
	size_t mservers;
	UDM_SERVER * Server;
	UDM_SERVER * csrv;
	size_t naliases;
	size_t maliases;
	UDM_ALIAS * Alias;
	char * alias_prog;
	size_t mhrefs;
	size_t nhrefs;
	size_t shrefs;
	size_t dhrefs;
	UDM_HREF * Href;
	int have_targets;
	int nrows;
	int currow;
	void * Filter;
	size_t nfilters;
	size_t mfilters;
	size_t nmimes;
	size_t mmimes;
	void * Mime;
	size_t nparsers;
	UDM_PARSER *parsers;
	size_t nhost_addr;
	size_t mhost_addr;
	UDM_HOST_ADDR * host_addr;
	size_t naffixes;
	size_t maffixes;
	void * Affix;
	size_t nspell;
	size_t mspell;
	UDM_SPELL * Spell;
	size_t nstoplist;
	UDM_STOPWORD * stoplist;
	int ispell_mode;
	UDM_ROBOTS Robots;
	char * DBHost;
	char * DBName;
	char * DBUser;
	char * DBPass;
	int    DBPort;
	int    DBType;
	int    DBMode;
	int    DBUseLock;
	char catstr[UDMSTRSIZ];
	char langstr[UDMSTRSIZ];
	char tagstr[UDMSTRSIZ];
	char statusstr[UDMSTRSIZ];
	char urlstr[UDMSTRSIZ];
	char timestr[UDMSTRSIZ];

	char catlimit[UDMSTRSIZ];
	char taglimit[UDMSTRSIZ];
	int  sitelimit;

	char srv_tables[UDMSTRSIZ];
	char stop_tables[UDMSTRSIZ];

	/* for logger */
	int logFacility;
	int logLevel;
	int is_log_open;
	FILE *logFD;

	/* For bult-in database */
	char * ul;
	char * ttag;
	int n_urls;
	UDM_DOCUMENT * URLs;
	
	/* For GetDocInfo cache */
	void * urlres;
	
	void (*ThreadInfo)(int handle,const char * state,const char * str);
	void (*StatInfo)(int handle, int code, int expired, int total, const char * str);
	void (*RefInfo)(int code,const char *url, const char *ref);
	void (*LockProc)(int command, int type);

	/* Cache mode logs file descriptor */
	/*int cachelog[257];*/
	int wrd_fd;
	int del_fd;
	char * logd_addr;
	int logd_fd;
	
	/* Generate url_id using crc32 */
	int use_crc32_url_id;
	int use_remote_cont_type;
        int nLang;
        Tree_struct  SpellTree[UDM_LANGPERDOC], PrefixTree[UDM_LANGPERDOC], SuffixTree[UDM_LANGPERDOC];
	int use_phrases;	/* Whether to index with phrases            */
	int use_crossword;	/* */
	char *spellhost;
} UDM_ENV;


typedef struct udm_url {
	char schema[UDM_URLSIZE];
	char specific[UDM_URLSIZE];
	char hostinfo[UDM_URLSIZE];
	char auth[UDM_URLSIZE];
	char hostname[UDM_URLSIZE];
	char path[UDM_URLSIZE];
	char filename[UDM_URLSIZE];
	char anchor[UDM_URLSIZE];
	int  port;
	int  default_port;
} UDM_URL;

typedef struct udm_tag {
	char *tag;
	char *href;
	char *src;
	char *content;
	char *value;
	char *selected;
	char *name;
	char *type;
	char *lang;
} UDM_TAG;

typedef struct udm_search_wrd_struct{
	int  url_id;
	int  count;
	short  weight;
	short  pos;
} UDM_SEARCHWORD;

typedef struct udm_category_struct {
	int   rec_id;
	char  path[128];
	char  link[128];
	char  name[128];
} UDM_CATEGORY;


/* Boolean search constants and types */
#define UDM_MAXSTACK 100
#define UDM_STACK_LEFT  0
#define UDM_STACK_RIGHT 1
#define UDM_STACK_BOT   2
#define UDM_STACK_OR    3
#define UDM_STACK_AND   4
#define UDM_STACK_NOT   5
#define UDM_STACK_WORD  200

typedef struct {
	int ncstack;
	int cstack[UDM_MAXSTACK];
	int nastack;
	int astack[UDM_MAXSTACK];
} UDM_BOOLSTACK;

typedef struct {
	int cmd;
	int arg;
} UDM_STACK_ITEM;


/* Indexer */
typedef struct udm_indexer_struct{
	int handle;		/* Indexer handle for multithreaded version */
	size_t mwords;		/* Number of memory allocated for words     */
	size_t nwords;		/* Real number of words in list             */
	size_t swords;		/* Number of words in sorted list           */
	UDM_WORD *Word;		/* Word list  itself                        */
	size_t ncrosswords;
	size_t mcrosswords;
	UDM_CROSSWORD * CrossWord;
	int DBError;
	int action;		/* Callback function may request action     */
	char state[128];	/* Currently unused                         */
        int buf_size;		/* Currently unused                         */
	char *buf;		/* Buffer for document                      */
	void * db;              /* Database handle                          */
	UDM_LANG lang[UDM_LANGPERDOC]; /* language guesser variable         */
	UDM_CONN *connp;
	UDM_ENV * Conf;
	char wordinfo[UDM_MAXWORDINFO];
	int  charset;
	int  search_mode;
	int  word_match;
	int  sort_order;
	int  page_number;
	int  page_size;
	unsigned int group_mask;
	int  track_mode;
	UDM_SPELL t_Spell;
	int read_timeout;
	int doc_timeout;
	int cache_mode;
	
	size_t total_found;
	char * weight_factor;
	size_t nlangs; /* number of languages per doc */
	size_t curlang; /* curent language for word normalizing */
	int spellang;
	int wordpos;  /* for phrases */
	
	/* Search stuff */
	size_t words_in_query;
	char * words[UDM_MAXWORDPERQUERY];
	int  wordorders[UDM_MAXWORDPERQUERY];
	udmcrc32_t cwords[UDM_MAXWORDPERQUERY];
	size_t nitems;
	UDM_STACK_ITEM items[UDM_MAXSTACK];
	int wf[8];
        char *glang; /* language selection from CGI form */
} UDM_AGENT;

typedef struct {
	size_t first;
	size_t last;
	size_t total_found;
	size_t num_rows;
	size_t work_time;
	char wordinfo[UDM_MAXWORDINFO];
	UDM_DOCUMENT * Doc;
} UDM_RESULT;

typedef struct {
	char * url;
	int  status;
} UDM_URLSTATE;


#endif
