This is pretty ugly behaviour (test it out on
http://www.vuw.ac.nz/home.html which is searchable -- searching it
searches a WAIS database using the patches I described previously) but
it works. In the future, a smart HTML-aware part of waisindex should
be written to suck out the <TITLE>...</TITLE> text and use that as the
headline, storing the URL in the DocID.
Anyway, add this to the irbuild.c file in the section where all the
various document types are defined
---begin
else if(0 == strcmp("URL", next_argument)) {
dataops.type = "URL";
typename = next_argument;
URL_trim = s_strdup(next_arg(&argc, &argv));
URL_prefix = s_strdup(next_arg(&argc, &argv));
}
---end
and add this to the help section:
---begin
fprintf(stderr," | URL what-to-trim what-to-add /* URL */\n");
---end
irtfiles.c now has in index_text_file
---begin
/* Make the current filename accessible via global variables.
* Increment current_filecount so routines can efficiently detect
* changes in the current file.
* -- Prentiss Riddle, Rice ONCS, riddle@rice.edu, 5/6/92
*/
if(current_filename == NULL) current_filename = s_malloc(MAX_FILENAME_LEN+1);
if (URL_prefix && !strncmp(filename, URL_trim, MIN(strlen(URL_trim), strlen(fi
lename)))) {
/* trim capable */
strcpy(current_filename, URL_prefix);
strcat(current_filename, filename+strlen(URL_trim));
} else
strncpy(current_filename, filename, MAX_FILENAME_LEN);
current_filecount++;
---end
and
---begin
/* we are processing a separator, therefore we should
* finish off the last document, and start a new one
*/
if(NULL != dataops->finish_header_function){
dataops->finish_header_function(header);
}
if(0 == strlen(header)){
char full_path[1000];
char directory[1000];
if (!URL_prefix) {
truename(filename, full_path);
sprintf(header, "%s %s", pathname_name(full_path),
pathname_directory(full_path, directory));
} else
strncpy(header, current_filename, MAX_FILENAME_LEN);
}
---end
ircfiles.c has at the end:
---begin
char *URL_prefix=NULL;
char *URL_trim=NULL;
---end
and ircfiles.h has at the end:
---begin
extern char *URL_prefix;
extern char *URL_trim;
---end
HTWAIS.c has in display_search_response:
---begin
} else { /* Not archie */
docname = WWW_from_WAIS(docid);
if (docname) {
char * dbname = HTEscape(database, URL_XPALPHAS);
sprintf(line, "%s/%s/%d/%s", /* W3 address */
dbname,
head->Types ? head->Types[0] : "TEXT",
(int)(head->DocumentLength),
docname);
HTStartAnchor(target, NULL, head->Types? (!strcmp(head->Types[0]
, "URL") ? headline : line) : line);
---end
Cheers;
Nat.