/* Auteur: Foobar1329 */
/* Plateforme: Win32/Linux */
/* Compilateur/version:?/? */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <time.h>
/* MACROS */
#define GT '>'
#define LT '<'
#define URI_MAX_LG 512
#define A_HREF(a,b,c,d,e,f) \
( ((a) == '\n' || (a) == '\t' || isspace((a))) && \
((b) == 'h' || (b) == 'H') && \
((c) == 'r' || (c) == 'R') && \
((d) == 'e' || (d) == 'E') && \
((e) == 'f' || (e) == 'F') && (f) == '=')
#define IMG_SRC(a,b,c,d,e) \
( ((a) == '\n' || (a) == '\t' || isspace((a))) && \
((b) == 's' || (b) == 'S') && \
((c) == 'r' || (c) == 'R') && \
((d) == 'c' || (d) == 'C') && \
(e) == '=')
#define A_BEG(a,b,c) \
( (a) == LT && ((b) == 'a' || (b) == 'A') && \
( (c) == '\n' || (c) == '\t' || isspace((c)) ) )
#define A_END(a,b) ( ((a) == 'a' || (a) == 'A') && (b) == GT )
#define IMG_BEG(a,b,c,d) \
((a) == LT && \
((b) == 'i' || (b) == 'I') && \
((c) == 'm' || (c) == 'M') && \
((d) == 'g' || (d) == 'G') )
#define IMG_END(a) ( (a) == GT )
#define CMT_BEG(a,b,c,d) \
((a) == LT && \
(b) == '!' && \
(c) == '-' && \
(d) == '-')
#define CMT_END(a,b,c) \
((a) == '-' && \
(b) == '-' && \
(c) == GT)
#define INLINEDISPLAY 1
/* DECL. FONCTIONS */
void parse(FILE * f_in, FILE * f_out, char *** pppc, int * pNb, int inlineDisp);
void release(char *** pppc, int nb);
/* POINT D'ENTREE PROG */
int main(int argc, char ** argv)
{
FILE * fp_in = NULL;
FILE * fp_out = NULL;
char ** URIs = NULL;
int i, n;
if (argc > 3 || argc < 1)
{
fprintf(stderr,"usage: findUri [arg1] [arg2]\n");
fprintf(stderr,"\t[arg1]: valid HTML 4.01 file (stdout if not supplied)\n");
fprintf(stderr,"\t[arg2]: output file that contains one URI per line (stdout if not supplied)\n");
exit(EXIT_FAILURE);
}
fp_in = (argc == 2 || argc == 3) ? fopen(argv[1], "r") : stdin;
if (!fp_in) {
fprintf(stderr,"findUri: error while opening file %s\n", argv[1]);
exit(EXIT_FAILURE);
}
fp_out = (argc == 3) ? fopen(argv[2], "w") : stdout;
if (!fp_out) {
fprintf(stderr,"findUri: error while opening file %s\n", argv[2]);
exit(EXIT_FAILURE);
}
parse(fp_in, fp_out, &URIs, &n, INLINEDISPLAY);
if (ferror(fp_in)) {
fprintf(stderr,"findUri: error while reading file %s\n", argv[2]);
if (!INLINEDISPLAY) { release(&URIs, n); }
exit(EXIT_FAILURE);
}
if (!INLINEDISPLAY) {
for(i=0; i<n; ++i) {
fputs(URIs[i], fp_out); fputc('\n', fp_out);
}
release(&URIs, n);
}
if (fp_in != stdin) { fclose(fp_in), fp_in =NULL; }
if (fp_out!= stdout) { fclose(fp_out), fp_out = NULL; }
return EXIT_SUCCESS;
}
/* DEF. FONCTIONS */
void parse(FILE * f_in, FILE * f_out, char *** pppc, int * pNb, int inlineDisp)
{
if(f_in && pppc && !*pppc && pNb) {
int InElementA = 0, InElementIMG = 0, InComment = 0;
int c1=0, c2=0, c3=0, c4=0, prev=0, cur=0, cnt=0;
while( (cur = fgetc(f_in)) != EOF)
{
/* Cas entree element A */
if (!InElementA && A_BEG(c4,prev, cur)) { InElementA = 1; }
/* Cas entree commentaire */
else if (!InComment && CMT_BEG(c3, c4, prev, cur) ) { InComment = 1; }
/* Cas sortie commentaire */
else if (CMT_END(c4, prev, cur)) { InComment = 0; }
/* Test element IMG */
else if (!InComment) {
if (!InElementIMG && IMG_BEG(c3,c4,prev,cur)) { InElementIMG = 1; }
else if (InElementIMG && IMG_END(cur)) { InElementIMG = 0; }
else if ( (InElementA && !InComment && A_HREF(c1,c2,c3,c4,prev,cur)) ||
(InElementIMG && !InComment && IMG_SRC(c2,c3,c4,prev,cur)) ) {
int cnt2 = 0;
if (!inlineDisp) {
char ** p = NULL;
p = realloc(*pppc, (cnt+1)*sizeof(*p) );
if (p) {
*pppc = p;
}
else {
release(pppc, cnt);
fprintf(stderr,"findUri: allocation failure.\n");
exit(EXIT_FAILURE);
}
(*pppc)[cnt] = malloc( (URI_MAX_LG+1)*sizeof(***pppc) );
if (!(*pppc)[cnt]) {
release(pppc, cnt);
fprintf(stderr,"findUri: allocation failure.\n");
exit(EXIT_FAILURE);
}
}
while( (cur = fgetc(f_in)) == '\n' || cur == '\t' || isspace(cur) ) ;
if (cur == EOF) { break; }
c1 = c2; c2 = c3; c3 = c4; c4 = prev; prev = cur;
while ((cur = fgetc(f_in)) != '"' && cur != EOF && cnt2 < URI_MAX_LG) {
if (!inlineDisp) { (*pppc)[cnt][cnt2] = cur; ++cnt2; }
else { fputc(cur, f_out); }
c1 = c2; c2 = c3; c3 = c4; c4 = prev; prev = cur;
}
if (cur == EOF) { break; }
if (!inlineDisp) { (*pppc)[cnt][cnt2] = '\0'; }
else { fputc('\n', f_out); }
++cnt;
}
else if (A_END(prev,cur)) { InElementA = 0; }
else if (IMG_END(cur)) { InElementIMG = 0; }
}
c1 = c2; c2 = c3; c3 = c4; c4 = prev; prev = cur;
}
*pNb = cnt;
}
}
void release(char *** pppc, int nb)
{
--nb;
while(nb >=0) {
free((*pppc)[nb]), (*pppc)[nb] = NULL;
--nb;
}
free(*pppc), *pppc = NULL;
}