/* Auteur: Foobar1329 */ /* Plateforme: Win32/Linux */ /* Compilateur/version:?/? */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h> #include <time.h> /* MACROS */ #define GT '>' #define LT '<' #define URI_MAX_LG 512 #define A_HREF(a,b,c,d,e,f) \ ( ((a) == '\n' || (a) == '\t' || isspace((a))) && \ ((b) == 'h' || (b) == 'H') && \ ((c) == 'r' || (c) == 'R') && \ ((d) == 'e' || (d) == 'E') && \ ((e) == 'f' || (e) == 'F') && (f) == '=') #define IMG_SRC(a,b,c,d,e) \ ( ((a) == '\n' || (a) == '\t' || isspace((a))) && \ ((b) == 's' || (b) == 'S') && \ ((c) == 'r' || (c) == 'R') && \ ((d) == 'c' || (d) == 'C') && \ (e) == '=') #define A_BEG(a,b,c) \ ( (a) == LT && ((b) == 'a' || (b) == 'A') && \ ( (c) == '\n' || (c) == '\t' || isspace((c)) ) ) #define A_END(a,b) ( ((a) == 'a' || (a) == 'A') && (b) == GT ) #define IMG_BEG(a,b,c,d) \ ((a) == LT && \ ((b) == 'i' || (b) == 'I') && \ ((c) == 'm' || (c) == 'M') && \ ((d) == 'g' || (d) == 'G') ) #define IMG_END(a) ( (a) == GT ) #define CMT_BEG(a,b,c,d) \ ((a) == LT && \ (b) == '!' && \ (c) == '-' && \ (d) == '-') #define CMT_END(a,b,c) \ ((a) == '-' && \ (b) == '-' && \ (c) == GT) #define INLINEDISPLAY 1 /* DECL. FONCTIONS */ void parse(FILE * f_in, FILE * f_out, char *** pppc, int * pNb, int inlineDisp); void release(char *** pppc, int nb); /* POINT D'ENTREE PROG */ int main(int argc, char ** argv) { FILE * fp_in = NULL; FILE * fp_out = NULL; char ** URIs = NULL; int i, n; if (argc > 3 || argc < 1) { fprintf(stderr,"usage: findUri [arg1] [arg2]\n"); fprintf(stderr,"\t[arg1]: valid HTML 4.01 file (stdout if not supplied)\n"); fprintf(stderr,"\t[arg2]: output file that contains one URI per line (stdout if not supplied)\n"); exit(EXIT_FAILURE); } fp_in = (argc == 2 || argc == 3) ? fopen(argv[1], "r") : stdin; if (!fp_in) { fprintf(stderr,"findUri: error while opening file %s\n", argv[1]); exit(EXIT_FAILURE); } fp_out = (argc == 3) ? fopen(argv[2], "w") : stdout; if (!fp_out) { fprintf(stderr,"findUri: error while opening file %s\n", argv[2]); exit(EXIT_FAILURE); } parse(fp_in, fp_out, &URIs, &n, INLINEDISPLAY); if (ferror(fp_in)) { fprintf(stderr,"findUri: error while reading file %s\n", argv[2]); if (!INLINEDISPLAY) { release(&URIs, n); } exit(EXIT_FAILURE); } if (!INLINEDISPLAY) { for(i=0; i<n; ++i) { fputs(URIs[i], fp_out); fputc('\n', fp_out); } release(&URIs, n); } if (fp_in != stdin) { fclose(fp_in), fp_in =NULL; } if (fp_out!= stdout) { fclose(fp_out), fp_out = NULL; } return EXIT_SUCCESS; } /* DEF. FONCTIONS */ void parse(FILE * f_in, FILE * f_out, char *** pppc, int * pNb, int inlineDisp) { if(f_in && pppc && !*pppc && pNb) { int InElementA = 0, InElementIMG = 0, InComment = 0; int c1=0, c2=0, c3=0, c4=0, prev=0, cur=0, cnt=0; while( (cur = fgetc(f_in)) != EOF) { /* Cas entree element A */ if (!InElementA && A_BEG(c4,prev, cur)) { InElementA = 1; } /* Cas entree commentaire */ else if (!InComment && CMT_BEG(c3, c4, prev, cur) ) { InComment = 1; } /* Cas sortie commentaire */ else if (CMT_END(c4, prev, cur)) { InComment = 0; } /* Test element IMG */ else if (!InComment) { if (!InElementIMG && IMG_BEG(c3,c4,prev,cur)) { InElementIMG = 1; } else if (InElementIMG && IMG_END(cur)) { InElementIMG = 0; } else if ( (InElementA && !InComment && A_HREF(c1,c2,c3,c4,prev,cur)) || (InElementIMG && !InComment && IMG_SRC(c2,c3,c4,prev,cur)) ) { int cnt2 = 0; if (!inlineDisp) { char ** p = NULL; p = realloc(*pppc, (cnt+1)*sizeof(*p) ); if (p) { *pppc = p; } else { release(pppc, cnt); fprintf(stderr,"findUri: allocation failure.\n"); exit(EXIT_FAILURE); } (*pppc)[cnt] = malloc( (URI_MAX_LG+1)*sizeof(***pppc) ); if (!(*pppc)[cnt]) { release(pppc, cnt); fprintf(stderr,"findUri: allocation failure.\n"); exit(EXIT_FAILURE); } } while( (cur = fgetc(f_in)) == '\n' || cur == '\t' || isspace(cur) ) ; if (cur == EOF) { break; } c1 = c2; c2 = c3; c3 = c4; c4 = prev; prev = cur; while ((cur = fgetc(f_in)) != '"' && cur != EOF && cnt2 < URI_MAX_LG) { if (!inlineDisp) { (*pppc)[cnt][cnt2] = cur; ++cnt2; } else { fputc(cur, f_out); } c1 = c2; c2 = c3; c3 = c4; c4 = prev; prev = cur; } if (cur == EOF) { break; } if (!inlineDisp) { (*pppc)[cnt][cnt2] = '\0'; } else { fputc('\n', f_out); } ++cnt; } else if (A_END(prev,cur)) { InElementA = 0; } else if (IMG_END(cur)) { InElementIMG = 0; } } c1 = c2; c2 = c3; c3 = c4; c4 = prev; prev = cur; } *pNb = cnt; } } void release(char *** pppc, int nb) { --nb; while(nb >=0) { free((*pppc)[nb]), (*pppc)[nb] = NULL; --nb; } free(*pppc), *pppc = NULL; }