/* Auteur: Jedai */
/* Plateforme: Win32/Linux */
/* Compilateur/version:gcc/? */
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#define BUFFER_LENGTH 256
#define ATTR_LENGTH 64
#define TAG_LENGTH 64
#define URL_LENGTH 1024
char buff[BUFFER_LENGTH];
int offset = 0;
int end = 0;
int last_char = -1;
int my_get_char(){
if( last_char > -1 ){
int temp = last_char;
last_char = -1;
return temp;
}
if( offset < end ){
return buff[offset++];
}
else {
end = read( STDIN_FILENO, buff, BUFFER_LENGTH );
if( !end )
exit(EXIT_SUCCESS);
if( end == -1 ){
printf("Echec de la lecture sur l'entrée standard %d !\n", errno);
perror("");
exit(EXIT_FAILURE);
}
offset = 1;
return buff[0];
}
}
void my_put_back(char c){
last_char = c;
}
int cur = 0;
int lineno = 1;
char last_attr[ATTR_LENGTH+1];
char last_tag[TAG_LENGTH+1];
char url[URL_LENGTH + 1];
void newline(){
lineno++;
}
void ignore_ws(){
while( isspace(cur) ) {
if( cur == '\n' )
newline();
cur = my_get_char();
}
}
int expect(char *s){
for( ; *s ; s++ ){
if( (cur = my_get_char()) != *s ){
if( cur == '\n' )
newline();
return 0;
}
}
return 1;
}
void go_char(char c){
while( (cur = my_get_char()) != c ){
if( cur == '\n' )
newline();
}
}
// tokens :
void _document();
void _out();
void _tag();
void _tag_name(); // set last_tag
void _tag_content();
void _comment_content();
int _attr_name(); // set last_attr
void _attr_content();
void _document(){
go_char('<');
if( (cur = my_get_char()) == '!' ){
go_char('>');
_out();
}
else {
my_put_back(cur);
_tag();
_out();
}
}
void _out(){
for(;;){
go_char('<');
_tag();
}
}
void _tag(){
cur = my_get_char();
// end tag
if( cur == '/' ){
cur = my_get_char();
_tag_name();
ignore_ws();
if( ! (cur = my_get_char()) == '>' ){
fprintf(stderr, "Le tag fermant de %s à la ligne %d contient des caractères"
"innattendus !\n", last_tag, lineno);
go_char('>');
}
return;
}
// comment case
else if( cur == '!' ){
if( ! expect("--" ) ){
fprintf(stderr, "Le tag commençant par ! à la ligne %d semble ne pas être "
"un commentaire, tentative de récupération.\n", lineno);
go_char('>');
}
_comment_content();
return;
}
// start tag
else {
_tag_name();
_tag_content();
// start-end tag
if( cur == '/' ){
go_char('>');
}
if( ! cur == '>' ){
fprintf(stderr, "Le tag ouvrant de %s à la ligne %d contient des caractères"
"innattendus !\n", last_tag, lineno);
go_char('>');
}
return;
}
}
void _tag_name(){
int i = 0;
while( isalpha(cur) ){
if( i < TAG_LENGTH ){
last_tag[i++] = cur;
}
else {
last_tag[i] = 0;
fprintf(stderr, "Tag trop long à la ligne %d : %s\n",
lineno, last_tag);
return;
}
cur = my_get_char();
}
last_tag[i] = 0;
return;
}
void _tag_content(){
for(;;){
ignore_ws();
int i = _attr_name();
if( ! i )
return;
if( i == -1 ){
go_char('>');
return;
}
ignore_ws();
if( cur == '=' ){
cur = my_get_char();
ignore_ws();
_attr_content();
}
}
}
void _comment_content(){
for(;;){
go_char('-');
if( expect("->") ){
return;
}
}
}
int _attr_name(){
int i = 0;
while( isalpha(cur) ){
if( i < ATTR_LENGTH ){
last_attr[i++] = cur;
}
else {
last_attr[i] = 0;
fprintf(stderr, "Attr trop long à la ligne %d : %s\n",
lineno, last_attr);
return -1;
}
cur = my_get_char();
}
last_attr[i] = 0;
return i;
}
void _attr_content(){
int i = 0;
if( cur == '"' ){
cur = my_get_char();
while( cur != '"' ) {
if( i < URL_LENGTH ){
url[i++] = cur;
}
else {
url[i] = 0;
fprintf(stderr, "URL trop longue à la ligne %d : %s\n",
lineno, url);
return;
}
if( cur == '\\' ){
cur = my_get_char();
if( i < URL_LENGTH ){
url[i++] = cur;
}
else {
url[i] = 0;
fprintf(stderr, "URL trop longue à la ligne %d : %s\n",
lineno, url);
return;
}
}
cur = my_get_char();
}
}
else {
while( ! isspace(cur) || cur == '>' ) {
if( i < URL_LENGTH ){
url[i++] = cur;
}
else {
url[i] = 0;
fprintf(stderr, "URL trop longue à la ligne %d : %s\n",
lineno, url);
return;
}
if( cur == '\\' ){
cur = my_get_char();
if( i < URL_LENGTH ){
url[i++] = cur;
}
else {
url[i] = 0;
fprintf(stderr, "URL trop longue à la ligne %d : %s\n",
lineno, url);
return;
}
}
cur = my_get_char();
}
}
url[i] = 0;
if( (!strcmp( last_tag, "a" ) || !strcmp( last_tag, "A" ) )
&& (!strcmp( last_attr, "href" ) || !strcmp( last_attr, "HREF" ) )
) {
printf("%s\n", url);
}
else if( (!strcmp( last_tag, "img" ) || !strcmp( last_tag, "IMG" ) )
&& (!strcmp( last_attr, "src" ) || !strcmp( last_attr, "SRC" ) )
) {
printf("%s\n", url);
}
return;
}
int main()
{
_document();
return EXIT_SUCCESS;
}