#!/usr/bin/perl -w use strict; use IO::Socket::INET; use Data::Dumper; #use threads; use Unicode::String; package Parser; require Exporter; $|=1; our @ISA = qw(Exporter); our @EXPORT = qw(get_files $save_old); my $v =1; my $save_old; sub new { my $self = {}; $self->{FEEDS} = undef; $self->{FEEDS_DIR} = "feeds"; $self->{CONFIG_FILE} = "config"; $self->{DATA_DIR} = "data"; $self->{SAVE_OLD} = 20; $self->{UPDATE_TIMER} = undef; $self->{DEFIL_TIMER} = undef; $self->{C_TEXTE} = []; $self->{NEWS_TITLE_REF} = []; $self->{NEWS_URL_REF} = undef; $self->{BROWSER} = "firefox"; bless($self); return $self; } sub set_browser($) { my $self = shift; $self->{BROWSER} = shift; } sub get_browser($) { my $self = shift; return $self->{BROWSER}; } sub set_save_old($) { my $self = shift; $self->{SAVE_OLD} = shift; } sub set_defil_timer($) { my $self = shift; $self->{DEFIL_TIMER} = shift; } sub set_update_timer($) { my $self = shift; $self->{UPDATE_TIMER} = shift; } sub set_config_file($) { my $self = shift; $self->{CONFIG_FILE} = shift; } sub set_data_dir($) { my $self = shift; $self->{DATA_DIR} = shift; } sub set_feeds_dir($) { my $self = shift; $self->{FEEDS_DIR} = shift; } sub get_news_title_ref() { my $self =shift; return $self->{NEWS_TITLE_REF}; } sub get_news_url_ref() { my $self =shift; return $self->{NEWS_URL_REF}; } sub get_config_file() { my $self =shift; return $self->{CONFIG_FILE}; } sub get_update_timer() { my $self =shift; return $self->{UPDATE_TIMER}; } sub get_defil_timer() { my $self =shift; return $self->{DEFIL_TIMER}; } sub feeds() { my $self = shift; return $self->{FEEDS}; } sub read_all() { my $self = shift; foreach my $file(keys %{$self->{FEEDS}}) { for (0 .. scalar @{$self->{FEEDS}{$file}}-1) { if (${$self->{FEEDS}{$file}}[$_]) { ${$self->{FEEDS}{$file}}[$_]{read} = 1; } # print "File : $hash_ref\n"; } } } sub read_none() { my $self = shift; foreach my $file(keys %{$self->{FEEDS}}) { for (0 .. scalar @{$self->{FEEDS}{$file}}) { if (${$self->{FEEDS}{$file}}[$_]) { ${$self->{FEEDS}{$file}}[$_]{read} = 0; } # print "File : $hash_ref\n"; } } } sub parse(@) { my $parse_ref = shift; my @parse_data = @{$parse_ref} ; #print "RECU : ".$parse_ref."\n"; my @titles; my $i=-1; my $elt; my $opened; my $start=0; my $old_m; my $old_d=0; my $content; my $perdu=0; foreach(@parse_data) { # print "$_ \n\touvert: $opened\n" if $opened; # print "baliser pour $_ ? \n" if !$opened; if(/| .*>)/) { $start = 1; $i++; } elsif (/<\/item>/) { $start=0; } elsif($start) { if (!$opened and /^\s*<([\w:]+)(?: .*)?>(?:<\!\[CDATA\[)?(.*?)(\]\]>)?<\/\1>/) { my ($opened,$content) = ($1,$2); $content =~ s/\]\]>$//; # print "<$opened /> pour $_\n"; $titles[$i]{"$opened"}=$content; $opened= ""; } elsif(!$opened and /^\s*<([\w\:]+)(?: [^>]+?)?>(?:$content pour $_\n"; push(@{$titles[$i]{"$opened"}},$content); } elsif($opened && /(.+?)(?:\]\]>)?<\/$opened>/) { # print " pour $_\n"; push(@{$titles[$i]{"$opened"}},$1); $opened=""; } elsif($opened) { push(@{$titles[$i]{"$opened"}},$_); } else { $perdu++; } } } #print "($perdu unused line) " if $perdu; return @titles; } sub start_parse { my $self = shift; my $data_dir = $self->{DATA_DIR}; # print "START PARSE $data_dir !\n"; opendir(DATA,$data_dir) or die $!; my @fichiers = grep { ! /^\./ } readdir(DATA); # print "FICHIERS : @fichiers\n"; foreach my $file(@fichiers) { if (!-f "$data_dir/$file") { next; } # print "etud ede $data_dir/$file\n"; open(INFO,"$data_dir/$file") or die $!; my @data = ; my @resultat = &parse(\@data); $self->{FEEDS}{"$file"} = \@resultat; } } sub update_parse { my $self = shift; # print "UPDATE PARSE: lecture des feeds recus ..." if $v; my $feeds_dir = $self->{FEEDS_DIR}; # print "FEED DIR: $feeds_dir\n"; opendir(DATA,$feeds_dir); my @fichiers = grep { ! /^\./ } readdir(DATA); ID: foreach my $file(@fichiers) { # print "Fichier: $file\n"; my @temp_feeds; open(INFO,"$feeds_dir/$file"); my @data = ; my @resultat = &parse(\@data); for my $i(0..$#resultat) { # print "element en cours: ".(keys %{$self->{FEEDS}{$file}[0]})."\n"; if ($self->{FEEDS}{$file}[0]{'title'} and $resultat[$i]{'title'} eq $self->{FEEDS}{$file}[0]{'title'}) { last ; } else { $resultat[$i]{'read'} = 0; } if (ref(${$self->{FEEDS}{"$file"}}[0]) ne "HASH") { ${$self->{FEEDS}{"$file"}}[0] = $resultat[$i]; } else { push(@temp_feeds,$resultat[$i]); } } unshift(@{$self->{FEEDS}{"$file"}}, @temp_feeds); } foreach my $file(keys %{$self->{FEEDS}}) { while(scalar @{$self->{FEEDS}{$file}} > $self->{SAVE_OLD}) { pop(@{$self->{FEEDS}{$file}}) } } } sub c_texte() { my $self = shift; return $self->{C_TEXTE}; } sub titles { my $self = shift; my @titles=(); foreach my $file(keys %{$self->{FEEDS}}) { foreach my $arr(@{$self->{FEEDS}{$file}}) { push(@titles,${$arr}{'title'}) if ${$arr}{'title'}; } } return @titles; } sub generate_c_texte { my $self = shift; $self->{C_TEXTE} =(); $self->{NEWS_TITLE_REF} = (); my $compteur = 0; foreach my $file(keys %{$self->{FEEDS}}) { my @file = split '',$file; foreach my $arr(@{$self->{FEEDS}{$file}}) { if (${$arr}{'title'} and ${$arr}{'read'} == 0) { push(@{$self->{NEWS_TITLE_REF}},$compteur); ${$self->{NEWS_URL_REF}{"$compteur"}} = ${$arr}{'link'}; # print "URL: ${$arr}{'link'} \n"; ${$arr}{'title'} =~ s/^<\!\[CDATA\[(.*)\]\]>$/$1/g; foreach(@file) { push (@{$self->{C_TEXTE}},"".$_.""); $compteur++; } # print "\n"; push(@{$self->{C_TEXTE}},":"," "); $compteur++; my @special=(); foreach(split '',${$arr}{'title'}) { if ($_ eq "&" and !@special) { @special=("&"); next; } elsif(@special) { push(@special,$_); } if(@special and $_ eq ";") { push(@{$self->{C_TEXTE}},join("",@special)); @special=(); $compteur++;} elsif(!@special) { push(@{$self->{C_TEXTE}},$_); $compteur++; } } push(@{$self->{C_TEXTE}}," ","-","-"," "); } } } if (!$self->{C_TEXTE}) { @{$self->{C_TEXTE}} = ("N","O","T","H","I","N","G"," ","N","E","W"," !"); } # print join(" ",@{$self->{C_TEXTE}}); return $self->{C_TEXTE}; } sub update_file($) { my $self = shift; my $data_dir = $self->{DATA_DIR}; # print "Execution de l update_files\n"; # print "DATA DIR $data_dir\n"; foreach my $file(keys %{$self->{FEEDS}}) { # print "Mise a jour du fichier $data_dir/$file \n"; open(WRITE,">$data_dir/$file") or die("Error $! \n"); my @arr = @{$self->{FEEDS}{$file}}; if ($self->{SAVE_OLD} == 0) { $self->{SAVE_OLD} = scalar @arr; } foreach my $key(@arr[0.. (scalar @arr > $self->{SAVE_OLD} ? $self->{SAVE_OLD} -1 : (scalar @arr)-1)]) { if (!%{$key}) { next; } print WRITE "\n"; foreach(keys %{$key}) { if (ref(${$key}{$_}) eq "ARRAY") { print WRITE "<$_>"; foreach my $t(@{$key}{$_}) { foreach my $j(@{$t}) { if ($j) { print WRITE $j; } } } print WRITE "\n"; } else { print WRITE "<$_>".${$key}{$_}."\n"; # print "<$_>${$key}{$_}\n"; } } print WRITE "\n\n\n"; } close WRITE; } } sub get_files() { # print "
"; my $self = shift; my $file = $self->{CONFIG_FILE}; # print "

Telechargement des nouveaux flux RSS:

\n" if $v; # print "FILE : $file\n"; open(FILES,$file) or die $!; my @files = ; close FILES; my $counter=0; foreach my $data_file(@files) { my ($emplacement,$file) = split(' ',$data_file); # print "File : $file\n"; chomp($file); chomp($emplacement); # print "Etude de $file\n"; $counter++; my ($sock,$contenu); my ($addr,$path,$fichier) = $file =~ /^https?:\/\/(.+?)\/(.*\/)?(.+?)$/; $path = $path ? $path : ""; if (!$addr or !$fichier) { warn("PB avec $data_file avec $addr et $fichier !\n"); next; } print "

\tDownloading from $addr:

" if $v; $sock = IO::Socket::INET->new ( PeerAddr=>"$addr", PeerPort=>'http(80)', Proto=>'tcp' ) or next; $sock -> autoflush; my $requete = "GET /$path$fichier HTTP/1.1\r\nHost: $addr\r\nUser-Agent: tfeserver Parser\r\n\r\n"; print $sock $requete; open(FICHIER,">".${$self}{FEEDS_DIR}."/$emplacement") or warn "Impossible d ouvrir la sortie (".${$self}{FEEDS_DIR}."/$emplacement) en ecriture \n"; my @temp = (); my $i=0; my $chunked=0; my $chunk_count=0; my $chunk_reprise=0; my $encoding = 0; my $started=0; my $check=1; while(defined($_ = <$sock>)) { $contenu = $_; if (!$started) { if ($contenu =~ /HTTP\/1.1 404/i) { print "

Erreur 404 : $data_file!

\n\n"; } if ($contenu =~ /Transfer-Encoding: chunked/) { print "

CHUNKED !

" if $v; $chunked=1; } if ($contenu =~ /Content-Length: (\d+)/i) { $chunk_count=$1; } if ($contenu =~ /Connection: close/i) { $check=0; } if ($contenu =~ /Charset=(.*)/i) { $encoding=$1; } if ($contenu =~ /^\r?$/) { $started=1; } } else { if (!$chunked) { $chunk_count-= length($contenu); $contenu =~ s/\r//g; $contenu =~ s/\n//g; $SIG{__WARN__} = sub { print ""; } ; if ($encoding =~ /utf\-(\d+)/i) { $contenu = Unicode::String::utf8("$contenu")->latin1; } push(@temp,$contenu); if ($chunk_count <= 0 and $check) { close $sock; last;} } else { if ($chunk_count<=0) { $contenu =~ s/[^a-f0-9]//g; $chunk_count = hex $contenu; if ($chunk_count <= 0 && length($contenu)>0) { close $sock; last; } $chunk_reprise=1; } else { $chunk_count-= length($contenu); { $SIG{__WARN__} = sub { print ""; } ; no warnings; if ($encoding =~ /utf\-(\d+)/i && $contenu) { $contenu = Unicode::String::utf8("$contenu")->latin1; } } $contenu =~ s/\r//g; $contenu =~ s/\n//g; if ($chunk_reprise) { if( @temp) { $temp[$#temp].=$contenu; } else { push(@temp,$contenu); } $chunk_reprise=0; } else { push(@temp,$contenu); } } } } # $i++; } print FICHIER join("\n",@temp); close FICHIER; close $sock; print "

\tOK

\n" if $v; } print "

Fin Mise a jour

\n\n" if $v; } sub conf() { my $self = shift; my $window = Gtk2::Window->new('toplevel') ; $window->set_title ("Affichage des flux") ; $window->set_default_size(600 ,400 ) ; $window->show(); my $hbox = Gtk2::VBox->new(1,0); $window->add($hbox); $hbox->show(); my @titres; foreach my $elt($self->{FEEDS}) { my %contenu = %{$elt}; foreach my $cle(keys %contenu) { push(@titres, Gtk2::Label->new()); $titres[$#titres]->set_markup("$cle"); foreach my $element(@{$self->{FEEDS}->{$cle}}) { if (!$element) { next; } if (!$element->{title}) { foreach(keys (%{$element})) { print "ERROR: $_\n"; } } else { push(@titres, Gtk2::Label->new(($element->{read} ? "" : "(NEW)")."\t$element->{title}")); # $titres[$#titres]->set_justify('left'); } } } } foreach(@titres) { # $_->set_justify('left'); $hbox->pack_start($_,1,0,0); $_->show(); } } 1;