1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 | #!/bin/bash echo Generating new data from log echo "#generated sql file" > data.sql echo "truncate table log_data;" >> data.sql echo "START TRANSACTION;" >> data.sql FIND_PATTERN="\ \(\S*\)\s*\ \(\S*\)\s*\ \(\S*\s*\S*\)\s*\ [\x5B]\([^\x5D]*\)[\x5D]\s*\ \"\([^\"]*\)\"\s*\ \(\S*\)\s*\ \(\S*\)\s*\ \"\([^\"]*\)\"\s*\ \"\([^\"]*\)\"\s*\ "; REPLACE_PATTERN="\ INSERT INTO log_data \ (domain,ip,other, date ,url,status,size,referral,clientstring) \ VALUES \ ( '\1' , '\2' , '\3' , '\4' , '\5' , '\6' , '\7' , '\8' , '\9' );"; APACHE_FILE= "other_vhosts_access.log" ; if [ -f $APACHE_FILE.size ]; then ALREADY_PROCEED=` cat $APACHE_FILE.size` echo "Already proceed size ($APACHE_FILE): " $ALREADY_PROCEED; else ALREADY_PROCEED=0; echo "New log file $APACHE_FILE" ; fi FILTER_PATTERN= "\.css|\.ico|\.png|\.jpg|\.txt|\.js|\.gif|\.swf" #echo $FIND_PATTERN"/"$REPLACE_PATTERN; cat $APACHE_FILE | tail -c +$ALREADY_PROCEED | #preskocime jiz zpracovane radky #head -5000 | #debug #head -1128863 | #tail -2 | #tail -n +1177103 | head -3 | #sed -r "s/(\(.*\))//g" | #debug - odebere z clientstring (...) #sed "s/^[^+]*//g" | #debug - odebere pocatek stringu az do GET #sed -r "s/Mozilla|Gecko|Firefox//g" | #debug odebere tyhle klicovy slova sed -r "s/^(subdomain|www)\.//" | #odebereme prefix subdomain. a www. sed -r "s/^([^:]*)(:[0-9]*)/\1/" | #odebereme port sed -r -n "/^(orm-|inventic).*/p" | #exportujem jen nektere domeny sed "s/\([\x5c][\x22]\)//g" | #odebrani \" retezcu sed "s/\([']\)//g" | #odebrani apostrof ktery delaj pak bordel v$ sed -r "s/ \"GET ([\x2F].*) HTTP[\x2F][0-9.]*/ \"\1/" | #dame pryc GET a HTTP1.1 #sed -r -n "/$FILTER_PATTERN/p" #| debug- vypise filtrovane .css/.png/... sed -r "/$FILTER_PATTERN/d" | #vyfiltrujeme pryc .css, .png atd #sed "/^$FIND_PATTERN$/d" | #debug - toto lze pouzit na vypis radku kte$ sed -n "/^$FIND_PATTERN$/p" | #vypiseme vsechny radky ktere jsou validni sed "s/$FIND_PATTERN/$REPLACE_PATTERN/" >> data.sql echo "COMMIT;" >> data.sql #ulozime velikost zpracovaneho logu stat -c "%s" $APACHE_FILE > $APACHE_FILE.size #vlozime data do Mysql echo Putting data to MySQL mysql \ --user=root \ --password=1234 \ --host=192.168.0.10 \ --database=server_log < data.sql |
Leave a Reply