How to parse Apache Log and insert data to MySQL database

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/bin/bash
 
echo Generating new data from log
echo "#generated sql file" > data.sql
 
echo "truncate table log_data;" >> data.sql
echo "START TRANSACTION;" >> data.sql
 
FIND_PATTERN="\
\(\S*\)\s*\
\(\S*\)\s*\
\(\S*\s*\S*\)\s*\
[\x5B]\([^\x5D]*\)[\x5D]\s*\
\"\([^\"]*\)\"\s*\
\(\S*\)\s*\
\(\S*\)\s*\
\"\([^\"]*\)\"\s*\
\"\([^\"]*\)\"\s*\
";
 
REPLACE_PATTERN="\
INSERT INTO log_data \
(domain,ip,other,date,url,status,size,referral,clientstring) \
VALUES \
('\1','\2','\3','\4','\5','\6','\7','\8','\9');";
 
APACHE_FILE="other_vhosts_access.log";
 
if [ -f $APACHE_FILE.size ];
then
   ALREADY_PROCEED=`cat $APACHE_FILE.size`
   echo "Already proceed size ($APACHE_FILE): "$ALREADY_PROCEED;
else
   ALREADY_PROCEED=0;
   echo "New log file $APACHE_FILE";
fi
 
FILTER_PATTERN="\.css|\.ico|\.png|\.jpg|\.txt|\.js|\.gif|\.swf"
 
#echo $FIND_PATTERN"/"$REPLACE_PATTERN;
 
cat $APACHE_FILE |
tail -c +$ALREADY_PROCEED | #preskocime jiz zpracovane radky
#head -5000 | #debug
#head   -1128863 | #tail -2  |
#tail -n +1177103 | head -3 |
#sed -r "s/(\(.*\))//g" | #debug - odebere z clientstring (...)
#sed  "s/^[^+]*//g" | #debug - odebere pocatek stringu az do GET
#sed -r "s/Mozilla|Gecko|Firefox//g" | #debug odebere tyhle klicovy slova
sed -r "s/^(subdomain|www)\.//"   | #odebereme prefix subdomain. a www.
sed -r "s/^([^:]*)(:[0-9]*)/\1/"  | #odebereme port
sed -r -n "/^(orm-|inventic).*/p" | #exportujem jen nektere domeny
sed "s/\([\x5c][\x22]\)//g"       | #odebrani \" retezcu
sed "s/\([']\)//g"                | #odebrani apostrof ktery delaj pak bordel v$
sed -r "s/ \"GET ([\x2F].*) HTTP[\x2F][0-9.]*/ \"\1/" | #dame pryc GET a HTTP1.1
#sed -r -n  "/$FILTER_PATTERN/p" #| debug- vypise filtrovane .css/.png/...
sed -r "/$FILTER_PATTERN/d"       | #vyfiltrujeme pryc .css, .png atd
#sed  "/^$FIND_PATTERN$/d"        | #debug - toto lze pouzit na vypis radku kte$
sed -n "/^$FIND_PATTERN$/p"       | #vypiseme vsechny radky ktere jsou validni
sed "s/$FIND_PATTERN/$REPLACE_PATTERN/" >> data.sql
 
echo "COMMIT;" >> data.sql
 
#ulozime velikost zpracovaneho logu
stat -c"%s" $APACHE_FILE > $APACHE_FILE.size
#vlozime data do Mysql
echo Putting data to MySQL
mysql \
  --user=root \
  --password=1234 \
  --host=192.168.0.10 \
  --database=server_log < data.sql

Leave a Reply

Your email address will not be published. Required fields are marked *