HTTP 요청 트래픽 파싱

HTTP 요청 URL 정보가 저장되어 있는 파일에서, 윈도우 실행파일 확장자를 다운로드 요청하는 URL로 뽑아 내도록 하는 소스.

GET /xxxx.exe HTTP/1.1

Host: http://www.xxxxxx.com

Connection: keep-alive

Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8

User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.93 Safari/537.36

Accept-Encoding: gzip, deflate, sdch

Accept-Language: ko-KR,ko;q=0.8,en-US;q=0.6,en;q=0.4

/*
* ParseGetRequest
* written by franc3sco
* best view tapstop 4
*/

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define GET_STRING “GET /”
#define POST_STRING “POST /”

#define GET_EXCEPT_WINDOWUPDATE “/msdownload/update/”
#define GET_EXCEPT_NONDIRECT “?”
#define GET_INCLUDESTRING “.exe HTTP/1.”

#define HOST_HEADER_STRING “Host: ”
#define HOST_EXCEPT_GOOGLE “.google.com”
#define HOST_EXCEPT_WINDOWS “.windowsupdate.com”
#define HOST_EXCEPT_MICROSOFT “.microsoft.com”
#define HOST_EXCEPT_FACEBOOK “.facebook.com”
#define HOST_EXCEPT_MSN “.msn.com”
#define HOST_EXCEPT_HP “.hp.com”
#define HOST_EXCEPT_SUN “.sun.com”
#define HOST_EXCEPT_LIVE “.live.com”
#define HOST_EXCEPT_ADOBE “.adobe.com”
#define HOST_EXCEPT_ORACLE “.oracle.com”
#define HOST_EXCEPT_APPLE “.apple.com”
#define HOST_EXCEPT_SKYPE “.skype.com”
#define HOST_EXCEPT_YAHOO “.software.yahoo.com”
#define HOST_EXCEPT_UTORRENT “download.utorrent.com”
#define LINEBUF_LEN 1024
#define REQUEST_COUNT_LIMIT 20000

#define SAVEFILENAME “/home3/exeurl/exeurl_”

char* trim(char *s);
char* rtrim(char *s);
char* ltrim(char *s);

typedef struct _GETREQUESTURL {
char sz_Date[16];
char sz_HOST[64];
char sz_GETPARAM[LINEBUF_LEN];
char sz_REQUESTURL[LINEBUF_LEN];
} GETREQUESTURL;

int ParseLine(char *filename, GETREQUESTURL *);
int ListRawData(GETREQUESTURL *);
int ConvertRawDataToHTTPURL(GETREQUESTURL *);
int SaveToFile(GETREQUESTURL *);

int total_download = 0;
int total_any_download = 0;
char sz_global_date[16] = {0};
int
main(int argc, char *argv[]) {

int res;
int idx = 0;
char sz_file[32] = {0};
char sz_Buf[LINEBUF_LEN] = {0};

GETREQUESTURL *p_GETREQUESTURL = NULL;

p_GETREQUESTURL = (GETREQUESTURL *)malloc(sizeof(GETREQUESTURL) * REQUEST_COUNT_LIMIT);

if ( ! p_GETREQUESTURL ) {
printf(“[Error] Memory Allocation Failed\n”);
exit(0);
}

memset(p_GETREQUESTURL, ‘\0’, sizeof(GETREQUESTURL) * REQUEST_COUNT_LIMIT);

strcpy(sz_file, argv[1]);
strcpy(sz_global_date, argv[2]);

ParseLine(sz_file, p_GETREQUESTURL);

//ListRawData(p_GETREQUESTURL);

ConvertRawDataToHTTPURL(p_GETREQUESTURL);

SaveToFile(p_GETREQUESTURL);

free(p_GETREQUESTURL);

}

int ParseLine(char *filename, GETREQUESTURL * p_GETREQUESTURL) {

int b_get_header_checked = 0;
FILE *fp = NULL;
char buf[LINEBUF_LEN] = {0};
char sz_buf_host[LINEBUF_LEN] = {0};

fp = fopen(filename, “r”);

if ( !fp ) {
printf(“file open failed\n”);
exit(0);
}
while ( fgets(buf, sizeof(buf)-1, fp) != NULL ) {

if (total_download > (REQUEST_COUNT_LIMIT – 1)) {
printf(“Request Process Buff size overflow : %d\n”, REQUEST_COUNT_LIMIT);
break;
}

trim(buf);

if ( buf[strlen(buf)-1] == ‘\n’) buf[strlen(buf)-1] = ‘\0’;
if ( strstr(buf,GET_STRING) && strstr(buf, GET_INCLUDESTRING) && ! strstr(buf, GET_EXCEPT_NONDIRECT)) { /* GET REQUEST Parsing */
b_get_header_checked = 0;
memset(p_GETREQUESTURL->sz_GETPARAM, ‘\0’, 1024);

/* MS update request */
if ( strstr(buf, GET_EXCEPT_WINDOWUPDATE)) {
memset(buf, ‘\0’, sizeof(buf));
continue;
}

total_any_download++;
b_get_header_checked = 1;
strcpy(p_GETREQUESTURL->sz_GETPARAM, buf);
// printf(“HOST : %s, %s\n”, p_GETREQUESTURL->sz_GETPARAM, buf);
} /* if end – GET REQUEST parsing */
if ( b_get_header_checked && strstr(buf,HOST_HEADER_STRING) ) {
memset(p_GETREQUESTURL->sz_HOST, ‘\0’, 64);
b_get_header_checked = 0;

/* Except Host */
if ( strstr(buf, HOST_EXCEPT_GOOGLE) || strstr(buf, HOST_EXCEPT_WINDOWS)
|| strstr(buf, HOST_EXCEPT_MICROSOFT) || strstr(buf, HOST_EXCEPT_FACEBOOK) \
|| strstr(buf, HOST_EXCEPT_HP) || strstr(buf, HOST_EXCEPT_SUN) \
|| strstr(buf, HOST_EXCEPT_MSN) || strstr(buf, HOST_EXCEPT_LIVE)
|| strstr(buf, HOST_EXCEPT_ADOBE) || strstr(buf, HOST_EXCEPT_APPLE) \
|| strstr(buf, HOST_EXCEPT_SKYPE) || strstr(buf, HOST_EXCEPT_ORACLE) \
|| strstr(buf, HOST_EXCEPT_YAHOO) || strstr(buf, HOST_EXCEPT_UTORRENT)) {

memset(buf, ‘\0’, sizeof(buf));
continue;
}
strncpy(p_GETREQUESTURL->sz_HOST, buf, 63);
strcpy(p_GETREQUESTURL->sz_Date, sz_global_date);
++total_download;
// printf(“HOST : %s, %s\n”, p_GETREQUESTURL->sz_HOST, buf);
p_GETREQUESTURL++;
}

memset(buf, ‘\0’, sizeof(buf));

} /* while loop end */
}

int
ListRawData (GETREQUESTURL * p_GETREQUESTURL) {

int count = 0;

printf(“— [DOWNLOAD COUNT (Total : %d / Target : %d) —- \n”, total_any_download, total_download);

while (p_GETREQUESTURL->sz_HOST[0] != ‘\0’) {

printf(“[idx:%d] HOST : %s, GET : %s\n”, ++count, p_GETREQUESTURL->sz_HOST, p_GETREQUESTURL->sz_GETPARAM);
p_GETREQUESTURL++;
}

}

int
ConvertRawDataToHTTPURL(GETREQUESTURL * p_GETREQUESTURL) {
char *x_token = NULL;
char *y_token = NULL;
char sz_tok_HostBuff[64] = {0};
char sz_tok_GetBuff[1024] = {0};
char sz_HTTPURL[1024] = {0};
printf(“— [DOWNLOAD COUNT (Total : %d / Target : %d) —- \n”, total_any_download, total_download);
while ( p_GETREQUESTURL != NULL && p_GETREQUESTURL->sz_HOST[0] != ‘\0’) {

x_token = (char *)strtok(p_GETREQUESTURL->sz_HOST, ” “);

if ( x_token != NULL ) {
x_token = (char *)strtok(NULL, ” “);

if ( x_token != NULL ) {
strcpy(sz_tok_HostBuff, x_token);
} else {
p_GETREQUESTURL++;
continue;
}
}

x_token = NULL;

y_token = (char *)strtok(p_GETREQUESTURL->sz_GETPARAM, ” “);

if ( y_token != NULL ) {
y_token = (char *)strtok(NULL, ” “);
strcpy(sz_tok_GetBuff, y_token);
}

sprintf(sz_HTTPURL, “http://%s%s&#8221;, sz_tok_HostBuff, sz_tok_GetBuff);

strcpy(p_GETREQUESTURL->sz_REQUESTURL, sz_HTTPURL);
p_GETREQUESTURL++;

} /* While loop end */
}
int
SaveToFile(GETREQUESTURL * p_GETREQUESTURL) {

FILE *fp = NULL;
char sz_filename[32] = {0};
char sz_buf[LINEBUF_LEN] = {0};
char sz_buf_host[LINEBUF_LEN] = {0};

sprintf(sz_filename,”%s%s.txt”, SAVEFILENAME, sz_global_date);

fp = fopen(sz_filename, “w”);

if ( !fp ) {
printf(“file open failed\n”);
return 0;
}

while (p_GETREQUESTURL->sz_REQUESTURL[0] != ‘\0’) {
sprintf(sz_buf,”%s %s\n”,p_GETREQUESTURL->sz_Date, p_GETREQUESTURL->sz_REQUESTURL);
fputs(sz_buf, fp);
p_GETREQUESTURL++;
memset(sz_buf, ‘\0’, sizeof(sz_buf));
}

fclose(fp);
}
char* rtrim(char* s) {
char t[LINEBUF_LEN];
char *end;

strcpy(t, s);
end = t + strlen(t) – 1;

while (end != t && isspace(*end))
end–;
*(end + 1) = ‘\0’;

s = t;
return s;
}

char* ltrim(char *s) {
char* begin;
begin = s;

while (*begin != ‘\0’) {
if (isspace(*begin)) {
begin++;
} else {
s = begin;
break;
}
}

return s;
}

char* trim(char *s) {
return rtrim(ltrim(s));
}

Advertisements

답글 남기기

아래 항목을 채우거나 오른쪽 아이콘 중 하나를 클릭하여 로그 인 하세요:

WordPress.com 로고

WordPress.com의 계정을 사용하여 댓글을 남깁니다. 로그아웃 /  변경 )

Google+ photo

Google+의 계정을 사용하여 댓글을 남깁니다. 로그아웃 /  변경 )

Twitter 사진

Twitter의 계정을 사용하여 댓글을 남깁니다. 로그아웃 /  변경 )

Facebook 사진

Facebook의 계정을 사용하여 댓글을 남깁니다. 로그아웃 /  변경 )

%s에 연결하는 중

%d 블로거가 이것을 좋아합니다: