00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036 #include <time.h>
00037 #include <stdio.h>
00038 #include <ctype.h>
00039 #include <stdlib.h>
00040 #include <string.h>
00041 #include <errno.h>
00042 #include <unistd.h>
00043 #include <sys/types.h>
00044
00045 #ifndef _WIN32
00046 #include <netdb.h>
00047 #include <arpa/inet.h>
00048 #include <sys/socket.h>
00049 #endif
00050
00051 #include "knetfile.h"
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065 static int socket_wait(int fd, int is_read)
00066 {
00067 fd_set fds, *fdr = 0, *fdw = 0;
00068 struct timeval tv;
00069 int ret;
00070 tv.tv_sec = 5; tv.tv_usec = 0;
00071 FD_ZERO(&fds);
00072 FD_SET(fd, &fds);
00073 if (is_read) fdr = &fds;
00074 else fdw = &fds;
00075 ret = select(fd+1, fdr, fdw, 0, &tv);
00076 #ifndef _WIN32
00077 if (ret == -1) perror("select");
00078 #else
00079 if (ret == 0)
00080 fprintf(stderr, "select time-out\n");
00081 else if (ret == SOCKET_ERROR)
00082 fprintf(stderr, "select: %d\n", WSAGetLastError());
00083 #endif
00084 return ret;
00085 }
00086
00087 #ifndef _WIN32
00088
00089
00090
00091 static int socket_connect(const char *host, const char *port)
00092 {
00093 #define __err_connect(func) do { perror(func); freeaddrinfo(res); return -1; } while (0)
00094
00095 int on = 1, fd;
00096 struct linger lng = { 0, 0 };
00097 struct addrinfo hints, *res = 0;
00098 memset(&hints, 0, sizeof(struct addrinfo));
00099 hints.ai_family = AF_UNSPEC;
00100 hints.ai_socktype = SOCK_STREAM;
00101
00102
00103 if (getaddrinfo(host, port, &hints, &res) != 0) __err_connect("getaddrinfo");
00104 if ((fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol)) == -1) __err_connect("socket");
00105
00106
00107
00108 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == -1) __err_connect("setsockopt");
00109 if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &lng, sizeof(lng)) == -1) __err_connect("setsockopt");
00110 if (connect(fd, res->ai_addr, res->ai_addrlen) != 0) __err_connect("connect");
00111 freeaddrinfo(res);
00112 return fd;
00113 }
00114 #else
00115
00116 char *int64tostr(char *buf, int64_t x)
00117 {
00118 int cnt;
00119 int i = 0;
00120 do {
00121 buf[i++] = '0' + x % 10;
00122 x /= 10;
00123 } while (x);
00124 buf[i] = 0;
00125 for (cnt = i, i = 0; i < cnt/2; ++i) {
00126 int c = buf[i]; buf[i] = buf[cnt-i-1]; buf[cnt-i-1] = c;
00127 }
00128 return buf;
00129 }
00130
00131 int64_t strtoint64(const char *buf)
00132 {
00133 int64_t x;
00134 for (x = 0; *buf != '\0'; ++buf)
00135 x = x * 10 + ((int64_t) *buf - 48);
00136 return x;
00137 }
00138
00139 int knet_win32_init()
00140 {
00141 WSADATA wsaData;
00142 return WSAStartup(MAKEWORD(2, 2), &wsaData);
00143 }
00144 void knet_win32_destroy()
00145 {
00146 WSACleanup();
00147 }
00148
00149
00150
00151
00152 static SOCKET socket_connect(const char *host, const char *port)
00153 {
00154 #define __err_connect(func) \
00155 do { \
00156 fprintf(stderr, "%s: %d\n", func, WSAGetLastError()); \
00157 return -1; \
00158 } while (0)
00159
00160 int on = 1;
00161 SOCKET fd;
00162 struct linger lng = { 0, 0 };
00163 struct sockaddr_in server;
00164 struct hostent *hp = 0;
00165
00166 if ((fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == INVALID_SOCKET) __err_connect("socket");
00167 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char*)&on, sizeof(on)) == -1) __err_connect("setsockopt");
00168 if (setsockopt(fd, SOL_SOCKET, SO_LINGER, (char*)&lng, sizeof(lng)) == -1) __err_connect("setsockopt");
00169
00170 if (isalpha(host[0])) hp = gethostbyname(host);
00171 else {
00172 struct in_addr addr;
00173 addr.s_addr = inet_addr(host);
00174 hp = gethostbyaddr((char*)&addr, 4, AF_INET);
00175 }
00176 if (hp == 0) __err_connect("gethost");
00177
00178 server.sin_addr.s_addr = *((unsigned long*)hp->h_addr);
00179 server.sin_family= AF_INET;
00180 server.sin_port = htons(atoi(port));
00181 if (connect(fd, (struct sockaddr*)&server, sizeof(server)) != 0) __err_connect("connect");
00182
00183 return fd;
00184 }
00185 #endif
00186
00187 static off_t my_netread(int fd, void *buf, off_t len)
00188 {
00189 off_t rest = len, curr, l = 0;
00190
00191
00192 while (rest) {
00193 if (socket_wait(fd, 1) <= 0) break;
00194 curr = netread(fd, buf + l, rest);
00195
00196
00197
00198
00199 if (curr == 0) break;
00200 l += curr; rest -= curr;
00201 }
00202 return l;
00203 }
00204
00205
00206
00207
00208
00209 static int kftp_get_response(knetFile *ftp)
00210 {
00211 #ifndef _WIN32
00212 unsigned char c;
00213 #else
00214 char c;
00215 #endif
00216 int n = 0;
00217 char *p;
00218 if (socket_wait(ftp->ctrl_fd, 1) <= 0) return 0;
00219 while (netread(ftp->ctrl_fd, &c, 1)) {
00220
00221 if (n >= ftp->max_response) {
00222 ftp->max_response = ftp->max_response? ftp->max_response<<1 : 256;
00223 ftp->response = realloc(ftp->response, ftp->max_response);
00224 }
00225 ftp->response[n++] = c;
00226 if (c == '\n') {
00227 if (n >= 4 && isdigit(ftp->response[0]) && isdigit(ftp->response[1]) && isdigit(ftp->response[2])
00228 && ftp->response[3] != '-') break;
00229 n = 0;
00230 continue;
00231 }
00232 }
00233 if (n < 2) return -1;
00234 ftp->response[n-2] = 0;
00235 return strtol(ftp->response, &p, 0);
00236 }
00237
00238 static int kftp_send_cmd(knetFile *ftp, const char *cmd, int is_get)
00239 {
00240 if (socket_wait(ftp->ctrl_fd, 0) <= 0) return -1;
00241 if(netwrite(ftp->ctrl_fd, cmd, strlen(cmd)) != strlen(cmd))
00242 {
00243 fprintf(stderr, "Failed to netwrite entire cmd\n");
00244 }
00245 return is_get? kftp_get_response(ftp) : 0;
00246 }
00247
00248 static int kftp_pasv_prep(knetFile *ftp)
00249 {
00250 char *p;
00251 int v[6];
00252 kftp_send_cmd(ftp, "PASV\r\n", 1);
00253 for (p = ftp->response; *p && *p != '('; ++p);
00254 if (*p != '(') return -1;
00255 ++p;
00256 sscanf(p, "%d,%d,%d,%d,%d,%d", &v[0], &v[1], &v[2], &v[3], &v[4], &v[5]);
00257 memcpy(ftp->pasv_ip, v, 4 * sizeof(int));
00258 ftp->pasv_port = (v[4]<<8&0xff00) + v[5];
00259 return 0;
00260 }
00261
00262
00263 static int kftp_pasv_connect(knetFile *ftp)
00264 {
00265 char host[80], port[10];
00266 if (ftp->pasv_port == 0) {
00267 fprintf(stderr, "[kftp_pasv_connect] kftp_pasv_prep() is not called before hand.\n");
00268 return -1;
00269 }
00270 sprintf(host, "%d.%d.%d.%d", ftp->pasv_ip[0], ftp->pasv_ip[1], ftp->pasv_ip[2], ftp->pasv_ip[3]);
00271 sprintf(port, "%d", ftp->pasv_port);
00272 ftp->fd = socket_connect(host, port);
00273 if (ftp->fd == -1) return -1;
00274 return 0;
00275 }
00276
00277 int kftp_connect(knetFile *ftp)
00278 {
00279 ftp->ctrl_fd = socket_connect(ftp->host, ftp->port);
00280 if (ftp->ctrl_fd == -1) return -1;
00281 kftp_get_response(ftp);
00282 kftp_send_cmd(ftp, "USER anonymous\r\n", 1);
00283 kftp_send_cmd(ftp, "PASS kftp@\r\n", 1);
00284 kftp_send_cmd(ftp, "TYPE I\r\n", 1);
00285 return 0;
00286 }
00287
00288 int kftp_reconnect(knetFile *ftp)
00289 {
00290 if (ftp->ctrl_fd != -1) {
00291 netclose(ftp->ctrl_fd);
00292 ftp->ctrl_fd = -1;
00293 }
00294 netclose(ftp->fd);
00295 ftp->fd = -1;
00296 return kftp_connect(ftp);
00297 }
00298
00299
00300 knetFile *kftp_parse_url(const char *fn, const char *mode)
00301 {
00302 knetFile *fp;
00303 char *p;
00304 int l;
00305 if (strstr(fn, "ftp://") != fn) return 0;
00306 for (p = (char*)fn + 6; *p && *p != '/'; ++p);
00307 if (*p != '/') return 0;
00308 l = p - fn - 6;
00309 fp = calloc(1, sizeof(knetFile));
00310 fp->type = KNF_TYPE_FTP;
00311 fp->fd = -1;
00312
00313
00314 fp->port = strdup("21");
00315 fp->host = calloc(l + 1, 1);
00316 if (strchr(mode, 'c')) fp->no_reconnect = 1;
00317 strncpy(fp->host, fn + 6, l);
00318 fp->retr = calloc(strlen(p) + 8, 1);
00319 sprintf(fp->retr, "RETR %s\r\n", p);
00320 fp->size_cmd = calloc(strlen(p) + 8, 1);
00321 sprintf(fp->size_cmd, "SIZE %s\r\n", p);
00322 fp->seek_offset = 0;
00323 return fp;
00324 }
00325
00326 int kftp_connect_file(knetFile *fp)
00327 {
00328 int ret;
00329 long long file_size;
00330 if (fp->fd != -1) {
00331 netclose(fp->fd);
00332 if (fp->no_reconnect) kftp_get_response(fp);
00333 }
00334 kftp_pasv_prep(fp);
00335 kftp_send_cmd(fp, fp->size_cmd, 1);
00336 #ifndef _WIN32
00337 if ( sscanf(fp->response,"%*d %lld", &file_size) != 1 )
00338 {
00339 fprintf(stderr,"[kftp_connect_file] %s\n", fp->response);
00340 return -1;
00341 }
00342 #else
00343 const char *p = fp->response;
00344 while (*p != ' ') ++p;
00345 while (*p < '0' || *p > '9') ++p;
00346 file_size = strtoint64(p);
00347 #endif
00348 fp->file_size = file_size;
00349 if (fp->offset>=0) {
00350 char tmp[32];
00351 #ifndef _WIN32
00352 sprintf(tmp, "REST %lld\r\n", (long long)fp->offset);
00353 #else
00354 strcpy(tmp, "REST ");
00355 int64tostr(tmp + 5, fp->offset);
00356 strcat(tmp, "\r\n");
00357 #endif
00358 kftp_send_cmd(fp, tmp, 1);
00359 }
00360 kftp_send_cmd(fp, fp->retr, 0);
00361 kftp_pasv_connect(fp);
00362 ret = kftp_get_response(fp);
00363 if (ret != 150) {
00364 fprintf(stderr, "[kftp_connect_file] %s\n", fp->response);
00365 netclose(fp->fd);
00366 fp->fd = -1;
00367 return -1;
00368 }
00369 fp->is_ready = 1;
00370 return 0;
00371 }
00372
00373
00374
00375
00376
00377
00378 knetFile *khttp_parse_url(const char *fn, const char *mode)
00379 {
00380 knetFile *fp;
00381 char *p, *proxy, *q;
00382 int l;
00383 if (strstr(fn, "http://") != fn) return 0;
00384
00385 for (p = (char*)fn + 7; *p && *p != '/'; ++p);
00386 l = p - fn - 7;
00387 fp = calloc(1, sizeof(knetFile));
00388 fp->http_host = calloc(l + 1, 1);
00389 strncpy(fp->http_host, fn + 7, l);
00390 fp->http_host[l] = 0;
00391 for (q = fp->http_host; *q && *q != ':'; ++q);
00392 if (*q == ':') *q++ = 0;
00393
00394 proxy = getenv("http_proxy");
00395
00396 if (proxy == 0) {
00397 fp->host = strdup(fp->http_host);
00398 fp->port = strdup(*q? q : "80");
00399 fp->path = strdup(*p? p : "/");
00400 } else {
00401 fp->host = (strstr(proxy, "http://") == proxy)? strdup(proxy + 7) : strdup(proxy);
00402 for (q = fp->host; *q && *q != ':'; ++q);
00403 if (*q == ':') *q++ = 0;
00404 fp->port = strdup(*q? q : "80");
00405 fp->path = strdup(fn);
00406 }
00407 fp->type = KNF_TYPE_HTTP;
00408 fp->ctrl_fd = fp->fd = -1;
00409 fp->seek_offset = 0;
00410 return fp;
00411 }
00412
00413 int khttp_connect_file(knetFile *fp)
00414 {
00415 int ret, l = 0;
00416 char *buf, *p;
00417 if (fp->fd != -1) netclose(fp->fd);
00418 fp->fd = socket_connect(fp->host, fp->port);
00419 buf = calloc(0x10000, 1);
00420 l += sprintf(buf + l, "GET %s HTTP/1.0\r\nHost: %s\r\n", fp->path, fp->http_host);
00421 l += sprintf(buf + l, "Range: bytes=%lld-\r\n", (long long)fp->offset);
00422 l += sprintf(buf + l, "\r\n");
00423 if(netwrite(fp->fd, buf, l) != l)
00424 {
00425 fprintf(stderr, "Failed to netwrite entire buf\n");
00426 }
00427 l = 0;
00428 while (netread(fp->fd, buf + l, 1)) {
00429 if (buf[l] == '\n' && l >= 3)
00430 if (strncmp(buf + l - 3, "\r\n\r\n", 4) == 0) break;
00431 ++l;
00432 }
00433 buf[l] = 0;
00434 if (l < 14) {
00435 netclose(fp->fd);
00436 fp->fd = -1;
00437 return -1;
00438 }
00439 ret = strtol(buf + 8, &p, 0);
00440 if (ret == 200 && fp->offset>0) {
00441 off_t rest = fp->offset;
00442 while (rest) {
00443 off_t l = rest < 0x10000? rest : 0x10000;
00444 rest -= my_netread(fp->fd, buf, l);
00445 }
00446 } else if (ret != 206 && ret != 200) {
00447 free(buf);
00448 fprintf(stderr, "[khttp_connect_file] fail to open file (HTTP code: %d).\n", ret);
00449 netclose(fp->fd);
00450 fp->fd = -1;
00451 return -1;
00452 }
00453 free(buf);
00454 fp->is_ready = 1;
00455 return 0;
00456 }
00457
00458
00459
00460
00461
00462 knetFile *knet_open(const char *fn, const char *mode)
00463 {
00464 knetFile *fp = 0;
00465 if (mode[0] != 'r') {
00466 fprintf(stderr, "[kftp_open] only mode \"r\" is supported.\n");
00467 return 0;
00468 }
00469 if (strstr(fn, "ftp://") == fn) {
00470 fp = kftp_parse_url(fn, mode);
00471 if (fp == 0) return 0;
00472 if (kftp_connect(fp) == -1) {
00473 knet_close(fp);
00474 return 0;
00475 }
00476 kftp_connect_file(fp);
00477 } else if (strstr(fn, "http://") == fn) {
00478 fp = khttp_parse_url(fn, mode);
00479 if (fp == 0) return 0;
00480 khttp_connect_file(fp);
00481 } else {
00482 #ifdef _WIN32
00483
00484
00485
00486 int fd = open(fn, O_RDONLY | O_BINARY);
00487 #else
00488 int fd = open(fn, O_RDONLY);
00489 #endif
00490 if (fd == -1) {
00491 perror("open");
00492 return 0;
00493 }
00494 fp = (knetFile*)calloc(1, sizeof(knetFile));
00495 fp->type = KNF_TYPE_LOCAL;
00496 fp->fd = fd;
00497 fp->ctrl_fd = -1;
00498 }
00499 if (fp && fp->fd == -1) {
00500 knet_close(fp);
00501 return 0;
00502 }
00503 return fp;
00504 }
00505
00506 knetFile *knet_dopen(int fd, const char *mode)
00507 {
00508 knetFile *fp = (knetFile*)calloc(1, sizeof(knetFile));
00509 fp->type = KNF_TYPE_LOCAL;
00510 fp->fd = fd;
00511 return fp;
00512 }
00513
00514 off_t knet_read(knetFile *fp, void *buf, off_t len)
00515 {
00516 off_t l = 0;
00517 if (fp->fd == -1) return 0;
00518 if (fp->type == KNF_TYPE_FTP) {
00519 if (fp->is_ready == 0) {
00520 if (!fp->no_reconnect) kftp_reconnect(fp);
00521 kftp_connect_file(fp);
00522 }
00523 } else if (fp->type == KNF_TYPE_HTTP) {
00524 if (fp->is_ready == 0)
00525 khttp_connect_file(fp);
00526 }
00527 if (fp->type == KNF_TYPE_LOCAL) {
00528 off_t rest = len, curr;
00529 while (rest) {
00530 do {
00531 curr = read(fp->fd, buf + l, rest);
00532 } while (curr < 0 && EINTR == errno);
00533 if (curr < 0) return -1;
00534 if (curr == 0) break;
00535 l += curr; rest -= curr;
00536 }
00537 } else l = my_netread(fp->fd, buf, len);
00538 fp->offset += l;
00539 return l;
00540 }
00541
00542 off_t knet_seek(knetFile *fp, int64_t off, int whence)
00543 {
00544 if (whence == SEEK_SET && off == fp->offset) return 0;
00545 if (fp->type == KNF_TYPE_LOCAL) {
00546
00547
00548 off_t offset = lseek(fp->fd, off, whence);
00549 if (offset == -1) {
00550
00551
00552 return -1;
00553 }
00554 fp->offset = offset;
00555 return 0;
00556 }
00557 else if (fp->type == KNF_TYPE_FTP)
00558 {
00559 if (whence==SEEK_CUR)
00560 fp->offset += off;
00561 else if (whence==SEEK_SET)
00562 fp->offset = off;
00563 else if ( whence==SEEK_END)
00564 fp->offset = fp->file_size+off;
00565 fp->is_ready = 0;
00566 return 0;
00567 }
00568 else if (fp->type == KNF_TYPE_HTTP)
00569 {
00570 if (whence == SEEK_END) {
00571 fprintf(stderr, "[knet_seek] SEEK_END is not supported for HTTP. Offset is unchanged.\n");
00572 errno = ESPIPE;
00573 return -1;
00574 }
00575 if (whence==SEEK_CUR)
00576 fp->offset += off;
00577 else if (whence==SEEK_SET)
00578 fp->offset = off;
00579 fp->is_ready = 0;
00580 return 0;
00581 }
00582 errno = EINVAL;
00583 fprintf(stderr,"[knet_seek] %s\n", strerror(errno));
00584 return -1;
00585 }
00586
00587 int knet_close(knetFile *fp)
00588 {
00589 if (fp == 0) return 0;
00590 if (fp->ctrl_fd != -1) netclose(fp->ctrl_fd);
00591 if (fp->fd != -1) {
00592
00593
00594 if (fp->type == KNF_TYPE_LOCAL) close(fp->fd);
00595 else netclose(fp->fd);
00596 }
00597 free(fp->host); free(fp->port);
00598 free(fp->response); free(fp->retr);
00599 free(fp->path); free(fp->http_host);
00600 free(fp);
00601 return 0;
00602 }
00603
00604 #ifdef KNETFILE_MAIN
00605 int main(void)
00606 {
00607 char *buf;
00608 knetFile *fp;
00609 int type = 4, l;
00610 #ifdef _WIN32
00611 knet_win32_init();
00612 #endif
00613 buf = calloc(0x100000, 1);
00614 if (type == 0) {
00615 fp = knet_open("knetfile.c", "r");
00616 knet_seek(fp, 1000, SEEK_SET);
00617 } else if (type == 1) {
00618 fp = knet_open("ftp://ftp.ncbi.nih.gov/1000genomes/ftp/data/NA12878/alignment/NA12878.chrom6.SLX.SRP000032.2009_06.bam", "r");
00619 knet_seek(fp, 2500000000ll, SEEK_SET);
00620 l = knet_read(fp, buf, 255);
00621 } else if (type == 2) {
00622 fp = knet_open("ftp://ftp.sanger.ac.uk/pub4/treefam/tmp/index.shtml", "r");
00623 knet_seek(fp, 1000, SEEK_SET);
00624 } else if (type == 3) {
00625 fp = knet_open("http://www.sanger.ac.uk/Users/lh3/index.shtml", "r");
00626 knet_seek(fp, 1000, SEEK_SET);
00627 } else if (type == 4) {
00628 fp = knet_open("http://www.sanger.ac.uk/Users/lh3/ex1.bam", "r");
00629 knet_read(fp, buf, 10000);
00630 knet_seek(fp, 20000, SEEK_SET);
00631 knet_seek(fp, 10000, SEEK_SET);
00632 l = knet_read(fp, buf+10000, 10000000) + 10000;
00633 }
00634 if (type != 4 && type != 1) {
00635 knet_read(fp, buf, 255);
00636 buf[255] = 0;
00637 printf("%s\n", buf);
00638 } else write(fileno(stdout), buf, l);
00639 knet_close(fp);
00640 free(buf);
00641 return 0;
00642 }
00643 #endif