1
0

wget.cpp 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. /*
  2. * @build: make examples
  3. * @server bin/httpd -s restart -d
  4. * @client bin/wget http://127.0.0.1:8080/
  5. */
  6. #include "HttpClient.h"
  7. #include "htime.h"
  8. using namespace hv;
  9. typedef std::function<void(size_t received_bytes, size_t total_bytes)> wget_progress_cb;
  10. static int wget(const char* url, const char* filepath, wget_progress_cb progress_cb = NULL, bool use_range = true) {
  11. int ret = 0;
  12. HttpClient cli;
  13. HttpRequest req;
  14. HttpResponse resp;
  15. // HEAD
  16. req.method = HTTP_HEAD;
  17. req.url = url;
  18. ret = cli.send(&req, &resp);
  19. if (ret != 0) {
  20. fprintf(stderr, "request error: %d\n", ret);
  21. return ret;
  22. }
  23. printd("%s", resp.Dump(true, false).c_str());
  24. if (resp.status_code == HTTP_STATUS_NOT_FOUND) {
  25. fprintf(stderr, "404 Not Found\n");
  26. return 404;
  27. }
  28. // use Range?
  29. int range_bytes = 1 << 20; // 1M
  30. long from = 0, to = 0;
  31. size_t content_length = hv::from_string<size_t>(resp.GetHeader("Content-Length"));
  32. if (use_range) {
  33. use_range = false;
  34. std::string accept_ranges = resp.GetHeader("Accept-Ranges");
  35. // use Range if server accept_ranges and content_length > 1M
  36. if (resp.status_code == 200 &&
  37. accept_ranges == "bytes" &&
  38. content_length > range_bytes) {
  39. use_range = true;
  40. }
  41. }
  42. // open file
  43. std::string filepath_download(filepath);
  44. filepath_download += ".download";
  45. HFile file;
  46. if (use_range) {
  47. ret = file.open(filepath_download.c_str(), "ab");
  48. from = file.size();
  49. } else {
  50. ret = file.open(filepath_download.c_str(), "wb");
  51. }
  52. if (ret != 0) {
  53. fprintf(stderr, "Failed to open file %s\n", filepath_download.c_str());
  54. return ret;
  55. }
  56. printf("Save file to %s ...\n", filepath);
  57. // GET
  58. req.method = HTTP_GET;
  59. req.timeout = 3600; // 1h
  60. if (!use_range) {
  61. size_t received_bytes = 0;
  62. req.http_cb = [&file, &content_length, &received_bytes, &progress_cb]
  63. (HttpMessage* resp, http_parser_state state, const char* data, size_t size) {
  64. if (!resp->headers["Location"].empty()) return;
  65. if (state == HP_HEADERS_COMPLETE) {
  66. content_length = hv::from_string<size_t>(resp->GetHeader("Content-Length"));
  67. printd("%s", resp->Dump(true, false).c_str());
  68. } else if (state == HP_BODY) {
  69. if (data && size) {
  70. file.write(data, size);
  71. received_bytes += size;
  72. if (progress_cb) {
  73. progress_cb(received_bytes, content_length);
  74. }
  75. }
  76. }
  77. };
  78. ret = cli.send(&req, &resp);
  79. if (ret != 0) {
  80. fprintf(stderr, "request error: %d\n", ret);
  81. goto error;
  82. }
  83. goto success;
  84. }
  85. // Range: bytes=from-to
  86. while (from < content_length) {
  87. to = from + range_bytes - 1;
  88. if (to >= content_length) to = content_length - 1;
  89. req.SetRange(from, to);
  90. printd("%s", req.Dump(true, false).c_str());
  91. ret = cli.send(&req, &resp);
  92. if (ret != 0) {
  93. fprintf(stderr, "request error: %d\n", ret);
  94. goto error;
  95. }
  96. printd("%s", resp.Dump(true, false).c_str());
  97. file.write(resp.body.data(), resp.body.size());
  98. // fix: resp.body.size != range_bytes on some server
  99. // from = to + 1;
  100. from += resp.body.size();
  101. if (progress_cb) {
  102. progress_cb(from, content_length);
  103. }
  104. }
  105. success:
  106. file.close();
  107. ret = file.rename(filepath);
  108. if (ret != 0) {
  109. fprintf(stderr, "mv %s => %s failed: %s:%d\n", filepath_download.c_str(), filepath, strerror(ret), ret);
  110. }
  111. return ret;
  112. error:
  113. file.close();
  114. // file.remove();
  115. return ret;
  116. }
  117. int main(int argc, char** argv) {
  118. if (argc < 2) {
  119. printf("Usage: %s [--use_range] url [filepath]\n", argv[0]);
  120. return -10;
  121. }
  122. int idx = 1;
  123. bool use_range = false;
  124. if (strcmp(argv[idx], "--use_range") == 0) {
  125. use_range = true;
  126. ++idx;
  127. }
  128. const char* url = argv[idx++];
  129. const char* filepath = "index.html";
  130. if (argv[idx]) {
  131. filepath = argv[idx];
  132. } else {
  133. const char* path = strrchr(url, '/');
  134. if (path && path[1]) {
  135. filepath = path + 1;
  136. }
  137. }
  138. unsigned int start_time = gettick_ms();
  139. int last_progress = 0;
  140. wget(url, filepath, [&last_progress](size_t received_bytes, size_t total_bytes) {
  141. // print progress
  142. if (total_bytes == 0) {
  143. printf("\rprogress: %lu/? = ?", (unsigned long)received_bytes);
  144. } else {
  145. int cur_progress = received_bytes * 100 / total_bytes;
  146. if (cur_progress > last_progress) {
  147. printf("\rprogress: %lu/%lu = %d%%", (unsigned long)received_bytes, (unsigned long)total_bytes, (int)cur_progress);
  148. last_progress = cur_progress;
  149. }
  150. }
  151. fflush(stdout);
  152. }, use_range);
  153. unsigned int end_time = gettick_ms();
  154. unsigned int cost_time = end_time - start_time;
  155. printf("\ncost time %u ms\n", cost_time);
  156. // 1B/ms = 1KB/s = 8Kbps
  157. printf("download rate = %lu KB/s\n", (unsigned long)hv_filesize(filepath) / cost_time);
  158. return 0;
  159. }