1
0

multipart_parser.c 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. /* Based on node-formidable by Felix Geisendörfer
  2. * Igor Afonov - afonov@gmail.com - 2012
  3. * MIT License - http://www.opensource.org/licenses/mit-license.php
  4. */
  5. #include "multipart_parser.h"
  6. #include <stdio.h>
  7. #include <stdarg.h>
  8. #include <string.h>
  9. static void multipart_log(const char * format, ...)
  10. {
  11. #ifdef DEBUG_MULTIPART
  12. va_list args;
  13. va_start(args, format);
  14. fprintf(stderr, "[HTTP_MULTIPART_PARSER] %s:%d: ", __FILE__, __LINE__);
  15. vfprintf(stderr, format, args);
  16. fprintf(stderr, "\n");
  17. va_end(args);
  18. #endif
  19. }
  20. #define NOTIFY_CB(FOR) \
  21. do { \
  22. if (p->settings->on_##FOR) { \
  23. if (p->settings->on_##FOR(p) != 0) { \
  24. return i; \
  25. } \
  26. } \
  27. } while (0)
  28. #define EMIT_DATA_CB(FOR, ptr, len) \
  29. do { \
  30. if (p->settings->on_##FOR) { \
  31. if (p->settings->on_##FOR(p, ptr, len) != 0) { \
  32. return i; \
  33. } \
  34. } \
  35. } while (0)
  36. #define LF 10
  37. #define CR 13
  38. struct multipart_parser {
  39. void * data;
  40. size_t index;
  41. size_t boundary_length;
  42. unsigned char state;
  43. const multipart_parser_settings* settings;
  44. char* lookbehind;
  45. char multipart_boundary[1];
  46. };
  47. enum state {
  48. s_uninitialized = 1,
  49. s_start,
  50. s_start_boundary,
  51. s_header_field_start,
  52. s_header_field,
  53. s_headers_almost_done,
  54. s_header_value_start,
  55. s_header_value,
  56. s_header_value_almost_done,
  57. s_part_data_start,
  58. s_part_data,
  59. s_part_data_almost_boundary,
  60. s_part_data_boundary,
  61. s_part_data_almost_end,
  62. s_part_data_end,
  63. s_part_data_final_hyphen,
  64. s_end
  65. };
  66. multipart_parser* multipart_parser_init
  67. (const char *boundary, const multipart_parser_settings* settings) {
  68. multipart_parser* p = (multipart_parser*)malloc(sizeof(multipart_parser) +
  69. strlen(boundary) +
  70. strlen(boundary) + 9);
  71. strcpy(p->multipart_boundary, boundary);
  72. p->boundary_length = strlen(boundary);
  73. p->lookbehind = (p->multipart_boundary + p->boundary_length + 1);
  74. p->index = 0;
  75. p->state = s_start;
  76. p->settings = settings;
  77. return p;
  78. }
  79. void multipart_parser_free(multipart_parser* p) {
  80. free(p);
  81. }
  82. void multipart_parser_set_data(multipart_parser *p, void *data) {
  83. p->data = data;
  84. }
  85. void *multipart_parser_get_data(multipart_parser *p) {
  86. return p->data;
  87. }
  88. size_t multipart_parser_execute(multipart_parser* p, const char *buf, size_t len) {
  89. size_t i = 0;
  90. size_t mark = 0;
  91. char c, cl;
  92. int is_last = 0;
  93. while(i < len) {
  94. c = buf[i];
  95. is_last = (i == (len - 1));
  96. switch (p->state) {
  97. case s_start:
  98. multipart_log("s_start");
  99. p->index = 0;
  100. p->state = s_start_boundary;
  101. /* fallthrough */
  102. case s_start_boundary:
  103. multipart_log("s_start_boundary");
  104. if (p->index == p->boundary_length) {
  105. if (c != CR) {
  106. return i;
  107. }
  108. p->index++;
  109. break;
  110. } else if (p->index == (p->boundary_length + 1)) {
  111. if (c != LF) {
  112. return i;
  113. }
  114. p->index = 0;
  115. NOTIFY_CB(part_data_begin);
  116. p->state = s_header_field_start;
  117. break;
  118. }
  119. if (c != p->multipart_boundary[p->index]) {
  120. return i;
  121. }
  122. p->index++;
  123. break;
  124. case s_header_field_start:
  125. multipart_log("s_header_field_start");
  126. mark = i;
  127. p->state = s_header_field;
  128. /* fallthrough */
  129. case s_header_field:
  130. multipart_log("s_header_field");
  131. if (c == CR) {
  132. p->state = s_headers_almost_done;
  133. break;
  134. }
  135. if (c == ':') {
  136. EMIT_DATA_CB(header_field, buf + mark, i - mark);
  137. p->state = s_header_value_start;
  138. break;
  139. }
  140. cl = tolower(c);
  141. if ((c != '-') && (cl < 'a' || cl > 'z')) {
  142. multipart_log("invalid character in header name");
  143. return i;
  144. }
  145. if (is_last)
  146. EMIT_DATA_CB(header_field, buf + mark, (i - mark) + 1);
  147. break;
  148. case s_headers_almost_done:
  149. multipart_log("s_headers_almost_done");
  150. if (c != LF) {
  151. return i;
  152. }
  153. p->state = s_part_data_start;
  154. break;
  155. case s_header_value_start:
  156. multipart_log("s_header_value_start");
  157. if (c == ' ') {
  158. break;
  159. }
  160. mark = i;
  161. p->state = s_header_value;
  162. /* fallthrough */
  163. case s_header_value:
  164. multipart_log("s_header_value");
  165. if (c == CR) {
  166. EMIT_DATA_CB(header_value, buf + mark, i - mark);
  167. p->state = s_header_value_almost_done;
  168. break;
  169. }
  170. if (is_last)
  171. EMIT_DATA_CB(header_value, buf + mark, (i - mark) + 1);
  172. break;
  173. case s_header_value_almost_done:
  174. multipart_log("s_header_value_almost_done");
  175. if (c != LF) {
  176. return i;
  177. }
  178. p->state = s_header_field_start;
  179. break;
  180. case s_part_data_start:
  181. multipart_log("s_part_data_start");
  182. NOTIFY_CB(headers_complete);
  183. mark = i;
  184. p->state = s_part_data;
  185. /* fallthrough */
  186. case s_part_data:
  187. multipart_log("s_part_data");
  188. if (c == CR) {
  189. EMIT_DATA_CB(part_data, buf + mark, i - mark);
  190. mark = i;
  191. p->state = s_part_data_almost_boundary;
  192. p->lookbehind[0] = CR;
  193. break;
  194. }
  195. if (is_last)
  196. EMIT_DATA_CB(part_data, buf + mark, (i - mark) + 1);
  197. break;
  198. case s_part_data_almost_boundary:
  199. multipart_log("s_part_data_almost_boundary");
  200. if (c == LF) {
  201. p->state = s_part_data_boundary;
  202. p->lookbehind[1] = LF;
  203. p->index = 0;
  204. break;
  205. }
  206. EMIT_DATA_CB(part_data, p->lookbehind, 1);
  207. p->state = s_part_data;
  208. mark = i --;
  209. break;
  210. case s_part_data_boundary:
  211. multipart_log("s_part_data_boundary");
  212. if (p->multipart_boundary[p->index] != c) {
  213. EMIT_DATA_CB(part_data, p->lookbehind, 2 + p->index);
  214. p->state = s_part_data;
  215. mark = i --;
  216. break;
  217. }
  218. p->lookbehind[2 + p->index] = c;
  219. if ((++ p->index) == p->boundary_length) {
  220. NOTIFY_CB(part_data_end);
  221. p->state = s_part_data_almost_end;
  222. }
  223. break;
  224. case s_part_data_almost_end:
  225. multipart_log("s_part_data_almost_end");
  226. if (c == '-') {
  227. p->state = s_part_data_final_hyphen;
  228. break;
  229. }
  230. if (c == CR) {
  231. p->state = s_part_data_end;
  232. break;
  233. }
  234. return i;
  235. case s_part_data_final_hyphen:
  236. multipart_log("s_part_data_final_hyphen");
  237. if (c == '-') {
  238. NOTIFY_CB(body_end);
  239. p->state = s_end;
  240. break;
  241. }
  242. return i;
  243. case s_part_data_end:
  244. multipart_log("s_part_data_end");
  245. if (c == LF) {
  246. p->state = s_header_field_start;
  247. NOTIFY_CB(part_data_begin);
  248. break;
  249. }
  250. return i;
  251. case s_end:
  252. multipart_log("s_end: %02X", (int) c);
  253. break;
  254. default:
  255. multipart_log("Multipart parser unrecoverable error");
  256. return 0;
  257. }
  258. ++ i;
  259. }
  260. return len;
  261. }