wepoll.c 68 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253
  1. /*
  2. * wepoll - epoll for Windows
  3. * https://github.com/piscisaureus/wepoll
  4. *
  5. * Copyright 2012-2020, Bert Belder <bertbelder@gmail.com>
  6. * All rights reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions are
  10. * met:
  11. *
  12. * * Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * * Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in the
  17. * documentation and/or other materials provided with the distribution.
  18. *
  19. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. */
  31. #ifndef WEPOLL_EXPORT
  32. #define WEPOLL_EXPORT
  33. #endif
  34. #include <stdint.h>
  35. enum EPOLL_EVENTS {
  36. EPOLLIN = (int) (1U << 0),
  37. EPOLLPRI = (int) (1U << 1),
  38. EPOLLOUT = (int) (1U << 2),
  39. EPOLLERR = (int) (1U << 3),
  40. EPOLLHUP = (int) (1U << 4),
  41. EPOLLRDNORM = (int) (1U << 6),
  42. EPOLLRDBAND = (int) (1U << 7),
  43. EPOLLWRNORM = (int) (1U << 8),
  44. EPOLLWRBAND = (int) (1U << 9),
  45. EPOLLMSG = (int) (1U << 10), /* Never reported. */
  46. EPOLLRDHUP = (int) (1U << 13),
  47. EPOLLONESHOT = (int) (1U << 31)
  48. };
  49. #define EPOLLIN (1U << 0)
  50. #define EPOLLPRI (1U << 1)
  51. #define EPOLLOUT (1U << 2)
  52. #define EPOLLERR (1U << 3)
  53. #define EPOLLHUP (1U << 4)
  54. #define EPOLLRDNORM (1U << 6)
  55. #define EPOLLRDBAND (1U << 7)
  56. #define EPOLLWRNORM (1U << 8)
  57. #define EPOLLWRBAND (1U << 9)
  58. #define EPOLLMSG (1U << 10)
  59. #define EPOLLRDHUP (1U << 13)
  60. #define EPOLLONESHOT (1U << 31)
  61. #define EPOLL_CTL_ADD 1
  62. #define EPOLL_CTL_MOD 2
  63. #define EPOLL_CTL_DEL 3
  64. typedef void* HANDLE;
  65. typedef uintptr_t SOCKET;
  66. typedef union epoll_data {
  67. void* ptr;
  68. int fd;
  69. uint32_t u32;
  70. uint64_t u64;
  71. SOCKET sock; /* Windows specific */
  72. HANDLE hnd; /* Windows specific */
  73. } epoll_data_t;
  74. struct epoll_event {
  75. uint32_t events; /* Epoll events and flags */
  76. epoll_data_t data; /* User data variable */
  77. };
  78. #ifdef __cplusplus
  79. extern "C" {
  80. #endif
  81. WEPOLL_EXPORT HANDLE epoll_create(int size);
  82. WEPOLL_EXPORT HANDLE epoll_create1(int flags);
  83. WEPOLL_EXPORT int epoll_close(HANDLE ephnd);
  84. WEPOLL_EXPORT int epoll_ctl(HANDLE ephnd,
  85. int op,
  86. SOCKET sock,
  87. struct epoll_event* event);
  88. WEPOLL_EXPORT int epoll_wait(HANDLE ephnd,
  89. struct epoll_event* events,
  90. int maxevents,
  91. int timeout);
  92. #ifdef __cplusplus
  93. } /* extern "C" */
  94. #endif
  95. #include <assert.h>
  96. #include <stdlib.h>
  97. #define WEPOLL_INTERNAL static
  98. #define WEPOLL_INTERNAL_EXTERN static
  99. #if defined(__clang__)
  100. #pragma clang diagnostic push
  101. #pragma clang diagnostic ignored "-Wnonportable-system-include-path"
  102. #pragma clang diagnostic ignored "-Wreserved-id-macro"
  103. #elif defined(_MSC_VER)
  104. #pragma warning(push, 1)
  105. #endif
  106. #undef WIN32_LEAN_AND_MEAN
  107. #define WIN32_LEAN_AND_MEAN
  108. #undef _WIN32_WINNT
  109. #define _WIN32_WINNT 0x0600
  110. #include <winsock2.h>
  111. #include <ws2tcpip.h>
  112. #include <windows.h>
  113. #if defined(__clang__)
  114. #pragma clang diagnostic pop
  115. #elif defined(_MSC_VER)
  116. #pragma warning(pop)
  117. #endif
  118. WEPOLL_INTERNAL int nt_global_init(void);
  119. typedef LONG NTSTATUS;
  120. typedef NTSTATUS* PNTSTATUS;
  121. #ifndef NT_SUCCESS
  122. #define NT_SUCCESS(status) (((NTSTATUS)(status)) >= 0)
  123. #endif
  124. #ifndef STATUS_SUCCESS
  125. #define STATUS_SUCCESS ((NTSTATUS) 0x00000000L)
  126. #endif
  127. #ifndef STATUS_PENDING
  128. #define STATUS_PENDING ((NTSTATUS) 0x00000103L)
  129. #endif
  130. #ifndef STATUS_CANCELLED
  131. #define STATUS_CANCELLED ((NTSTATUS) 0xC0000120L)
  132. #endif
  133. #ifndef STATUS_NOT_FOUND
  134. #define STATUS_NOT_FOUND ((NTSTATUS) 0xC0000225L)
  135. #endif
  136. typedef struct _IO_STATUS_BLOCK {
  137. NTSTATUS Status;
  138. ULONG_PTR Information;
  139. } IO_STATUS_BLOCK, *PIO_STATUS_BLOCK;
  140. typedef VOID(NTAPI* PIO_APC_ROUTINE)(PVOID ApcContext,
  141. PIO_STATUS_BLOCK IoStatusBlock,
  142. ULONG Reserved);
  143. typedef struct _UNICODE_STRING {
  144. USHORT Length;
  145. USHORT MaximumLength;
  146. PWSTR Buffer;
  147. } UNICODE_STRING, *PUNICODE_STRING;
  148. #define RTL_CONSTANT_STRING(s) \
  149. { sizeof(s) - sizeof((s)[0]), sizeof(s), s }
  150. typedef struct _OBJECT_ATTRIBUTES {
  151. ULONG Length;
  152. HANDLE RootDirectory;
  153. PUNICODE_STRING ObjectName;
  154. ULONG Attributes;
  155. PVOID SecurityDescriptor;
  156. PVOID SecurityQualityOfService;
  157. } OBJECT_ATTRIBUTES, *POBJECT_ATTRIBUTES;
  158. #define RTL_CONSTANT_OBJECT_ATTRIBUTES(ObjectName, Attributes) \
  159. { sizeof(OBJECT_ATTRIBUTES), NULL, ObjectName, Attributes, NULL, NULL }
  160. #ifndef FILE_OPEN
  161. #define FILE_OPEN 0x00000001UL
  162. #endif
  163. #define KEYEDEVENT_WAIT 0x00000001UL
  164. #define KEYEDEVENT_WAKE 0x00000002UL
  165. #define KEYEDEVENT_ALL_ACCESS \
  166. (STANDARD_RIGHTS_REQUIRED | KEYEDEVENT_WAIT | KEYEDEVENT_WAKE)
  167. #define NT_NTDLL_IMPORT_LIST(X) \
  168. X(NTSTATUS, \
  169. NTAPI, \
  170. NtCancelIoFileEx, \
  171. (HANDLE FileHandle, \
  172. PIO_STATUS_BLOCK IoRequestToCancel, \
  173. PIO_STATUS_BLOCK IoStatusBlock)) \
  174. \
  175. X(NTSTATUS, \
  176. NTAPI, \
  177. NtCreateFile, \
  178. (PHANDLE FileHandle, \
  179. ACCESS_MASK DesiredAccess, \
  180. POBJECT_ATTRIBUTES ObjectAttributes, \
  181. PIO_STATUS_BLOCK IoStatusBlock, \
  182. PLARGE_INTEGER AllocationSize, \
  183. ULONG FileAttributes, \
  184. ULONG ShareAccess, \
  185. ULONG CreateDisposition, \
  186. ULONG CreateOptions, \
  187. PVOID EaBuffer, \
  188. ULONG EaLength)) \
  189. \
  190. X(NTSTATUS, \
  191. NTAPI, \
  192. NtCreateKeyedEvent, \
  193. (PHANDLE KeyedEventHandle, \
  194. ACCESS_MASK DesiredAccess, \
  195. POBJECT_ATTRIBUTES ObjectAttributes, \
  196. ULONG Flags)) \
  197. \
  198. X(NTSTATUS, \
  199. NTAPI, \
  200. NtDeviceIoControlFile, \
  201. (HANDLE FileHandle, \
  202. HANDLE Event, \
  203. PIO_APC_ROUTINE ApcRoutine, \
  204. PVOID ApcContext, \
  205. PIO_STATUS_BLOCK IoStatusBlock, \
  206. ULONG IoControlCode, \
  207. PVOID InputBuffer, \
  208. ULONG InputBufferLength, \
  209. PVOID OutputBuffer, \
  210. ULONG OutputBufferLength)) \
  211. \
  212. X(NTSTATUS, \
  213. NTAPI, \
  214. NtReleaseKeyedEvent, \
  215. (HANDLE KeyedEventHandle, \
  216. PVOID KeyValue, \
  217. BOOLEAN Alertable, \
  218. PLARGE_INTEGER Timeout)) \
  219. \
  220. X(NTSTATUS, \
  221. NTAPI, \
  222. NtWaitForKeyedEvent, \
  223. (HANDLE KeyedEventHandle, \
  224. PVOID KeyValue, \
  225. BOOLEAN Alertable, \
  226. PLARGE_INTEGER Timeout)) \
  227. \
  228. X(ULONG, WINAPI, RtlNtStatusToDosError, (NTSTATUS Status))
  229. #define X(return_type, attributes, name, parameters) \
  230. WEPOLL_INTERNAL_EXTERN return_type(attributes* name) parameters;
  231. NT_NTDLL_IMPORT_LIST(X)
  232. #undef X
  233. #define AFD_POLL_RECEIVE 0x0001
  234. #define AFD_POLL_RECEIVE_EXPEDITED 0x0002
  235. #define AFD_POLL_SEND 0x0004
  236. #define AFD_POLL_DISCONNECT 0x0008
  237. #define AFD_POLL_ABORT 0x0010
  238. #define AFD_POLL_LOCAL_CLOSE 0x0020
  239. #define AFD_POLL_ACCEPT 0x0080
  240. #define AFD_POLL_CONNECT_FAIL 0x0100
  241. typedef struct _AFD_POLL_HANDLE_INFO {
  242. HANDLE Handle;
  243. ULONG Events;
  244. NTSTATUS Status;
  245. } AFD_POLL_HANDLE_INFO, *PAFD_POLL_HANDLE_INFO;
  246. typedef struct _AFD_POLL_INFO {
  247. LARGE_INTEGER Timeout;
  248. ULONG NumberOfHandles;
  249. ULONG Exclusive;
  250. AFD_POLL_HANDLE_INFO Handles[1];
  251. } AFD_POLL_INFO, *PAFD_POLL_INFO;
  252. WEPOLL_INTERNAL int afd_create_device_handle(HANDLE iocp_handle,
  253. HANDLE* afd_device_handle_out);
  254. WEPOLL_INTERNAL int afd_poll(HANDLE afd_device_handle,
  255. AFD_POLL_INFO* poll_info,
  256. IO_STATUS_BLOCK* io_status_block);
  257. WEPOLL_INTERNAL int afd_cancel_poll(HANDLE afd_device_handle,
  258. IO_STATUS_BLOCK* io_status_block);
  259. #define return_map_error(value) \
  260. do { \
  261. err_map_win_error(); \
  262. return (value); \
  263. } while (0)
  264. #define return_set_error(value, error) \
  265. do { \
  266. err_set_win_error(error); \
  267. return (value); \
  268. } while (0)
  269. WEPOLL_INTERNAL void err_map_win_error(void);
  270. WEPOLL_INTERNAL void err_set_win_error(DWORD error);
  271. WEPOLL_INTERNAL int err_check_handle(HANDLE handle);
  272. #define IOCTL_AFD_POLL 0x00012024
  273. static UNICODE_STRING afd__device_name =
  274. RTL_CONSTANT_STRING(L"\\Device\\Afd\\Wepoll");
  275. static OBJECT_ATTRIBUTES afd__device_attributes =
  276. RTL_CONSTANT_OBJECT_ATTRIBUTES(&afd__device_name, 0);
  277. int afd_create_device_handle(HANDLE iocp_handle,
  278. HANDLE* afd_device_handle_out) {
  279. HANDLE afd_device_handle;
  280. IO_STATUS_BLOCK iosb;
  281. NTSTATUS status;
  282. /* By opening \Device\Afd without specifying any extended attributes, we'll
  283. * get a handle that lets us talk to the AFD driver, but that doesn't have an
  284. * associated endpoint (so it's not a socket). */
  285. status = NtCreateFile(&afd_device_handle,
  286. SYNCHRONIZE,
  287. &afd__device_attributes,
  288. &iosb,
  289. NULL,
  290. 0,
  291. FILE_SHARE_READ | FILE_SHARE_WRITE,
  292. FILE_OPEN,
  293. 0,
  294. NULL,
  295. 0);
  296. if (status != STATUS_SUCCESS)
  297. return_set_error(-1, RtlNtStatusToDosError(status));
  298. if (CreateIoCompletionPort(afd_device_handle, iocp_handle, 0, 0) == NULL)
  299. goto error;
  300. if (!SetFileCompletionNotificationModes(afd_device_handle,
  301. FILE_SKIP_SET_EVENT_ON_HANDLE))
  302. goto error;
  303. *afd_device_handle_out = afd_device_handle;
  304. return 0;
  305. error:
  306. CloseHandle(afd_device_handle);
  307. return_map_error(-1);
  308. }
  309. int afd_poll(HANDLE afd_device_handle,
  310. AFD_POLL_INFO* poll_info,
  311. IO_STATUS_BLOCK* io_status_block) {
  312. NTSTATUS status;
  313. /* Blocking operation is not supported. */
  314. assert(io_status_block != NULL);
  315. io_status_block->Status = STATUS_PENDING;
  316. status = NtDeviceIoControlFile(afd_device_handle,
  317. NULL,
  318. NULL,
  319. io_status_block,
  320. io_status_block,
  321. IOCTL_AFD_POLL,
  322. poll_info,
  323. sizeof *poll_info,
  324. poll_info,
  325. sizeof *poll_info);
  326. if (status == STATUS_SUCCESS)
  327. return 0;
  328. else if (status == STATUS_PENDING)
  329. return_set_error(-1, ERROR_IO_PENDING);
  330. else
  331. return_set_error(-1, RtlNtStatusToDosError(status));
  332. }
  333. int afd_cancel_poll(HANDLE afd_device_handle,
  334. IO_STATUS_BLOCK* io_status_block) {
  335. NTSTATUS cancel_status;
  336. IO_STATUS_BLOCK cancel_iosb;
  337. /* If the poll operation has already completed or has been cancelled earlier,
  338. * there's nothing left for us to do. */
  339. if (io_status_block->Status != STATUS_PENDING)
  340. return 0;
  341. cancel_status =
  342. NtCancelIoFileEx(afd_device_handle, io_status_block, &cancel_iosb);
  343. /* NtCancelIoFileEx() may return STATUS_NOT_FOUND if the operation completed
  344. * just before calling NtCancelIoFileEx(). This is not an error. */
  345. if (cancel_status == STATUS_SUCCESS || cancel_status == STATUS_NOT_FOUND)
  346. return 0;
  347. else
  348. return_set_error(-1, RtlNtStatusToDosError(cancel_status));
  349. }
  350. WEPOLL_INTERNAL int epoll_global_init(void);
  351. WEPOLL_INTERNAL int init(void);
  352. typedef struct port_state port_state_t;
  353. typedef struct queue queue_t;
  354. typedef struct sock_state sock_state_t;
  355. typedef struct ts_tree_node ts_tree_node_t;
  356. WEPOLL_INTERNAL port_state_t* port_new(HANDLE* iocp_handle_out);
  357. WEPOLL_INTERNAL int port_close(port_state_t* port_state);
  358. WEPOLL_INTERNAL int port_delete(port_state_t* port_state);
  359. WEPOLL_INTERNAL int port_wait(port_state_t* port_state,
  360. struct epoll_event* events,
  361. int maxevents,
  362. int timeout);
  363. WEPOLL_INTERNAL int port_ctl(port_state_t* port_state,
  364. int op,
  365. SOCKET sock,
  366. struct epoll_event* ev);
  367. WEPOLL_INTERNAL int port_register_socket(port_state_t* port_state,
  368. sock_state_t* sock_state,
  369. SOCKET socket);
  370. WEPOLL_INTERNAL void port_unregister_socket(port_state_t* port_state,
  371. sock_state_t* sock_state);
  372. WEPOLL_INTERNAL sock_state_t* port_find_socket(port_state_t* port_state,
  373. SOCKET socket);
  374. WEPOLL_INTERNAL void port_request_socket_update(port_state_t* port_state,
  375. sock_state_t* sock_state);
  376. WEPOLL_INTERNAL void port_cancel_socket_update(port_state_t* port_state,
  377. sock_state_t* sock_state);
  378. WEPOLL_INTERNAL void port_add_deleted_socket(port_state_t* port_state,
  379. sock_state_t* sock_state);
  380. WEPOLL_INTERNAL void port_remove_deleted_socket(port_state_t* port_state,
  381. sock_state_t* sock_state);
  382. WEPOLL_INTERNAL HANDLE port_get_iocp_handle(port_state_t* port_state);
  383. WEPOLL_INTERNAL queue_t* port_get_poll_group_queue(port_state_t* port_state);
  384. WEPOLL_INTERNAL port_state_t* port_state_from_handle_tree_node(
  385. ts_tree_node_t* tree_node);
  386. WEPOLL_INTERNAL ts_tree_node_t* port_state_to_handle_tree_node(
  387. port_state_t* port_state);
  388. /* The reflock is a special kind of lock that normally prevents a chunk of
  389. * memory from being freed, but does allow the chunk of memory to eventually be
  390. * released in a coordinated fashion.
  391. *
  392. * Under normal operation, threads increase and decrease the reference count,
  393. * which are wait-free operations.
  394. *
  395. * Exactly once during the reflock's lifecycle, a thread holding a reference to
  396. * the lock may "destroy" the lock; this operation blocks until all other
  397. * threads holding a reference to the lock have dereferenced it. After
  398. * "destroy" returns, the calling thread may assume that no other threads have
  399. * a reference to the lock.
  400. *
  401. * Attemmpting to lock or destroy a lock after reflock_unref_and_destroy() has
  402. * been called is invalid and results in undefined behavior. Therefore the user
  403. * should use another lock to guarantee that this can't happen.
  404. */
  405. typedef struct reflock {
  406. volatile long state; /* 32-bit Interlocked APIs operate on `long` values. */
  407. } reflock_t;
  408. WEPOLL_INTERNAL int reflock_global_init(void);
  409. WEPOLL_INTERNAL void reflock_init(reflock_t* reflock);
  410. WEPOLL_INTERNAL void reflock_ref(reflock_t* reflock);
  411. WEPOLL_INTERNAL void reflock_unref(reflock_t* reflock);
  412. WEPOLL_INTERNAL void reflock_unref_and_destroy(reflock_t* reflock);
  413. #include <stdbool.h>
  414. /* N.b.: the tree functions do not set errno or LastError when they fail. Each
  415. * of the API functions has at most one failure mode. It is up to the caller to
  416. * set an appropriate error code when necessary. */
  417. typedef struct tree tree_t;
  418. typedef struct tree_node tree_node_t;
  419. typedef struct tree {
  420. tree_node_t* root;
  421. } tree_t;
  422. typedef struct tree_node {
  423. tree_node_t* left;
  424. tree_node_t* right;
  425. tree_node_t* parent;
  426. uintptr_t key;
  427. bool red;
  428. } tree_node_t;
  429. WEPOLL_INTERNAL void tree_init(tree_t* tree);
  430. WEPOLL_INTERNAL void tree_node_init(tree_node_t* node);
  431. WEPOLL_INTERNAL int tree_add(tree_t* tree, tree_node_t* node, uintptr_t key);
  432. WEPOLL_INTERNAL void tree_del(tree_t* tree, tree_node_t* node);
  433. WEPOLL_INTERNAL tree_node_t* tree_find(const tree_t* tree, uintptr_t key);
  434. WEPOLL_INTERNAL tree_node_t* tree_root(const tree_t* tree);
  435. typedef struct ts_tree {
  436. tree_t tree;
  437. SRWLOCK lock;
  438. } ts_tree_t;
  439. typedef struct ts_tree_node {
  440. tree_node_t tree_node;
  441. reflock_t reflock;
  442. } ts_tree_node_t;
  443. WEPOLL_INTERNAL void ts_tree_init(ts_tree_t* rtl);
  444. WEPOLL_INTERNAL void ts_tree_node_init(ts_tree_node_t* node);
  445. WEPOLL_INTERNAL int ts_tree_add(ts_tree_t* ts_tree,
  446. ts_tree_node_t* node,
  447. uintptr_t key);
  448. WEPOLL_INTERNAL ts_tree_node_t* ts_tree_del_and_ref(ts_tree_t* ts_tree,
  449. uintptr_t key);
  450. WEPOLL_INTERNAL ts_tree_node_t* ts_tree_find_and_ref(ts_tree_t* ts_tree,
  451. uintptr_t key);
  452. WEPOLL_INTERNAL void ts_tree_node_unref(ts_tree_node_t* node);
  453. WEPOLL_INTERNAL void ts_tree_node_unref_and_destroy(ts_tree_node_t* node);
  454. static ts_tree_t epoll__handle_tree;
  455. int epoll_global_init(void) {
  456. ts_tree_init(&epoll__handle_tree);
  457. return 0;
  458. }
  459. static HANDLE epoll__create(void) {
  460. port_state_t* port_state;
  461. HANDLE ephnd;
  462. ts_tree_node_t* tree_node;
  463. if (init() < 0)
  464. return NULL;
  465. port_state = port_new(&ephnd);
  466. if (port_state == NULL)
  467. return NULL;
  468. tree_node = port_state_to_handle_tree_node(port_state);
  469. if (ts_tree_add(&epoll__handle_tree, tree_node, (uintptr_t) ephnd) < 0) {
  470. /* This should never happen. */
  471. port_delete(port_state);
  472. return_set_error(NULL, ERROR_ALREADY_EXISTS);
  473. }
  474. return ephnd;
  475. }
  476. HANDLE epoll_create(int size) {
  477. if (size <= 0)
  478. return_set_error(NULL, ERROR_INVALID_PARAMETER);
  479. return epoll__create();
  480. }
  481. HANDLE epoll_create1(int flags) {
  482. if (flags != 0)
  483. return_set_error(NULL, ERROR_INVALID_PARAMETER);
  484. return epoll__create();
  485. }
  486. int epoll_close(HANDLE ephnd) {
  487. ts_tree_node_t* tree_node;
  488. port_state_t* port_state;
  489. if (init() < 0)
  490. return -1;
  491. tree_node = ts_tree_del_and_ref(&epoll__handle_tree, (uintptr_t) ephnd);
  492. if (tree_node == NULL) {
  493. err_set_win_error(ERROR_INVALID_PARAMETER);
  494. goto err;
  495. }
  496. port_state = port_state_from_handle_tree_node(tree_node);
  497. port_close(port_state);
  498. ts_tree_node_unref_and_destroy(tree_node);
  499. return port_delete(port_state);
  500. err:
  501. err_check_handle(ephnd);
  502. return -1;
  503. }
  504. int epoll_ctl(HANDLE ephnd, int op, SOCKET sock, struct epoll_event* ev) {
  505. ts_tree_node_t* tree_node;
  506. port_state_t* port_state;
  507. int r;
  508. if (init() < 0)
  509. return -1;
  510. tree_node = ts_tree_find_and_ref(&epoll__handle_tree, (uintptr_t) ephnd);
  511. if (tree_node == NULL) {
  512. err_set_win_error(ERROR_INVALID_PARAMETER);
  513. goto err;
  514. }
  515. port_state = port_state_from_handle_tree_node(tree_node);
  516. r = port_ctl(port_state, op, sock, ev);
  517. ts_tree_node_unref(tree_node);
  518. if (r < 0)
  519. goto err;
  520. return 0;
  521. err:
  522. /* On Linux, in the case of epoll_ctl(), EBADF takes priority over other
  523. * errors. Wepoll mimics this behavior. */
  524. err_check_handle(ephnd);
  525. err_check_handle((HANDLE) sock);
  526. return -1;
  527. }
  528. int epoll_wait(HANDLE ephnd,
  529. struct epoll_event* events,
  530. int maxevents,
  531. int timeout) {
  532. ts_tree_node_t* tree_node;
  533. port_state_t* port_state;
  534. int num_events;
  535. if (maxevents <= 0)
  536. return_set_error(-1, ERROR_INVALID_PARAMETER);
  537. if (init() < 0)
  538. return -1;
  539. tree_node = ts_tree_find_and_ref(&epoll__handle_tree, (uintptr_t) ephnd);
  540. if (tree_node == NULL) {
  541. err_set_win_error(ERROR_INVALID_PARAMETER);
  542. goto err;
  543. }
  544. port_state = port_state_from_handle_tree_node(tree_node);
  545. num_events = port_wait(port_state, events, maxevents, timeout);
  546. ts_tree_node_unref(tree_node);
  547. if (num_events < 0)
  548. goto err;
  549. return num_events;
  550. err:
  551. err_check_handle(ephnd);
  552. return -1;
  553. }
  554. #include <errno.h>
  555. #define ERR__ERRNO_MAPPINGS(X) \
  556. X(ERROR_ACCESS_DENIED, EACCES) \
  557. X(ERROR_ALREADY_EXISTS, EEXIST) \
  558. X(ERROR_BAD_COMMAND, EACCES) \
  559. X(ERROR_BAD_EXE_FORMAT, ENOEXEC) \
  560. X(ERROR_BAD_LENGTH, EACCES) \
  561. X(ERROR_BAD_NETPATH, ENOENT) \
  562. X(ERROR_BAD_NET_NAME, ENOENT) \
  563. X(ERROR_BAD_NET_RESP, ENETDOWN) \
  564. X(ERROR_BAD_PATHNAME, ENOENT) \
  565. X(ERROR_BROKEN_PIPE, EPIPE) \
  566. X(ERROR_CANNOT_MAKE, EACCES) \
  567. X(ERROR_COMMITMENT_LIMIT, ENOMEM) \
  568. X(ERROR_CONNECTION_ABORTED, ECONNABORTED) \
  569. X(ERROR_CONNECTION_ACTIVE, EISCONN) \
  570. X(ERROR_CONNECTION_REFUSED, ECONNREFUSED) \
  571. X(ERROR_CRC, EACCES) \
  572. X(ERROR_DIR_NOT_EMPTY, ENOTEMPTY) \
  573. X(ERROR_DISK_FULL, ENOSPC) \
  574. X(ERROR_DUP_NAME, EADDRINUSE) \
  575. X(ERROR_FILENAME_EXCED_RANGE, ENOENT) \
  576. X(ERROR_FILE_NOT_FOUND, ENOENT) \
  577. X(ERROR_GEN_FAILURE, EACCES) \
  578. X(ERROR_GRACEFUL_DISCONNECT, EPIPE) \
  579. X(ERROR_HOST_DOWN, EHOSTUNREACH) \
  580. X(ERROR_HOST_UNREACHABLE, EHOSTUNREACH) \
  581. X(ERROR_INSUFFICIENT_BUFFER, EFAULT) \
  582. X(ERROR_INVALID_ADDRESS, EADDRNOTAVAIL) \
  583. X(ERROR_INVALID_FUNCTION, EINVAL) \
  584. X(ERROR_INVALID_HANDLE, EBADF) \
  585. X(ERROR_INVALID_NETNAME, EADDRNOTAVAIL) \
  586. X(ERROR_INVALID_PARAMETER, EINVAL) \
  587. X(ERROR_INVALID_USER_BUFFER, EMSGSIZE) \
  588. X(ERROR_IO_PENDING, EINPROGRESS) \
  589. X(ERROR_LOCK_VIOLATION, EACCES) \
  590. X(ERROR_MORE_DATA, EMSGSIZE) \
  591. X(ERROR_NETNAME_DELETED, ECONNABORTED) \
  592. X(ERROR_NETWORK_ACCESS_DENIED, EACCES) \
  593. X(ERROR_NETWORK_BUSY, ENETDOWN) \
  594. X(ERROR_NETWORK_UNREACHABLE, ENETUNREACH) \
  595. X(ERROR_NOACCESS, EFAULT) \
  596. X(ERROR_NONPAGED_SYSTEM_RESOURCES, ENOMEM) \
  597. X(ERROR_NOT_ENOUGH_MEMORY, ENOMEM) \
  598. X(ERROR_NOT_ENOUGH_QUOTA, ENOMEM) \
  599. X(ERROR_NOT_FOUND, ENOENT) \
  600. X(ERROR_NOT_LOCKED, EACCES) \
  601. X(ERROR_NOT_READY, EACCES) \
  602. X(ERROR_NOT_SAME_DEVICE, EXDEV) \
  603. X(ERROR_NOT_SUPPORTED, ENOTSUP) \
  604. X(ERROR_NO_MORE_FILES, ENOENT) \
  605. X(ERROR_NO_SYSTEM_RESOURCES, ENOMEM) \
  606. X(ERROR_OPERATION_ABORTED, EINTR) \
  607. X(ERROR_OUT_OF_PAPER, EACCES) \
  608. X(ERROR_PAGED_SYSTEM_RESOURCES, ENOMEM) \
  609. X(ERROR_PAGEFILE_QUOTA, ENOMEM) \
  610. X(ERROR_PATH_NOT_FOUND, ENOENT) \
  611. X(ERROR_PIPE_NOT_CONNECTED, EPIPE) \
  612. X(ERROR_PORT_UNREACHABLE, ECONNRESET) \
  613. X(ERROR_PROTOCOL_UNREACHABLE, ENETUNREACH) \
  614. X(ERROR_REM_NOT_LIST, ECONNREFUSED) \
  615. X(ERROR_REQUEST_ABORTED, EINTR) \
  616. X(ERROR_REQ_NOT_ACCEP, EWOULDBLOCK) \
  617. X(ERROR_SECTOR_NOT_FOUND, EACCES) \
  618. X(ERROR_SEM_TIMEOUT, ETIMEDOUT) \
  619. X(ERROR_SHARING_VIOLATION, EACCES) \
  620. X(ERROR_TOO_MANY_NAMES, ENOMEM) \
  621. X(ERROR_TOO_MANY_OPEN_FILES, EMFILE) \
  622. X(ERROR_UNEXP_NET_ERR, ECONNABORTED) \
  623. X(ERROR_WAIT_NO_CHILDREN, ECHILD) \
  624. X(ERROR_WORKING_SET_QUOTA, ENOMEM) \
  625. X(ERROR_WRITE_PROTECT, EACCES) \
  626. X(ERROR_WRONG_DISK, EACCES) \
  627. X(WSAEACCES, EACCES) \
  628. X(WSAEADDRINUSE, EADDRINUSE) \
  629. X(WSAEADDRNOTAVAIL, EADDRNOTAVAIL) \
  630. X(WSAEAFNOSUPPORT, EAFNOSUPPORT) \
  631. X(WSAECONNABORTED, ECONNABORTED) \
  632. X(WSAECONNREFUSED, ECONNREFUSED) \
  633. X(WSAECONNRESET, ECONNRESET) \
  634. X(WSAEDISCON, EPIPE) \
  635. X(WSAEFAULT, EFAULT) \
  636. X(WSAEHOSTDOWN, EHOSTUNREACH) \
  637. X(WSAEHOSTUNREACH, EHOSTUNREACH) \
  638. X(WSAEINPROGRESS, EBUSY) \
  639. X(WSAEINTR, EINTR) \
  640. X(WSAEINVAL, EINVAL) \
  641. X(WSAEISCONN, EISCONN) \
  642. X(WSAEMSGSIZE, EMSGSIZE) \
  643. X(WSAENETDOWN, ENETDOWN) \
  644. X(WSAENETRESET, EHOSTUNREACH) \
  645. X(WSAENETUNREACH, ENETUNREACH) \
  646. X(WSAENOBUFS, ENOMEM) \
  647. X(WSAENOTCONN, ENOTCONN) \
  648. X(WSAENOTSOCK, ENOTSOCK) \
  649. X(WSAEOPNOTSUPP, EOPNOTSUPP) \
  650. X(WSAEPROCLIM, ENOMEM) \
  651. X(WSAESHUTDOWN, EPIPE) \
  652. X(WSAETIMEDOUT, ETIMEDOUT) \
  653. X(WSAEWOULDBLOCK, EWOULDBLOCK) \
  654. X(WSANOTINITIALISED, ENETDOWN) \
  655. X(WSASYSNOTREADY, ENETDOWN) \
  656. X(WSAVERNOTSUPPORTED, ENOSYS)
  657. static errno_t err__map_win_error_to_errno(DWORD error) {
  658. switch (error) {
  659. #define X(error_sym, errno_sym) \
  660. case error_sym: \
  661. return errno_sym;
  662. ERR__ERRNO_MAPPINGS(X)
  663. #undef X
  664. }
  665. return EINVAL;
  666. }
  667. void err_map_win_error(void) {
  668. errno = err__map_win_error_to_errno(GetLastError());
  669. }
  670. void err_set_win_error(DWORD error) {
  671. SetLastError(error);
  672. errno = err__map_win_error_to_errno(error);
  673. }
  674. int err_check_handle(HANDLE handle) {
  675. DWORD flags;
  676. /* GetHandleInformation() succeeds when passed INVALID_HANDLE_VALUE, so check
  677. * for this condition explicitly. */
  678. if (handle == INVALID_HANDLE_VALUE)
  679. return_set_error(-1, ERROR_INVALID_HANDLE);
  680. if (!GetHandleInformation(handle, &flags))
  681. return_map_error(-1);
  682. return 0;
  683. }
  684. #include <stddef.h>
  685. #define array_count(a) (sizeof(a) / (sizeof((a)[0])))
  686. #define container_of(ptr, type, member) \
  687. ((type*) ((uintptr_t) (ptr) - offsetof(type, member)))
  688. #define unused_var(v) ((void) (v))
  689. /* Polyfill `inline` for older versions of msvc (up to Visual Studio 2013) */
  690. #if defined(_MSC_VER) && _MSC_VER < 1900
  691. #define inline __inline
  692. #endif
  693. WEPOLL_INTERNAL int ws_global_init(void);
  694. WEPOLL_INTERNAL SOCKET ws_get_base_socket(SOCKET socket);
  695. static bool init__done = false;
  696. static INIT_ONCE init__once = INIT_ONCE_STATIC_INIT;
  697. static BOOL CALLBACK init__once_callback(INIT_ONCE* once,
  698. void* parameter,
  699. void** context) {
  700. unused_var(once);
  701. unused_var(parameter);
  702. unused_var(context);
  703. /* N.b. that initialization order matters here. */
  704. if (ws_global_init() < 0 || nt_global_init() < 0 ||
  705. reflock_global_init() < 0 || epoll_global_init() < 0)
  706. return FALSE;
  707. init__done = true;
  708. return TRUE;
  709. }
  710. int init(void) {
  711. if (!init__done &&
  712. !InitOnceExecuteOnce(&init__once, init__once_callback, NULL, NULL))
  713. /* `InitOnceExecuteOnce()` itself is infallible, and it doesn't set any
  714. * error code when the once-callback returns FALSE. We return -1 here to
  715. * indicate that global initialization failed; the failing init function is
  716. * resposible for setting `errno` and calling `SetLastError()`. */
  717. return -1;
  718. return 0;
  719. }
  720. /* Set up a workaround for the following problem:
  721. * FARPROC addr = GetProcAddress(...);
  722. * MY_FUNC func = (MY_FUNC) addr; <-- GCC 8 warning/error.
  723. * MY_FUNC func = (MY_FUNC) (void*) addr; <-- MSVC warning/error.
  724. * To compile cleanly with either compiler, do casts with this "bridge" type:
  725. * MY_FUNC func = (MY_FUNC) (nt__fn_ptr_cast_t) addr; */
  726. #ifdef __GNUC__
  727. typedef void* nt__fn_ptr_cast_t;
  728. #else
  729. typedef FARPROC nt__fn_ptr_cast_t;
  730. #endif
  731. #define X(return_type, attributes, name, parameters) \
  732. WEPOLL_INTERNAL return_type(attributes* name) parameters = NULL;
  733. NT_NTDLL_IMPORT_LIST(X)
  734. #undef X
  735. int nt_global_init(void) {
  736. HMODULE ntdll;
  737. FARPROC fn_ptr;
  738. ntdll = GetModuleHandleW(L"ntdll.dll");
  739. if (ntdll == NULL)
  740. return -1;
  741. #define X(return_type, attributes, name, parameters) \
  742. fn_ptr = GetProcAddress(ntdll, #name); \
  743. if (fn_ptr == NULL) \
  744. return -1; \
  745. name = (return_type(attributes*) parameters)(nt__fn_ptr_cast_t) fn_ptr;
  746. NT_NTDLL_IMPORT_LIST(X)
  747. #undef X
  748. return 0;
  749. }
  750. #include <string.h>
  751. typedef struct poll_group poll_group_t;
  752. typedef struct queue_node queue_node_t;
  753. WEPOLL_INTERNAL poll_group_t* poll_group_acquire(port_state_t* port);
  754. WEPOLL_INTERNAL void poll_group_release(poll_group_t* poll_group);
  755. WEPOLL_INTERNAL void poll_group_delete(poll_group_t* poll_group);
  756. WEPOLL_INTERNAL poll_group_t* poll_group_from_queue_node(
  757. queue_node_t* queue_node);
  758. WEPOLL_INTERNAL HANDLE
  759. poll_group_get_afd_device_handle(poll_group_t* poll_group);
  760. typedef struct queue_node {
  761. queue_node_t* prev;
  762. queue_node_t* next;
  763. } queue_node_t;
  764. typedef struct queue {
  765. queue_node_t head;
  766. } queue_t;
  767. WEPOLL_INTERNAL void queue_init(queue_t* queue);
  768. WEPOLL_INTERNAL void queue_node_init(queue_node_t* node);
  769. WEPOLL_INTERNAL queue_node_t* queue_first(const queue_t* queue);
  770. WEPOLL_INTERNAL queue_node_t* queue_last(const queue_t* queue);
  771. WEPOLL_INTERNAL void queue_prepend(queue_t* queue, queue_node_t* node);
  772. WEPOLL_INTERNAL void queue_append(queue_t* queue, queue_node_t* node);
  773. WEPOLL_INTERNAL void queue_move_to_start(queue_t* queue, queue_node_t* node);
  774. WEPOLL_INTERNAL void queue_move_to_end(queue_t* queue, queue_node_t* node);
  775. WEPOLL_INTERNAL void queue_remove(queue_node_t* node);
  776. WEPOLL_INTERNAL bool queue_is_empty(const queue_t* queue);
  777. WEPOLL_INTERNAL bool queue_is_enqueued(const queue_node_t* node);
  778. #define POLL_GROUP__MAX_GROUP_SIZE 32
  779. typedef struct poll_group {
  780. port_state_t* port_state;
  781. queue_node_t queue_node;
  782. HANDLE afd_device_handle;
  783. size_t group_size;
  784. } poll_group_t;
  785. static poll_group_t* poll_group__new(port_state_t* port_state) {
  786. HANDLE iocp_handle = port_get_iocp_handle(port_state);
  787. queue_t* poll_group_queue = port_get_poll_group_queue(port_state);
  788. poll_group_t* poll_group = malloc(sizeof *poll_group);
  789. if (poll_group == NULL)
  790. return_set_error(NULL, ERROR_NOT_ENOUGH_MEMORY);
  791. memset(poll_group, 0, sizeof *poll_group);
  792. queue_node_init(&poll_group->queue_node);
  793. poll_group->port_state = port_state;
  794. if (afd_create_device_handle(iocp_handle, &poll_group->afd_device_handle) <
  795. 0) {
  796. free(poll_group);
  797. return NULL;
  798. }
  799. queue_append(poll_group_queue, &poll_group->queue_node);
  800. return poll_group;
  801. }
  802. void poll_group_delete(poll_group_t* poll_group) {
  803. assert(poll_group->group_size == 0);
  804. CloseHandle(poll_group->afd_device_handle);
  805. queue_remove(&poll_group->queue_node);
  806. free(poll_group);
  807. }
  808. poll_group_t* poll_group_from_queue_node(queue_node_t* queue_node) {
  809. return container_of(queue_node, poll_group_t, queue_node);
  810. }
  811. HANDLE poll_group_get_afd_device_handle(poll_group_t* poll_group) {
  812. return poll_group->afd_device_handle;
  813. }
  814. poll_group_t* poll_group_acquire(port_state_t* port_state) {
  815. queue_t* poll_group_queue = port_get_poll_group_queue(port_state);
  816. poll_group_t* poll_group =
  817. !queue_is_empty(poll_group_queue)
  818. ? container_of(
  819. queue_last(poll_group_queue), poll_group_t, queue_node)
  820. : NULL;
  821. if (poll_group == NULL ||
  822. poll_group->group_size >= POLL_GROUP__MAX_GROUP_SIZE)
  823. poll_group = poll_group__new(port_state);
  824. if (poll_group == NULL)
  825. return NULL;
  826. if (++poll_group->group_size == POLL_GROUP__MAX_GROUP_SIZE)
  827. queue_move_to_start(poll_group_queue, &poll_group->queue_node);
  828. return poll_group;
  829. }
  830. void poll_group_release(poll_group_t* poll_group) {
  831. port_state_t* port_state = poll_group->port_state;
  832. queue_t* poll_group_queue = port_get_poll_group_queue(port_state);
  833. poll_group->group_size--;
  834. assert(poll_group->group_size < POLL_GROUP__MAX_GROUP_SIZE);
  835. queue_move_to_end(poll_group_queue, &poll_group->queue_node);
  836. /* Poll groups are currently only freed when the epoll port is closed. */
  837. }
  838. WEPOLL_INTERNAL sock_state_t* sock_new(port_state_t* port_state,
  839. SOCKET socket);
  840. WEPOLL_INTERNAL void sock_delete(port_state_t* port_state,
  841. sock_state_t* sock_state);
  842. WEPOLL_INTERNAL void sock_force_delete(port_state_t* port_state,
  843. sock_state_t* sock_state);
  844. WEPOLL_INTERNAL int sock_set_event(port_state_t* port_state,
  845. sock_state_t* sock_state,
  846. const struct epoll_event* ev);
  847. WEPOLL_INTERNAL int sock_update(port_state_t* port_state,
  848. sock_state_t* sock_state);
  849. WEPOLL_INTERNAL int sock_feed_event(port_state_t* port_state,
  850. IO_STATUS_BLOCK* io_status_block,
  851. struct epoll_event* ev);
  852. WEPOLL_INTERNAL sock_state_t* sock_state_from_queue_node(
  853. queue_node_t* queue_node);
  854. WEPOLL_INTERNAL queue_node_t* sock_state_to_queue_node(
  855. sock_state_t* sock_state);
  856. WEPOLL_INTERNAL sock_state_t* sock_state_from_tree_node(
  857. tree_node_t* tree_node);
  858. WEPOLL_INTERNAL tree_node_t* sock_state_to_tree_node(sock_state_t* sock_state);
  859. #define PORT__MAX_ON_STACK_COMPLETIONS 256
  860. typedef struct port_state {
  861. HANDLE iocp_handle;
  862. tree_t sock_tree;
  863. queue_t sock_update_queue;
  864. queue_t sock_deleted_queue;
  865. queue_t poll_group_queue;
  866. ts_tree_node_t handle_tree_node;
  867. CRITICAL_SECTION lock;
  868. size_t active_poll_count;
  869. } port_state_t;
  870. static inline port_state_t* port__alloc(void) {
  871. port_state_t* port_state = malloc(sizeof *port_state);
  872. if (port_state == NULL)
  873. return_set_error(NULL, ERROR_NOT_ENOUGH_MEMORY);
  874. return port_state;
  875. }
  876. static inline void port__free(port_state_t* port) {
  877. assert(port != NULL);
  878. free(port);
  879. }
  880. static inline HANDLE port__create_iocp(void) {
  881. HANDLE iocp_handle =
  882. CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
  883. if (iocp_handle == NULL)
  884. return_map_error(NULL);
  885. return iocp_handle;
  886. }
  887. port_state_t* port_new(HANDLE* iocp_handle_out) {
  888. port_state_t* port_state;
  889. HANDLE iocp_handle;
  890. port_state = port__alloc();
  891. if (port_state == NULL)
  892. goto err1;
  893. iocp_handle = port__create_iocp();
  894. if (iocp_handle == NULL)
  895. goto err2;
  896. memset(port_state, 0, sizeof *port_state);
  897. port_state->iocp_handle = iocp_handle;
  898. tree_init(&port_state->sock_tree);
  899. queue_init(&port_state->sock_update_queue);
  900. queue_init(&port_state->sock_deleted_queue);
  901. queue_init(&port_state->poll_group_queue);
  902. ts_tree_node_init(&port_state->handle_tree_node);
  903. InitializeCriticalSection(&port_state->lock);
  904. *iocp_handle_out = iocp_handle;
  905. return port_state;
  906. err2:
  907. port__free(port_state);
  908. err1:
  909. return NULL;
  910. }
  911. static inline int port__close_iocp(port_state_t* port_state) {
  912. HANDLE iocp_handle = port_state->iocp_handle;
  913. port_state->iocp_handle = NULL;
  914. if (!CloseHandle(iocp_handle))
  915. return_map_error(-1);
  916. return 0;
  917. }
  918. int port_close(port_state_t* port_state) {
  919. int result;
  920. EnterCriticalSection(&port_state->lock);
  921. result = port__close_iocp(port_state);
  922. LeaveCriticalSection(&port_state->lock);
  923. return result;
  924. }
  925. int port_delete(port_state_t* port_state) {
  926. tree_node_t* tree_node;
  927. queue_node_t* queue_node;
  928. /* At this point the IOCP port should have been closed. */
  929. assert(port_state->iocp_handle == NULL);
  930. while ((tree_node = tree_root(&port_state->sock_tree)) != NULL) {
  931. sock_state_t* sock_state = sock_state_from_tree_node(tree_node);
  932. sock_force_delete(port_state, sock_state);
  933. }
  934. while ((queue_node = queue_first(&port_state->sock_deleted_queue)) != NULL) {
  935. sock_state_t* sock_state = sock_state_from_queue_node(queue_node);
  936. sock_force_delete(port_state, sock_state);
  937. }
  938. while ((queue_node = queue_first(&port_state->poll_group_queue)) != NULL) {
  939. poll_group_t* poll_group = poll_group_from_queue_node(queue_node);
  940. poll_group_delete(poll_group);
  941. }
  942. assert(queue_is_empty(&port_state->sock_update_queue));
  943. DeleteCriticalSection(&port_state->lock);
  944. port__free(port_state);
  945. return 0;
  946. }
  947. static int port__update_events(port_state_t* port_state) {
  948. queue_t* sock_update_queue = &port_state->sock_update_queue;
  949. /* Walk the queue, submitting new poll requests for every socket that needs
  950. * it. */
  951. while (!queue_is_empty(sock_update_queue)) {
  952. queue_node_t* queue_node = queue_first(sock_update_queue);
  953. sock_state_t* sock_state = sock_state_from_queue_node(queue_node);
  954. if (sock_update(port_state, sock_state) < 0)
  955. return -1;
  956. /* sock_update() removes the socket from the update queue. */
  957. }
  958. return 0;
  959. }
  960. static inline void port__update_events_if_polling(port_state_t* port_state) {
  961. if (port_state->active_poll_count > 0)
  962. port__update_events(port_state);
  963. }
  964. static inline int port__feed_events(port_state_t* port_state,
  965. struct epoll_event* epoll_events,
  966. OVERLAPPED_ENTRY* iocp_events,
  967. DWORD iocp_event_count) {
  968. int epoll_event_count = 0;
  969. DWORD i;
  970. for (i = 0; i < iocp_event_count; i++) {
  971. IO_STATUS_BLOCK* io_status_block =
  972. (IO_STATUS_BLOCK*) iocp_events[i].lpOverlapped;
  973. struct epoll_event* ev = &epoll_events[epoll_event_count];
  974. epoll_event_count += sock_feed_event(port_state, io_status_block, ev);
  975. }
  976. return epoll_event_count;
  977. }
  978. static inline int port__poll(port_state_t* port_state,
  979. struct epoll_event* epoll_events,
  980. OVERLAPPED_ENTRY* iocp_events,
  981. DWORD maxevents,
  982. DWORD timeout) {
  983. DWORD completion_count;
  984. if (port__update_events(port_state) < 0)
  985. return -1;
  986. port_state->active_poll_count++;
  987. LeaveCriticalSection(&port_state->lock);
  988. BOOL r = GetQueuedCompletionStatusEx(port_state->iocp_handle,
  989. iocp_events,
  990. maxevents,
  991. &completion_count,
  992. timeout,
  993. FALSE);
  994. EnterCriticalSection(&port_state->lock);
  995. port_state->active_poll_count--;
  996. if (!r)
  997. return_map_error(-1);
  998. return port__feed_events(
  999. port_state, epoll_events, iocp_events, completion_count);
  1000. }
  1001. int port_wait(port_state_t* port_state,
  1002. struct epoll_event* events,
  1003. int maxevents,
  1004. int timeout) {
  1005. OVERLAPPED_ENTRY stack_iocp_events[PORT__MAX_ON_STACK_COMPLETIONS];
  1006. OVERLAPPED_ENTRY* iocp_events;
  1007. uint64_t due = 0;
  1008. DWORD gqcs_timeout;
  1009. int result;
  1010. /* Check whether `maxevents` is in range. */
  1011. if (maxevents <= 0)
  1012. return_set_error(-1, ERROR_INVALID_PARAMETER);
  1013. /* Decide whether the IOCP completion list can live on the stack, or allocate
  1014. * memory for it on the heap. */
  1015. if ((size_t) maxevents <= array_count(stack_iocp_events)) {
  1016. iocp_events = stack_iocp_events;
  1017. } else if ((iocp_events =
  1018. malloc((size_t) maxevents * sizeof *iocp_events)) == NULL) {
  1019. iocp_events = stack_iocp_events;
  1020. maxevents = array_count(stack_iocp_events);
  1021. }
  1022. /* Compute the timeout for GetQueuedCompletionStatus, and the wait end
  1023. * time, if the user specified a timeout other than zero or infinite. */
  1024. if (timeout > 0) {
  1025. due = GetTickCount64() + (uint64_t) timeout;
  1026. gqcs_timeout = (DWORD) timeout;
  1027. } else if (timeout == 0) {
  1028. gqcs_timeout = 0;
  1029. } else {
  1030. gqcs_timeout = INFINITE;
  1031. }
  1032. EnterCriticalSection(&port_state->lock);
  1033. /* Dequeue completion packets until either at least one interesting event
  1034. * has been discovered, or the timeout is reached. */
  1035. for (;;) {
  1036. uint64_t now;
  1037. result = port__poll(
  1038. port_state, events, iocp_events, (DWORD) maxevents, gqcs_timeout);
  1039. if (result < 0 || result > 0)
  1040. break; /* Result, error, or time-out. */
  1041. if (timeout < 0)
  1042. continue; /* When timeout is negative, never time out. */
  1043. /* Update time. */
  1044. now = GetTickCount64();
  1045. /* Do not allow the due time to be in the past. */
  1046. if (now >= due) {
  1047. SetLastError(WAIT_TIMEOUT);
  1048. break;
  1049. }
  1050. /* Recompute time-out argument for GetQueuedCompletionStatus. */
  1051. gqcs_timeout = (DWORD)(due - now);
  1052. }
  1053. port__update_events_if_polling(port_state);
  1054. LeaveCriticalSection(&port_state->lock);
  1055. if (iocp_events != stack_iocp_events)
  1056. free(iocp_events);
  1057. if (result >= 0)
  1058. return result;
  1059. else if (GetLastError() == WAIT_TIMEOUT)
  1060. return 0;
  1061. else
  1062. return -1;
  1063. }
  1064. static inline int port__ctl_add(port_state_t* port_state,
  1065. SOCKET sock,
  1066. struct epoll_event* ev) {
  1067. sock_state_t* sock_state = sock_new(port_state, sock);
  1068. if (sock_state == NULL)
  1069. return -1;
  1070. if (sock_set_event(port_state, sock_state, ev) < 0) {
  1071. sock_delete(port_state, sock_state);
  1072. return -1;
  1073. }
  1074. port__update_events_if_polling(port_state);
  1075. return 0;
  1076. }
  1077. static inline int port__ctl_mod(port_state_t* port_state,
  1078. SOCKET sock,
  1079. struct epoll_event* ev) {
  1080. sock_state_t* sock_state = port_find_socket(port_state, sock);
  1081. if (sock_state == NULL)
  1082. return -1;
  1083. if (sock_set_event(port_state, sock_state, ev) < 0)
  1084. return -1;
  1085. port__update_events_if_polling(port_state);
  1086. return 0;
  1087. }
  1088. static inline int port__ctl_del(port_state_t* port_state, SOCKET sock) {
  1089. sock_state_t* sock_state = port_find_socket(port_state, sock);
  1090. if (sock_state == NULL)
  1091. return -1;
  1092. sock_delete(port_state, sock_state);
  1093. return 0;
  1094. }
  1095. static inline int port__ctl_op(port_state_t* port_state,
  1096. int op,
  1097. SOCKET sock,
  1098. struct epoll_event* ev) {
  1099. switch (op) {
  1100. case EPOLL_CTL_ADD:
  1101. return port__ctl_add(port_state, sock, ev);
  1102. case EPOLL_CTL_MOD:
  1103. return port__ctl_mod(port_state, sock, ev);
  1104. case EPOLL_CTL_DEL:
  1105. return port__ctl_del(port_state, sock);
  1106. default:
  1107. return_set_error(-1, ERROR_INVALID_PARAMETER);
  1108. }
  1109. }
  1110. int port_ctl(port_state_t* port_state,
  1111. int op,
  1112. SOCKET sock,
  1113. struct epoll_event* ev) {
  1114. int result;
  1115. EnterCriticalSection(&port_state->lock);
  1116. result = port__ctl_op(port_state, op, sock, ev);
  1117. LeaveCriticalSection(&port_state->lock);
  1118. return result;
  1119. }
  1120. int port_register_socket(port_state_t* port_state,
  1121. sock_state_t* sock_state,
  1122. SOCKET socket) {
  1123. if (tree_add(&port_state->sock_tree,
  1124. sock_state_to_tree_node(sock_state),
  1125. socket) < 0)
  1126. return_set_error(-1, ERROR_ALREADY_EXISTS);
  1127. return 0;
  1128. }
  1129. void port_unregister_socket(port_state_t* port_state,
  1130. sock_state_t* sock_state) {
  1131. tree_del(&port_state->sock_tree, sock_state_to_tree_node(sock_state));
  1132. }
  1133. sock_state_t* port_find_socket(port_state_t* port_state, SOCKET socket) {
  1134. tree_node_t* tree_node = tree_find(&port_state->sock_tree, socket);
  1135. if (tree_node == NULL)
  1136. return_set_error(NULL, ERROR_NOT_FOUND);
  1137. return sock_state_from_tree_node(tree_node);
  1138. }
  1139. void port_request_socket_update(port_state_t* port_state,
  1140. sock_state_t* sock_state) {
  1141. if (queue_is_enqueued(sock_state_to_queue_node(sock_state)))
  1142. return;
  1143. queue_append(&port_state->sock_update_queue,
  1144. sock_state_to_queue_node(sock_state));
  1145. }
  1146. void port_cancel_socket_update(port_state_t* port_state,
  1147. sock_state_t* sock_state) {
  1148. unused_var(port_state);
  1149. if (!queue_is_enqueued(sock_state_to_queue_node(sock_state)))
  1150. return;
  1151. queue_remove(sock_state_to_queue_node(sock_state));
  1152. }
  1153. void port_add_deleted_socket(port_state_t* port_state,
  1154. sock_state_t* sock_state) {
  1155. if (queue_is_enqueued(sock_state_to_queue_node(sock_state)))
  1156. return;
  1157. queue_append(&port_state->sock_deleted_queue,
  1158. sock_state_to_queue_node(sock_state));
  1159. }
  1160. void port_remove_deleted_socket(port_state_t* port_state,
  1161. sock_state_t* sock_state) {
  1162. unused_var(port_state);
  1163. if (!queue_is_enqueued(sock_state_to_queue_node(sock_state)))
  1164. return;
  1165. queue_remove(sock_state_to_queue_node(sock_state));
  1166. }
  1167. HANDLE port_get_iocp_handle(port_state_t* port_state) {
  1168. assert(port_state->iocp_handle != NULL);
  1169. return port_state->iocp_handle;
  1170. }
  1171. queue_t* port_get_poll_group_queue(port_state_t* port_state) {
  1172. return &port_state->poll_group_queue;
  1173. }
  1174. port_state_t* port_state_from_handle_tree_node(ts_tree_node_t* tree_node) {
  1175. return container_of(tree_node, port_state_t, handle_tree_node);
  1176. }
  1177. ts_tree_node_t* port_state_to_handle_tree_node(port_state_t* port_state) {
  1178. return &port_state->handle_tree_node;
  1179. }
  1180. void queue_init(queue_t* queue) {
  1181. queue_node_init(&queue->head);
  1182. }
  1183. void queue_node_init(queue_node_t* node) {
  1184. node->prev = node;
  1185. node->next = node;
  1186. }
  1187. static inline void queue__detach_node(queue_node_t* node) {
  1188. node->prev->next = node->next;
  1189. node->next->prev = node->prev;
  1190. }
  1191. queue_node_t* queue_first(const queue_t* queue) {
  1192. return !queue_is_empty(queue) ? queue->head.next : NULL;
  1193. }
  1194. queue_node_t* queue_last(const queue_t* queue) {
  1195. return !queue_is_empty(queue) ? queue->head.prev : NULL;
  1196. }
  1197. void queue_prepend(queue_t* queue, queue_node_t* node) {
  1198. node->next = queue->head.next;
  1199. node->prev = &queue->head;
  1200. node->next->prev = node;
  1201. queue->head.next = node;
  1202. }
  1203. void queue_append(queue_t* queue, queue_node_t* node) {
  1204. node->next = &queue->head;
  1205. node->prev = queue->head.prev;
  1206. node->prev->next = node;
  1207. queue->head.prev = node;
  1208. }
  1209. void queue_move_to_start(queue_t* queue, queue_node_t* node) {
  1210. queue__detach_node(node);
  1211. queue_prepend(queue, node);
  1212. }
  1213. void queue_move_to_end(queue_t* queue, queue_node_t* node) {
  1214. queue__detach_node(node);
  1215. queue_append(queue, node);
  1216. }
  1217. void queue_remove(queue_node_t* node) {
  1218. queue__detach_node(node);
  1219. queue_node_init(node);
  1220. }
  1221. bool queue_is_empty(const queue_t* queue) {
  1222. return !queue_is_enqueued(&queue->head);
  1223. }
  1224. bool queue_is_enqueued(const queue_node_t* node) {
  1225. return node->prev != node;
  1226. }
  1227. #define REFLOCK__REF ((long) 0x00000001UL)
  1228. #define REFLOCK__REF_MASK ((long) 0x0fffffffUL)
  1229. #define REFLOCK__DESTROY ((long) 0x10000000UL)
  1230. #define REFLOCK__DESTROY_MASK ((long) 0xf0000000UL)
  1231. #define REFLOCK__POISON ((long) 0x300dead0UL)
  1232. static HANDLE reflock__keyed_event = NULL;
  1233. int reflock_global_init(void) {
  1234. NTSTATUS status = NtCreateKeyedEvent(
  1235. &reflock__keyed_event, KEYEDEVENT_ALL_ACCESS, NULL, 0);
  1236. if (status != STATUS_SUCCESS)
  1237. return_set_error(-1, RtlNtStatusToDosError(status));
  1238. return 0;
  1239. }
  1240. void reflock_init(reflock_t* reflock) {
  1241. reflock->state = 0;
  1242. }
  1243. static void reflock__signal_event(void* address) {
  1244. NTSTATUS status =
  1245. NtReleaseKeyedEvent(reflock__keyed_event, address, FALSE, NULL);
  1246. if (status != STATUS_SUCCESS)
  1247. abort();
  1248. }
  1249. static void reflock__await_event(void* address) {
  1250. NTSTATUS status =
  1251. NtWaitForKeyedEvent(reflock__keyed_event, address, FALSE, NULL);
  1252. if (status != STATUS_SUCCESS)
  1253. abort();
  1254. }
  1255. void reflock_ref(reflock_t* reflock) {
  1256. long state = InterlockedAdd(&reflock->state, REFLOCK__REF);
  1257. /* Verify that the counter didn't overflow and the lock isn't destroyed. */
  1258. assert((state & REFLOCK__DESTROY_MASK) == 0);
  1259. unused_var(state);
  1260. }
  1261. void reflock_unref(reflock_t* reflock) {
  1262. long state = InterlockedAdd(&reflock->state, -REFLOCK__REF);
  1263. /* Verify that the lock was referenced and not already destroyed. */
  1264. assert((state & REFLOCK__DESTROY_MASK & ~REFLOCK__DESTROY) == 0);
  1265. if (state == REFLOCK__DESTROY)
  1266. reflock__signal_event(reflock);
  1267. }
  1268. void reflock_unref_and_destroy(reflock_t* reflock) {
  1269. long state =
  1270. InterlockedAdd(&reflock->state, REFLOCK__DESTROY - REFLOCK__REF);
  1271. long ref_count = state & REFLOCK__REF_MASK;
  1272. /* Verify that the lock was referenced and not already destroyed. */
  1273. assert((state & REFLOCK__DESTROY_MASK) == REFLOCK__DESTROY);
  1274. if (ref_count != 0)
  1275. reflock__await_event(reflock);
  1276. state = InterlockedExchange(&reflock->state, REFLOCK__POISON);
  1277. assert(state == REFLOCK__DESTROY);
  1278. }
  1279. #define SOCK__KNOWN_EPOLL_EVENTS \
  1280. (EPOLLIN | EPOLLPRI | EPOLLOUT | EPOLLERR | EPOLLHUP | EPOLLRDNORM | \
  1281. EPOLLRDBAND | EPOLLWRNORM | EPOLLWRBAND | EPOLLMSG | EPOLLRDHUP)
  1282. typedef enum sock__poll_status {
  1283. SOCK__POLL_IDLE = 0,
  1284. SOCK__POLL_PENDING,
  1285. SOCK__POLL_CANCELLED
  1286. } sock__poll_status_t;
  1287. typedef struct sock_state {
  1288. IO_STATUS_BLOCK io_status_block;
  1289. AFD_POLL_INFO poll_info;
  1290. queue_node_t queue_node;
  1291. tree_node_t tree_node;
  1292. poll_group_t* poll_group;
  1293. SOCKET base_socket;
  1294. epoll_data_t user_data;
  1295. uint32_t user_events;
  1296. uint32_t pending_events;
  1297. sock__poll_status_t poll_status;
  1298. bool delete_pending;
  1299. } sock_state_t;
  1300. static inline sock_state_t* sock__alloc(void) {
  1301. sock_state_t* sock_state = malloc(sizeof *sock_state);
  1302. if (sock_state == NULL)
  1303. return_set_error(NULL, ERROR_NOT_ENOUGH_MEMORY);
  1304. return sock_state;
  1305. }
  1306. static inline void sock__free(sock_state_t* sock_state) {
  1307. assert(sock_state != NULL);
  1308. free(sock_state);
  1309. }
  1310. static inline int sock__cancel_poll(sock_state_t* sock_state) {
  1311. assert(sock_state->poll_status == SOCK__POLL_PENDING);
  1312. if (afd_cancel_poll(poll_group_get_afd_device_handle(sock_state->poll_group),
  1313. &sock_state->io_status_block) < 0)
  1314. return -1;
  1315. sock_state->poll_status = SOCK__POLL_CANCELLED;
  1316. sock_state->pending_events = 0;
  1317. return 0;
  1318. }
  1319. sock_state_t* sock_new(port_state_t* port_state, SOCKET socket) {
  1320. SOCKET base_socket;
  1321. poll_group_t* poll_group;
  1322. sock_state_t* sock_state;
  1323. if (socket == 0 || socket == INVALID_SOCKET)
  1324. return_set_error(NULL, ERROR_INVALID_HANDLE);
  1325. base_socket = ws_get_base_socket(socket);
  1326. if (base_socket == INVALID_SOCKET)
  1327. return NULL;
  1328. poll_group = poll_group_acquire(port_state);
  1329. if (poll_group == NULL)
  1330. return NULL;
  1331. sock_state = sock__alloc();
  1332. if (sock_state == NULL)
  1333. goto err1;
  1334. memset(sock_state, 0, sizeof *sock_state);
  1335. sock_state->base_socket = base_socket;
  1336. sock_state->poll_group = poll_group;
  1337. tree_node_init(&sock_state->tree_node);
  1338. queue_node_init(&sock_state->queue_node);
  1339. if (port_register_socket(port_state, sock_state, socket) < 0)
  1340. goto err2;
  1341. return sock_state;
  1342. err2:
  1343. sock__free(sock_state);
  1344. err1:
  1345. poll_group_release(poll_group);
  1346. return NULL;
  1347. }
  1348. static int sock__delete(port_state_t* port_state,
  1349. sock_state_t* sock_state,
  1350. bool force) {
  1351. if (!sock_state->delete_pending) {
  1352. if (sock_state->poll_status == SOCK__POLL_PENDING)
  1353. sock__cancel_poll(sock_state);
  1354. port_cancel_socket_update(port_state, sock_state);
  1355. port_unregister_socket(port_state, sock_state);
  1356. sock_state->delete_pending = true;
  1357. }
  1358. /* If the poll request still needs to complete, the sock_state object can't
  1359. * be free()d yet. `sock_feed_event()` or `port_close()` will take care
  1360. * of this later. */
  1361. if (force || sock_state->poll_status == SOCK__POLL_IDLE) {
  1362. /* Free the sock_state now. */
  1363. port_remove_deleted_socket(port_state, sock_state);
  1364. poll_group_release(sock_state->poll_group);
  1365. sock__free(sock_state);
  1366. } else {
  1367. /* Free the socket later. */
  1368. port_add_deleted_socket(port_state, sock_state);
  1369. }
  1370. return 0;
  1371. }
  1372. void sock_delete(port_state_t* port_state, sock_state_t* sock_state) {
  1373. sock__delete(port_state, sock_state, false);
  1374. }
  1375. void sock_force_delete(port_state_t* port_state, sock_state_t* sock_state) {
  1376. sock__delete(port_state, sock_state, true);
  1377. }
  1378. int sock_set_event(port_state_t* port_state,
  1379. sock_state_t* sock_state,
  1380. const struct epoll_event* ev) {
  1381. /* EPOLLERR and EPOLLHUP are always reported, even when not requested by the
  1382. * caller. However they are disabled after a event has been reported for a
  1383. * socket for which the EPOLLONESHOT flag was set. */
  1384. uint32_t events = ev->events | EPOLLERR | EPOLLHUP;
  1385. sock_state->user_events = events;
  1386. sock_state->user_data = ev->data;
  1387. if ((events & SOCK__KNOWN_EPOLL_EVENTS & ~sock_state->pending_events) != 0)
  1388. port_request_socket_update(port_state, sock_state);
  1389. return 0;
  1390. }
  1391. static inline DWORD sock__epoll_events_to_afd_events(uint32_t epoll_events) {
  1392. /* Always monitor for AFD_POLL_LOCAL_CLOSE, which is triggered when the
  1393. * socket is closed with closesocket() or CloseHandle(). */
  1394. DWORD afd_events = AFD_POLL_LOCAL_CLOSE;
  1395. if (epoll_events & (EPOLLIN | EPOLLRDNORM))
  1396. afd_events |= AFD_POLL_RECEIVE | AFD_POLL_ACCEPT;
  1397. if (epoll_events & (EPOLLPRI | EPOLLRDBAND))
  1398. afd_events |= AFD_POLL_RECEIVE_EXPEDITED;
  1399. if (epoll_events & (EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND))
  1400. afd_events |= AFD_POLL_SEND;
  1401. if (epoll_events & (EPOLLIN | EPOLLRDNORM | EPOLLRDHUP))
  1402. afd_events |= AFD_POLL_DISCONNECT;
  1403. if (epoll_events & EPOLLHUP)
  1404. afd_events |= AFD_POLL_ABORT;
  1405. if (epoll_events & EPOLLERR)
  1406. afd_events |= AFD_POLL_CONNECT_FAIL;
  1407. return afd_events;
  1408. }
  1409. static inline uint32_t sock__afd_events_to_epoll_events(DWORD afd_events) {
  1410. uint32_t epoll_events = 0;
  1411. if (afd_events & (AFD_POLL_RECEIVE | AFD_POLL_ACCEPT))
  1412. epoll_events |= EPOLLIN | EPOLLRDNORM;
  1413. if (afd_events & AFD_POLL_RECEIVE_EXPEDITED)
  1414. epoll_events |= EPOLLPRI | EPOLLRDBAND;
  1415. if (afd_events & AFD_POLL_SEND)
  1416. epoll_events |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
  1417. if (afd_events & AFD_POLL_DISCONNECT)
  1418. epoll_events |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
  1419. if (afd_events & AFD_POLL_ABORT)
  1420. epoll_events |= EPOLLHUP;
  1421. if (afd_events & AFD_POLL_CONNECT_FAIL)
  1422. /* Linux reports all these events after connect() has failed. */
  1423. epoll_events |=
  1424. EPOLLIN | EPOLLOUT | EPOLLERR | EPOLLRDNORM | EPOLLWRNORM | EPOLLRDHUP;
  1425. return epoll_events;
  1426. }
  1427. int sock_update(port_state_t* port_state, sock_state_t* sock_state) {
  1428. assert(!sock_state->delete_pending);
  1429. if ((sock_state->poll_status == SOCK__POLL_PENDING) &&
  1430. (sock_state->user_events & SOCK__KNOWN_EPOLL_EVENTS &
  1431. ~sock_state->pending_events) == 0) {
  1432. /* All the events the user is interested in are already being monitored by
  1433. * the pending poll operation. It might spuriously complete because of an
  1434. * event that we're no longer interested in; when that happens we'll submit
  1435. * a new poll operation with the updated event mask. */
  1436. } else if (sock_state->poll_status == SOCK__POLL_PENDING) {
  1437. /* A poll operation is already pending, but it's not monitoring for all the
  1438. * events that the user is interested in. Therefore, cancel the pending
  1439. * poll operation; when we receive it's completion package, a new poll
  1440. * operation will be submitted with the correct event mask. */
  1441. if (sock__cancel_poll(sock_state) < 0)
  1442. return -1;
  1443. } else if (sock_state->poll_status == SOCK__POLL_CANCELLED) {
  1444. /* The poll operation has already been cancelled, we're still waiting for
  1445. * it to return. For now, there's nothing that needs to be done. */
  1446. } else if (sock_state->poll_status == SOCK__POLL_IDLE) {
  1447. /* No poll operation is pending; start one. */
  1448. sock_state->poll_info.Exclusive = FALSE;
  1449. sock_state->poll_info.NumberOfHandles = 1;
  1450. sock_state->poll_info.Timeout.QuadPart = INT64_MAX;
  1451. sock_state->poll_info.Handles[0].Handle = (HANDLE) sock_state->base_socket;
  1452. sock_state->poll_info.Handles[0].Status = 0;
  1453. sock_state->poll_info.Handles[0].Events =
  1454. sock__epoll_events_to_afd_events(sock_state->user_events);
  1455. if (afd_poll(poll_group_get_afd_device_handle(sock_state->poll_group),
  1456. &sock_state->poll_info,
  1457. &sock_state->io_status_block) < 0) {
  1458. switch (GetLastError()) {
  1459. case ERROR_IO_PENDING:
  1460. /* Overlapped poll operation in progress; this is expected. */
  1461. break;
  1462. case ERROR_INVALID_HANDLE:
  1463. /* Socket closed; it'll be dropped from the epoll set. */
  1464. return sock__delete(port_state, sock_state, false);
  1465. default:
  1466. /* Other errors are propagated to the caller. */
  1467. return_map_error(-1);
  1468. }
  1469. }
  1470. /* The poll request was successfully submitted. */
  1471. sock_state->poll_status = SOCK__POLL_PENDING;
  1472. sock_state->pending_events = sock_state->user_events;
  1473. } else {
  1474. /* Unreachable. */
  1475. assert(false);
  1476. }
  1477. port_cancel_socket_update(port_state, sock_state);
  1478. return 0;
  1479. }
  1480. int sock_feed_event(port_state_t* port_state,
  1481. IO_STATUS_BLOCK* io_status_block,
  1482. struct epoll_event* ev) {
  1483. sock_state_t* sock_state =
  1484. container_of(io_status_block, sock_state_t, io_status_block);
  1485. AFD_POLL_INFO* poll_info = &sock_state->poll_info;
  1486. uint32_t epoll_events = 0;
  1487. sock_state->poll_status = SOCK__POLL_IDLE;
  1488. sock_state->pending_events = 0;
  1489. if (sock_state->delete_pending) {
  1490. /* Socket has been deleted earlier and can now be freed. */
  1491. return sock__delete(port_state, sock_state, false);
  1492. } else if (io_status_block->Status == STATUS_CANCELLED) {
  1493. /* The poll request was cancelled by CancelIoEx. */
  1494. } else if (!NT_SUCCESS(io_status_block->Status)) {
  1495. /* The overlapped request itself failed in an unexpected way. */
  1496. epoll_events = EPOLLERR;
  1497. } else if (poll_info->NumberOfHandles < 1) {
  1498. /* This poll operation succeeded but didn't report any socket events. */
  1499. } else if (poll_info->Handles[0].Events & AFD_POLL_LOCAL_CLOSE) {
  1500. /* The poll operation reported that the socket was closed. */
  1501. return sock__delete(port_state, sock_state, false);
  1502. } else {
  1503. /* Events related to our socket were reported. */
  1504. epoll_events =
  1505. sock__afd_events_to_epoll_events(poll_info->Handles[0].Events);
  1506. }
  1507. /* Requeue the socket so a new poll request will be submitted. */
  1508. port_request_socket_update(port_state, sock_state);
  1509. /* Filter out events that the user didn't ask for. */
  1510. epoll_events &= sock_state->user_events;
  1511. /* Return if there are no epoll events to report. */
  1512. if (epoll_events == 0)
  1513. return 0;
  1514. /* If the the socket has the EPOLLONESHOT flag set, unmonitor all events,
  1515. * even EPOLLERR and EPOLLHUP. But always keep looking for closed sockets. */
  1516. if (sock_state->user_events & EPOLLONESHOT)
  1517. sock_state->user_events = 0;
  1518. ev->data = sock_state->user_data;
  1519. ev->events = epoll_events;
  1520. return 1;
  1521. }
  1522. sock_state_t* sock_state_from_queue_node(queue_node_t* queue_node) {
  1523. return container_of(queue_node, sock_state_t, queue_node);
  1524. }
  1525. queue_node_t* sock_state_to_queue_node(sock_state_t* sock_state) {
  1526. return &sock_state->queue_node;
  1527. }
  1528. sock_state_t* sock_state_from_tree_node(tree_node_t* tree_node) {
  1529. return container_of(tree_node, sock_state_t, tree_node);
  1530. }
  1531. tree_node_t* sock_state_to_tree_node(sock_state_t* sock_state) {
  1532. return &sock_state->tree_node;
  1533. }
  1534. void ts_tree_init(ts_tree_t* ts_tree) {
  1535. tree_init(&ts_tree->tree);
  1536. InitializeSRWLock(&ts_tree->lock);
  1537. }
  1538. void ts_tree_node_init(ts_tree_node_t* node) {
  1539. tree_node_init(&node->tree_node);
  1540. reflock_init(&node->reflock);
  1541. }
  1542. int ts_tree_add(ts_tree_t* ts_tree, ts_tree_node_t* node, uintptr_t key) {
  1543. int r;
  1544. AcquireSRWLockExclusive(&ts_tree->lock);
  1545. r = tree_add(&ts_tree->tree, &node->tree_node, key);
  1546. ReleaseSRWLockExclusive(&ts_tree->lock);
  1547. return r;
  1548. }
  1549. static inline ts_tree_node_t* ts_tree__find_node(ts_tree_t* ts_tree,
  1550. uintptr_t key) {
  1551. tree_node_t* tree_node = tree_find(&ts_tree->tree, key);
  1552. if (tree_node == NULL)
  1553. return NULL;
  1554. return container_of(tree_node, ts_tree_node_t, tree_node);
  1555. }
  1556. ts_tree_node_t* ts_tree_del_and_ref(ts_tree_t* ts_tree, uintptr_t key) {
  1557. ts_tree_node_t* ts_tree_node;
  1558. AcquireSRWLockExclusive(&ts_tree->lock);
  1559. ts_tree_node = ts_tree__find_node(ts_tree, key);
  1560. if (ts_tree_node != NULL) {
  1561. tree_del(&ts_tree->tree, &ts_tree_node->tree_node);
  1562. reflock_ref(&ts_tree_node->reflock);
  1563. }
  1564. ReleaseSRWLockExclusive(&ts_tree->lock);
  1565. return ts_tree_node;
  1566. }
  1567. ts_tree_node_t* ts_tree_find_and_ref(ts_tree_t* ts_tree, uintptr_t key) {
  1568. ts_tree_node_t* ts_tree_node;
  1569. AcquireSRWLockShared(&ts_tree->lock);
  1570. ts_tree_node = ts_tree__find_node(ts_tree, key);
  1571. if (ts_tree_node != NULL)
  1572. reflock_ref(&ts_tree_node->reflock);
  1573. ReleaseSRWLockShared(&ts_tree->lock);
  1574. return ts_tree_node;
  1575. }
  1576. void ts_tree_node_unref(ts_tree_node_t* node) {
  1577. reflock_unref(&node->reflock);
  1578. }
  1579. void ts_tree_node_unref_and_destroy(ts_tree_node_t* node) {
  1580. reflock_unref_and_destroy(&node->reflock);
  1581. }
  1582. void tree_init(tree_t* tree) {
  1583. memset(tree, 0, sizeof *tree);
  1584. }
  1585. void tree_node_init(tree_node_t* node) {
  1586. memset(node, 0, sizeof *node);
  1587. }
  1588. #define TREE__ROTATE(cis, trans) \
  1589. tree_node_t* p = node; \
  1590. tree_node_t* q = node->trans; \
  1591. tree_node_t* parent = p->parent; \
  1592. \
  1593. if (parent) { \
  1594. if (parent->left == p) \
  1595. parent->left = q; \
  1596. else \
  1597. parent->right = q; \
  1598. } else { \
  1599. tree->root = q; \
  1600. } \
  1601. \
  1602. q->parent = parent; \
  1603. p->parent = q; \
  1604. p->trans = q->cis; \
  1605. if (p->trans) \
  1606. p->trans->parent = p; \
  1607. q->cis = p;
  1608. static inline void tree__rotate_left(tree_t* tree, tree_node_t* node) {
  1609. TREE__ROTATE(left, right)
  1610. }
  1611. static inline void tree__rotate_right(tree_t* tree, tree_node_t* node) {
  1612. TREE__ROTATE(right, left)
  1613. }
  1614. #define TREE__INSERT_OR_DESCEND(side) \
  1615. if (parent->side) { \
  1616. parent = parent->side; \
  1617. } else { \
  1618. parent->side = node; \
  1619. break; \
  1620. }
  1621. #define TREE__REBALANCE_AFTER_INSERT(cis, trans) \
  1622. tree_node_t* grandparent = parent->parent; \
  1623. tree_node_t* uncle = grandparent->trans; \
  1624. \
  1625. if (uncle && uncle->red) { \
  1626. parent->red = uncle->red = false; \
  1627. grandparent->red = true; \
  1628. node = grandparent; \
  1629. } else { \
  1630. if (node == parent->trans) { \
  1631. tree__rotate_##cis(tree, parent); \
  1632. node = parent; \
  1633. parent = node->parent; \
  1634. } \
  1635. parent->red = false; \
  1636. grandparent->red = true; \
  1637. tree__rotate_##trans(tree, grandparent); \
  1638. }
  1639. int tree_add(tree_t* tree, tree_node_t* node, uintptr_t key) {
  1640. tree_node_t* parent;
  1641. parent = tree->root;
  1642. if (parent) {
  1643. for (;;) {
  1644. if (key < parent->key) {
  1645. TREE__INSERT_OR_DESCEND(left)
  1646. } else if (key > parent->key) {
  1647. TREE__INSERT_OR_DESCEND(right)
  1648. } else {
  1649. return -1;
  1650. }
  1651. }
  1652. } else {
  1653. tree->root = node;
  1654. }
  1655. node->key = key;
  1656. node->left = node->right = NULL;
  1657. node->parent = parent;
  1658. node->red = true;
  1659. for (; parent && parent->red; parent = node->parent) {
  1660. if (parent == parent->parent->left) {
  1661. TREE__REBALANCE_AFTER_INSERT(left, right)
  1662. } else {
  1663. TREE__REBALANCE_AFTER_INSERT(right, left)
  1664. }
  1665. }
  1666. tree->root->red = false;
  1667. return 0;
  1668. }
  1669. #define TREE__REBALANCE_AFTER_REMOVE(cis, trans) \
  1670. tree_node_t* sibling = parent->trans; \
  1671. \
  1672. if (sibling->red) { \
  1673. sibling->red = false; \
  1674. parent->red = true; \
  1675. tree__rotate_##cis(tree, parent); \
  1676. sibling = parent->trans; \
  1677. } \
  1678. if ((sibling->left && sibling->left->red) || \
  1679. (sibling->right && sibling->right->red)) { \
  1680. if (!sibling->trans || !sibling->trans->red) { \
  1681. sibling->cis->red = false; \
  1682. sibling->red = true; \
  1683. tree__rotate_##trans(tree, sibling); \
  1684. sibling = parent->trans; \
  1685. } \
  1686. sibling->red = parent->red; \
  1687. parent->red = sibling->trans->red = false; \
  1688. tree__rotate_##cis(tree, parent); \
  1689. node = tree->root; \
  1690. break; \
  1691. } \
  1692. sibling->red = true;
  1693. void tree_del(tree_t* tree, tree_node_t* node) {
  1694. tree_node_t* parent = node->parent;
  1695. tree_node_t* left = node->left;
  1696. tree_node_t* right = node->right;
  1697. tree_node_t* next;
  1698. bool red;
  1699. if (!left) {
  1700. next = right;
  1701. } else if (!right) {
  1702. next = left;
  1703. } else {
  1704. next = right;
  1705. while (next->left)
  1706. next = next->left;
  1707. }
  1708. if (parent) {
  1709. if (parent->left == node)
  1710. parent->left = next;
  1711. else
  1712. parent->right = next;
  1713. } else {
  1714. tree->root = next;
  1715. }
  1716. if (left && right) {
  1717. red = next->red;
  1718. next->red = node->red;
  1719. next->left = left;
  1720. left->parent = next;
  1721. if (next != right) {
  1722. parent = next->parent;
  1723. next->parent = node->parent;
  1724. node = next->right;
  1725. parent->left = node;
  1726. next->right = right;
  1727. right->parent = next;
  1728. } else {
  1729. next->parent = parent;
  1730. parent = next;
  1731. node = next->right;
  1732. }
  1733. } else {
  1734. red = node->red;
  1735. node = next;
  1736. }
  1737. if (node)
  1738. node->parent = parent;
  1739. if (red)
  1740. return;
  1741. if (node && node->red) {
  1742. node->red = false;
  1743. return;
  1744. }
  1745. do {
  1746. if (node == tree->root)
  1747. break;
  1748. if (node == parent->left) {
  1749. TREE__REBALANCE_AFTER_REMOVE(left, right)
  1750. } else {
  1751. TREE__REBALANCE_AFTER_REMOVE(right, left)
  1752. }
  1753. node = parent;
  1754. parent = parent->parent;
  1755. } while (!node->red);
  1756. if (node)
  1757. node->red = false;
  1758. }
  1759. tree_node_t* tree_find(const tree_t* tree, uintptr_t key) {
  1760. tree_node_t* node = tree->root;
  1761. while (node) {
  1762. if (key < node->key)
  1763. node = node->left;
  1764. else if (key > node->key)
  1765. node = node->right;
  1766. else
  1767. return node;
  1768. }
  1769. return NULL;
  1770. }
  1771. tree_node_t* tree_root(const tree_t* tree) {
  1772. return tree->root;
  1773. }
  1774. #ifndef SIO_BSP_HANDLE_POLL
  1775. #define SIO_BSP_HANDLE_POLL 0x4800001D
  1776. #endif
  1777. #ifndef SIO_BASE_HANDLE
  1778. #define SIO_BASE_HANDLE 0x48000022
  1779. #endif
  1780. int ws_global_init(void) {
  1781. int r;
  1782. WSADATA wsa_data;
  1783. r = WSAStartup(MAKEWORD(2, 2), &wsa_data);
  1784. if (r != 0)
  1785. return_set_error(-1, (DWORD) r);
  1786. return 0;
  1787. }
  1788. static inline SOCKET ws__ioctl_get_bsp_socket(SOCKET socket, DWORD ioctl) {
  1789. SOCKET bsp_socket;
  1790. DWORD bytes;
  1791. if (WSAIoctl(socket,
  1792. ioctl,
  1793. NULL,
  1794. 0,
  1795. &bsp_socket,
  1796. sizeof bsp_socket,
  1797. &bytes,
  1798. NULL,
  1799. NULL) != SOCKET_ERROR)
  1800. return bsp_socket;
  1801. else
  1802. return INVALID_SOCKET;
  1803. }
  1804. SOCKET ws_get_base_socket(SOCKET socket) {
  1805. SOCKET base_socket;
  1806. DWORD error;
  1807. for (;;) {
  1808. base_socket = ws__ioctl_get_bsp_socket(socket, SIO_BASE_HANDLE);
  1809. if (base_socket != INVALID_SOCKET)
  1810. return base_socket;
  1811. error = GetLastError();
  1812. if (error == WSAENOTSOCK)
  1813. return_set_error(INVALID_SOCKET, error);
  1814. /* Even though Microsoft documentation clearly states that LSPs should
  1815. * never intercept the `SIO_BASE_HANDLE` ioctl [1], Komodia based LSPs do
  1816. * so anyway, breaking it, with the apparent intention of preventing LSP
  1817. * bypass [2]. Fortunately they don't handle `SIO_BSP_HANDLE_POLL`, which
  1818. * will at least let us obtain the socket associated with the next winsock
  1819. * protocol chain entry. If this succeeds, loop around and call
  1820. * `SIO_BASE_HANDLE` again with the returned BSP socket, to make sure that
  1821. * we unwrap all layers and retrieve the actual base socket.
  1822. * [1] https://docs.microsoft.com/en-us/windows/win32/winsock/winsock-ioctls
  1823. * [2] https://www.komodia.com/newwiki/index.php?title=Komodia%27s_Redirector_bug_fixes#Version_2.2.2.6
  1824. */
  1825. base_socket = ws__ioctl_get_bsp_socket(socket, SIO_BSP_HANDLE_POLL);
  1826. if (base_socket != INVALID_SOCKET && base_socket != socket)
  1827. socket = base_socket;
  1828. else
  1829. return_set_error(INVALID_SOCKET, error);
  1830. }
  1831. }