initial user mode network support
[qemu] / slirp / slirp.c
1 #include "slirp.h"
2
3 /* host address */
4 struct in_addr our_addr;
5 /* host dns address */
6 struct in_addr dns_addr;
7 /* host loopback address */
8 struct in_addr loopback_addr;
9
10 /* address for slirp virtual addresses */
11 struct in_addr special_addr;
12
13 const uint8_t special_ethaddr[6] = { 
14     0x52, 0x54, 0x00, 0x12, 0x35, 0x00
15 };
16
17 uint8_t client_ethaddr[6];
18
19 int do_slowtimo;
20 int link_up;
21 struct timeval tt;
22 FILE *lfd;
23
24 /* XXX: suppress those select globals */
25 fd_set *global_readfds, *global_writefds, *global_xfds;
26
27 #ifdef _WIN32
28
29 static int get_dns_addr(struct in_addr *pdns_addr)
30 {
31     /* XXX: add it */
32     return -1;
33 }
34
35 #else
36
37 static int get_dns_addr(struct in_addr *pdns_addr)
38 {
39     char buff[512];
40     char buff2[256];
41     FILE *f;
42     int found = 0;
43     struct in_addr tmp_addr;
44     
45     f = fopen("/etc/resolv.conf", "r");
46     if (!f)
47         return -1;
48
49     lprint("IP address of your DNS(s): ");
50     while (fgets(buff, 512, f) != NULL) {
51         if (sscanf(buff, "nameserver%*[ \t]%256s", buff2) == 1) {
52             if (!inet_aton(buff2, &tmp_addr))
53                 continue;
54             if (tmp_addr.s_addr == loopback_addr.s_addr)
55                 tmp_addr = our_addr;
56             /* If it's the first one, set it to dns_addr */
57             if (!found)
58                 *pdns_addr = tmp_addr;
59             else
60                 lprint(", ");
61             if (++found > 3) {
62                 lprint("(more)");
63                 break;
64             } else
65                 lprint("%s", inet_ntoa(tmp_addr));
66         }
67     }
68     if (!found)
69         return -1;
70     return 0;
71 }
72
73 #endif
74
75 void slirp_init(void)
76 {
77     debug_init("/tmp/slirp.log", DEBUG_DEFAULT);
78
79     link_up = 1;
80
81     if_init();
82     ip_init();
83
84     /* Initialise mbufs *after* setting the MTU */
85     m_init();
86
87     /* set default addresses */
88     getouraddr();
89     inet_aton("127.0.0.1", &loopback_addr);
90
91     if (get_dns_addr(&dns_addr) < 0) {
92         fprintf(stderr, "Could not get DNS address\n");
93         exit(1);
94     }
95
96     inet_aton(CTL_SPECIAL, &special_addr);
97 }
98
99 #define CONN_CANFSEND(so) (((so)->so_state & (SS_FCANTSENDMORE|SS_ISFCONNECTED)) == SS_ISFCONNECTED)
100 #define CONN_CANFRCV(so) (((so)->so_state & (SS_FCANTRCVMORE|SS_ISFCONNECTED)) == SS_ISFCONNECTED)
101 #define UPD_NFDS(x) if (nfds < (x)) nfds = (x)
102
103 /*
104  * curtime kept to an accuracy of 1ms
105  */
106 static void updtime(void)
107 {
108         gettimeofday(&tt, 0);
109         
110         curtime = (u_int)tt.tv_sec * (u_int)1000;
111         curtime += (u_int)tt.tv_usec / (u_int)1000;
112         
113         if ((tt.tv_usec % 1000) >= 500)
114            curtime++;
115 }
116
117 void slirp_select_fill(int *pnfds, 
118                        fd_set *readfds, fd_set *writefds, fd_set *xfds)
119 {
120     struct socket *so, *so_next;
121     struct timeval timeout;
122     int nfds;
123     int tmp_time;
124
125     /* fail safe */
126     global_readfds = NULL;
127     global_writefds = NULL;
128     global_xfds = NULL;
129     
130     nfds = *pnfds;
131         /*
132          * First, TCP sockets
133          */
134         do_slowtimo = 0;
135         if (link_up) {
136                 /* 
137                  * *_slowtimo needs calling if there are IP fragments
138                  * in the fragment queue, or there are TCP connections active
139                  */
140                 do_slowtimo = ((tcb.so_next != &tcb) ||
141                                ((struct ipasfrag *)&ipq != (struct ipasfrag *)ipq.next));
142                 
143                 for (so = tcb.so_next; so != &tcb; so = so_next) {
144                         so_next = so->so_next;
145                         
146                         /*
147                          * See if we need a tcp_fasttimo
148                          */
149                         if (time_fasttimo == 0 && so->so_tcpcb->t_flags & TF_DELACK)
150                            time_fasttimo = curtime; /* Flag when we want a fasttimo */
151                         
152                         /*
153                          * NOFDREF can include still connecting to local-host,
154                          * newly socreated() sockets etc. Don't want to select these.
155                          */
156                         if (so->so_state & SS_NOFDREF || so->s == -1)
157                            continue;
158                         
159                         /*
160                          * Set for reading sockets which are accepting
161                          */
162                         if (so->so_state & SS_FACCEPTCONN) {
163                                 FD_SET(so->s, readfds);
164                                 UPD_NFDS(so->s);
165                                 continue;
166                         }
167                         
168                         /*
169                          * Set for writing sockets which are connecting
170                          */
171                         if (so->so_state & SS_ISFCONNECTING) {
172                                 FD_SET(so->s, writefds);
173                                 UPD_NFDS(so->s);
174                                 continue;
175                         }
176                         
177                         /*
178                          * Set for writing if we are connected, can send more, and
179                          * we have something to send
180                          */
181                         if (CONN_CANFSEND(so) && so->so_rcv.sb_cc) {
182                                 FD_SET(so->s, writefds);
183                                 UPD_NFDS(so->s);
184                         }
185                         
186                         /*
187                          * Set for reading (and urgent data) if we are connected, can
188                          * receive more, and we have room for it XXX /2 ?
189                          */
190                         if (CONN_CANFRCV(so) && (so->so_snd.sb_cc < (so->so_snd.sb_datalen/2))) {
191                                 FD_SET(so->s, readfds);
192                                 FD_SET(so->s, xfds);
193                                 UPD_NFDS(so->s);
194                         }
195                 }
196                 
197                 /*
198                  * UDP sockets
199                  */
200                 for (so = udb.so_next; so != &udb; so = so_next) {
201                         so_next = so->so_next;
202                         
203                         /*
204                          * See if it's timed out
205                          */
206                         if (so->so_expire) {
207                                 if (so->so_expire <= curtime) {
208                                         udp_detach(so);
209                                         continue;
210                                 } else
211                                         do_slowtimo = 1; /* Let socket expire */
212                         }
213                         
214                         /*
215                          * When UDP packets are received from over the
216                          * link, they're sendto()'d straight away, so
217                          * no need for setting for writing
218                          * Limit the number of packets queued by this session
219                          * to 4.  Note that even though we try and limit this
220                          * to 4 packets, the session could have more queued
221                          * if the packets needed to be fragmented
222                          * (XXX <= 4 ?)
223                          */
224                         if ((so->so_state & SS_ISFCONNECTED) && so->so_queued <= 4) {
225                                 FD_SET(so->s, readfds);
226                                 UPD_NFDS(so->s);
227                         }
228                 }
229         }
230         
231         /*
232          * Setup timeout to use minimum CPU usage, especially when idle
233          */
234         
235         /* 
236          * First, see the timeout needed by *timo
237          */
238         timeout.tv_sec = 0;
239         timeout.tv_usec = -1;
240         /*
241          * If a slowtimo is needed, set timeout to 500ms from the last
242          * slow timeout. If a fast timeout is needed, set timeout within
243          * 200ms of when it was requested.
244          */
245         if (do_slowtimo) {
246                 /* XXX + 10000 because some select()'s aren't that accurate */
247                 timeout.tv_usec = ((500 - (curtime - last_slowtimo)) * 1000) + 10000;
248                 if (timeout.tv_usec < 0)
249                    timeout.tv_usec = 0;
250                 else if (timeout.tv_usec > 510000)
251                    timeout.tv_usec = 510000;
252                 
253                 /* Can only fasttimo if we also slowtimo */
254                 if (time_fasttimo) {
255                         tmp_time = (200 - (curtime - time_fasttimo)) * 1000;
256                         if (tmp_time < 0)
257                            tmp_time = 0;
258                         
259                         /* Choose the smallest of the 2 */
260                         if (tmp_time < timeout.tv_usec)
261                            timeout.tv_usec = (u_int)tmp_time;
262                 }
263         }
264         *pnfds = nfds;
265 }       
266
267 void slirp_select_poll(fd_set *readfds, fd_set *writefds, fd_set *xfds)
268 {
269     struct socket *so, *so_next;
270     int ret;
271
272     global_readfds = readfds;
273     global_writefds = writefds;
274     global_xfds = xfds;
275
276         /* Update time */
277         updtime();
278         
279         /*
280          * See if anything has timed out 
281          */
282         if (link_up) {
283                 if (time_fasttimo && ((curtime - time_fasttimo) >= 199)) {
284                         tcp_fasttimo();
285                         time_fasttimo = 0;
286                 }
287                 if (do_slowtimo && ((curtime - last_slowtimo) >= 499)) {
288                         ip_slowtimo();
289                         tcp_slowtimo();
290                         last_slowtimo = curtime;
291                 }
292         }
293         
294         /*
295          * Check sockets
296          */
297         if (link_up) {
298                 /*
299                  * Check TCP sockets
300                  */
301                 for (so = tcb.so_next; so != &tcb; so = so_next) {
302                         so_next = so->so_next;
303                         
304                         /*
305                          * FD_ISSET is meaningless on these sockets
306                          * (and they can crash the program)
307                          */
308                         if (so->so_state & SS_NOFDREF || so->s == -1)
309                            continue;
310                         
311                         /*
312                          * Check for URG data
313                          * This will soread as well, so no need to
314                          * test for readfds below if this succeeds
315                          */
316                         if (FD_ISSET(so->s, xfds))
317                            sorecvoob(so);
318                         /*
319                          * Check sockets for reading
320                          */
321                         else if (FD_ISSET(so->s, readfds)) {
322                                 /*
323                                  * Check for incoming connections
324                                  */
325                                 if (so->so_state & SS_FACCEPTCONN) {
326                                         tcp_connect(so);
327                                         continue;
328                                 } /* else */
329                                 ret = soread(so);
330                                 
331                                 /* Output it if we read something */
332                                 if (ret > 0)
333                                    tcp_output(sototcpcb(so));
334                         }
335                         
336                         /*
337                          * Check sockets for writing
338                          */
339                         if (FD_ISSET(so->s, writefds)) {
340                           /*
341                            * Check for non-blocking, still-connecting sockets
342                            */
343                           if (so->so_state & SS_ISFCONNECTING) {
344                             /* Connected */
345                             so->so_state &= ~SS_ISFCONNECTING;
346                             
347                             ret = write(so->s, &ret, 0);
348                             if (ret < 0) {
349                               /* XXXXX Must fix, zero bytes is a NOP */
350                               if (errno == EAGAIN || errno == EWOULDBLOCK ||
351                                   errno == EINPROGRESS || errno == ENOTCONN)
352                                 continue;
353                               
354                               /* else failed */
355                               so->so_state = SS_NOFDREF;
356                             }
357                             /* else so->so_state &= ~SS_ISFCONNECTING; */
358                             
359                             /*
360                              * Continue tcp_input
361                              */
362                             tcp_input((struct mbuf *)NULL, sizeof(struct ip), so);
363                             /* continue; */
364                           } else
365                             ret = sowrite(so);
366                           /*
367                            * XXXXX If we wrote something (a lot), there 
368                            * could be a need for a window update.
369                            * In the worst case, the remote will send
370                            * a window probe to get things going again
371                            */
372                         }
373                         
374                         /*
375                          * Probe a still-connecting, non-blocking socket
376                          * to check if it's still alive
377                          */
378 #ifdef PROBE_CONN
379                         if (so->so_state & SS_ISFCONNECTING) {
380                           ret = read(so->s, (char *)&ret, 0);
381                           
382                           if (ret < 0) {
383                             /* XXX */
384                             if (errno == EAGAIN || errno == EWOULDBLOCK ||
385                                 errno == EINPROGRESS || errno == ENOTCONN)
386                               continue; /* Still connecting, continue */
387                             
388                             /* else failed */
389                             so->so_state = SS_NOFDREF;
390                             
391                             /* tcp_input will take care of it */
392                           } else {
393                             ret = write(so->s, &ret, 0);
394                             if (ret < 0) {
395                               /* XXX */
396                               if (errno == EAGAIN || errno == EWOULDBLOCK ||
397                                   errno == EINPROGRESS || errno == ENOTCONN)
398                                 continue;
399                               /* else failed */
400                               so->so_state = SS_NOFDREF;
401                             } else
402                               so->so_state &= ~SS_ISFCONNECTING;
403                             
404                           }
405                           tcp_input((struct mbuf *)NULL, sizeof(struct ip),so);
406                         } /* SS_ISFCONNECTING */
407 #endif
408                 }
409                 
410                 /*
411                  * Now UDP sockets.
412                  * Incoming packets are sent straight away, they're not buffered.
413                  * Incoming UDP data isn't buffered either.
414                  */
415                 for (so = udb.so_next; so != &udb; so = so_next) {
416                         so_next = so->so_next;
417                         
418                         if (so->s != -1 && FD_ISSET(so->s, readfds)) {
419                             sorecvfrom(so);
420                         }
421                 }
422         }
423         
424         /*
425          * See if we can start outputting
426          */
427         if (if_queued && link_up)
428            if_start();
429 }
430
431 #define ETH_ALEN 6
432 #define ETH_HLEN 14
433
434 #define ETH_P_IP        0x0800          /* Internet Protocol packet     */
435 #define ETH_P_ARP       0x0806          /* Address Resolution packet    */
436
437 #define ARPOP_REQUEST   1               /* ARP request                  */
438 #define ARPOP_REPLY     2               /* ARP reply                    */
439
440 struct ethhdr 
441 {
442         unsigned char   h_dest[ETH_ALEN];       /* destination eth addr */
443         unsigned char   h_source[ETH_ALEN];     /* source ether addr    */
444         unsigned short  h_proto;                /* packet type ID field */
445 };
446
447 struct arphdr
448 {
449         unsigned short  ar_hrd;         /* format of hardware address   */
450         unsigned short  ar_pro;         /* format of protocol address   */
451         unsigned char   ar_hln;         /* length of hardware address   */
452         unsigned char   ar_pln;         /* length of protocol address   */
453         unsigned short  ar_op;          /* ARP opcode (command)         */
454
455          /*
456           *      Ethernet looks like this : This bit is variable sized however...
457           */
458         unsigned char           ar_sha[ETH_ALEN];       /* sender hardware address      */
459         unsigned char           ar_sip[4];              /* sender IP address            */
460         unsigned char           ar_tha[ETH_ALEN];       /* target hardware address      */
461         unsigned char           ar_tip[4];              /* target IP address            */
462 };
463
464 void arp_input(const uint8_t *pkt, int pkt_len)
465 {
466     struct ethhdr *eh = (struct ethhdr *)pkt;
467     struct arphdr *ah = (struct arphdr *)(pkt + ETH_HLEN);
468     uint8_t arp_reply[ETH_HLEN + sizeof(struct arphdr)];
469     struct ethhdr *reh = (struct ethhdr *)arp_reply;
470     struct arphdr *rah = (struct arphdr *)(arp_reply + ETH_HLEN);
471     int ar_op;
472
473     ar_op = ntohs(ah->ar_op);
474     switch(ar_op) {
475     case ARPOP_REQUEST:
476         if (!memcmp(ah->ar_tip, &special_addr, 3) &&
477             (ah->ar_tip[3] == CTL_DNS || ah->ar_tip[3] == CTL_ALIAS)) {
478
479             /* XXX: make an ARP request to have the client address */
480             memcpy(client_ethaddr, eh->h_source, ETH_ALEN);
481
482             /* ARP request for alias/dns mac address */
483             memcpy(reh->h_dest, pkt + ETH_ALEN, ETH_ALEN);
484             memcpy(reh->h_source, special_ethaddr, ETH_ALEN - 1);
485             reh->h_source[5] = ah->ar_tip[3];
486             reh->h_proto = htons(ETH_P_ARP);
487
488             rah->ar_hrd = htons(1);
489             rah->ar_pro = htons(ETH_P_IP);
490             rah->ar_hln = ETH_ALEN;
491             rah->ar_pln = 4;
492             rah->ar_op = htons(ARPOP_REPLY);
493             memcpy(rah->ar_sha, reh->h_source, ETH_ALEN);
494             memcpy(rah->ar_sip, ah->ar_tip, 4);
495             memcpy(rah->ar_tha, ah->ar_sha, ETH_ALEN);
496             memcpy(rah->ar_tip, ah->ar_sip, 4);
497             slirp_output(arp_reply, sizeof(arp_reply));
498         }
499         break;
500     default:
501         break;
502     }
503 }
504
505 void slirp_input(const uint8_t *pkt, int pkt_len)
506 {
507     struct mbuf *m;
508     int proto;
509
510     if (pkt_len < ETH_HLEN)
511         return;
512     
513     proto = ntohs(*(uint16_t *)(pkt + 12));
514     switch(proto) {
515     case ETH_P_ARP:
516         arp_input(pkt, pkt_len);
517         break;
518     case ETH_P_IP:
519         m = m_get();
520         if (!m)
521             return;
522         m->m_len = pkt_len;
523         memcpy(m->m_data, pkt, pkt_len);
524
525         m->m_data += ETH_HLEN;
526         m->m_len -= ETH_HLEN;
527
528         ip_input(m);
529         break;
530     default:
531         break;
532     }
533 }
534
535 /* output the IP packet to the ethernet device */
536 void if_encap(const uint8_t *ip_data, int ip_data_len)
537 {
538     uint8_t buf[1600];
539     struct ethhdr *eh = (struct ethhdr *)buf;
540
541     if (ip_data_len + ETH_HLEN > sizeof(buf))
542         return;
543
544     memcpy(eh->h_dest, client_ethaddr, ETH_ALEN);
545     memcpy(eh->h_source, special_ethaddr, ETH_ALEN - 1);
546     eh->h_source[5] = CTL_ALIAS;
547     eh->h_proto = htons(ETH_P_IP);
548     memcpy(buf + sizeof(struct ethhdr), ip_data, ip_data_len);
549     slirp_output(buf, ip_data_len + ETH_HLEN);
550 }