--- /dev/null
+use HTML::Entities qw(decode_entities encode_entities encode_entities_numeric);
+
+use Test::More tests => 13;
+
+$a = "Våre norske tegn bør æres";
+
+decode_entities($a);
+
+is($a, "Våre norske tegn bør æres");
+
+encode_entities($a);
+
+is($a, "Våre norske tegn bør æres");
+
+decode_entities($a);
+encode_entities_numeric($a);
+
+is($a, "Våre norske tegn bør æres");
+
+$a = "<&>\"'";
+is(encode_entities($a), "<&>"'");
+is(encode_entities_numeric($a), "<&>"'");
+
+$a = "abcdef";
+is(encode_entities($a, 'a-c'), "abcdef");
+
+
+# See how well it does against rfc1866...
+$ent = $plain = "";
+while (<DATA>) {
+ next unless /^\s*<!ENTITY\s+(\w+)\s*CDATA\s*\"&\#(\d+)/;
+ $ent .= "&$1;";
+ $plain .= chr($2);
+}
+
+$a = $ent;
+decode_entities($a);
+is($a, $plain);
+
+# Try decoding when the ";" are left out
+$a = $ent,
+$a =~ s/;//g;
+decode_entities($a);
+is($a, $plain);
+
+
+$a = $plain;
+encode_entities($a);
+is($a, $ent);
+
+
+# From: Bill Simpson-Young <bill.simpson-young@cmis.csiro.au>
+# Subject: HTML entities problem with 5.11
+# To: libwww-perl@ics.uci.edu
+# Date: Fri, 05 Sep 1997 16:56:55 +1000
+# Message-Id: <199709050657.QAA10089@snowy.nsw.cmis.CSIRO.AU>
+#
+# Hi. I've got a problem that has surfaced with the changes to
+# HTML::Entities.pm for 5.11 (it doesn't happen with 5.08). It's happening
+# in the process of encoding then decoding special entities. Eg, what goes
+# in as "abc&def&ghi" comes out as "abc&def;&ghi;".
+
+is(decode_entities("abc&def&ghi&abc;&def;"), "abc&def&ghi&abc;&def;");
+
+# Decoding of '
+is(decode_entities("'"), "'");
+is(encode_entities("'", "'"), "'");
+
+is(decode_entities("Attention Homeοωnөrs...1ѕt Tімe Eνөг"),
+ "Attention Home\x{3BF}\x{3C9}n\x{4E9}rs...1\x{455}t T\x{456}\x{43C}e E\x{3BD}\x{4E9}\x{433}");
+
+__END__
+# Quoted from rfc1866.txt
+
+14. Proposed Entities
+
+ The HTML DTD references the "Added Latin 1" entity set, which only
+ supplies named entities for a subset of the non-ASCII characters in
+ [ISO-8859-1], namely the accented characters. The following entities
+ should be supported so that all ISO 8859-1 characters may only be
+ referenced symbolically. The names for these entities are taken from
+ the appendixes of [SGML].
+
+ <!ENTITY nbsp CDATA " " -- no-break space -->
+ <!ENTITY iexcl CDATA "¡" -- inverted exclamation mark -->
+ <!ENTITY cent CDATA "¢" -- cent sign -->
+ <!ENTITY pound CDATA "£" -- pound sterling sign -->
+ <!ENTITY curren CDATA "¤" -- general currency sign -->
+ <!ENTITY yen CDATA "¥" -- yen sign -->
+ <!ENTITY brvbar CDATA "¦" -- broken (vertical) bar -->
+ <!ENTITY sect CDATA "§" -- section sign -->
+ <!ENTITY uml CDATA "¨" -- umlaut (dieresis) -->
+ <!ENTITY copy CDATA "©" -- copyright sign -->
+ <!ENTITY ordf CDATA "ª" -- ordinal indicator, feminine -->
+ <!ENTITY laquo CDATA "«" -- angle quotation mark, left -->
+ <!ENTITY not CDATA "¬" -- not sign -->
+ <!ENTITY shy CDATA "­" -- soft hyphen -->
+ <!ENTITY reg CDATA "®" -- registered sign -->
+ <!ENTITY macr CDATA "¯" -- macron -->
+ <!ENTITY deg CDATA "°" -- degree sign -->
+ <!ENTITY plusmn CDATA "±" -- plus-or-minus sign -->
+ <!ENTITY sup2 CDATA "²" -- superscript two -->
+ <!ENTITY sup3 CDATA "³" -- superscript three -->
+ <!ENTITY acute CDATA "´" -- acute accent -->
+ <!ENTITY micro CDATA "µ" -- micro sign -->
+ <!ENTITY para CDATA "¶" -- pilcrow (paragraph sign) -->
+ <!ENTITY middot CDATA "·" -- middle dot -->
+ <!ENTITY cedil CDATA "¸" -- cedilla -->
+ <!ENTITY sup1 CDATA "¹" -- superscript one -->
+ <!ENTITY ordm CDATA "º" -- ordinal indicator, masculine -->
+ <!ENTITY raquo CDATA "»" -- angle quotation mark, right -->
+ <!ENTITY frac14 CDATA "¼" -- fraction one-quarter -->
+ <!ENTITY frac12 CDATA "½" -- fraction one-half -->
+ <!ENTITY frac34 CDATA "¾" -- fraction three-quarters -->
+ <!ENTITY iquest CDATA "¿" -- inverted question mark -->
+ <!ENTITY Agrave CDATA "À" -- capital A, grave accent -->
+ <!ENTITY Aacute CDATA "Á" -- capital A, acute accent -->
+ <!ENTITY Acirc CDATA "Â" -- capital A, circumflex accent -->
+
+
+
+Berners-Lee & Connolly Standards Track [Page 75]
+\f
+RFC 1866 Hypertext Markup Language - 2.0 November 1995
+
+
+ <!ENTITY Atilde CDATA "Ã" -- capital A, tilde -->
+ <!ENTITY Auml CDATA "Ä" -- capital A, dieresis or umlaut mark -->
+ <!ENTITY Aring CDATA "Å" -- capital A, ring -->
+ <!ENTITY AElig CDATA "Æ" -- capital AE diphthong (ligature) -->
+ <!ENTITY Ccedil CDATA "Ç" -- capital C, cedilla -->
+ <!ENTITY Egrave CDATA "È" -- capital E, grave accent -->
+ <!ENTITY Eacute CDATA "É" -- capital E, acute accent -->
+ <!ENTITY Ecirc CDATA "Ê" -- capital E, circumflex accent -->
+ <!ENTITY Euml CDATA "Ë" -- capital E, dieresis or umlaut mark -->
+ <!ENTITY Igrave CDATA "Ì" -- capital I, grave accent -->
+ <!ENTITY Iacute CDATA "Í" -- capital I, acute accent -->
+ <!ENTITY Icirc CDATA "Î" -- capital I, circumflex accent -->
+ <!ENTITY Iuml CDATA "Ï" -- capital I, dieresis or umlaut mark -->
+ <!ENTITY ETH CDATA "Ð" -- capital Eth, Icelandic -->
+ <!ENTITY Ntilde CDATA "Ñ" -- capital N, tilde -->
+ <!ENTITY Ograve CDATA "Ò" -- capital O, grave accent -->
+ <!ENTITY Oacute CDATA "Ó" -- capital O, acute accent -->
+ <!ENTITY Ocirc CDATA "Ô" -- capital O, circumflex accent -->
+ <!ENTITY Otilde CDATA "Õ" -- capital O, tilde -->
+ <!ENTITY Ouml CDATA "Ö" -- capital O, dieresis or umlaut mark -->
+ <!ENTITY times CDATA "×" -- multiply sign -->
+ <!ENTITY Oslash CDATA "Ø" -- capital O, slash -->
+ <!ENTITY Ugrave CDATA "Ù" -- capital U, grave accent -->
+ <!ENTITY Uacute CDATA "Ú" -- capital U, acute accent -->
+ <!ENTITY Ucirc CDATA "Û" -- capital U, circumflex accent -->
+ <!ENTITY Uuml CDATA "Ü" -- capital U, dieresis or umlaut mark -->
+ <!ENTITY Yacute CDATA "Ý" -- capital Y, acute accent -->
+ <!ENTITY THORN CDATA "Þ" -- capital THORN, Icelandic -->
+ <!ENTITY szlig CDATA "ß" -- small sharp s, German (sz ligature) -->
+ <!ENTITY agrave CDATA "à" -- small a, grave accent -->
+ <!ENTITY aacute CDATA "á" -- small a, acute accent -->
+ <!ENTITY acirc CDATA "â" -- small a, circumflex accent -->
+ <!ENTITY atilde CDATA "ã" -- small a, tilde -->
+ <!ENTITY auml CDATA "ä" -- small a, dieresis or umlaut mark -->
+ <!ENTITY aring CDATA "å" -- small a, ring -->
+ <!ENTITY aelig CDATA "æ" -- small ae diphthong (ligature) -->
+ <!ENTITY ccedil CDATA "ç" -- small c, cedilla -->
+ <!ENTITY egrave CDATA "è" -- small e, grave accent -->
+ <!ENTITY eacute CDATA "é" -- small e, acute accent -->
+ <!ENTITY ecirc CDATA "ê" -- small e, circumflex accent -->
+ <!ENTITY euml CDATA "ë" -- small e, dieresis or umlaut mark -->
+ <!ENTITY igrave CDATA "ì" -- small i, grave accent -->
+ <!ENTITY iacute CDATA "í" -- small i, acute accent -->
+ <!ENTITY icirc CDATA "î" -- small i, circumflex accent -->
+ <!ENTITY iuml CDATA "ï" -- small i, dieresis or umlaut mark -->
+ <!ENTITY eth CDATA "ð" -- small eth, Icelandic -->
+ <!ENTITY ntilde CDATA "ñ" -- small n, tilde -->
+ <!ENTITY ograve CDATA "ò" -- small o, grave accent -->
+
+
+
+Berners-Lee & Connolly Standards Track [Page 76]
+\f
+RFC 1866 Hypertext Markup Language - 2.0 November 1995
+
+
+ <!ENTITY oacute CDATA "ó" -- small o, acute accent -->
+ <!ENTITY ocirc CDATA "ô" -- small o, circumflex accent -->
+ <!ENTITY otilde CDATA "õ" -- small o, tilde -->
+ <!ENTITY ouml CDATA "ö" -- small o, dieresis or umlaut mark -->
+ <!ENTITY divide CDATA "÷" -- divide sign -->
+ <!ENTITY oslash CDATA "ø" -- small o, slash -->
+ <!ENTITY ugrave CDATA "ù" -- small u, grave accent -->
+ <!ENTITY uacute CDATA "ú" -- small u, acute accent -->
+ <!ENTITY ucirc CDATA "û" -- small u, circumflex accent -->
+ <!ENTITY uuml CDATA "ü" -- small u, dieresis or umlaut mark -->
+ <!ENTITY yacute CDATA "ý" -- small y, acute accent -->
+ <!ENTITY thorn CDATA "þ" -- small thorn, Icelandic -->
+ <!ENTITY yuml CDATA "ÿ" -- small y, dieresis or umlaut mark -->