root/10025_unescape_unicode_entities.patch
@
55:8b22aecab4d9
| Revision 55:8b22aecab4d9, 5.3 KB (checked in by Jiang Xin <worldhello.net AT gmail DOT com>, 3 years ago) |
|---|
-
freemind/main/HtmlTools.java
diff -r ad30e1ec65d9 freemind/main/HtmlTools.java
a b 23 23 import java.io.IOException; 24 24 import java.io.StringReader; 25 25 import java.io.StringWriter; 26 import java.io.Writer; 26 27 import java.util.ArrayList; 27 28 import java.util.Iterator; 28 29 import java.util.Locale; … … 77 78 try { 78 79 XHTMLWriter.html2xhtml(reader, writer); 79 80 String resultXml = writer.toString(); 81 if (Resources.getInstance().getBoolProperty("wh_nonascii_in_utf8")) { 82 resultXml = unescape_utf8(resultXml); 83 } 80 84 // for safety: 81 85 if(!isWellformedXml(resultXml)) { 82 86 return toXMLEscapedText(htmlText); … … 547 551 return output; 548 552 } 549 553 554 /* Borrow code from org.apache.commons.lang.Entities */ 555 public String unescape_utf8(String str) { 556 int firstAmp = str.indexOf('&'); 557 if (firstAmp < 0) { 558 return str; 559 } else { 560 StringWriter stringWriter = createStringWriter(str); 561 try { 562 this.doUnescapeUtf8(stringWriter, str, firstAmp); 563 } catch (IOException e) { 564 // This should never happen because ALL the StringWriter methods called by #escape(Writer, String) 565 // do not throw IOExceptions. 566 return str; 567 } 568 return stringWriter.toString(); 569 } 570 } 571 572 /** 573 * Make the StringWriter 10% larger than the source String to avoid growing the writer 574 * 575 * @param str The source string 576 * @return A newly created StringWriter 577 */ 578 private StringWriter createStringWriter(String str) { 579 return new StringWriter((int) (str.length() + (str.length() * 0.1))); 580 } 581 582 583 /** 584 * Underlying unescape method that allows the optimisation of not starting from the 0 index again. 585 * 586 * @param writer 587 * The <code>Writer</code> to write the results to; assumed to be non-null. 588 * @param str 589 * The source <code>String</code> to unescape; assumed to be non-null. 590 * @param firstAmp 591 * The <code>int</code> index of the first ampersand in the source String. 592 * @throws IOException 593 * when <code>Writer</code> passed throws the exception from calls to the {@link Writer#write(int)} 594 * methods. 595 */ 596 private void doUnescapeUtf8(Writer writer, String str, int firstAmp) throws IOException { 597 writer.write(str, 0, firstAmp); 598 int len = str.length(); 599 for (int i = firstAmp; i < len; i++) { 600 char c = str.charAt(i); 601 if (c == '&') { 602 int nextIdx = i + 1; 603 int semiColonIdx = str.indexOf(';', nextIdx); 604 if (semiColonIdx == -1) { 605 writer.write(c); 606 continue; 607 } 608 int amphersandIdx = str.indexOf('&', i + 1); 609 if (amphersandIdx != -1 && amphersandIdx < semiColonIdx) { 610 // Then the text looks like &...&...; 611 writer.write(c); 612 continue; 613 } 614 String entityContent = str.substring(nextIdx, semiColonIdx); 615 int entityValue = -1; 616 int entityContentLen = entityContent.length(); 617 if (entityContentLen > 0) { 618 if (entityContent.charAt(0) == '#') { // escaped value content is an integer (decimal or 619 // hexidecimal) 620 if (entityContentLen > 1) { 621 char isHexChar = entityContent.charAt(1); 622 try { 623 switch (isHexChar) { 624 case 'X' : 625 case 'x' : { 626 entityValue = Integer.parseInt(entityContent.substring(2), 16); 627 break; 628 } 629 default : { 630 entityValue = Integer.parseInt(entityContent.substring(1), 10); 631 } 632 } 633 if (entityValue > 0xFFFF || entityValue < 128 ) { 634 entityValue = -1; 635 } 636 } catch (NumberFormatException e) { 637 entityValue = -1; 638 } 639 } 640 } else { // escaped value content is an entity name 641 //entityValue = this.entityValue(entityContent); 642 entityValue = -1; 643 } 644 } 645 646 if (entityValue == -1) { 647 writer.write('&'); 648 writer.write(entityContent); 649 writer.write(';'); 650 } else { 651 writer.write(entityValue); 652 } 653 i = semiColonIdx; // move index up to the semi-colon 654 } else { 655 writer.write(c); 656 } 657 } 658 } 550 659 551 660 }
Note: See TracBrowser
for help on using the browser.
![(please configure the [header_logo] section in trac.ini)](/trac/freemind/chrome/common/trac_banner.png)