git.maemo.org Git - someplayer/blob - src/taglib/toolkit/tstring.cpp

   1 /***************************************************************************
   2     copyright            : (C) 2002 - 2008 by Scott Wheeler
   3     email                : wheeler@kde.org
   4  ***************************************************************************/
   5
   6 /***************************************************************************
   7  *   This library is free software; you can redistribute it and/or modify  *
   8  *   it under the terms of the GNU Lesser General Public License version   *
   9  *   2.1 as published by the Free Software Foundation.                     *
  10  *                                                                         *
  11  *   This library is distributed in the hope that it will be useful, but   *
  12  *   WITHOUT ANY WARRANTY; without even the implied warranty of            *
  13  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU     *
  14  *   Lesser General Public License for more details.                       *
  15  *                                                                         *
  16  *   You should have received a copy of the GNU Lesser General Public      *
  17  *   License along with this library; if not, write to the Free Software   *
  18  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  *
  19  *   USA                                                                   *
  20  *                                                                         *
  21  *   Alternatively, this file is available under the Mozilla Public        *
  22  *   License Version 1.1.  You may obtain a copy of the License at         *
  23  *   http://www.mozilla.org/MPL/                                           *
  24  ***************************************************************************/
  25
  26 #include "tstring.h"
  27 #include "unicode.h"
  28 #include "tdebug.h"
  29
  30 #include <iostream>
  31
  32 #include <string.h>
  33
  34 namespace TagLib {
  35
  36   inline unsigned short byteSwap(unsigned short x)
  37   {
  38     return (((x) >> 8) & 0xff) | (((x) & 0xff) << 8);
  39   }
  40
  41   inline unsigned short combine(unsigned char c1, unsigned char c2)
  42   {
  43     return (c1 << 8) | c2;
  44   }
  45 }
  46
  47 using namespace TagLib;
  48
  49 class String::StringPrivate : public RefCounter
  50 {
  51 public:
  52   StringPrivate(const wstring &s) :
  53     RefCounter(),
  54     data(s),
  55     CString(0) {}
  56
  57   StringPrivate() :
  58     RefCounter(),
  59     CString(0) {}
  60
  61   ~StringPrivate() {
  62     delete [] CString;
  63   }
  64
  65   wstring data;
  66
  67   /*!
  68    * This is only used to hold the a pointer to the most recent value of
  69    * toCString.
  70    */
  71   char *CString;
  72 };
  73
  74 String String::null;
  75
  76 ////////////////////////////////////////////////////////////////////////////////
  77
  78 String::String()
  79 {
  80   d = new StringPrivate;
  81 }
  82
  83 String::String(const String &s) : d(s.d)
  84 {
  85   d->ref();
  86 }
  87
  88 String::String(const std::string &s, Type t)
  89 {
  90   d = new StringPrivate;
  91
  92   if(t == UTF16 || t == UTF16BE || t == UTF16LE) {
  93     debug("String::String() -- A std::string should not contain UTF16.");
  94     return;
  95   }
  96
  97   int length = s.length();
  98   d->data.resize(length);
  99   wstring::iterator targetIt = d->data.begin();
 100
 101   for(std::string::const_iterator it = s.begin(); it != s.end(); it++) {
 102     *targetIt = uchar(*it);
 103     ++targetIt;
 104   }
 105
 106   prepare(t);
 107 }
 108
 109 String::String(const wstring &s, Type t)
 110 {
 111   d = new StringPrivate(s);
 112   prepare(t);
 113 }
 114
 115 String::String(const wchar_t *s, Type t)
 116 {
 117   d = new StringPrivate(s);
 118   prepare(t);
 119 }
 120
 121 String::String(const char *s, Type t)
 122 {
 123   d = new StringPrivate;
 124
 125   if(t == UTF16 || t == UTF16BE || t == UTF16LE) {
 126     debug("String::String() -- A const char * should not contain UTF16.");
 127     return;
 128   }
 129
 130   int length = ::strlen(s);
 131   d->data.resize(length);
 132
 133   wstring::iterator targetIt = d->data.begin();
 134
 135   for(int i = 0; i < length; i++) {
 136     *targetIt = uchar(s[i]);
 137     ++targetIt;
 138   }
 139
 140   prepare(t);
 141 }
 142
 143 String::String(wchar_t c, Type t)
 144 {
 145   d = new StringPrivate;
 146   d->data += c;
 147   prepare(t);
 148 }
 149
 150 String::String(char c, Type t)
 151 {
 152   d = new StringPrivate;
 153
 154   if(t == UTF16 || t == UTF16BE || t == UTF16LE) {
 155     debug("String::String() -- A std::string should not contain UTF16.");
 156     return;
 157   }
 158
 159   d->data += uchar(c);
 160   prepare(t);
 161 }
 162
 163 String::String(const ByteVector &v, Type t)
 164 {
 165   d = new StringPrivate;
 166
 167   if(v.isEmpty())
 168     return;
 169
 170   if(t == Latin1 || t == UTF8) {
 171
 172     int length = 0;
 173     d->data.resize(v.size());
 174     wstring::iterator targetIt = d->data.begin();
 175     for(ByteVector::ConstIterator it = v.begin(); it != v.end() && (*it); ++it) {
 176       *targetIt = uchar(*it);
 177       ++targetIt;
 178       ++length;
 179     }
 180     d->data.resize(length);
 181   }
 182   else  {
 183     d->data.resize(v.size() / 2);
 184     wstring::iterator targetIt = d->data.begin();
 185
 186     for(ByteVector::ConstIterator it = v.begin();
 187         it != v.end() && it + 1 != v.end() && combine(*it, *(it + 1));
 188         it += 2)
 189     {
 190       *targetIt = combine(*it, *(it + 1));
 191       ++targetIt;
 192     }
 193   }
 194   prepare(t);
 195 }
 196
 197 ////////////////////////////////////////////////////////////////////////////////
 198
 199 String::~String()
 200 {
 201   if(d->deref())
 202     delete d;
 203 }
 204
 205 std::string String::to8Bit(bool unicode) const
 206 {
 207   std::string s;
 208   s.resize(d->data.size());
 209
 210   if(!unicode) {
 211     std::string::iterator targetIt = s.begin();
 212     for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++) {
 213       *targetIt = char(*it);
 214       ++targetIt;
 215     }
 216     return s;
 217   }
 218
 219   const int outputBufferSize = d->data.size() * 3 + 1;
 220
 221   Unicode::UTF16 *sourceBuffer = new Unicode::UTF16[d->data.size() + 1];
 222   Unicode::UTF8  *targetBuffer = new Unicode::UTF8[outputBufferSize];
 223
 224   for(unsigned int i = 0; i < d->data.size(); i++)
 225     sourceBuffer[i] = Unicode::UTF16(d->data[i]);
 226
 227   const Unicode::UTF16 *source = sourceBuffer;
 228   Unicode::UTF8 *target = targetBuffer;
 229
 230   Unicode::ConversionResult result =
 231     Unicode::ConvertUTF16toUTF8(&source, sourceBuffer + d->data.size(),
 232                                 &target, targetBuffer + outputBufferSize,
 233                                 Unicode::lenientConversion);
 234
 235   if(result != Unicode::conversionOK)
 236     debug("String::to8Bit() - Unicode conversion error.");
 237
 238   int newSize = target - targetBuffer;
 239   s.resize(newSize);
 240   targetBuffer[newSize] = 0;
 241
 242   s = (char *) targetBuffer;
 243
 244   delete [] sourceBuffer;
 245   delete [] targetBuffer;
 246
 247   return s;
 248 }
 249
 250 TagLib::wstring String::toWString() const
 251 {
 252   return d->data;
 253 }
 254
 255 const char *String::toCString(bool unicode) const
 256 {
 257   delete [] d->CString;
 258
 259   std::string buffer = to8Bit(unicode);
 260   d->CString = new char[buffer.size() + 1];
 261   strcpy(d->CString, buffer.c_str());
 262
 263   return d->CString;
 264 }
 265
 266 String::Iterator String::begin()
 267 {
 268   return d->data.begin();
 269 }
 270
 271 String::ConstIterator String::begin() const
 272 {
 273   return d->data.begin();
 274 }
 275
 276 String::Iterator String::end()
 277 {
 278   return d->data.end();
 279 }
 280
 281 String::ConstIterator String::end() const
 282 {
 283   return d->data.end();
 284 }
 285
 286 int String::find(const String &s, int offset) const
 287 {
 288   wstring::size_type position = d->data.find(s.d->data, offset);
 289
 290   if(position != wstring::npos)
 291     return position;
 292   else
 293     return -1;
 294 }
 295
 296 int String::rfind(const String &s, int offset) const
 297 {
 298   wstring::size_type position =
 299     d->data.rfind(s.d->data, offset == -1 ? wstring::npos : offset);
 300
 301   if(position != wstring::npos)
 302     return position;
 303   else
 304     return -1;
 305 }
 306
 307 bool String::startsWith(const String &s) const
 308 {
 309   if(s.length() > length())
 310     return false;
 311
 312   return substr(0, s.length()) == s;
 313 }
 314
 315 String String::substr(uint position, uint n) const
 316 {
 317   if(n > position + d->data.size())
 318     n = d->data.size() - position;
 319
 320   String s;
 321   s.d->data = d->data.substr(position, n);
 322   return s;
 323 }
 324
 325 String &String::append(const String &s)
 326 {
 327   detach();
 328   d->data += s.d->data;
 329   return *this;
 330 }
 331
 332 String String::upper() const
 333 {
 334   String s;
 335
 336   static int shift = 'A' - 'a';
 337
 338   for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); ++it) {
 339     if(*it >= 'a' && *it <= 'z')
 340       s.d->data.push_back(*it + shift);
 341     else
 342       s.d->data.push_back(*it);
 343   }
 344
 345   return s;
 346 }
 347
 348 TagLib::uint String::size() const
 349 {
 350   return d->data.size();
 351 }
 352
 353 TagLib::uint String::length() const
 354 {
 355   return size();
 356 }
 357
 358 bool String::isEmpty() const
 359 {
 360   return d->data.size() == 0;
 361 }
 362
 363 bool String::isNull() const
 364 {
 365   return d == null.d;
 366 }
 367
 368 ByteVector String::data(Type t) const
 369 {
 370   ByteVector v;
 371
 372   switch(t) {
 373
 374   case Latin1:
 375   {
 376     for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++)
 377       v.append(char(*it));
 378     break;
 379   }
 380   case UTF8:
 381   {
 382     std::string s = to8Bit(true);
 383     v.setData(s.c_str(), s.length());
 384     break;
 385   }
 386   case UTF16:
 387   {
 388     // Assume that if we're doing UTF16 and not UTF16BE that we want little
 389     // endian encoding.  (Byte Order Mark)
 390
 391     v.append(char(0xff));
 392     v.append(char(0xfe));
 393
 394     for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++) {
 395
 396       char c1 = *it & 0xff;
 397       char c2 = *it >> 8;
 398
 399       v.append(c1);
 400       v.append(c2);
 401     }
 402     break;
 403   }
 404   case UTF16BE:
 405   {
 406     for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++) {
 407
 408       char c1 = *it >> 8;
 409       char c2 = *it & 0xff;
 410
 411       v.append(c1);
 412       v.append(c2);
 413     }
 414     break;
 415   }
 416   case UTF16LE:
 417   {
 418     for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++) {
 419
 420       char c1 = *it & 0xff;
 421       char c2 = *it >> 8;
 422
 423       v.append(c1);
 424       v.append(c2);
 425     }
 426     break;
 427   }
 428   }
 429
 430   return v;
 431 }
 432
 433 int String::toInt() const
 434 {
 435   return toInt(0);
 436 }
 437
 438 int String::toInt(bool *ok) const
 439 {
 440   int value = 0;
 441
 442   uint size = d->data.size();
 443   bool negative = size > 0 && d->data[0] == '-';
 444   uint start = negative ? 1 : 0;
 445   uint i = start;
 446
 447   for(; i < size && d->data[i] >= '0' && d->data[i] <= '9'; i++)
 448     value = value * 10 + (d->data[i] - '0');
 449
 450   if(negative)
 451     value = value * -1;
 452
 453   if(ok)
 454     *ok = (size > start && i == size);
 455
 456   return value;
 457 }
 458
 459 String String::stripWhiteSpace() const
 460 {
 461   wstring::const_iterator begin = d->data.begin();
 462   wstring::const_iterator end = d->data.end();
 463
 464   while(begin != end &&
 465         (*begin == '\t' || *begin == '\n' || *begin == '\f' ||
 466          *begin == '\r' || *begin == ' '))
 467   {
 468     ++begin;
 469   }
 470
 471   if(begin == end)
 472     return null;
 473
 474   // There must be at least one non-whitespace character here for us to have
 475   // gotten this far, so we should be safe not doing bounds checking.
 476
 477   do {
 478     --end;
 479   } while(*end == '\t' || *end == '\n' ||
 480           *end == '\f' || *end == '\r' || *end == ' ');
 481
 482   return String(wstring(begin, end + 1));
 483 }
 484
 485 bool String::isLatin1() const
 486 {
 487   for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++) {
 488     if(*it >= 256)
 489       return false;
 490   }
 491   return true;
 492 }
 493
 494 bool String::isAscii() const
 495 {
 496   for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++) {
 497     if(*it >= 128)
 498       return false;
 499   }
 500   return true;
 501 }
 502
 503 String String::number(int n) // static
 504 {
 505   if(n == 0)
 506     return String("0");
 507
 508   String charStack;
 509
 510   bool negative = n < 0;
 511
 512   if(negative)
 513     n = n * -1;
 514
 515   while(n > 0) {
 516     int remainder = n % 10;
 517     charStack += char(remainder + '0');
 518     n = (n - remainder) / 10;
 519   }
 520
 521   String s;
 522
 523   if(negative)
 524     s += '-';
 525
 526   for(int i = charStack.d->data.size() - 1; i >= 0; i--)
 527     s += charStack.d->data[i];
 528
 529   return s;
 530 }
 531
 532 TagLib::wchar &String::operator[](int i)
 533 {
 534   detach();
 535
 536   return d->data[i];
 537 }
 538
 539 const TagLib::wchar &String::operator[](int i) const
 540 {
 541   return d->data[i];
 542 }
 543
 544 bool String::operator==(const String &s) const
 545 {
 546   return d == s.d || d->data == s.d->data;
 547 }
 548
 549 String &String::operator+=(const String &s)
 550 {
 551   detach();
 552
 553   d->data += s.d->data;
 554   return *this;
 555 }
 556
 557 String &String::operator+=(const wchar_t *s)
 558 {
 559   detach();
 560
 561   d->data += s;
 562   return *this;
 563 }
 564
 565 String &String::operator+=(const char *s)
 566 {
 567   detach();
 568
 569   for(int i = 0; s[i] != 0; i++)
 570     d->data += uchar(s[i]);
 571   return *this;
 572 }
 573
 574 String &String::operator+=(wchar_t c)
 575 {
 576   detach();
 577
 578   d->data += c;
 579   return *this;
 580 }
 581
 582 String &String::operator+=(char c)
 583 {
 584   detach();
 585
 586   d->data += uchar(c);
 587   return *this;
 588 }
 589
 590 String &String::operator=(const String &s)
 591 {
 592   if(&s == this)
 593     return *this;
 594
 595   if(d->deref())
 596     delete d;
 597   d = s.d;
 598   d->ref();
 599   return *this;
 600 }
 601
 602 String &String::operator=(const std::string &s)
 603 {
 604   if(d->deref())
 605     delete d;
 606
 607   d = new StringPrivate;
 608
 609   d->data.resize(s.size());
 610
 611   wstring::iterator targetIt = d->data.begin();
 612   for(std::string::const_iterator it = s.begin(); it != s.end(); it++) {
 613     *targetIt = uchar(*it);
 614     ++targetIt;
 615   }
 616
 617   return *this;
 618 }
 619
 620 String &String::operator=(const wstring &s)
 621 {
 622   if(d->deref())
 623     delete d;
 624   d = new StringPrivate(s);
 625   return *this;
 626 }
 627
 628 String &String::operator=(const wchar_t *s)
 629 {
 630   if(d->deref())
 631     delete d;
 632   d = new StringPrivate(s);
 633   return *this;
 634 }
 635
 636 String &String::operator=(char c)
 637 {
 638   if(d->deref())
 639     delete d;
 640   d = new StringPrivate;
 641   d->data += uchar(c);
 642   return *this;
 643 }
 644
 645 String &String::operator=(wchar_t c)
 646 {
 647   if(d->deref())
 648     delete d;
 649   d = new StringPrivate;
 650   d->data += c;
 651   return *this;
 652 }
 653
 654 String &String::operator=(const char *s)
 655 {
 656   if(d->deref())
 657     delete d;
 658
 659   d = new StringPrivate;
 660
 661   int length = ::strlen(s);
 662   d->data.resize(length);
 663
 664   wstring::iterator targetIt = d->data.begin();
 665   for(int i = 0; i < length; i++) {
 666     *targetIt = uchar(s[i]);
 667     ++targetIt;
 668   }
 669
 670   return *this;
 671 }
 672
 673 String &String::operator=(const ByteVector &v)
 674 {
 675   if(d->deref())
 676     delete d;
 677
 678   d = new StringPrivate;
 679   d->data.resize(v.size());
 680   wstring::iterator targetIt = d->data.begin();
 681
 682   uint i = 0;
 683
 684   for(ByteVector::ConstIterator it = v.begin(); it != v.end() && (*it); ++it) {
 685     *targetIt = uchar(*it);
 686     ++targetIt;
 687     ++i;
 688   }
 689
 690   // If we hit a null in the ByteVector, shrink the string again.
 691
 692   d->data.resize(i);
 693
 694   return *this;
 695 }
 696
 697 bool String::operator<(const String &s) const
 698 {
 699   return d->data < s.d->data;
 700 }
 701
 702 ////////////////////////////////////////////////////////////////////////////////
 703 // protected members
 704 ////////////////////////////////////////////////////////////////////////////////
 705
 706 void String::detach()
 707 {
 708   if(d->count() > 1) {
 709     d->deref();
 710     d = new StringPrivate(d->data);
 711   }
 712 }
 713
 714 ////////////////////////////////////////////////////////////////////////////////
 715 // private members
 716 ////////////////////////////////////////////////////////////////////////////////
 717
 718 void String::prepare(Type t)
 719 {
 720   switch(t) {
 721   case UTF16:
 722   {
 723     if(d->data.size() >= 1 && (d->data[0] == 0xfeff || d->data[0] == 0xfffe)) {
 724       bool swap = d->data[0] != 0xfeff;
 725       d->data.erase(d->data.begin(), d->data.begin() + 1);
 726       if(swap) {
 727         for(uint i = 0; i < d->data.size(); i++)
 728           d->data[i] = byteSwap((unsigned short)d->data[i]);
 729       }
 730     }
 731     else {
 732       debug("String::prepare() - Invalid UTF16 string.");
 733       d->data.erase(d->data.begin(), d->data.end());
 734     }
 735     break;
 736   }
 737   case UTF8:
 738   {
 739     int bufferSize = d->data.size() + 1;
 740     Unicode::UTF8  *sourceBuffer = new Unicode::UTF8[bufferSize];
 741     Unicode::UTF16 *targetBuffer = new Unicode::UTF16[bufferSize];
 742
 743     unsigned int i = 0;
 744     for(; i < d->data.size(); i++)
 745       sourceBuffer[i] = Unicode::UTF8(d->data[i]);
 746     sourceBuffer[i] = 0;
 747
 748     const Unicode::UTF8 *source = sourceBuffer;
 749     Unicode::UTF16 *target = targetBuffer;
 750
 751     Unicode::ConversionResult result =
 752       Unicode::ConvertUTF8toUTF16(&source, sourceBuffer + bufferSize,
 753                                   &target, targetBuffer + bufferSize,
 754                                   Unicode::lenientConversion);
 755
 756     if(result != Unicode::conversionOK)
 757       debug("String::prepare() - Unicode conversion error.");
 758
 759
 760     int newSize = target != targetBuffer ? target - targetBuffer - 1 : 0;
 761     d->data.resize(newSize);
 762
 763     for(int i = 0; i < newSize; i++)
 764       d->data[i] = targetBuffer[i];
 765
 766     delete [] sourceBuffer;
 767     delete [] targetBuffer;
 768
 769     break;
 770   }
 771   case UTF16LE:
 772   {
 773     for(uint i = 0; i < d->data.size(); i++)
 774       d->data[i] = byteSwap((unsigned short)d->data[i]);
 775     break;
 776   }
 777   default:
 778     break;
 779   }
 780 }
 781
 782 ////////////////////////////////////////////////////////////////////////////////
 783 // related functions
 784 ////////////////////////////////////////////////////////////////////////////////
 785
 786 const TagLib::String operator+(const TagLib::String &s1, const TagLib::String &s2)
 787 {
 788   String s(s1);
 789   s.append(s2);
 790   return s;
 791 }
 792
 793 const TagLib::String operator+(const char *s1, const TagLib::String &s2)
 794 {
 795   String s(s1);
 796   s.append(s2);
 797   return s;
 798 }
 799
 800 const TagLib::String operator+(const TagLib::String &s1, const char *s2)
 801 {
 802   String s(s1);
 803   s.append(s2);
 804   return s;
 805 }
 806
 807 std::ostream &operator<<(std::ostream &s, const String &str)
 808 {
 809   s << str.to8Bit();
 810   return s;
 811 }