pleyo.com

/src/trunk2/BAL/Interfaces/BidiResolver.h

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2000 Lars Knoll (knoll@kde.org)
00003  * Copyright (C) 2003, 2004, 2006, 2007 Apple Inc.  All right reserved.
00004  *
00005  * This library is free software; you can redistribute it and/or
00006  * modify it under the terms of the GNU Library General Public
00007  * License as published by the Free Software Foundation; either
00008  * version 2 of the License, or (at your option) any later version.
00009  *
00010  * This library is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  * Library General Public License for more details.
00014  *
00015  * You should have received a copy of the GNU Library General Public License
00016  * along with this library; see the file COPYING.LIB.  If not, write to
00017  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00018  * Boston, MA 02110-1301, USA.
00019  *
00020  */
00021 
00022 #ifndef BidiResolver_h
00023 #define BidiResolver_h
00024 
00025 #include "BidiContext.h"
00026 #include <wtf/PassRefPtr.h>
00027 
00028 namespace WebCore {
00029 
00030 // The BidiStatus at a given position (typically the end of a line) can
00031 // be cached and then used to restart bidi resolution at that position.
00032 struct BidiStatus {
00033     BidiStatus()
00034         : eor(WTF::Unicode::OtherNeutral)
00035         , lastStrong(WTF::Unicode::OtherNeutral)
00036         , last(WTF::Unicode::OtherNeutral)
00037     {
00038     }
00039 
00040     BidiStatus(WTF::Unicode::Direction eorDir, WTF::Unicode::Direction lastStrongDir, WTF::Unicode::Direction lastDir, PassRefPtr<BidiContext> bidiContext)
00041         : eor(eorDir)
00042         , lastStrong(lastStrongDir)
00043         , last(lastDir)
00044         , context(bidiContext)
00045     {
00046     }
00047 
00048     WTF::Unicode::Direction eor;
00049     WTF::Unicode::Direction lastStrong;
00050     WTF::Unicode::Direction last;
00051     RefPtr<BidiContext> context;
00052 };
00053 
00054 inline bool operator==(const BidiStatus& status1, const BidiStatus& status2)
00055 {
00056     return status1.eor == status2.eor && status1.last == status2.last && status1.lastStrong == status2.lastStrong && *(status1.context) == *(status2.context);
00057 }
00058 
00059 inline bool operator!=(const BidiStatus& status1, const BidiStatus& status2)
00060 {
00061     return !(status1 == status2);
00062 }
00063 
00064 struct BidiCharacterRun {
00065     BidiCharacterRun(int start, int stop, BidiContext* context, WTF::Unicode::Direction dir)
00066         : m_start(start)
00067         , m_stop(stop)
00068         , m_override(context->override())
00069         , m_next(0)
00070     {
00071         if (dir == WTF::Unicode::OtherNeutral)
00072             dir = context->dir();
00073 
00074         m_level = context->level();
00075 
00076         // add level of run (cases I1 & I2)
00077         if (m_level % 2) {
00078             if (dir == WTF::Unicode::LeftToRight || dir == WTF::Unicode::ArabicNumber || dir == WTF::Unicode::EuropeanNumber)
00079                 m_level++;
00080         } else {
00081             if (dir == WTF::Unicode::RightToLeft)
00082                 m_level++;
00083             else if (dir == WTF::Unicode::ArabicNumber || dir == WTF::Unicode::EuropeanNumber)
00084                 m_level += 2;
00085         }
00086     }
00087 
00088     int start() const { return m_start; }
00089     int stop() const { return m_stop; }
00090     unsigned char level() const { return m_level; }
00091     bool reversed(bool visuallyOrdered) { return m_level % 2 && !visuallyOrdered; }
00092     bool dirOverride(bool visuallyOrdered) { return m_override || visuallyOrdered; }
00093 
00094     BidiCharacterRun* next() const { return m_next; }
00095 
00096     unsigned char m_level;
00097     int m_start;
00098     int m_stop;
00099     bool m_override;
00100     BidiCharacterRun* m_next;
00101 };
00102 
00103 template <class Iterator, class Run> class BidiResolver {
00104 public :
00105     BidiResolver()
00106         : m_direction(WTF::Unicode::OtherNeutral)
00107         , m_adjustEmbedding(false)
00108         , reachedEndOfLine(false)
00109         , emptyRun(true)
00110         , m_firstRun(0)
00111         , m_lastRun(0)
00112         , m_runCount(0)
00113     {
00114     }
00115 
00116     BidiContext* context() const { return m_status.context.get(); }
00117     void setContext(PassRefPtr<BidiContext> c) { m_status.context = c; }
00118 
00119     void setLastDir(WTF::Unicode::Direction lastDir) { m_status.last = lastDir; }
00120     void setLastStrongDir(WTF::Unicode::Direction lastStrongDir) { m_status.lastStrong = lastStrongDir; }
00121     void setEorDir(WTF::Unicode::Direction eorDir) { m_status.eor = eorDir; }
00122 
00123     WTF::Unicode::Direction dir() const { return m_direction; }
00124     void setDir(WTF::Unicode::Direction d) { m_direction = d; }
00125 
00126     const BidiStatus& status() const { return m_status; }
00127     void setStatus(const BidiStatus s) { m_status = s; }
00128 
00129     bool adjustEmbedding() const { return m_adjustEmbedding; }
00130     void setAdjustEmbedding(bool adjsutEmbedding) { m_adjustEmbedding = adjsutEmbedding; }
00131 
00132     void embed(WTF::Unicode::Direction);
00133     void createBidiRunsForLine(const Iterator& start, const Iterator& end, bool visualOrder = false, bool hardLineBreak = false);
00134 
00135     Run* firstRun() const { return m_firstRun; }
00136     Run* lastRun() const { return m_lastRun; }
00137     int runCount() const { return m_runCount; }
00138 
00139     void addRun(Run*);
00140     void deleteRuns();
00141 
00142 protected:
00143     void appendRun();
00144     void reverseRuns(int start, int end);
00145 
00146     Iterator current;
00147     Iterator sor;
00148     Iterator eor;
00149     Iterator last;
00150     BidiStatus m_status;
00151     WTF::Unicode::Direction m_direction;
00152     bool m_adjustEmbedding;
00153     Iterator endOfLine;
00154     bool reachedEndOfLine;
00155     Iterator lastBeforeET;
00156     bool emptyRun;
00157 
00158     Run* m_firstRun;
00159     Run* m_lastRun;
00160     int m_runCount;
00161 };
00162 
00163 template <class Iterator, class Run>
00164 void BidiResolver<Iterator, Run>::appendRun()
00165 {
00166     if (emptyRun || eor.atEnd())
00167         return;
00168 
00169     Run* bidiRun = new Run(sor.offset(), eor.offset() + 1, context(), m_direction);
00170     if (!m_firstRun)
00171         m_firstRun = bidiRun;
00172     else
00173         m_lastRun->m_next = bidiRun;
00174     m_lastRun = bidiRun;
00175     m_runCount++;
00176 
00177     eor.increment(*this);
00178     sor = eor;
00179     m_direction = WTF::Unicode::OtherNeutral;
00180     m_status.eor = WTF::Unicode::OtherNeutral;
00181 }
00182 
00183 template <class Iterator, class Run>
00184 void BidiResolver<Iterator, Run>::embed(WTF::Unicode::Direction d)
00185 {
00186     using namespace WTF::Unicode;
00187 
00188     bool b = m_adjustEmbedding;
00189     m_adjustEmbedding = false;
00190     if (d == PopDirectionalFormat) {
00191         BidiContext* c = context()->parent();
00192         if (c) {
00193             if (!emptyRun && eor != last) {
00194                 ASSERT(m_status.eor != OtherNeutral || eor.atEnd());
00195                 // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last
00196                 ASSERT(m_status.last == EuropeanNumberSeparator
00197                     || m_status.last == EuropeanNumberTerminator
00198                     || m_status.last == CommonNumberSeparator
00199                     || m_status.last == BoundaryNeutral
00200                     || m_status.last == BlockSeparator
00201                     || m_status.last == SegmentSeparator
00202                     || m_status.last == WhiteSpaceNeutral
00203                     || m_status.last == OtherNeutral);
00204                 if (m_direction == OtherNeutral)
00205                     m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft;
00206                 if (context()->dir() == LeftToRight) {
00207                     // bidi.sor ... bidi.eor ... bidi.last L
00208                     if (m_status.eor == EuropeanNumber) {
00209                         if (m_status.lastStrong != LeftToRight) {
00210                             m_direction = EuropeanNumber;
00211                             appendRun();
00212                         }
00213                     } else if (m_status.eor == ArabicNumber) {
00214                         m_direction = ArabicNumber;
00215                         appendRun();
00216                     } else if (m_status.lastStrong != LeftToRight) {
00217                         if (context()->dir() == RightToLeft)
00218                             m_direction = RightToLeft;
00219                         else {
00220                             appendRun();
00221                             m_direction = LeftToRight;
00222                         }
00223                     }
00224                 } else if (m_status.eor == EuropeanNumber || m_status.eor == ArabicNumber || m_status.lastStrong == LeftToRight) {
00225                     appendRun();
00226                     m_direction = RightToLeft;
00227                 }
00228                 eor = last;
00229             }
00230             appendRun();
00231             emptyRun = true;
00232             // sor for the new run is determined by the higher level (rule X10)
00233             setLastDir(context()->dir());
00234             setLastStrongDir(context()->dir());
00235             setContext(c);
00236             eor = Iterator();
00237         }
00238     } else {
00239         Direction runDir;
00240         if (d == RightToLeftEmbedding || d == RightToLeftOverride)
00241             runDir = RightToLeft;
00242         else
00243             runDir = LeftToRight;
00244         bool override = d == LeftToRightOverride || d == RightToLeftOverride;
00245 
00246         unsigned char level = context()->level();
00247         if (runDir == RightToLeft) {
00248             if (level % 2) // we have an odd level
00249                 level += 2;
00250             else
00251                 level++;
00252         } else {
00253             if (level % 2) // we have an odd level
00254                 level++;
00255             else
00256                 level += 2;
00257         }
00258 
00259         if (level < 61) {
00260             if (!emptyRun && eor != last) {
00261                 ASSERT(m_status.eor != OtherNeutral || eor.atEnd());
00262                 // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last
00263                 ASSERT(m_status.last == EuropeanNumberSeparator
00264                     || m_status.last == EuropeanNumberTerminator
00265                     || m_status.last == CommonNumberSeparator
00266                     || m_status.last == BoundaryNeutral
00267                     || m_status.last == BlockSeparator
00268                     || m_status.last == SegmentSeparator
00269                     || m_status.last == WhiteSpaceNeutral
00270                     || m_status.last == OtherNeutral);
00271                 if (m_direction == OtherNeutral)
00272                     m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft;
00273                 if (runDir == LeftToRight) {
00274                     // bidi.sor ... bidi.eor ... bidi.last L
00275                     if (m_status.eor == EuropeanNumber) {
00276                         if (m_status.lastStrong != LeftToRight) {
00277                             m_direction = EuropeanNumber;
00278                             appendRun();
00279                         }
00280                     } else if (m_status.eor == ArabicNumber) {
00281                         m_direction = ArabicNumber;
00282                         appendRun();
00283                     } else if (m_status.lastStrong != LeftToRight && context()->dir() == LeftToRight) {
00284                         appendRun();
00285                         m_direction = LeftToRight;
00286                     }
00287                 } else if (m_status.eor == ArabicNumber
00288                     || m_status.eor == EuropeanNumber && (m_status.lastStrong != LeftToRight || context()->dir() == RightToLeft)
00289                     || m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && context()->dir() == RightToLeft) {
00290                     appendRun();
00291                     m_direction = RightToLeft;
00292                 }
00293                 eor = last;
00294             }
00295             appendRun();
00296             emptyRun = true;
00297             setContext(new BidiContext(level, runDir, override, context()));
00298             setLastDir(runDir);
00299             setLastStrongDir(runDir);
00300             eor = Iterator();
00301         }
00302     }
00303     m_adjustEmbedding = b;
00304 }
00305 
00306 template <class Iterator, class Run>
00307 void BidiResolver<Iterator, Run>::deleteRuns()
00308 {
00309     emptyRun = true;
00310     if (!m_firstRun)
00311         return;
00312 
00313     Run* curr = m_firstRun;
00314     while (curr) {
00315         Run* s = curr->m_next;
00316         delete curr;
00317         curr = s;
00318     }
00319 
00320     m_firstRun = 0;
00321     m_lastRun = 0;
00322     m_runCount = 0;
00323 }
00324 
00325 template <class Iterator, class Run>
00326 void BidiResolver<Iterator, Run>::reverseRuns(int start, int end)
00327 {
00328     if (start >= end)
00329         return;
00330 
00331     ASSERT(start >= 0 && end < m_runCount);
00332     
00333     // Get the item before the start of the runs to reverse and put it in
00334     // |beforeStart|.  |curr| should point to the first run to reverse.
00335     Run* curr = m_firstRun;
00336     Run* beforeStart = 0;
00337     int i = 0;
00338     while (i < start) {
00339         i++;
00340         beforeStart = curr;
00341         curr = curr->next();
00342     }
00343 
00344     Run* startRun = curr;
00345     while (i < end) {
00346         i++;
00347         curr = curr->next();
00348     }
00349     Run* endRun = curr;
00350     Run* afterEnd = curr->next();
00351 
00352     i = start;
00353     curr = startRun;
00354     Run* newNext = afterEnd;
00355     while (i <= end) {
00356         // Do the reversal.
00357         Run* next = curr->next();
00358         curr->m_next = newNext;
00359         newNext = curr;
00360         curr = next;
00361         i++;
00362     }
00363 
00364     // Now hook up beforeStart and afterEnd to the startRun and endRun.
00365     if (beforeStart)
00366         beforeStart->m_next = endRun;
00367     else
00368         m_firstRun = endRun;
00369 
00370     startRun->m_next = afterEnd;
00371     if (!afterEnd)
00372         m_lastRun = startRun;
00373 }
00374 
00375 template <class Iterator, class Run>
00376 void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& start, const Iterator& end, bool visualOrder, bool hardLineBreak)
00377 {
00378     using namespace WTF::Unicode;
00379 
00380     ASSERT(m_direction == OtherNeutral);
00381 
00382     emptyRun = true;
00383 
00384     eor = Iterator();
00385 
00386     current = start;
00387     last = current;
00388     bool pastEnd = false;
00389     BidiResolver<Iterator, Run> stateAtEnd;
00390 
00391     while (true) {
00392         Direction dirCurrent;
00393         if (pastEnd && (hardLineBreak || current.atEnd())) {
00394             BidiContext* c = context();
00395             while (c->parent())
00396                 c = c->parent();
00397             dirCurrent = c->dir();
00398             if (hardLineBreak) {
00399                 // A deviation from the Unicode Bidi Algorithm in order to match
00400                 // Mac OS X text and WinIE: a hard line break resets bidi state.
00401                 stateAtEnd.setContext(c);
00402                 stateAtEnd.setEorDir(dirCurrent);
00403                 stateAtEnd.setLastDir(dirCurrent);
00404                 stateAtEnd.setLastStrongDir(dirCurrent);
00405             }
00406         } else {
00407             dirCurrent = current.direction();
00408             if (context()->override()
00409                     && dirCurrent != RightToLeftEmbedding
00410                     && dirCurrent != LeftToRightEmbedding
00411                     && dirCurrent != RightToLeftOverride
00412                     && dirCurrent != LeftToRightOverride
00413                     && dirCurrent != PopDirectionalFormat)
00414                 dirCurrent = context()->dir();
00415             else if (dirCurrent == NonSpacingMark)
00416                 dirCurrent = m_status.last;
00417         }
00418 
00419         ASSERT(m_status.eor != OtherNeutral || eor.atEnd());
00420         switch (dirCurrent) {
00421 
00422         // embedding and overrides (X1-X9 in the Bidi specs)
00423         case RightToLeftEmbedding:
00424         case LeftToRightEmbedding:
00425         case RightToLeftOverride:
00426         case LeftToRightOverride:
00427         case PopDirectionalFormat:
00428             embed(dirCurrent);
00429             break;
00430 
00431             // strong types
00432         case LeftToRight:
00433             switch(m_status.last) {
00434                 case RightToLeft:
00435                 case RightToLeftArabic:
00436                 case EuropeanNumber:
00437                 case ArabicNumber:
00438                     if (m_status.last != EuropeanNumber || m_status.lastStrong != LeftToRight)
00439                         appendRun();
00440                     break;
00441                 case LeftToRight:
00442                     break;
00443                 case EuropeanNumberSeparator:
00444                 case EuropeanNumberTerminator:
00445                 case CommonNumberSeparator:
00446                 case BoundaryNeutral:
00447                 case BlockSeparator:
00448                 case SegmentSeparator:
00449                 case WhiteSpaceNeutral:
00450                 case OtherNeutral:
00451                     if (m_status.eor == EuropeanNumber) {
00452                         if (m_status.lastStrong != LeftToRight) {
00453                             // the numbers need to be on a higher embedding level, so let's close that run
00454                             m_direction = EuropeanNumber;
00455                             appendRun();
00456                             if (context()->dir() != LeftToRight) {
00457                                 // the neutrals take the embedding direction, which is R
00458                                 eor = last;
00459                                 m_direction = RightToLeft;
00460                                 appendRun();
00461                             }
00462                         }
00463                     } else if (m_status.eor == ArabicNumber) {
00464                         // Arabic numbers are always on a higher embedding level, so let's close that run
00465                         m_direction = ArabicNumber;
00466                         appendRun();
00467                         if (context()->dir() != LeftToRight) {
00468                             // the neutrals take the embedding direction, which is R
00469                             eor = last;
00470                             m_direction = RightToLeft;
00471                             appendRun();
00472                         }
00473                     } else if (m_status.lastStrong != LeftToRight) {
00474                         //last stuff takes embedding dir
00475                         if (context()->dir() == RightToLeft) {
00476                             eor = last; 
00477                             m_direction = RightToLeft;
00478                         }
00479                         appendRun();
00480                     }
00481                 default:
00482                     break;
00483             }
00484             eor = current;
00485             m_status.eor = LeftToRight;
00486             m_status.lastStrong = LeftToRight;
00487             m_direction = LeftToRight;
00488             break;
00489         case RightToLeftArabic:
00490         case RightToLeft:
00491             switch (m_status.last) {
00492                 case LeftToRight:
00493                 case EuropeanNumber:
00494                 case ArabicNumber:
00495                     appendRun();
00496                 case RightToLeft:
00497                 case RightToLeftArabic:
00498                     break;
00499                 case EuropeanNumberSeparator:
00500                 case EuropeanNumberTerminator:
00501                 case CommonNumberSeparator:
00502                 case BoundaryNeutral:
00503                 case BlockSeparator:
00504                 case SegmentSeparator:
00505                 case WhiteSpaceNeutral:
00506                 case OtherNeutral:
00507                     if (m_status.eor == EuropeanNumber) {
00508                         if (m_status.lastStrong == LeftToRight && context()->dir() == LeftToRight)
00509                             eor = last;
00510                         appendRun();
00511                     } else if (m_status.eor == ArabicNumber)
00512                         appendRun();
00513                     else if (m_status.lastStrong == LeftToRight) {
00514                         if (context()->dir() == LeftToRight)
00515                             eor = last;
00516                         appendRun();
00517                     }
00518                 default:
00519                     break;
00520             }
00521             eor = current;
00522             m_status.eor = RightToLeft;
00523             m_status.lastStrong = dirCurrent;
00524             m_direction = RightToLeft;
00525             break;
00526 
00527             // weak types:
00528 
00529         case EuropeanNumber:
00530             if (m_status.lastStrong != RightToLeftArabic) {
00531                 // if last strong was AL change EN to AN
00532                 switch (m_status.last) {
00533                     case EuropeanNumber:
00534                     case LeftToRight:
00535                         break;
00536                     case RightToLeft:
00537                     case RightToLeftArabic:
00538                     case ArabicNumber:
00539                         eor = last;
00540                         appendRun();
00541                         m_direction = EuropeanNumber;
00542                         break;
00543                     case EuropeanNumberSeparator:
00544                     case CommonNumberSeparator:
00545                         if (m_status.eor == EuropeanNumber)
00546                             break;
00547                     case EuropeanNumberTerminator:
00548                     case BoundaryNeutral:
00549                     case BlockSeparator:
00550                     case SegmentSeparator:
00551                     case WhiteSpaceNeutral:
00552                     case OtherNeutral:
00553                         if (m_status.eor == EuropeanNumber) {
00554                             if (m_status.lastStrong == RightToLeft) {
00555                                 // ENs on both sides behave like Rs, so the neutrals should be R.
00556                                 // Terminate the EN run.
00557                                 appendRun();
00558                                 // Make an R run.
00559                                 eor = m_status.last == EuropeanNumberTerminator ? lastBeforeET : last;
00560                                 m_direction = RightToLeft;
00561                                 appendRun();
00562                                 // Begin a new EN run.
00563                                 m_direction = EuropeanNumber;
00564                             }
00565                         } else if (m_status.eor == ArabicNumber) {
00566                             // Terminate the AN run.
00567                             appendRun();
00568                             if (m_status.lastStrong == RightToLeft || context()->dir() == RightToLeft) {
00569                                 // Make an R run.
00570                                 eor = m_status.last == EuropeanNumberTerminator ? lastBeforeET : last;
00571                                 m_direction = RightToLeft;
00572                                 appendRun();
00573                                 // Begin a new EN run.
00574                                 m_direction = EuropeanNumber;
00575                             }
00576                         } else if (m_status.lastStrong == RightToLeft) {
00577                             // Extend the R run to include the neutrals.
00578                             eor = m_status.last == EuropeanNumberTerminator ? lastBeforeET : last;
00579                             m_direction = RightToLeft;
00580                             appendRun();
00581                             // Begin a new EN run.
00582                             m_direction = EuropeanNumber;
00583                         }
00584                     default:
00585                         break;
00586                 }
00587                 eor = current;
00588                 m_status.eor = EuropeanNumber;
00589                 if (m_direction == OtherNeutral)
00590                     m_direction = LeftToRight;
00591                 break;
00592             }
00593         case ArabicNumber:
00594             dirCurrent = ArabicNumber;
00595             switch (m_status.last) {
00596                 case LeftToRight:
00597                     if (context()->dir() == LeftToRight)
00598                         appendRun();
00599                     break;
00600                 case ArabicNumber:
00601                     break;
00602                 case RightToLeft:
00603                 case RightToLeftArabic:
00604                 case EuropeanNumber:
00605                     eor = last;
00606                     appendRun();
00607                     break;
00608                 case CommonNumberSeparator:
00609                     if (m_status.eor == ArabicNumber)
00610                         break;
00611                 case EuropeanNumberSeparator:
00612                 case EuropeanNumberTerminator:
00613                 case BoundaryNeutral:
00614                 case BlockSeparator:
00615                 case SegmentSeparator:
00616                 case WhiteSpaceNeutral:
00617                 case OtherNeutral:
00618                     if (m_status.eor == ArabicNumber
00619                         || m_status.eor == EuropeanNumber && (m_status.lastStrong == RightToLeft || context()->dir() == RightToLeft)
00620                         || m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && context()->dir() == RightToLeft) {
00621                         // Terminate the run before the neutrals.
00622                         appendRun();
00623                         // Begin an R run for the neutrals.
00624