Logo Search packages:      
Sourcecode: jedit version File versions  Download package

TokenMarker.java

/*
 * TokenMarker.java - Tokenizes lines of text
 * :tabSize=8:indentSize=8:noTabs=false:
 * :folding=explicit:collapseFolds=1:
 *
 * Copyright (C) 1998, 2003 Slava Pestov
 * Copyright (C) 1999, 2000 mike dillon
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */

package org.gjt.sp.jedit.syntax;

//{{{ Imports
import javax.swing.text.Segment;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.gjt.sp.jedit.TextUtilities;
import org.gjt.sp.util.SegmentCharSequence;
import org.gjt.sp.util.StandardUtilities;
//}}}

/**
 * A token marker splits lines of text into tokens. Each token carries
 * a length field and an identification tag that can be mapped to a color
 * or font style for painting that token.
 *
 * @author Slava Pestov, mike dillon
 * @version $Id: TokenMarker.java 16344 2009-10-14 10:31:01Z kpouer $
 *
 * @see org.gjt.sp.jedit.syntax.Token
 * @see org.gjt.sp.jedit.syntax.TokenHandler
 */
00047 public class TokenMarker
{
      //{{{ TokenMarker constructor
      public TokenMarker()
      {} //}}}

      //{{{ addRuleSet() method
      public void addRuleSet(ParserRuleSet rules)
      {
            ruleSets.put(rules.getSetName(), rules);

            if (rules.getSetName().equals("MAIN"))
                  mainRuleSet = rules;
      } //}}}

      //{{{ getMainRuleSet() method
      public ParserRuleSet getMainRuleSet()
      {
            return mainRuleSet;
      } //}}}

      //{{{ getRuleSet() method
      public ParserRuleSet getRuleSet(String setName)
      {
            return ruleSets.get(setName);
      } //}}}

      //{{{ getRuleSets() method
      /**
       * @since jEdit 4.2pre3
       */
00078       public ParserRuleSet[] getRuleSets()
      {
            return ruleSets.values().toArray(new ParserRuleSet[ruleSets.size()]);
      } //}}}

      //{{{ markTokens() method
      /**
       * Do not call this method directly; call Buffer.markTokens() instead.
       *
       * @param prevContext the context of the previous line, it can be null
       * @param tokenHandler the token handler
       * @param line a segment containing the content of the line
       */
00091       public synchronized LineContext markTokens(LineContext prevContext,
            TokenHandler tokenHandler, Segment line)
      {
            //{{{ Set up some instance variables
            // this is to avoid having to pass around lots and lots of
            // parameters.
            this.tokenHandler = tokenHandler;
            this.line = line;

            lastOffset = line.offset;
            lineLength = line.count + line.offset;

            context = new LineContext();

            if(prevContext == null)
            {
                  context.rules = getMainRuleSet();
                  context.escapeRule = context.rules.getEscapeRule();
            }
            else
            {
                  context.parent = prevContext.parent;
                  context.setInRule(prevContext.inRule);
                  context.rules = prevContext.rules;
                  context.spanEndSubst = prevContext.spanEndSubst;
            }

            keywords = context.rules.getKeywords();

            seenWhitespaceEnd = false;
            whitespaceEnd = line.offset;
            //}}}

            //{{{ Main parser loop
            int terminateChar = context.rules.getTerminateChar();
            boolean terminated = false;
main_loop:  for(pos = line.offset; pos < lineLength; pos++)
            {
                  //{{{ check if we have to stop parsing (happens if the terminateChar has been exceeded)
                  if(terminateChar >= 0 && pos - line.offset >= terminateChar
                        && !terminated)
                  {
                        terminated = true;
                        context = new LineContext(ParserRuleSet
                              .getStandardRuleSet(context.rules
                              .getDefault()),context);
                        keywords = context.rules.getKeywords();
                  } //}}}

                  //{{{ Check for the escape rule before anything else.
                  if (context.escapeRule != null &&
                        handleRule(context.escapeRule,false))
                  {
                        continue main_loop;
                  } //}}}

                  //{{{ check for end of delegate
                  if (context.parent != null
                      && context.parent.inRule != null
                      && checkDelegateEnd(context.parent.inRule))
                  {
                        seenWhitespaceEnd = true;
                        continue main_loop;
                  } //}}}

                  //{{{ check every rule
                  Character ch = Character.valueOf(line.array[pos]);
                  List<ParserRule> rules = context.rules.getRules(ch);
                  for (ParserRule rule : rules)
                  {
                        // stop checking rules if there was a match
                        if (handleRule(rule,false))
                        {
                              seenWhitespaceEnd = true;
                              continue main_loop;
                        }
                  } //}}}

                  //{{{ check if current character is a word separator
                  if(Character.isWhitespace(ch))
                  {
                        if(!seenWhitespaceEnd)
                              whitespaceEnd = pos + 1;

                        if(context.inRule != null)
                              handleRule(context.inRule,true);

                        handleNoWordBreak();

                        markKeyword(false);

                        if(lastOffset != pos)
                        {
                              tokenHandler.handleToken(line,
                                    context.rules.getDefault(),
                                    lastOffset - line.offset,
                                    pos - lastOffset,
                                    context);
                        }

                        tokenHandler.handleToken(line,
                              context.rules.getDefault(),
                              pos - line.offset,1,context);
                        lastOffset = pos + 1;
                  }
                  else
                  {
                        if(keywords != null || context.rules.getRuleCount() != 0)
                        {
                              String noWordSep = context.rules.getNoWordSep();

                              if(!Character.isLetterOrDigit(ch)
                                    && noWordSep.indexOf(ch) == -1)
                              {
                                    if(context.inRule != null)
                                          handleRule(context.inRule,true);

                                    handleNoWordBreak();

                                    markKeyword(true);

                                    tokenHandler.handleToken(line,
                                          context.rules.getDefault(),
                                          lastOffset - line.offset,1,
                                          context);
                                    lastOffset = pos + 1;
                              }
                        }

                        seenWhitespaceEnd = true;
                  } //}}}
            } //}}}

            //{{{ Mark all remaining characters
            pos = lineLength;

            if(context.inRule != null)
                  handleRule(context.inRule,true);

            handleNoWordBreak();
            markKeyword(true);
            //}}}

            //{{{ Unwind any NO_LINE_BREAK parent delegates
unwind:           while(context.parent != null)
            {
                  ParserRule rule = context.parent.inRule;
                  if((rule != null && (rule.action
                        & ParserRule.NO_LINE_BREAK) == ParserRule.NO_LINE_BREAK)
                        || terminated)
                  {
                        context = context.parent;
                        keywords = context.rules.getKeywords();
                        context.setInRule(null);
                  }
                  else
                        break unwind;
            } //}}}

            tokenHandler.handleToken(line,Token.END,
                  pos - line.offset,0,context);

            context = context.intern();
            tokenHandler.setLineContext(context);

            /* for GC. */
            this.tokenHandler = null;
            this.line = null;

            return context;
      } //}}}

      //{{{ Private members

      //{{{ Instance variables
      private final Map<String, ParserRuleSet> ruleSets = new Hashtable<String, ParserRuleSet>(64);
      private ParserRuleSet mainRuleSet;

      // Instead of passing these around to each method, we just store them
      // as instance variables. Note that this is not thread-safe.
      private TokenHandler tokenHandler;
      /** The line from which we will mark the tokens. */
00273       private Segment line;
      /** The context of the current line. */
00275       private LineContext context;
      private KeywordMap keywords;
      private final Segment pattern = new Segment();
      private int lastOffset;
      private int lineLength;
      private int pos;

      private int whitespaceEnd;
      private boolean seenWhitespaceEnd;
      //}}}

      //{{{ checkDelegateEnd() method
      private boolean checkDelegateEnd(ParserRule rule)
      {
            if(rule.end == null)
                  return false;

            LineContext tempContext = context;
            context = context.parent;
            keywords = context.rules.getKeywords();
            boolean handled = handleRule(rule,true);
            context = tempContext;
            keywords = context.rules.getKeywords();

            if (handled)
            {
                  if(context.inRule != null)
                        handleRule(context.inRule,true);

                  markKeyword(true);

                  context = (LineContext)context.parent.clone();

                  tokenHandler.handleToken(line,
                        matchToken(context.inRule, context.inRule, context),
                        pos - line.offset,pattern.count,context);

                  keywords = context.rules.getKeywords();
                  context.setInRule(null);
                  lastOffset = pos + pattern.count;

                  // move pos to last character of match sequence
                  pos += pattern.count - 1;

                  return true;
            }

            return false;
      } //}}}

      //{{{ handleRule() method
      /**
       * Checks if the rule matches the line at the current position
       * and handles the rule if it does match
       */
00330       private boolean handleRule(ParserRule checkRule, boolean end)
      {
            //{{{ Some rules can only match in certain locations
            if(!end)
            {
                  if (null == checkRule.upHashChars)
                  {
                        if (checkRule.upHashChar != null &&
                            (pos + checkRule.upHashChar.length() < line.array.length) &&
                            !checkHashString(checkRule))
                        {
                              return false;
                        }
                  }
                  else
                  {
                        if (-1 == Arrays.binarySearch(
                                    checkRule.upHashChars,
                                    Character.toUpperCase(line.array[pos])))
                        {
                              return false;
                        }
                  }
            }

            int offset = (checkRule.action & ParserRule.MARK_PREVIOUS) != 0 ? lastOffset : pos;
            int posMatch = end ? checkRule.endPosMatch : checkRule.startPosMatch;

            if((posMatch & ParserRule.AT_LINE_START)
                  == ParserRule.AT_LINE_START)
            {
                  if(offset != line.offset)
                  {
                        return false;
                  }
            }
            else if((posMatch & ParserRule.AT_WHITESPACE_END)
                  == ParserRule.AT_WHITESPACE_END)
            {
                  if(offset != whitespaceEnd)
                  {
                        return false;
                  }
            }
            else if((posMatch & ParserRule.AT_WORD_START)
                  == ParserRule.AT_WORD_START)
            {
                  if(offset != lastOffset)
                  {
                        return false;
                  }
            } //}}}

            int matchedChars = 1;
            CharSequence charSeq = null;
            Matcher match = null;

            //{{{ See if the rule's start or end sequence matches here
            if(!end || (checkRule.action & ParserRule.MARK_FOLLOWING) == 0)
            {
                  // the end cannot be a regular expression
                  if((checkRule.action & ParserRule.REGEXP) == 0 || end)
                  {
                        if(end)
                        {
                              if(context.spanEndSubst != null)
                                    pattern.array = context.spanEndSubst;
                              else
                                    pattern.array = checkRule.end;
                        }
                        else
                              pattern.array = checkRule.start;
                        pattern.offset = 0;
                        pattern.count = pattern.array.length;
                        matchedChars = pattern.count;

                        if(!SyntaxUtilities.regionMatches(context.rules
                              .getIgnoreCase(),line,pos,pattern.array))
                        {
                              return false;
                        }
                  }
                  else
                  {
                        // note that all regexps start with \A so they only
                        // match the start of the string
                        //int matchStart = pos - line.offset;
                        charSeq = new SegmentCharSequence(line, pos - line.offset,
                                                  line.count - (pos - line.offset));
                        match = checkRule.startRegexp.matcher(charSeq);
                        if(!match.lookingAt())
                        {
                              return false;
                        }
                        else if(match.start() != 0)
                        {
                              throw new InternalError("Can't happen");
                        }
                        else
                        {
                              matchedChars = match.end();
                              /* workaround for hang if match was
                               * zero-width. not sure if there is
                               * a better way to handle this */
                              if(matchedChars == 0)
                                    matchedChars = 1;
                        }
                  }
            } //}}}
            //{{{ Check for an escape sequence
            if((checkRule.action & ParserRule.IS_ESCAPE) == ParserRule.IS_ESCAPE)
            {
                  pos += pattern.count;
            } //}}}
            //{{{ Handle start of rule
            else if(!end)
            {
                  if(context.inRule != null)
                        handleRule(context.inRule,true);

                  markKeyword((checkRule.action & ParserRule.MARK_PREVIOUS)
                        != ParserRule.MARK_PREVIOUS);

                  switch(checkRule.action & ParserRule.MAJOR_ACTIONS)
                  {
                  //{{{ SEQ
                  case ParserRule.SEQ:
                        context.spanEndSubst = null;

                        if((checkRule.action & ParserRule.REGEXP) != 0)
                        {
                              handleTokenWithSpaces(tokenHandler,
                                    checkRule.token,
                                    pos - line.offset,
                                    matchedChars,
                                    context);
                        }
                        else
                        {
                              tokenHandler.handleToken(line,
                                    checkRule.token,
                                    pos - line.offset,
                                    matchedChars,context);
                        }

                        // a DELEGATE attribute on a SEQ changes the
                        // ruleset from the end of the SEQ onwards
                        if(checkRule.delegate != null)
                        {
                              context = new LineContext(
                                    checkRule.delegate,
                                    context.parent);
                              keywords = context.rules.getKeywords();
                        }
                        break;
                  //}}}
                  //{{{ SPAN, EOL_SPAN
                  case ParserRule.SPAN:
                  case ParserRule.EOL_SPAN:
                        context.setInRule(checkRule);

                        byte tokenType = matchToken(checkRule,
                                          context.inRule, context);

                        if((checkRule.action & ParserRule.REGEXP) != 0)
                        {
                              handleTokenWithSpaces(tokenHandler,
                                    tokenType,
                                    pos - line.offset,
                                    matchedChars,
                                    context);
                        }
                        else
                        {
                              tokenHandler.handleToken(line,tokenType,
                                    pos - line.offset,
                                    matchedChars,context);
                        }

                        char[] spanEndSubst = null;
                        /* substitute result of matching the rule start
                         * into the end string.
                         *
                         * eg, in shell script mode, <<\s*(\w+) is
                         * matched into <$1> to construct rules for
                         * highlighting read-ins like this <<EOF
                         * ...
                         * EOF
                         */
                        if(charSeq != null && checkRule.end != null)
                        {
                              spanEndSubst = substitute(match,
                                    checkRule.end);
                        }

                        context.spanEndSubst = spanEndSubst;
                        context = new LineContext(
                              checkRule.delegate,
                              context);
                        keywords = context.rules.getKeywords();

                        break;
                  //}}}
                  //{{{ MARK_FOLLOWING
                  case ParserRule.MARK_FOLLOWING:
                        tokenHandler.handleToken(line,
                              matchToken(checkRule, checkRule, context),
                              pos - line.offset,
                              pattern.count,context);

                        context.spanEndSubst = null;
                        context.setInRule(checkRule);
                        break;
                  //}}}
                  //{{{ MARK_PREVIOUS
                  case ParserRule.MARK_PREVIOUS:
                        context.spanEndSubst = null;

                        if(pos != lastOffset)
                        {
                              tokenHandler.handleToken(line,
                                    checkRule.token,
                                    lastOffset - line.offset,
                                    pos - lastOffset,
                                    context);
                        }

                        tokenHandler.handleToken(line,
                              matchToken(checkRule, checkRule, context),
                              pos - line.offset,pattern.count,
                              context);

                        break;
                  //}}}
                  default:
                        throw new InternalError("Unhandled major action");
                  }

                  // move pos to last character of match sequence
                  pos += matchedChars - 1;
                  lastOffset = pos + 1;

                  // break out of inner for loop to check next char
            } //}}}
            //{{{ Handle end of MARK_FOLLOWING
            else if((context.inRule.action & ParserRule.MARK_FOLLOWING) != 0)
            {
                  if(pos != lastOffset)
                  {
                        tokenHandler.handleToken(line,
                              context.inRule.token,
                              lastOffset - line.offset,
                              pos - lastOffset,context);
                  }

                  lastOffset = pos;
                  context.setInRule(null);
            } //}}}

            return true;
      } //}}}

      //{{{ handleNoWordBreak() method
      private void handleNoWordBreak()
      {
            if(context.parent != null)
            {
                  ParserRule rule = context.parent.inRule;
                  if(rule != null && (context.parent.inRule.action
                        & ParserRule.NO_WORD_BREAK) != 0)
                  {
                        if(pos != lastOffset)
                        {
                              tokenHandler.handleToken(line,
                                    rule.token,
                                    lastOffset - line.offset,
                                    pos - lastOffset,context);
                        }

                        lastOffset = pos;
                        context = context.parent;
                        keywords = context.rules.getKeywords();
                        context.setInRule(null);
                  }
            }
      } //}}}

      //{{{ handleTokenWithSpaces() method
      private void handleTokenWithSpaces(TokenHandler tokenHandler,
            byte tokenType, int start, int len, LineContext context)
      {
            int last = start;
            int end = start + len;

            for(int i = start; i < end; i++)
            {
                  if(Character.isWhitespace(line.array[i + line.offset]))
                  {
                        if(last != i)
                        {
                              tokenHandler.handleToken(line,
                              tokenType,last,i - last,context);
                        }
                        tokenHandler.handleToken(line,tokenType,i,1,context);
                        last = i + 1;
                  }
            }

            if(last != end)
            {
                  tokenHandler.handleToken(line,tokenType,last,
                        end - last,context);
            }
      } //}}}

      //{{{ markKeyword() method
      private void markKeyword(boolean addRemaining)
      {
            int len = pos - lastOffset;
            if(len == 0)
                  return;

            //{{{ Do digits
            if(context.rules.getHighlightDigits())
            {
                  boolean digit = false;
                  boolean mixed = false;

                  for(int i = lastOffset; i < pos; i++)
                  {
                        char ch = line.array[i];
                        if(Character.isDigit(ch))
                              digit = true;
                        else
                              mixed = true;
                  }

                  if(mixed)
                  {
                        Pattern digitRE = context.rules.getDigitRegexp();

                        // only match against regexp if its not all
                        // digits; if all digits, no point matching
                        if(digit)
                        {
                              if(digitRE == null)
                              {
                                    // mixed digit/alpha keyword,
                                    // and no regexp... don't
                                    // highlight as DIGIT
                                    digit = false;
                              }
                              else
                              {
                                    int oldCount = line.count;
                                    int oldOffset = line.offset;
                                    line.offset = lastOffset;
                                    line.count = len;
                                    CharSequence seq = new SegmentCharSequence(line);
                                    digit = digitRE.matcher(seq).matches();
                                    line.offset = oldOffset;
                                    line.count = oldCount;
                              }
                        }
                  }

                  if(digit)
                  {
                        tokenHandler.handleToken(line,Token.DIGIT,
                              lastOffset - line.offset,
                              len,context);
                        lastOffset = pos;

                        return;
                  }
            } //}}}

            //{{{ Do keywords
            if(keywords != null)
            {
                  byte id = keywords.lookup(line, lastOffset, len);

                  if(id != Token.NULL)
                  {
                        tokenHandler.handleToken(line,id,
                              lastOffset - line.offset,
                              len,context);
                        lastOffset = pos;
                        return;
                  }
            } //}}}

            //{{{ Handle any remaining crud
            if(addRemaining)
            {
                  tokenHandler.handleToken(line,context.rules.getDefault(),
                        lastOffset - line.offset,len,context);
                  lastOffset = pos;
            } //}}}
      } //}}}

      //{{{ substitute() method
      private static char[] substitute(Matcher match, char[] end)
      {
            StringBuilder buf = new StringBuilder();
            for(int i = 0; i < end.length; i++)
            {
                  char ch = end[i];
                  if(ch == '$' || ch == '~')
                  {
                        if(i == end.length - 1)
                              buf.append(ch);
                        else
                        {
                              char digit = end[i + 1];
                              if(!Character.isDigit(digit))
                                    buf.append(ch);
                              else if (ch == '$')
                              {
                                    buf.append(match.group(
                                          digit - '0'));
                                    i++;
                              }
                              else
                              {
                                    String s = match.group(digit - '0');
                                    if (s.length() == 1)
                                    {
                                          char b = TextUtilities.getComplementaryBracket(s.charAt(0), null);
                                          if (b == '\0')
                                                b = s.charAt(0);
                                          buf.append(b);
                                    }
                                    else
                                          buf.append(ch);
                                    i++;
                              }
                        }
                  }
                  else
                        buf.append(ch);
            }

            char[] returnValue = new char[buf.length()];
            buf.getChars(0,buf.length(),returnValue,0);
            return returnValue;
      } //}}}

      //{{{ matchToken() method
      private byte matchToken(ParserRule rule, ParserRule base, LineContext ctx)
      {
            switch (rule.matchType)
            {
                  case ParserRule.MATCH_TYPE_RULE:
                        return base.token;

                  case ParserRule.MATCH_TYPE_CONTEXT:
                        return context.rules.getDefault();

                  default:
                        return rule.matchType;
            }
      } //}}}

      //{{{ checkHashString() method
      private boolean checkHashString(ParserRule rule)
      {
            for (int i = 0; i < rule.upHashChar.length(); i++)
            {
                  if (Character.toUpperCase(line.array[pos+i]) != rule.upHashChar.charAt(i))
                  {
                        return false;
                  }
            }
            return true;
      } //}}}

      //}}}

      //{{{ LineContext class
      /**
       * Stores persistent per-line syntax parser state.
       */
00813       public static class LineContext
      {
            private static final Map<LineContext, LineContext> intern = new HashMap<LineContext, LineContext>();

            public LineContext parent;
            public ParserRule inRule;
            public ParserRuleSet rules;
            // used for SPAN_REGEXP rules; otherwise null
            public char[] spanEndSubst;
            public ParserRule escapeRule;

            //{{{ LineContext constructor
            public LineContext(ParserRuleSet rs, LineContext lc)
            {
                  rules = rs;
                  parent = (lc == null ? null : (LineContext)lc.clone());
                  /*
                   * SPANs with no delegate need to propagate the
                   * escape rule to the child context, so this is
                   * needed.
                   */
                  if (rs.getModeName() != null)
                        escapeRule = rules.getEscapeRule();
                  else
                        escapeRule = lc.escapeRule;
            } //}}}

            //{{{ LineContext constructor
            public LineContext()
            {
            } //}}}

            //{{{ intern() method
            public LineContext intern()
            {
                  LineContext obj = intern.get(this);
                  if(obj == null)
                  {
                        intern.put(this,this);
                        return this;
                  }
                  else
                        return obj;
            } //}}}

            //{{{ hashCode() method
            public int hashCode()
            {
                  if(inRule != null)
                        return inRule.hashCode();
                  else if(rules != null)
                        return rules.hashCode();
                  else
                        return 0;
            } //}}}

            //{{{ equals() method
            public boolean equals(Object obj)
            {
                  if(obj instanceof LineContext)
                  {
                        LineContext lc = (LineContext)obj;
                        return lc.inRule == inRule && lc.rules == rules
                              && StandardUtilities.objectsEqual(parent,lc.parent)
                              && charArraysEqual(spanEndSubst,lc.spanEndSubst);
                  }
                  else
                        return false;
            } //}}}

            //{{{ clone() method
            public Object clone()
            {
                  LineContext lc = new LineContext();
                  lc.inRule = inRule;
                  lc.rules = rules;
                  lc.parent = (parent == null) ? null : (LineContext) parent.clone();
                  lc.spanEndSubst = spanEndSubst;
                  lc.escapeRule = escapeRule;

                  return lc;
            } //}}}

            //{{{ charArraysEqual() method
            private static boolean charArraysEqual(char[] c1, char[] c2)
            {
                  if(c1 == null)
                        return c2 == null;

                  // c1 is not null
                  if(c2 == null)
                        return false;

                  if(c1.length != c2.length)
                        return false;

                  for(int i = 0; i < c1.length; i++)
                  {
                        if(c1[i] != c2[i])
                              return false;
                  }

                  return true;
            } //}}}

            //{{{ setInRule() method
            /**
             * Sets the current rule being processed and adjusts the
             * escape rule for the context based on the rule.
             */
00923             public void setInRule(ParserRule rule)
            {
                  inRule = rule;
                  if (rule != null && rule.escapeRule != null)
                        escapeRule = rule.escapeRule;
                  else if (rules != null && rules.getModeName() != null)
                        escapeRule = rules.getEscapeRule();
                  else if (parent != null)
                        escapeRule = parent.escapeRule;
                  else
                        escapeRule = null;
            } //}}}

      } //}}}
}

Generated by  Doxygen 1.6.0   Back to index