1 /* Copyright (c) 2006, Ivan Sagalaev
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 * Neither the name of highlight.js nor the names of its contributors
12 may be used to endorse or promote products derived from this software
13 without specific prior written permission.
15 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
16 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
19 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 define([], function () {
28 var hljs
= new function() {
30 /* Utility functions */
32 function escape(value
) {
33 return value
.replace(/&/gm
, '&').replace(/</gm
, '<').replace(/>/gm
, '>');
37 return node
.nodeName
.toLowerCase();
40 function testRe(re
, lexeme
) {
41 var match
= re
&& re
.exec(lexeme
);
42 return match
&& match
.index
== 0;
45 function blockLanguage(block
) {
46 var classes
= (block
.className
+ ' ' + (block
.parentNode ? block
.parentNode
.className
: '')).split(/\s
+/);
47 classes
= classes
.map(function(c
) {return c
.replace(/^
lang(uage
)?
-/, '');});
48 return classes
.filter(function(c
) {return getLanguage(c
) || c
== 'no-highlight';})[0];
51 function inherit(parent
, obj
) {
53 for (var key
in parent
)
54 result
[key] = parent
[key];
57 result
[key] = obj
[key];
63 function nodeStream(node
) {
65 (function _nodeStream(node
, offset
) {
66 for (var child
= node
.firstChild
; child
; child
= child
.nextSibling
) {
67 if (child
.nodeType
== 3)
68 offset
+= child
.nodeValue
.length
;
69 else if (tag(child
) == 'br')
71 else if (child
.nodeType
== 1) {
77 offset
= _nodeStream(child
, offset
);
90 function mergeStreams(original
, highlighted
, value
) {
95 function selectStream() {
96 if (!original
.length ||
!highlighted
.length
) {
97 return original
.length ? original
: highlighted
;
99 if (original
[0].offset
!= highlighted
[0].offset
) {
100 return (original
[0].offset
< highlighted
[0].offset
) ? original
: highlighted
;
104 To avoid starting the stream just before it should stop the order is
105 ensured that original always starts first and closes last:
107 if (event1 == 'start' && event2 == 'start')
109 if (event1 == 'start' && event2 == 'stop')
111 if (event1 == 'stop' && event2 == 'start')
113 if (event1 == 'stop' && event2 == 'stop')
116 ... which is collapsed to:
118 return highlighted
[0].event
== 'start' ? original
: highlighted
;
121 function open(node
) {
122 function attr_str(a
) {return ' ' + a
.nodeName
+ '="' + escape(a
.value
) + '"';}
123 result
+= '<' + tag(node
) + Array
.prototype.map
.call(node
.attributes
, attr_str
).join('') + '>';
126 function close(node
) {
127 result
+= '</' + tag(node
) + '>';
130 function render(event
) {
131 (event
.event
== 'start' ? open
: close
)(event
.node
);
134 while (original
.length || highlighted
.length
) {
135 var stream
= selectStream();
136 result
+= escape(value
.substr(processed
, stream
[0].offset
- processed
));
137 processed
= stream
[0].offset
;
138 if (stream
== original
) {
140 On any opening or closing tag of the original markup we first close
141 the entire highlighted node stack, then render the original tag along
142 with all the following original tags at the same offset and then
143 reopen all the tags on the highlighted stack.
145 nodeStack
.reverse().forEach(close
);
147 render(stream
.splice(0, 1)[0]);
148 stream
= selectStream();
149 } while (stream
== original
&& stream
.length
&& stream
[0].offset
== processed
);
150 nodeStack
.reverse().forEach(open
);
152 if (stream
[0].event
== 'start') {
153 nodeStack
.push(stream
[0].node
);
157 render(stream
.splice(0, 1)[0]);
160 return result
+ escape(value
.substr(processed
));
165 function compileLanguage(language
) {
168 return (re
&& re
.source
) || re
;
171 function langRe(value
, global
) {
174 'm' + (language
.case_insensitive ?
'i' : '') + (global ?
'g' : '')
178 function compileMode(mode
, parent
) {
181 mode
.compiled
= true;
183 mode
.keywords
= mode
.keywords || mode
.beginKeywords
;
185 var compiled_keywords
= {};
187 function flatten(className
, str
) {
188 if (language
.case_insensitive
) {
189 str
= str
.toLowerCase();
191 str
.split(' ').forEach(function(kw
) {
192 var pair
= kw
.split('|');
193 compiled_keywords
[pair
[0]] = [className
, pair
[1] ?
Number(pair
[1]) : 1];
197 if (typeof mode
.keywords
== 'string') { // string
198 flatten('keyword', mode
.keywords
);
200 Object
.keys(mode
.keywords
).forEach(function (className
) {
201 flatten(className
, mode
.keywords
[className]);
204 mode
.keywords
= compiled_keywords
;
206 mode
.lexemesRe
= langRe(mode
.lexemes ||
/\b[A
-Za
-z0
-9_
]+\b/, true);
209 if (mode
.beginKeywords
) {
210 mode
.begin
= '\\b(' + mode
.beginKeywords
.split(' ').join('|') + ')\\b';
213 mode
.begin
= /\B|
\b/;
214 mode
.beginRe
= langRe(mode
.begin
);
215 if (!mode
.end
&& !mode
.endsWithParent
)
218 mode
.endRe
= langRe(mode
.end
);
219 mode
.terminator_end
= reStr(mode
.end
) ||
'';
220 if (mode
.endsWithParent
&& parent
.terminator_end
)
221 mode
.terminator_end
+= (mode
.end ?
'|' : '') + parent
.terminator_end
;
224 mode
.illegalRe
= langRe(mode
.illegal
);
225 if (mode
.relevance
=== undefined
)
227 if (!mode
.contains
) {
230 var expanded_contains
= [];
231 mode
.contains
.forEach(function(c
) {
233 c
.variants
.forEach(function(v
) {expanded_contains
.push(inherit(c
, v
));});
235 expanded_contains
.push(c
== 'self' ? mode
: c
);
238 mode
.contains
= expanded_contains
;
239 mode
.contains
.forEach(function(c
) {compileMode(c
, mode
);});
242 compileMode(mode
.starts
, parent
);
246 mode
.contains
.map(function(c
) {
247 return c
.beginKeywords ?
'\\.?(' + c
.begin
+ ')\\.?' : c
.begin
;
249 .concat([mode
.terminator_end
, mode
.illegal
])
252 mode
.terminators
= terminators
.length ?
langRe(terminators
.join('|'), true) : {exec
: function(s
) {return null;}};
254 mode
.continuation
= {};
257 compileMode(language
);
261 Core highlighting function. Accepts a language name, or an alias, and a
262 string with the code to highlight. Returns an object with the following
266 - value (an HTML string with highlighting markup)
269 function highlight(name
, value
, ignore_illegals
, continuation
) {
271 function subMode(lexeme
, mode
) {
272 for (var i
= 0; i
< mode
.contains
.length
; i
++) {
273 if (testRe(mode
.contains
[i].beginRe
, lexeme
)) {
274 return mode
.contains
[i];
279 function endOfMode(mode
, lexeme
) {
280 if (testRe(mode
.endRe
, lexeme
)) {
283 if (mode
.endsWithParent
) {
284 return endOfMode(mode
.parent
, lexeme
);
288 function isIllegal(lexeme
, mode
) {
289 return !ignore_illegals
&& testRe(mode
.illegalRe
, lexeme
);
292 function keywordMatch(mode
, match
) {
293 var match_str
= language
.case_insensitive ? match
[0].toLowerCase() : match
[0];
294 return mode
.keywords
.hasOwnProperty(match_str
) && mode
.keywords
[match_str];
297 function buildSpan(classname
, insideSpan
, leaveOpen
, noPrefix
) {
298 var classPrefix
= noPrefix ?
'' : options
.classPrefix
,
299 openSpan
= '<span class="' + classPrefix
,
300 closeSpan
= leaveOpen ?
'' : '</span>';
302 openSpan
+= classname
+ '">';
304 return openSpan
+ insideSpan
+ closeSpan
;
307 function processKeywords() {
309 return escape(mode_buffer
);
312 top
.lexemesRe
.lastIndex
= 0;
313 var match
= top
.lexemesRe
.exec(mode_buffer
);
315 result
+= escape(mode_buffer
.substr(last_index
, match
.index
- last_index
));
316 var keyword_match
= keywordMatch(top
, match
);
318 relevance
+= keyword_match
[1];
319 result
+= buildSpan(keyword_match
[0], escape(match
[0]));
321 result
+= escape(match
[0]);
323 last_index
= top
.lexemesRe
.lastIndex
;
324 match
= top
.lexemesRe
.exec(mode_buffer
);
326 return result
+ escape(mode_buffer
.substr(last_index
));
329 function processSubLanguage() {
330 if (top
.subLanguage
&& !languages
[top
.subLanguage
]) {
331 return escape(mode_buffer
);
333 var result
= top
.subLanguage ?
highlight(top
.subLanguage
, mode_buffer
, true, top
.continuation
.top
) : highlightAuto(mode_buffer
);
334 // Counting embedded language score towards the host language may be disabled
335 // with zeroing the containing mode relevance. Usecase in point is Markdown that
336 // allows XML everywhere and makes every XML snippet to have a much larger Markdown
338 if (top
.relevance
> 0) {
339 relevance
+= result
.relevance
;
341 if (top
.subLanguageMode
== 'continuous') {
342 top
.continuation
.top
= result
.top
;
344 return buildSpan(result
.language
, result
.value
, false, true);
347 function processBuffer() {
348 return top
.subLanguage
!== undefined ?
processSubLanguage() : processKeywords();
351 function startNewMode(mode
, lexeme
) {
352 var markup
= mode
.className?
buildSpan(mode
.className
, '', true): '';
353 if (mode
.returnBegin
) {
356 } else if (mode
.excludeBegin
) {
357 result
+= escape(lexeme
) + markup
;
361 mode_buffer
= lexeme
;
363 top
= Object
.create(mode
, {parent
: {value
: top
}});
366 function processLexeme(buffer
, lexeme
) {
368 mode_buffer
+= buffer
;
369 if (lexeme
=== undefined
) {
370 result
+= processBuffer();
374 var new_mode
= subMode(lexeme
, top
);
376 result
+= processBuffer();
377 startNewMode(new_mode
, lexeme
);
378 return new_mode
.returnBegin ?
0 : lexeme
.length
;
381 var end_mode
= endOfMode(top
, lexeme
);
384 if (!(origin
.returnEnd || origin
.excludeEnd
)) {
385 mode_buffer
+= lexeme
;
387 result
+= processBuffer();
392 relevance
+= top
.relevance
;
394 } while (top
!= end_mode
.parent
);
395 if (origin
.excludeEnd
) {
396 result
+= escape(lexeme
);
399 if (end_mode
.starts
) {
400 startNewMode(end_mode
.starts
, '');
402 return origin
.returnEnd ?
0 : lexeme
.length
;
405 if (isIllegal(lexeme
, top
))
406 throw new Error('Illegal lexeme "' + lexeme
+ '" for mode "' + (top
.className ||
'<unnamed>') + '"');
409 Parser should not reach this point as all types of lexemes should be caught
410 earlier, but if it does due to some bug make sure it advances at least one
411 character forward to prevent infinite looping.
413 mode_buffer
+= lexeme
;
414 return lexeme
.length ||
1;
417 var language
= getLanguage(name
);
419 throw new Error('Unknown language: "' + name
+ '"');
422 compileLanguage(language
);
423 var top
= continuation || language
;
425 for(var current
= top
; current
!= language
; current
= current
.parent
) {
426 if (current
.className
) {
427 result
+= buildSpan(current
.className
, result
, true);
430 var mode_buffer
= '';
433 var match
, count
, index
= 0;
435 top
.terminators
.lastIndex
= index
;
436 match
= top
.terminators
.exec(value
);
439 count
= processLexeme(value
.substr(index
, match
.index
- index
), match
[0]);
440 index
= match
.index
+ count
;
442 processLexeme(value
.substr(index
));
443 for(var current
= top
; current
.parent
; current
= current
.parent
) { // close dangling modes
444 if (current
.className
) {
449 relevance
: relevance
,
455 if (e
.message
.indexOf('Illegal') != -1) {
467 Highlighting with language detection. Accepts a string with the code to
468 highlight. Returns an object with the following properties:
470 - language (detected language)
472 - value (an HTML string with highlighting markup)
473 - second_best (object with the same structure for second-best heuristically
474 detected language, may be absent)
477 function highlightAuto(text
, languageSubset
) {
478 languageSubset
= languageSubset || options
.languages || Object
.keys(languages
);
483 var second_best
= result
;
484 languageSubset
.forEach(function(name
) {
485 if (!getLanguage(name
)) {
488 var current
= highlight(name
, text
, false);
489 current
.language
= name
;
490 if (current
.relevance
> second_best
.relevance
) {
491 second_best
= current
;
493 if (current
.relevance
> result
.relevance
) {
494 second_best
= result
;
498 if (second_best
.language
) {
499 result
.second_best
= second_best
;
505 Post-processing of the highlighted markup:
507 - replace TABs with something more useful
508 - replace real line-breaks with '<br>' for non-pre containers
511 function fixMarkup(value
) {
512 if (options
.tabReplace
) {
513 value
= value
.replace(/^
((<[^
>]+>|
\t)+)/gm
, function(match
, p1
, offset
, s
) {
514 return p1
.replace(/\t/g
, options
.tabReplace
);
518 value
= value
.replace(/\n/g
, '<br>');
524 Applies highlighting to a DOM node containing code. Accepts a DOM node and
525 two optional parameters for fixMarkup.
527 function highlightBlock(block
) {
528 var text
= options
.useBR ? block
.innerHTML
529 .replace(/\n/g
,'').replace(/<br
>|
<br
[^
>]*>/g
, '\n').replace(/<[^
>]*>/g
,'')
531 var language
= blockLanguage(block
);
532 if (language
== 'no-highlight')
534 var result
= language ?
highlight(language
, text
, true) : highlightAuto(text
);
535 var original
= nodeStream(block
);
536 if (original
.length
) {
537 var pre
= document
.createElementNS('http://www.w3.org/1999/xhtml', 'pre');
538 pre
.innerHTML
= result
.value
;
539 result
.value
= mergeStreams(original
, nodeStream(pre
), text
);
541 result
.value
= fixMarkup(result
.value
);
543 block
.innerHTML
= result
.value
;
544 block
.className
+= ' hljs ' + (!language
&& result
.language ||
'');
546 language
: result
.language
,
549 if (result
.second_best
) {
550 block
.second_best
= {
551 language
: result
.second_best
.language
,
552 re
: result
.second_best
.relevance
558 classPrefix
: 'hljs-',
565 Updates highlight.js global options with values passed in the form of an object
567 function configure(user_options
) {
568 options
= inherit(options
, user_options
);
572 Applies highlighting to all <pre><code>..</code></pre> blocks on a page.
574 function initHighlighting() {
575 if (initHighlighting
.called
)
577 initHighlighting
.called
= true;
579 var blocks
= document
.querySelectorAll('pre code');
580 Array
.prototype.forEach
.call(blocks
, highlightBlock
);
584 Attaches highlighting to the page load event.
586 function initHighlightingOnLoad() {
587 addEventListener('DOMContentLoaded', initHighlighting
, false);
588 addEventListener('load', initHighlighting
, false);
594 function registerLanguage(name
, language
) {
595 var lang
= languages
[name] = language(this);
597 lang
.aliases
.forEach(function(alias
) {aliases
[alias] = name
;});
601 function listLanguages() {
602 return Object
.keys(languages
);
605 function getLanguage(name
) {
606 return languages
[name] || languages
[aliases
[name]];
609 /* Interface definition */
611 this.highlight
= highlight
;
612 this.highlightAuto
= highlightAuto
;
613 this.fixMarkup
= fixMarkup
;
614 this.highlightBlock
= highlightBlock
;
615 this.configure
= configure
;
616 this.initHighlighting
= initHighlighting
;
617 this.initHighlightingOnLoad
= initHighlightingOnLoad
;
618 this.registerLanguage
= registerLanguage
;
619 this.listLanguages
= listLanguages
;
620 this.getLanguage
= getLanguage
;
621 this.inherit
= inherit
;
624 this.IDENT_RE
= '[a-zA-Z][a-zA-Z0-9_]*';
625 this.UNDERSCORE_IDENT_RE
= '[a-zA-Z_][a-zA-Z0-9_]*';
626 this.NUMBER_RE
= '\\b\\d+(\\.\\d+)?';
627 this.C_NUMBER_RE
= '(\\b0[xX][a-fA-F0-9]+|(\\b\\d+(\\.\\d*)?|\\.\\d+)([eE][-+]?\\d+)?)'; // 0x..., 0..., decimal, float
628 this.BINARY_NUMBER_RE
= '\\b(0b[01]+)'; // 0b...
629 this.RE_STARTERS_RE
= '!|!=|!==|%|%=|&|&&|&=|\\*|\\*=|\\+|\\+=|,|-|-=|/=|/|:|;|<<|<<=|<=|<|===|==|=|>>>=|>>=|>=|>>>|>>|>|\\?|\\[|\\{|\\(|\\^|\\^=|\\||\\|=|\\|\\||~';
632 this.BACKSLASH_ESCAPE
= {
633 begin
: '\\\\[\\s\\S]', relevance
: 0
635 this.APOS_STRING_MODE
= {
637 begin
: '\'', end
: '\'',
639 contains
: [this.BACKSLASH_ESCAPE
]
641 this.QUOTE_STRING_MODE
= {
643 begin
: '"', end
: '"',
645 contains
: [this.BACKSLASH_ESCAPE
]
647 this.PHRASAL_WORDS_MODE
= {
648 begin
: /\b(a|an|the|are|I|I
'm|isn't|don
't|doesn't|won
't|but|just|should|pretty|simply|enough|gonna|going|wtf|so|such)\b/
650 this.C_LINE_COMMENT_MODE = {
651 className: 'comment
',
652 begin: '//', end: '$',
653 contains
: [this.PHRASAL_WORDS_MODE
]
655 this.C_BLOCK_COMMENT_MODE
= {
656 className
: 'comment',
657 begin
: '/\\*', end
: '\\*/',
658 contains
: [this.PHRASAL_WORDS_MODE
]
660 this.HASH_COMMENT_MODE
= {
661 className
: 'comment',
662 begin
: '#', end
: '$',
663 contains
: [this.PHRASAL_WORDS_MODE
]
667 begin
: this.NUMBER_RE
,
670 this.C_NUMBER_MODE
= {
672 begin
: this.C_NUMBER_RE
,
675 this.BINARY_NUMBER_MODE
= {
677 begin
: this.BINARY_NUMBER_RE
,
680 this.CSS_NUMBER_MODE
= {
682 begin
: this.NUMBER_RE
+ '(' +
685 '|cm|mm|in|pt|pc|px' +
686 '|deg|grad|rad|turn' +
695 begin
: /\
//, end: /\/[gim]*/,
698 this.BACKSLASH_ESCAPE
,
700 begin
: /\
[/, end
: /\
]/,
702 contains
: [this.BACKSLASH_ESCAPE
]
708 begin
: this.IDENT_RE
,
711 this.UNDERSCORE_TITLE_MODE
= {
713 begin
: this.UNDERSCORE_IDENT_RE
,