Left: | ||
Right: |
OLD | NEW |
---|---|
1 /* | 1 /* |
2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
3 * Copyright (C) 2006-2017 eyeo GmbH | 3 * Copyright (C) 2006-2017 eyeo GmbH |
4 * | 4 * |
5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify |
6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as |
7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
8 * | 8 * |
9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, |
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
(...skipping 348 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
359 { | 359 { |
360 newSelector.push(selector.substring(i, pos.start)); | 360 newSelector.push(selector.substring(i, pos.start)); |
361 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); | 361 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); |
362 i = pos.end; | 362 i = pos.end; |
363 } | 363 } |
364 newSelector.push(selector.substring(i)); | 364 newSelector.push(selector.substring(i)); |
365 | 365 |
366 return newSelector.join(""); | 366 return newSelector.join(""); |
367 } | 367 } |
368 | 368 |
369 /** | |
370 * Check if two strings are a close match | |
371 * | |
372 * This function returns an edit operation, one of "substitute", "delete", and | |
373 * "insert", along with an index in the source string where the edit must occur | |
374 * in order to arrive at the target string. If the strings are not a close | |
375 * match, it returns null. | |
376 * | |
377 * Two strings are considered to be a close match if they are one edit | |
378 * operation apart. | |
379 * | |
380 * Deletions or insertions of a contiguous range of characters from one string | |
381 * into the other, at the same index, are treated as a single edit. For | |
382 * example, "internal" and "international" are considered to be one edit apart | |
383 * and therefore a close match. | |
384 * | |
385 * A few things to note: | |
386 * | |
387 * 1) This function does not care about the format of the input strings. For | |
388 * example, the caller may pass in regular expressions, where "[ab]" and | |
389 * "[bc]" could be considered to be a close match, since the order within the | |
390 * brackets doesn't matter. This function will still return null for this set | |
391 * of inputs since they are two edits apart. | |
392 * | |
393 * 2) To be friendly to calling code that might be passing in regular | |
394 * expressions, this function will simply return null if it encounters a | |
395 * special character (e.g. "\", "?", "+", etc.) in the delta. For example, | |
396 * given "Hello" and "Hello, how are you?", it will return null. | |
397 * | |
398 * 3) If the caller does indeed pass in regular expressions, it must make the | |
399 * important assumption that the parts where two such regular expressions may | |
400 * differ can always be treated as normal strings. For example, | |
401 * "^https?://example.com/ads" and "^https?://example.com/adv" differ only in | |
402 * the last character, therefore the regular expressions can safely be merged | |
403 * into "^https?://example.com/ad[sv]". | |
404 * | |
405 * @param {string} s The source string | |
406 * @param {string} t The target string | |
407 * | |
408 * @returns {object} An object describing the single edit operation that must | |
409 * occur in the source string in order to arrive at the | |
410 * target string | |
411 */ | |
412 function closeMatch(s, t) | |
413 { | |
414 let diff = s.length - t.length; | |
415 | |
416 // If target is longer than source, swap them for the purpose of our | |
417 // calculation. | |
418 if (diff < 0) | |
419 { | |
420 let tmp = s; | |
421 s = t; | |
422 t = tmp; | |
423 } | |
424 | |
425 let edit = null; | |
426 | |
427 let i = 0, j = 0; | |
428 | |
429 // Start from the beginning and keep going until we hit a character that | |
430 // doesn't match. | |
431 for (; i < s.length; i++) | |
432 { | |
433 if (s[i] != t[i]) | |
434 break; | |
435 } | |
436 | |
437 // Now do exactly the same from the end, but also stop if we reach the | |
438 // position where we terminated the previous loop. | |
439 for (; j < t.length; j++) | |
440 { | |
441 if (t.length - j == i || s[s.length - j - 1] != t[t.length - j - 1]) | |
442 break; | |
443 } | |
444 | |
445 if (diff == 0) | |
446 { | |
447 // If the strings are equal in length and the delta isn't exactly one | |
448 // character, it's not a close match. | |
449 if (t.length - j - i != 1) | |
450 return null; | |
451 } | |
452 else if (i != t.length - j) | |
453 { | |
454 // For strings of unequal length, if we haven't found a match for every | |
455 // single character in the shorter string counting from both the beginning | |
456 // and the end, it's not a close match. | |
457 return null; | |
458 } | |
459 | |
460 for (let k = i; k < s.length - j; k++) | |
461 { | |
462 // If the delta contains any special characters, it's not a close match. | |
463 if (s[k] == "." || s[k] == "+" || s[k] == "$" || s[k] == "?" || | |
464 s[k] == "{" || s[k] == "}" || s[k] == "(" || s[k] == ")" || | |
465 s[k] == "[" || s[k] == "]" || s[k] == "\\") | |
466 return null; | |
467 } | |
468 | |
469 if (diff == 0) | |
470 { | |
471 edit = {type: "substitute", index: i}; | |
472 } | |
473 else if (diff > 0) | |
474 { | |
475 edit = {type: "delete", index: i}; | |
476 | |
477 if (diff > 1) | |
478 edit.endIndex = s.length - j; | |
479 } | |
480 else | |
481 { | |
482 edit = {type: "insert", index: i}; | |
483 | |
484 if (diff < -1) | |
485 edit.endIndex = s.length - j; | |
486 } | |
487 | |
488 return edit; | |
489 } | |
490 | |
491 function eliminateRedundantRulesByURLFilter(rulesInfo) | |
492 { | |
493 for (let i = 0; i < rulesInfo.length; i++) | |
494 { | |
495 // If this rule is already marked as redundant, don't bother comparing it | |
496 // with other rules. | |
497 if (rulesInfo[i].redundant) | |
498 continue; | |
499 | |
500 for (let j = i + 1; j < rulesInfo.length; j++) | |
501 { | |
502 if (rulesInfo[j].redundant) | |
503 continue; | |
504 | |
505 let source = rulesInfo[i].rule.trigger["url-filter"]; | |
506 let target = rulesInfo[j].rule.trigger["url-filter"]; | |
507 | |
508 if (source.length >= target.length) | |
509 { | |
510 // If one URL filter is a substring of the other starting at the | |
511 // beginning, the other one is clearly redundant. | |
512 if (source.substring(0, target.length) == target) | |
513 { | |
514 rulesInfo[i].redundant = true; | |
515 break; | |
516 } | |
517 } | |
518 else if (target.substring(0, source.length) == source) | |
519 { | |
520 rulesInfo[j].redundant = true; | |
521 } | |
522 } | |
523 } | |
524 | |
525 return rulesInfo.filter(ruleInfo => !ruleInfo.redundant); | |
526 } | |
527 | |
528 function mergeRulesByURLFilter(rulesInfo, exhaustive) | |
529 { | |
530 // Closely matching rules are likely to be within a certain range. We only | |
531 // look for matches within this range by default. If we increase this value, | |
532 // it can give us more matches and a smaller resulting rule set, but possibly | |
533 // at a significant performance cost. | |
534 // | |
535 // If the exhaustive option is true, we simply ignore this value and look for | |
536 // matches throughout the rule set. | |
537 const heuristicRange = 10; | |
538 | |
539 if (exhaustive) | |
540 { | |
541 // Throw out obviously redundant rules. | |
542 rulesInfo = eliminateRedundantRulesByURLFilter(rulesInfo); | |
543 } | |
544 | |
545 if (rulesInfo.length <= 1) | |
546 return; | |
547 | |
548 for (let i = 0; i < rulesInfo.length; i++) | |
549 { | |
550 let limit = exhaustive ? rulesInfo.length : | |
551 Math.min(i + heuristicRange, rulesInfo.length); | |
552 | |
553 for (let j = i + 1; j < limit; j++) | |
554 { | |
555 let source = rulesInfo[i].rule.trigger["url-filter"]; | |
556 let target = rulesInfo[j].rule.trigger["url-filter"]; | |
557 | |
558 let edit = closeMatch(source, target); | |
559 | |
560 if (edit) | |
561 { | |
562 let urlFilter, ruleInfo, match = {edit}; | |
563 | |
564 if (edit.type == "insert") | |
565 { | |
566 // Convert the insertion into a deletion and stick it on the target | |
567 // rule instead. We can only group deletions and substitutions; | |
568 // therefore insertions must be treated as deletions on the target | |
569 // rule. | |
570 urlFilter = target; | |
571 ruleInfo = rulesInfo[j]; | |
572 match.index = i; | |
573 edit.type = "delete"; | |
574 } | |
575 else | |
576 { | |
577 urlFilter = source; | |
578 ruleInfo = rulesInfo[i]; | |
579 match.index = j; | |
580 } | |
581 | |
582 // If the edit has an end index, it represents a multiple character | |
583 // edit. | |
584 let multiEdit = !!edit.endIndex; | |
585 | |
586 if (multiEdit) | |
587 { | |
588 // We only care about a single multiple character edit because the | |
589 // number of characters for such a match doesn't matter, we can | |
590 // only merge with one other rule. | |
591 if (!ruleInfo.multiEditMatch) | |
592 ruleInfo.multiEditMatch = match; | |
593 } | |
594 else | |
595 { | |
596 // For single character edits, multiple rules can be merged into | |
597 // one. e.g. "ad", "ads", and "adv" can be merged into "ad[sv]?". | |
598 if (!ruleInfo.matches) | |
599 ruleInfo.matches = new Array(urlFilter.length); | |
600 | |
601 // Matches at a particular index. For example, for a source string | |
602 // "ads", both target strings "ad" (deletion) and "adv" | |
603 // (substitution) match at index 2, hence they are grouped together | |
604 // to possibly be merged later into "ad[sv]?". | |
605 let matchesForIndex = ruleInfo.matches[edit.index]; | |
606 | |
607 if (matchesForIndex) | |
608 { | |
609 matchesForIndex.push(match); | |
610 } | |
611 else | |
612 { | |
613 matchesForIndex = [match]; | |
614 ruleInfo.matches[edit.index] = matchesForIndex; | |
615 } | |
616 | |
617 // Keep track of the best set of matches. We later sort by this to | |
618 // get best results. | |
619 if (!ruleInfo.bestMatches || | |
620 matchesForIndex.length > ruleInfo.bestMatches.length) | |
621 ruleInfo.bestMatches = matchesForIndex; | |
622 } | |
623 } | |
624 } | |
625 } | |
626 | |
627 // Filter out rules that have no matches at all. | |
628 let candidateRulesInfo = rulesInfo.filter(ruleInfo => | |
629 { | |
630 return ruleInfo.bestMatches || ruleInfo.multiEditMatch | |
631 }); | |
632 | |
633 // For best results, we have to sort the candidates by the largest set of | |
634 // matches. | |
635 // | |
636 // For example, we want "ads", "bds", "adv", "bdv", "adx", and "bdx" to | |
637 // generate "ad[svx]" and "bd[svx]" (2 rules), not "[ab]ds", "[ab]dv", and | |
638 // "[ab]dx" (3 rules). | |
639 candidateRulesInfo.sort((ruleInfo1, ruleInfo2) => | |
640 { | |
641 let weight1 = ruleInfo1.bestMatches ? ruleInfo1.bestMatches.length : | |
642 ruleInfo1.multiEditMatch ? 1 : 0; | |
643 let weight2 = ruleInfo2.bestMatches ? ruleInfo2.bestMatches.length : | |
644 ruleInfo2.multiEditMatch ? 1 : 0; | |
645 | |
646 return weight2 - weight1; | |
647 }); | |
648 | |
649 for (let ruleInfo of candidateRulesInfo) | |
650 { | |
651 let rule = ruleInfo.rule; | |
652 | |
653 // If this rule has already been merged into another rule, we skip it. | |
654 if (ruleInfo.merged) | |
655 continue; | |
656 | |
657 // Find the best set of rules to group, which is simply the largest set. | |
658 let best = (ruleInfo.matches || []).reduce((best, matchesForIndex) => | |
659 { | |
660 matchesForIndex = (matchesForIndex || []).filter(match => | |
661 { | |
662 // Filter out rules that have either already been merged into other | |
663 // rules or have had other rules merged into them. | |
664 return !rulesInfo[match.index].merged && | |
665 !rulesInfo[match.index].mergedInto; | |
666 }); | |
667 | |
668 return matchesForIndex.length > best.length ? matchesForIndex : best; | |
669 }, | |
670 []); | |
671 | |
672 let multiEdit = false; | |
673 | |
674 // If we couldn't find a single rule to merge with, let's see if we have a | |
675 // multiple character edit. e.g. we could merge "ad" and "adserver" into | |
676 // "ad(server)?". | |
677 if (best.length == 0 && ruleInfo.multiEditMatch && | |
678 !rulesInfo[ruleInfo.multiEditMatch.index].merged && | |
679 !rulesInfo[ruleInfo.multiEditMatch.index].mergedInto) | |
680 { | |
681 best = [ruleInfo.multiEditMatch]; | |
682 multiEdit = true; | |
683 } | |
684 | |
685 if (best.length > 0) | |
686 { | |
687 let urlFilter = rule.trigger["url-filter"]; | |
688 | |
689 let editIndex = best[0].edit.index; | |
690 | |
691 if (!multiEdit) | |
692 { | |
693 // Merge all the matching rules into this one. | |
694 | |
695 let characters = []; | |
696 let quantifier = ""; | |
697 | |
698 for (let match of best) | |
699 { | |
700 if (match.edit.type == "delete") | |
701 { | |
702 quantifier = "?"; | |
703 } | |
704 else | |
705 { | |
706 let character = rulesInfo[match.index].rule | |
707 .trigger["url-filter"][editIndex]; | |
708 characters.push(character); | |
709 } | |
710 | |
711 // Mark the target rule as merged so other rules don't try to merge | |
712 // it again. | |
713 rulesInfo[match.index].merged = true; | |
714 } | |
715 | |
716 urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier + | |
717 urlFilter.substring(editIndex + 1); | |
718 if (characters.length > 0) | |
719 { | |
720 urlFilter = urlFilter.substring(0, editIndex) + "[" + | |
721 urlFilter[editIndex] + characters.join("") + "]" + | |
722 urlFilter.substring(editIndex + 1); | |
723 } | |
724 } | |
725 else | |
726 { | |
727 let editEndIndex = best[0].edit.endIndex; | |
728 | |
729 // Mark the target rule as merged so other rules don't try to merge it | |
730 // again. | |
731 rulesInfo[best[0].index].merged = true; | |
732 | |
733 urlFilter = urlFilter.substring(0, editIndex) + "(" + | |
734 urlFilter.substring(editIndex, editEndIndex) + ")?" + | |
735 urlFilter.substring(editEndIndex); | |
736 } | |
737 | |
738 rule.trigger["url-filter"] = urlFilter; | |
739 | |
740 // Mark this rule as one that has had other rules merged into it. | |
741 ruleInfo.mergedInto = true; | |
742 } | |
743 } | |
744 } | |
745 | |
746 function mergeRulesByArrayProperty(rulesInfo, propertyType, property) | |
747 { | |
748 if (rulesInfo.length <= 1) | |
749 return; | |
750 | |
751 let set = new Set(); | |
752 | |
753 rulesInfo.forEach((ruleInfo, index) => | |
754 { | |
755 if (ruleInfo.rule[propertyType][property]) | |
756 { | |
757 for (let value of ruleInfo.rule[propertyType][property]) | |
758 set.add(value); | |
759 } | |
760 | |
761 if (index > 0) | |
762 ruleInfo.merged = true; | |
763 }); | |
764 | |
765 if (set.size > 0) | |
766 rulesInfo[0].rule[propertyType][property] = Array.from(set); | |
767 | |
768 rulesInfo[0].mergedInto = true; | |
769 } | |
770 | |
771 function groupRulesByMergeableProperty(rulesInfo, propertyType, property) | |
772 { | |
773 let mergeableRulesInfoByGroup = new Map(); | |
774 | |
775 rulesInfo.forEach(ruleInfo => | |
776 { | |
777 let copy = { | |
778 trigger: Object.assign({}, ruleInfo.rule.trigger), | |
779 action: Object.assign({}, ruleInfo.rule.action) | |
780 }; | |
781 | |
782 delete copy[propertyType][property]; | |
783 | |
784 let groupKey = JSON.stringify(copy); | |
785 | |
786 let mergeableRulesInfo = mergeableRulesInfoByGroup.get(groupKey); | |
787 | |
788 if (mergeableRulesInfo) | |
789 mergeableRulesInfo.push(ruleInfo); | |
790 else | |
791 mergeableRulesInfoByGroup.set(groupKey, [ruleInfo]); | |
792 }); | |
793 | |
794 return mergeableRulesInfoByGroup; | |
795 } | |
796 | |
797 function mergeRules(rules, options) | |
798 { | |
799 const defaultOptions = {exhaustive: false}; | |
kzar
2017/05/09 10:05:47
Have defaultOptions be a property on ContentBlocke
Manish Jethani
2017/05/09 15:52:46
Actually the options for the ContentBlockerList co
| |
800 | |
801 options = Object.assign({}, defaultOptions, options); | |
802 | |
803 let rulesInfo = rules.map(rule => ({rule})); | |
804 | |
805 groupRulesByMergeableProperty(rulesInfo, "trigger", "url-filter") | |
806 .forEach(mergeableRulesInfo => | |
807 { | |
808 if (mergeableRulesInfo.length > 1) | |
809 mergeRulesByURLFilter(mergeableRulesInfo, options.exhaustive); | |
810 }); | |
811 | |
812 // Filter out rules that are redundant or have been merged into other rules. | |
813 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.redundant && | |
814 !ruleInfo.merged); | |
815 | |
816 for (let arrayProperty of ["resource-type", "if-domain"]) | |
817 { | |
818 groupRulesByMergeableProperty(rulesInfo, "trigger", arrayProperty) | |
819 .forEach(mergeableRulesInfo => | |
820 { | |
821 if (mergeableRulesInfo.length > 1) | |
822 mergeRulesByArrayProperty(mergeableRulesInfo, "trigger", arrayProperty); | |
823 }); | |
824 | |
825 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.merged); | |
826 } | |
827 | |
828 return rulesInfo.map(ruleInfo => ruleInfo.rule); | |
829 } | |
830 | |
369 let ContentBlockerList = | 831 let ContentBlockerList = |
370 /** | 832 /** |
371 * Create a new Adblock Plus filter to content blocker list converter | 833 * Create a new Adblock Plus filter to content blocker list converter |
372 * | 834 * |
835 * @param {object} options Options for content blocker list generation | |
836 * | |
373 * @constructor | 837 * @constructor |
374 */ | 838 */ |
375 exports.ContentBlockerList = function () | 839 exports.ContentBlockerList = function(options) |
376 { | 840 { |
841 const defaultOptions = { | |
842 merge: false, | |
843 exhaustiveMerge: false | |
844 }; | |
845 | |
846 this.options = Object.assign({}, defaultOptions, options); | |
847 | |
377 this.requestFilters = []; | 848 this.requestFilters = []; |
378 this.requestExceptions = []; | 849 this.requestExceptions = []; |
379 this.elemhideFilters = []; | 850 this.elemhideFilters = []; |
380 this.elemhideExceptions = []; | 851 this.elemhideExceptions = []; |
381 this.elemhideSelectorExceptions = new Map(); | 852 this.elemhideSelectorExceptions = new Map(); |
382 }; | 853 }; |
383 | 854 |
384 /** | 855 /** |
385 * Add Adblock Plus filter to be converted | 856 * Add Adblock Plus filter to be converted |
386 * | 857 * |
(...skipping 27 matching lines...) Expand all Loading... | |
414 let domains = this.elemhideSelectorExceptions[filter.selector]; | 885 let domains = this.elemhideSelectorExceptions[filter.selector]; |
415 if (!domains) | 886 if (!domains) |
416 domains = this.elemhideSelectorExceptions[filter.selector] = []; | 887 domains = this.elemhideSelectorExceptions[filter.selector] = []; |
417 | 888 |
418 parseDomains(filter.domains, domains, []); | 889 parseDomains(filter.domains, domains, []); |
419 } | 890 } |
420 }; | 891 }; |
421 | 892 |
422 /** | 893 /** |
423 * Generate content blocker list for all filters that were added | 894 * Generate content blocker list for all filters that were added |
424 * | |
425 * @returns {Filter} filter Filter to convert | |
426 */ | 895 */ |
427 ContentBlockerList.prototype.generateRules = function(filter) | 896 ContentBlockerList.prototype.generateRules = function() |
428 { | 897 { |
429 let rules = []; | 898 let rules = []; |
430 | 899 |
431 let groupedElemhideFilters = new Map(); | 900 let groupedElemhideFilters = new Map(); |
432 for (let filter of this.elemhideFilters) | 901 for (let filter of this.elemhideFilters) |
433 { | 902 { |
434 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); | 903 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); |
435 if (!result) | 904 if (!result) |
436 continue; | 905 continue; |
437 | 906 |
(...skipping 27 matching lines...) Expand all Loading... | |
465 } | 934 } |
466 }); | 935 }); |
467 | 936 |
468 for (let filter of this.elemhideExceptions) | 937 for (let filter of this.elemhideExceptions) |
469 convertFilterAddRules(rules, filter, "ignore-previous-rules", false); | 938 convertFilterAddRules(rules, filter, "ignore-previous-rules", false); |
470 for (let filter of this.requestFilters) | 939 for (let filter of this.requestFilters) |
471 convertFilterAddRules(rules, filter, "block", true); | 940 convertFilterAddRules(rules, filter, "block", true); |
472 for (let filter of this.requestExceptions) | 941 for (let filter of this.requestExceptions) |
473 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); | 942 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); |
474 | 943 |
475 return rules.filter(rule => !hasNonASCI(rule)); | 944 rules = rules.filter(rule => !hasNonASCI(rule)); |
945 | |
946 if (this.options.merge) | |
947 rules = mergeRules(rules, {exhaustive: this.options.exhaustiveMerge}); | |
kzar
2017/05/09 10:05:47
Why wrap the exhaustiveMerge option in an Object h
Manish Jethani
2017/05/09 15:52:47
This is because mergeRules takes an option called
kzar
2017/05/09 16:50:58
Well if you passed through the value of exhaustive
Manish Jethani
2017/05/09 17:32:11
Done.
| |
948 | |
949 return rules; | |
476 }; | 950 }; |
OLD | NEW |