OLD | NEW |
(Empty) | |
| 1 #include <emscripten.h> |
| 2 |
| 3 #include "RegExpFilter.h" |
| 4 #include "WhiteListFilter.h" |
| 5 #include "InvalidFilter.h" |
| 6 |
| 7 namespace |
| 8 { |
| 9 int GenerateRegExp(const std::u16string& source) |
| 10 { |
| 11 // Note: This doesn't remove trailing wildcards, otherwise the result should |
| 12 // be identical to Filter.toRegExp(). |
| 13 std::u16string result; |
| 14 char16_t prevChar = u'*'; |
| 15 for (size_t i = 0, l = source.length(); i < l; ++i) |
| 16 { |
| 17 char16_t currChar = source[i]; |
| 18 switch (currChar) |
| 19 { |
| 20 case u'*': |
| 21 if (prevChar != u'*') |
| 22 result += u".*"; |
| 23 break; |
| 24 case u'^': |
| 25 result += u"(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\\x60
\\x7B-\\x7F]|$)"; |
| 26 break; |
| 27 case u'|': |
| 28 if (i == 0) |
| 29 { |
| 30 // Anchor at expression start, maybe extended anchor? |
| 31 if (i + 1 < l && source[i + 1] == u'|') |
| 32 { |
| 33 result += u"^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?"; |
| 34 ++i; |
| 35 } |
| 36 else |
| 37 result += u"^"; |
| 38 } |
| 39 else if (i == l - 1) |
| 40 { |
| 41 // Anchor at expression end, ignore if following separator placehold
er |
| 42 if (prevChar != u'^') |
| 43 result += u"$"; |
| 44 } |
| 45 else |
| 46 { |
| 47 // Not actually an anchor, escape it |
| 48 result += u"\\|"; |
| 49 } |
| 50 break; |
| 51 default: |
| 52 if ((currChar >= u'a' && currChar <= u'z') || |
| 53 (currChar >= u'A' && currChar <= u'Z') || |
| 54 (currChar >= u'0' && currChar <= u'9') || |
| 55 currChar >= 128) |
| 56 { |
| 57 result += currChar; |
| 58 } |
| 59 else |
| 60 { |
| 61 result += u"\\"; |
| 62 result.append(1, currChar); |
| 63 } |
| 64 } |
| 65 prevChar = currChar; |
| 66 } |
| 67 return EM_ASM_INT(return regexps.create($0, $1), &result, false); |
| 68 } |
| 69 } |
| 70 |
| 71 RegExpFilter::RegExpFilter(const std::u16string& text, |
| 72 const std::u16string& pattern, const std::u16string& options) |
| 73 : ActiveFilter(text), regexpId(0) |
| 74 { |
| 75 size_t len = pattern.length(); |
| 76 if (len >= 2 && pattern[0] == u'/' && pattern[len - 1] == u'/') |
| 77 { |
| 78 std::u16string param = pattern.substr(1, len - 2); |
| 79 regexpId = EM_ASM_INT(return regexps.create($0, $1), ¶m, false); |
| 80 |
| 81 std::u16string* error = reinterpret_cast<std::u16string*>(EM_ASM_INT(return
regexps.getError($0), regexpId)); |
| 82 if (error) |
| 83 { |
| 84 EM_ASM_ARGS(regexps.delete($0), regexpId); |
| 85 throw std::u16string(*error); |
| 86 } |
| 87 } |
| 88 else |
| 89 regexpSource = pattern; |
| 90 } |
| 91 |
| 92 RegExpFilter::~RegExpFilter() |
| 93 { |
| 94 if (regexpId) |
| 95 EM_ASM_ARGS(regexps.delete($0), regexpId); |
| 96 } |
| 97 |
| 98 Filter* RegExpFilter::Create(const std::u16string& text) |
| 99 { |
| 100 bool blocking = true; |
| 101 size_t patternStart = 0; |
| 102 if (!text.compare(0, 2, u"@@")) |
| 103 { |
| 104 blocking = false; |
| 105 patternStart = 2; |
| 106 } |
| 107 |
| 108 size_t patternEnd = text.find(u'$', patternStart); |
| 109 size_t patternLength = (patternEnd != std::u16string::npos ? |
| 110 patternEnd - patternStart : patternEnd); |
| 111 std::u16string pattern(text.substr(patternStart, patternLength)); |
| 112 std::u16string options(patternEnd != std::u16string::npos ? |
| 113 text.substr(patternEnd) : u""); |
| 114 |
| 115 try |
| 116 { |
| 117 if (blocking) |
| 118 return new RegExpFilter(text, pattern, options); |
| 119 else |
| 120 return new WhiteListFilter(text, pattern, options); |
| 121 } |
| 122 catch (const std::u16string& reason) |
| 123 { |
| 124 return new InvalidFilter(text, reason); |
| 125 } |
| 126 } |
| 127 |
| 128 Filter::Type RegExpFilter::GetType() const |
| 129 { |
| 130 return Type::BLOCKING; |
| 131 } |
| 132 |
| 133 bool RegExpFilter::Matches(const std::u16string& location) |
| 134 { |
| 135 if (!regexpId) |
| 136 { |
| 137 regexpId = GenerateRegExp(regexpSource); |
| 138 regexpSource.resize(0); |
| 139 } |
| 140 return EM_ASM_INT(return regexps.test($0, $1), regexpId, &location); |
| 141 } |
OLD | NEW |