Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: Shared/PluginFilter.cpp

Issue 9998007: Initial libadblockplus integration (Closed)
Patch Set: Subscription changes and filter management cleanup Created April 5, 2013, 1:56 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « Shared/PluginFilter.h ('k') | Shared/PluginSettings.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #include "PluginStdAfx.h" 1 #include "PluginStdAfx.h"
2 2
3 #include "PluginFilter.h" 3 #include "PluginFilter.h"
4 4
5 #if (defined PRODUCT_ADBLOCKPLUS) 5 #if (defined PRODUCT_ADBLOCKPLUS)
6 #include "PluginSettings.h" 6 #include "PluginSettings.h"
7 #include "PluginClient.h" 7 #include "PluginClient.h"
8 #include "PluginClientFactory.h" 8 #include "PluginClientFactory.h"
9 #endif 9 #endif
10 10
(...skipping 806 matching lines...) Expand 10 before | Expand all | Expand 10 after
817 s_criticalSectionFilterMap.Lock(); 817 s_criticalSectionFilterMap.Lock();
818 { 818 {
819 for (std::vector<std::string>::iterator it = filters.begin(); it < filters.e nd(); ++it) 819 for (std::vector<std::string>::iterator it = filters.begin(); it < filters.e nd(); ++it)
820 { 820 {
821 CString filter((*it).c_str()); 821 CString filter((*it).c_str());
822 // If the line is not commented out 822 // If the line is not commented out
823 if (!filter.Trim().IsEmpty() && filter.GetAt(0) != '!' && filter.GetAt(0) != '[') 823 if (!filter.Trim().IsEmpty() && filter.GetAt(0) != '!' && filter.GetAt(0) != '[')
824 { 824 {
825 int filterType = 0; 825 int filterType = 0;
826 826
827 // We need to categorize the filters
828 // We have three options, whitelist, block or element hiding
829 // See http://adblockplus.org/en/filters for further documentation 827 // See http://adblockplus.org/en/filters for further documentation
830 828
831 // @@ indicates white listing rule
832 if (filter.Find(L"@@") == 0)
833 {
834 filterType = CFilter::filterTypeWhiteList;
835
836 filter.Delete(0, 2);
837 }
838 // If a filter contains ## then it is a element hiding rule 829 // If a filter contains ## then it is a element hiding rule
839 else if (filter.Find(L"#") >= 0) 830 if (filter.Find(L"#") >= 0)
840 { 831 {
841 filterType = CFilter::filterTypeElementHide; 832 filterType = CFilter::filterTypeElementHide;
842 } 833 }
843 //Anything we do not support here
844 else if (filter.Find(L"*") == 0)
845 {
846 filterType = CFilter::filterTypeUnknown;
847 }
848 // Else, it is a general rule
849 else
850 {
851 filterType = CFilter::filterTypeBlocking;
852 }
853 834
854 try 835 try
855 { 836 {
856 if (filterType == CFilter::filterTypeElementHide) 837 if (filterType == CFilter::filterTypeElementHide)
857 { 838 {
858 AddFilterElementHide(filter); 839 AddFilterElementHide(filter);
859 } 840 }
860 } 841 }
861 catch(...) 842 catch(...)
862 { 843 {
863 //just ignore all errors we might get when adding filters 844 #ifdef ENABLE_DEBUG_RESULT
845 CPluginDebug::DebugResult(L"Error loading hide filter: " + filter);
846 #endif
864 } 847 }
865 } 848 }
866 } 849 }
867 } 850 }
868 s_criticalSectionFilterMap.Unlock(); 851 s_criticalSectionFilterMap.Unlock();
869 852
870 return isRead; 853 return isRead;
871 } 854 }
872 855
873 void CPluginFilter::ClearFilters() 856 void CPluginFilter::ClearFilters()
(...skipping 12 matching lines...) Expand all
886 869
887 m_elementHideTags.clear(); 870 m_elementHideTags.clear();
888 m_elementHideTagsId.clear(); 871 m_elementHideTagsId.clear();
889 m_elementHideTagsClass.clear(); 872 m_elementHideTagsClass.clear();
890 m_elementHideDomains.clear(); 873 m_elementHideDomains.clear();
891 } 874 }
892 s_criticalSectionFilterMap.Unlock(); 875 s_criticalSectionFilterMap.Unlock();
893 } 876 }
894 877
895 878
896 bool CPluginFilter::IsMatchFilter(const CFilter& filter, CString src, const CStr ing& srcDomain, const CString& domain) const
897 {
898 // Initial checks
899
900 // $match_case
901 if (!filter.m_isMatchCase)
902 {
903 src.MakeLower();
904 }
905
906 // $domain
907 if (!filter.m_domains.empty())
908 {
909 bool bFound = false;
910
911 for (std::set<CString>::const_iterator it = filter.m_domains.begin(); !bFoun d && it != filter.m_domains.end(); ++it)
912 {
913 bFound = domain == *(it) || IsSubdomain(domain, *it);
914 }
915
916 if (!bFound)
917 {
918 return false;
919 }
920 }
921
922 // $domain ~
923 if (!filter.m_domainsNot.empty())
924 {
925 for (std::set<CString>::const_iterator it = filter.m_domainsNot.begin(); it != filter.m_domainsNot.end(); ++it)
926 {
927 if (domain == *(it) || IsSubdomain(domain, *it))
928 {
929 return false;
930 }
931 }
932 }
933
934 // $third_party
935 if (filter.m_isThirdParty)
936 {
937 if (srcDomain == domain || IsSubdomain(srcDomain, domain))
938 {
939 return false;
940 }
941 }
942
943 // $third_party ~
944 if (filter.m_isFirstParty)
945 {
946 if (srcDomain != domain && !IsSubdomain(srcDomain, domain))
947 {
948 return false;
949 }
950 }
951
952 // "regex" checks
953
954 int startPos = 0;
955 int srcLength = src.GetLength();
956 UINT indexEnd = filter.m_stringElements.size() - 1;
957
958 for (UINT index = 0; index <= indexEnd; index++)
959 {
960 if (index == 0 && filter.m_isFromStartDomain)
961 {
962 CString loweredDomain = srcDomain;
963 int domainPos = src.Find(loweredDomain.MakeLower());
964 int lastPos = src.Find('/', domainPos);
965
966 bool bFoundDomain = false;
967 bool bContinueDomainSearch = true;
968
969 while (bContinueDomainSearch)
970 {
971 if (domainPos == FindMatch(src, filter.m_stringElements[index]))
972 {
973 bContinueDomainSearch = false;
974 bFoundDomain = true;
975 }
976 else
977 {
978 domainPos = src.Find('.', domainPos + 1) + 1;
979 if (domainPos == 0 || (domainPos >= lastPos && lastPos >= 0))
980 {
981 bContinueDomainSearch = false;
982 }
983 }
984 }
985
986 if (!bFoundDomain)
987 {
988 return false;
989 }
990 }
991
992 startPos = FindMatch(src, filter.m_stringElements[index], startPos);
993 if (startPos < 0)
994 {
995 return false;
996 }
997
998 int length = filter.m_stringElements[index].GetLength();
999
1000 // Check from start
1001 if (index == 0 && filter.m_isFromStart && startPos > 0)
1002 {
1003 return false;
1004 }
1005
1006 // Check from end
1007 if (index == indexEnd && filter.m_isFromEnd && startPos + length != srcLengt h)
1008 {
1009 return false;
1010 }
1011
1012 startPos += length;
1013 }
1014
1015 return true;
1016 }
1017
1018
1019 const CFilter* CPluginFilter::MatchFilter(int filterType, const CString& src, in t contentType, const CString& domain) const
1020 {
1021 const CFilter* filter = NULL;
1022
1023 int startCharacter = 0;
1024 int keyLength = 4;
1025
1026 CString srcLower = src;
1027 srcLower.MakeLower();
1028 int srcLowerLength = srcLower.GetLength();
1029
1030 // Extract src domain
1031 DWORD length = 2048;
1032 CString srcDomain;
1033
1034 if (SUCCEEDED(::UrlGetPart(src, srcDomain.GetBufferSetLength(2048), &length, U RL_PART_HOSTNAME, 0)))
1035 {
1036 srcDomain.ReleaseBuffer();
1037
1038 if (srcDomain.Left(4) == L"www.")
1039 {
1040 srcDomain = srcDomain.Right(srcDomain.GetLength() - 4);
1041 }
1042 else if (srcDomain.Left(5) == L"www2." || srcDomain.Left(5) == L"www3.")
1043 {
1044 srcDomain = srcDomain.Right(srcDomain.GetLength() - 5);
1045 }
1046 }
1047 else
1048 {
1049 srcDomain.ReleaseBuffer();
1050 srcDomain.Empty();
1051 }
1052
1053 // Search in filter map
1054 s_criticalSectionFilterMap.Lock();
1055 {
1056 const TFilterMap* filterMap = m_filterMap[filterType];
1057
1058 if (srcLowerLength >= 7)
1059 {
1060 if (srcLower.Find(L"http://") == 0)
1061 {
1062 startCharacter = 7;
1063 }
1064 else if (srcLower.Find(L"https://") == 0)
1065 {
1066 startCharacter = 8;
1067 }
1068 }
1069
1070 DWORD dwKey = 0;
1071
1072 while (filter == NULL && srcLowerLength >= startCharacter + keyLength)
1073 {
1074 if (dwKey == 0)
1075 {
1076 dwKey = (srcLower.GetAt(startCharacter) << 24) | (srcLower.GetAt(startCh aracter+1) << 16) | (srcLower.GetAt(startCharacter+2) << 8) | srcLower.GetAt(sta rtCharacter+3);
1077 }
1078 else
1079 {
1080 dwKey <<= 8;
1081 dwKey |= srcLower.GetAt(startCharacter+3);
1082 }
1083
1084 TFilterMap::const_iterator foundEntry = filterMap[0].find(dwKey);
1085 if (foundEntry != filterMap[0].end())
1086 {
1087 if (((foundEntry->second.m_contentType & contentType) || foundEntry->sec ond.m_contentType == CFilter::contentTypeAny) && IsMatchFilter(foundEntry->secon d, src, srcDomain, domain))
1088 {
1089 filter = &(foundEntry->second);
1090 break;
1091 }
1092 }
1093
1094 // No match - increment the start character
1095 startCharacter++;
1096 }
1097
1098 // Second list
1099 if (filter == NULL)
1100 {
1101 dwKey = 0;
1102 startCharacter = 0;
1103
1104 if (srcLowerLength >= 7)
1105 {
1106 if (srcLower.Find(L"http://") == 0)
1107 {
1108 startCharacter = 7;
1109 }
1110 else if (srcLower.Find(L"https://") == 0)
1111 {
1112 startCharacter = 8;
1113 }
1114 }
1115
1116 while (filter == NULL && srcLowerLength >= startCharacter + keyLength)
1117 {
1118 if (dwKey == 0)
1119 {
1120 dwKey = (srcLower.GetAt(startCharacter) << 24) | (srcLower.GetAt(start Character+1) << 16) | (srcLower.GetAt(startCharacter+2) << 8) | srcLower.GetAt(s tartCharacter+3);
1121 }
1122 else
1123 {
1124 dwKey <<= 8;
1125 dwKey |= srcLower.GetAt(startCharacter+3);
1126 }
1127
1128 TFilterMap::const_iterator foundEntry = filterMap[1].find(dwKey);
1129 if (foundEntry != filterMap[1].end())
1130 {
1131 if (((foundEntry->second.m_contentType & contentType) || foundEntry->s econd.m_contentType == CFilter::contentTypeAny) && IsMatchFilter(foundEntry->sec ond, src, srcDomain, domain))
1132 {
1133 filter = &(foundEntry->second);
1134 break;
1135 }
1136 }
1137
1138 // No match - increment the start character
1139 startCharacter++;
1140 }
1141 }
1142
1143 // Search in default filter map (try all filters)
1144 if (filter == NULL)
1145 {
1146 for (TFilterMapDefault::const_iterator it = m_filterMapDefault[filterType] .begin(); it != m_filterMapDefault[filterType].end(); ++it)
1147 {
1148 if (((it->m_contentType & contentType) || it->m_contentType == CFilter:: contentTypeAny) && IsMatchFilter(*it, src, srcDomain, domain))
1149 {
1150 filter = &(*it);
1151 break;
1152 }
1153 }
1154 }
1155
1156 }
1157 s_criticalSectionFilterMap.Unlock();
1158
1159 return filter;
1160 }
1161
1162
1163 bool CPluginFilter::ShouldWhiteList(CString src) const 879 bool CPluginFilter::ShouldWhiteList(CString src) const
1164 { 880 {
1165 // We should not block the empty string, so all filtering does not make sense 881 // We should not block the empty string, so all filtering does not make sense
1166 // Therefore we just return 882 // Therefore we just return
1167 if (src.Trim().IsEmpty()) 883 if (src.Trim().IsEmpty())
1168 { 884 {
1169 return false; 885 return false;
1170 } 886 }
1171 887
1172 const CFilter* filter = MatchFilter(CFilter::filterTypeWhiteList, src, CFilter ::contentTypeDocument, ""); 888 //TODO: Implement whitelisting check from libadblockplus here
1173 889 return false;
1174 return filter ? true : false;
1175 } 890 }
1176 891
1177 892
1178 bool CPluginFilter::ShouldBlock(CString src, int contentType, const CString& dom ain, bool addDebug) const 893 bool CPluginFilter::ShouldBlock(CString src, int contentType, const CString& dom ain, bool addDebug) const
1179 { 894 {
1180 // We should not block the empty string, so all filtering does not make sense 895 // We should not block the empty string, so all filtering does not make sense
1181 // Therefore we just return 896 // Therefore we just return
1182 if (src.Trim().IsEmpty()) 897 if (src.Trim().IsEmpty())
1183 { 898 {
1184 return false; 899 return false;
1185 } 900 }
1186 901
1187 CPluginSettings* settings = CPluginSettings::GetInstance(); 902 CPluginSettings* settings = CPluginSettings::GetInstance();
1188 903
1189 CString type; 904 CString type;
1190 if (addDebug) 905 if (addDebug)
1191 { 906 {
1192 type = "???"; 907 type = "???";
1193 908
1194 std::map<int,CString>::const_iterator it = m_contentMapText.find(contentType ); 909 std::map<int,CString>::const_iterator it = m_contentMapText.find(contentType );
1195 if (it != m_contentMapText.end()) 910 if (it != m_contentMapText.end())
1196 { 911 {
1197 type = it->second; 912 type = it->second;
1198 } 913 }
1199 } 914 }
1200 915
1201 CPluginClient* client = CPluginClient::GetInstance(); 916 CPluginClient* client = CPluginClient::GetInstance();
1202 AdblockPlus::FilterEngine* filterEngine = client->GetFilterEngine(); 917 AdblockPlus::FilterEngine* filterEngine = client->GetFilterEngine();
1203 918
1204 // src.OemToCharA(); 919 //TODO: Make sure if the content type names are in sync with libadblockplus
920 std::string contentTypeString = CT2A(type, CP_UTF8);
1205 921
1206 std::string contentTypeString = ""; 922 CT2CA srcAnsi(src, CP_UTF8);
923 std::string url(srcAnsi);
1207 924
1208 CT2CA srcAnsi(src); 925 //TODO: figure out domain passing for whitelisting
1209 std::string url(srcAnsi);
1210 if (filterEngine->Matches(url, contentTypeString)) 926 if (filterEngine->Matches(url, contentTypeString))
1211 { 927 {
1212 if (addDebug) 928 if (addDebug)
1213 { 929 {
1214 DEBUG_FILTER("Filter::ShouldBlock " + type + " YES") 930 DEBUG_FILTER("Filter::ShouldBlock " + type + " YES")
1215 931
1216 #ifdef ENABLE_DEBUG_RESULT 932 #ifdef ENABLE_DEBUG_RESULT
1217 CPluginDebug::DebugResultBlocking(type, src); 933 CPluginDebug::DebugResultBlocking(type, src);
1218 #endif 934 #endif
1219 } 935 }
1220 return true; 936 return true;
1221 } 937 }
1222 return false; 938 return false;
1223
1224 //The following is for reference only
1225
1226 const CFilter* blockFilter = MatchFilter(CFilter::filterTypeBlocking, src, con tentType, domain);
1227 if (blockFilter)
1228 {
1229 const CFilter* whiteFilter = MatchFilter(CFilter::filterTypeWhiteList, src, contentType, domain);
1230 if (whiteFilter)
1231 {
1232 if (addDebug)
1233 {
1234 DEBUG_FILTER("Filter::ShouldBlock " + type + " NO src:" + src + " - whi telist:\"" + whiteFilter->m_filterText + "\"");
1235 }
1236 blockFilter = NULL;
1237 }
1238 else if (addDebug)
1239 {
1240 DEBUG_FILTER("Filter::ShouldBlock " + type + " YES src:" + src + " - \"" + blockFilter->m_filterText + "\"")
1241
1242 #ifdef ENABLE_DEBUG_RESULT
1243 CPluginDebug::DebugResultBlocking(type, src);
1244 #endif
1245 }
1246 }
1247 else if (addDebug)
1248 {
1249 DEBUG_FILTER("Filter::ShouldBlock " + type + " NO src:" + src)
1250 }
1251
1252 return blockFilter ? true : false;
1253 } 939 }
1254 940
1255 int CPluginFilter::FindMatch(const CString& src, CString filterPart, int srcStar tPos) const 941 int CPluginFilter::FindMatch(const CString& src, CString filterPart, int srcStar tPos) const
1256 { 942 {
1257 int filterCurrentPos = filterPart.Find('^'); 943 int filterCurrentPos = filterPart.Find('^');
1258 if (filterCurrentPos >= 0) 944 if (filterCurrentPos >= 0)
1259 { 945 {
1260 int srcLength = src.GetLength(); 946 int srcLength = src.GetLength();
1261 int srcFilterPos = -1; 947 int srcFilterPos = -1;
1262 int srcCurrentPos = srcStartPos; 948 int srcCurrentPos = srcStartPos;
(...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after
1396 if (pos > 0 && domain.GetLength() + pos == subdomain.GetLength()) 1082 if (pos > 0 && domain.GetLength() + pos == subdomain.GetLength())
1397 { 1083 {
1398 if (subdomain.GetAt(pos - 1) == '.') 1084 if (subdomain.GetAt(pos - 1) == '.')
1399 { 1085 {
1400 return true; 1086 return true;
1401 } 1087 }
1402 } 1088 }
1403 1089
1404 return false; 1090 return false;
1405 } 1091 }
OLDNEW
« no previous file with comments | « Shared/PluginFilter.h ('k') | Shared/PluginSettings.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld