OLD | NEW |
| (Empty) |
1 /* | |
2 * Simplified by Andrey Novikov for AdBlock Plus | |
3 */ | |
4 | |
5 /* | |
6 * Licensed to the Apache Software Foundation (ASF) under one or more | |
7 * contributor license agreements. See the NOTICE file distributed with | |
8 * this work for additional information regarding copyright ownership. | |
9 * The ASF licenses this file to You under the Apache License, Version 2.0 | |
10 * (the "License"); you may not use this file except in compliance with | |
11 * the License. You may obtain a copy of the License at | |
12 * | |
13 * http://www.apache.org/licenses/LICENSE-2.0 | |
14 * | |
15 * Unless required by applicable law or agreed to in writing, software | |
16 * distributed under the License is distributed on an "AS IS" BASIS, | |
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
18 * See the License for the specific language governing permissions and | |
19 * limitations under the License. | |
20 */ | |
21 package org.apache.commons.lang; | |
22 | |
23 import java.io.IOException; | |
24 import java.io.StringWriter; | |
25 import java.io.Writer; | |
26 | |
27 /** | |
28 * <p>Escapes and unescapes <code>String</code>s for | |
29 * Java, Java Script, HTML, XML, and SQL.</p> | |
30 * | |
31 * @author Apache Jakarta Turbine | |
32 * @author Purple Technology | |
33 * @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a> | |
34 * @author Antony Riley | |
35 * @author Helge Tesgaard | |
36 * @author <a href="sean@boohai.com">Sean Brown</a> | |
37 * @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a> | |
38 * @author Phil Steitz | |
39 * @author Pete Gieser | |
40 * @since 2.0 | |
41 * @version $Id: StringEscapeUtils.java 612880 2008-01-17 17:34:43Z ggregory $ | |
42 */ | |
43 public class StringEscapeUtils { | |
44 | |
45 private static final char CSV_DELIMITER = ','; | |
46 private static final char CSV_QUOTE = '"'; | |
47 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); | |
48 private static final char[] CSV_SEARCH_CHARS = new char[] {CSV_DELIMITER, CS
V_QUOTE, CharUtils.CR, CharUtils.LF}; | |
49 | |
50 /** | |
51 * <p><code>StringEscapeUtils</code> instances should NOT be constructed in | |
52 * standard programming.</p> | |
53 * | |
54 * <p>Instead, the class should be used as: | |
55 * <pre>StringEscapeUtils.escapeJava("foo");</pre></p> | |
56 * | |
57 * <p>This constructor is public to permit tools that require a JavaBean | |
58 * instance to operate.</p> | |
59 */ | |
60 public StringEscapeUtils() { | |
61 super(); | |
62 } | |
63 | |
64 // Java and JavaScript | |
65 //-------------------------------------------------------------------------- | |
66 /** | |
67 * <p>Escapes the characters in a <code>String</code> using Java String rule
s.</p> | |
68 * | |
69 * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff,
etc.) </p> | |
70 * | |
71 * <p>So a tab becomes the characters <code>'\\'</code> and | |
72 * <code>'t'</code>.</p> | |
73 * | |
74 * <p>The only difference between Java strings and JavaScript strings | |
75 * is that in JavaScript, a single quote must be escaped.</p> | |
76 * | |
77 * <p>Example: | |
78 * <pre> | |
79 * input string: He didn't say, "Stop!" | |
80 * output string: He didn't say, \"Stop!\" | |
81 * </pre> | |
82 * </p> | |
83 * | |
84 * @param str String to escape values in, may be null | |
85 * @return String with escaped values, <code>null</code> if null string inpu
t | |
86 */ | |
87 public static String escapeJava(String str) { | |
88 return escapeJavaStyleString(str, false); | |
89 } | |
90 | |
91 /** | |
92 * <p>Escapes the characters in a <code>String</code> using Java String rule
s to | |
93 * a <code>Writer</code>.</p> | |
94 * | |
95 * <p>A <code>null</code> string input has no effect.</p> | |
96 * | |
97 * @see #escapeJava(java.lang.String) | |
98 * @param out Writer to write escaped string into | |
99 * @param str String to escape values in, may be null | |
100 * @throws IllegalArgumentException if the Writer is <code>null</code> | |
101 * @throws IOException if error occurs on underlying Writer | |
102 */ | |
103 public static void escapeJava(Writer out, String str) throws IOException { | |
104 escapeJavaStyleString(out, str, false); | |
105 } | |
106 | |
107 /** | |
108 * <p>Escapes the characters in a <code>String</code> using JavaScript Strin
g rules.</p> | |
109 * <p>Escapes any values it finds into their JavaScript String form. | |
110 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, et
c.) </p> | |
111 * | |
112 * <p>So a tab becomes the characters <code>'\\'</code> and | |
113 * <code>'t'</code>.</p> | |
114 * | |
115 * <p>The only difference between Java strings and JavaScript strings | |
116 * is that in JavaScript, a single quote must be escaped.</p> | |
117 * | |
118 * <p>Example: | |
119 * <pre> | |
120 * input string: He didn't say, "Stop!" | |
121 * output string: He didn\'t say, \"Stop!\" | |
122 * </pre> | |
123 * </p> | |
124 * | |
125 * @param str String to escape values in, may be null | |
126 * @return String with escaped values, <code>null</code> if null string inpu
t | |
127 */ | |
128 public static String escapeJavaScript(String str) { | |
129 return escapeJavaStyleString(str, true); | |
130 } | |
131 | |
132 /** | |
133 * <p>Escapes the characters in a <code>String</code> using JavaScript Strin
g rules | |
134 * to a <code>Writer</code>.</p> | |
135 * | |
136 * <p>A <code>null</code> string input has no effect.</p> | |
137 * | |
138 * @see #escapeJavaScript(java.lang.String) | |
139 * @param out Writer to write escaped string into | |
140 * @param str String to escape values in, may be null | |
141 * @throws IllegalArgumentException if the Writer is <code>null</code> | |
142 * @throws IOException if error occurs on underlying Writer | |
143 **/ | |
144 public static void escapeJavaScript(Writer out, String str) throws IOExcepti
on { | |
145 escapeJavaStyleString(out, str, true); | |
146 } | |
147 | |
148 /** | |
149 * <p>Worker method for the {@link #escapeJavaScript(String)} method.</p> | |
150 * | |
151 * @param str String to escape values in, may be null | |
152 * @param escapeSingleQuotes escapes single quotes if <code>true</code> | |
153 * @return the escaped string | |
154 */ | |
155 private static String escapeJavaStyleString(String str, boolean escapeSingle
Quotes) { | |
156 if (str == null) { | |
157 return null; | |
158 } | |
159 try { | |
160 StringWriter writer = new StringWriter(str.length() * 2); | |
161 escapeJavaStyleString(writer, str, escapeSingleQuotes); | |
162 return writer.toString(); | |
163 } catch (IOException ioe) { | |
164 // this should never ever happen while writing to a StringWriter | |
165 ioe.printStackTrace(); | |
166 return null; | |
167 } | |
168 } | |
169 | |
170 /** | |
171 * <p>Worker method for the {@link #escapeJavaScript(String)} method.</p> | |
172 * | |
173 * @param out write to receieve the escaped string | |
174 * @param str String to escape values in, may be null | |
175 * @param escapeSingleQuote escapes single quotes if <code>true</code> | |
176 * @throws IOException if an IOException occurs | |
177 */ | |
178 private static void escapeJavaStyleString(Writer out, String str, boolean es
capeSingleQuote) throws IOException { | |
179 if (out == null) { | |
180 throw new IllegalArgumentException("The Writer must not be null"); | |
181 } | |
182 if (str == null) { | |
183 return; | |
184 } | |
185 int sz; | |
186 sz = str.length(); | |
187 for (int i = 0; i < sz; i++) { | |
188 char ch = str.charAt(i); | |
189 | |
190 // handle unicode | |
191 if (ch > 0xfff) { | |
192 out.write("\\u" + hex(ch)); | |
193 } else if (ch > 0xff) { | |
194 out.write("\\u0" + hex(ch)); | |
195 } else if (ch > 0x7f) { | |
196 out.write("\\u00" + hex(ch)); | |
197 } else if (ch < 32) { | |
198 switch (ch) { | |
199 case '\b': | |
200 out.write('\\'); | |
201 out.write('b'); | |
202 break; | |
203 case '\n': | |
204 out.write('\\'); | |
205 out.write('n'); | |
206 break; | |
207 case '\t': | |
208 out.write('\\'); | |
209 out.write('t'); | |
210 break; | |
211 case '\f': | |
212 out.write('\\'); | |
213 out.write('f'); | |
214 break; | |
215 case '\r': | |
216 out.write('\\'); | |
217 out.write('r'); | |
218 break; | |
219 default : | |
220 if (ch > 0xf) { | |
221 out.write("\\u00" + hex(ch)); | |
222 } else { | |
223 out.write("\\u000" + hex(ch)); | |
224 } | |
225 break; | |
226 } | |
227 } else { | |
228 switch (ch) { | |
229 case '\'': | |
230 if (escapeSingleQuote) { | |
231 out.write('\\'); | |
232 } | |
233 out.write('\''); | |
234 break; | |
235 case '"': | |
236 out.write('\\'); | |
237 out.write('"'); | |
238 break; | |
239 case '\\': | |
240 out.write('\\'); | |
241 out.write('\\'); | |
242 break; | |
243 case '/': | |
244 out.write('\\'); | |
245 out.write('/'); | |
246 break; | |
247 default : | |
248 out.write(ch); | |
249 break; | |
250 } | |
251 } | |
252 } | |
253 } | |
254 | |
255 /** | |
256 * <p>Returns an upper case hexadecimal <code>String</code> for the given | |
257 * character.</p> | |
258 * | |
259 * @param ch The character to convert. | |
260 * @return An upper case hexadecimal <code>String</code> | |
261 */ | |
262 private static String hex(char ch) { | |
263 return Integer.toHexString(ch).toUpperCase(); | |
264 } | |
265 | |
266 /** | |
267 * <p>Unescapes any Java literals found in the <code>String</code>. | |
268 * For example, it will turn a sequence of <code>'\'</code> and | |
269 * <code>'n'</code> into a newline character, unless the <code>'\'</code> | |
270 * is preceded by another <code>'\'</code>.</p> | |
271 * | |
272 * @param str the <code>String</code> to unescape, may be null | |
273 * @return a new unescaped <code>String</code>, <code>null</code> if null st
ring input | |
274 */ | |
275 public static String unescapeJava(String str) { | |
276 if (str == null) { | |
277 return null; | |
278 } | |
279 try { | |
280 StringWriter writer = new StringWriter(str.length()); | |
281 unescapeJava(writer, str); | |
282 return writer.toString(); | |
283 } catch (IOException ioe) { | |
284 // this should never ever happen while writing to a StringWriter | |
285 ioe.printStackTrace(); | |
286 return null; | |
287 } | |
288 } | |
289 | |
290 /** | |
291 * <p>Unescapes any Java literals found in the <code>String</code> to a | |
292 * <code>Writer</code>.</p> | |
293 * | |
294 * <p>For example, it will turn a sequence of <code>'\'</code> and | |
295 * <code>'n'</code> into a newline character, unless the <code>'\'</code> | |
296 * is preceded by another <code>'\'</code>.</p> | |
297 * | |
298 * <p>A <code>null</code> string input has no effect.</p> | |
299 * | |
300 * @param out the <code>Writer</code> used to output unescaped characters | |
301 * @param str the <code>String</code> to unescape, may be null | |
302 * @throws IllegalArgumentException if the Writer is <code>null</code> | |
303 * @throws IOException if error occurs on underlying Writer | |
304 */ | |
305 public static void unescapeJava(Writer out, String str) throws IOException { | |
306 if (out == null) { | |
307 throw new IllegalArgumentException("The Writer must not be null"); | |
308 } | |
309 if (str == null) { | |
310 return; | |
311 } | |
312 int sz = str.length(); | |
313 StringBuffer unicode = new StringBuffer(4); | |
314 boolean hadSlash = false; | |
315 boolean inUnicode = false; | |
316 for (int i = 0; i < sz; i++) { | |
317 char ch = str.charAt(i); | |
318 if (inUnicode) { | |
319 // if in unicode, then we're reading unicode | |
320 // values in somehow | |
321 unicode.append(ch); | |
322 if (unicode.length() == 4) { | |
323 // unicode now contains the four hex digits | |
324 // which represents our unicode character | |
325 try { | |
326 int value = Integer.parseInt(unicode.toString(), 16); | |
327 out.write((char) value); | |
328 unicode.setLength(0); | |
329 inUnicode = false; | |
330 hadSlash = false; | |
331 } catch (NumberFormatException nfe) { | |
332 throw (IOException) new IOException("Unable to parse uni
code value: " + unicode).initCause(nfe); | |
333 } | |
334 } | |
335 continue; | |
336 } | |
337 if (hadSlash) { | |
338 // handle an escaped value | |
339 hadSlash = false; | |
340 switch (ch) { | |
341 case '\\': | |
342 out.write('\\'); | |
343 break; | |
344 case '\'': | |
345 out.write('\''); | |
346 break; | |
347 case '\"': | |
348 out.write('"'); | |
349 break; | |
350 case 'r': | |
351 out.write('\r'); | |
352 break; | |
353 case 'f': | |
354 out.write('\f'); | |
355 break; | |
356 case 't': | |
357 out.write('\t'); | |
358 break; | |
359 case 'n': | |
360 out.write('\n'); | |
361 break; | |
362 case 'b': | |
363 out.write('\b'); | |
364 break; | |
365 case 'u': | |
366 { | |
367 // uh-oh, we're in unicode country.... | |
368 inUnicode = true; | |
369 break; | |
370 } | |
371 default : | |
372 out.write(ch); | |
373 break; | |
374 } | |
375 continue; | |
376 } else if (ch == '\\') { | |
377 hadSlash = true; | |
378 continue; | |
379 } | |
380 out.write(ch); | |
381 } | |
382 if (hadSlash) { | |
383 // then we're in the weird case of a \ at the end of the | |
384 // string, let's output it anyway. | |
385 out.write('\\'); | |
386 } | |
387 } | |
388 | |
389 /** | |
390 * <p>Unescapes any JavaScript literals found in the <code>String</code>.</p
> | |
391 * | |
392 * <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'
</code> | |
393 * into a newline character, unless the <code>'\'</code> is preceded by anot
her | |
394 * <code>'\'</code>.</p> | |
395 * | |
396 * @see #unescapeJava(String) | |
397 * @param str the <code>String</code> to unescape, may be null | |
398 * @return A new unescaped <code>String</code>, <code>null</code> if null st
ring input | |
399 */ | |
400 public static String unescapeJavaScript(String str) { | |
401 return unescapeJava(str); | |
402 } | |
403 | |
404 /** | |
405 * <p>Unescapes any JavaScript literals found in the <code>String</code> to
a | |
406 * <code>Writer</code>.</p> | |
407 * | |
408 * <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'
</code> | |
409 * into a newline character, unless the <code>'\'</code> is preceded by anot
her | |
410 * <code>'\'</code>.</p> | |
411 * | |
412 * <p>A <code>null</code> string input has no effect.</p> | |
413 * | |
414 * @see #unescapeJava(Writer,String) | |
415 * @param out the <code>Writer</code> used to output unescaped characters | |
416 * @param str the <code>String</code> to unescape, may be null | |
417 * @throws IllegalArgumentException if the Writer is <code>null</code> | |
418 * @throws IOException if error occurs on underlying Writer | |
419 */ | |
420 public static void unescapeJavaScript(Writer out, String str) throws IOExcep
tion { | |
421 unescapeJava(out, str); | |
422 } | |
423 | |
424 //----------------------------------------------------------------------- | |
425 /** | |
426 * <p>Escapes the characters in a <code>String</code> to be suitable to pass
to | |
427 * an SQL query.</p> | |
428 * | |
429 * <p>For example, | |
430 * <pre>statement.executeQuery("SELECT * FROM MOVIES WHERE TITLE='" + | |
431 * StringEscapeUtils.escapeSql("McHale's Navy") + | |
432 * "'");</pre> | |
433 * </p> | |
434 * | |
435 * <p>At present, this method only turns single-quotes into doubled single-q
uotes | |
436 * (<code>"McHale's Navy"</code> => <code>"McHale''s Navy"</code>). It does
not | |
437 * handle the cases of percent (%) or underscore (_) for use in LIKE clauses
.</p> | |
438 * | |
439 * see http://www.jguru.com/faq/view.jsp?EID=8881 | |
440 * @param str the string to escape, may be null | |
441 * @return a new String, escaped for SQL, <code>null</code> if null string i
nput | |
442 */ | |
443 public static String escapeSql(String str) { | |
444 if (str == null) { | |
445 return null; | |
446 } | |
447 return StringUtils.replace(str, "'", "''"); | |
448 } | |
449 | |
450 //----------------------------------------------------------------------- | |
451 | |
452 /** | |
453 * <p>Returns a <code>String</code> value for a CSV column enclosed in doubl
e quotes, | |
454 * if required.</p> | |
455 * | |
456 * <p>If the value contains a comma, newline or double quote, then the | |
457 * String value is returned enclosed in double quotes.</p> | |
458 * </p> | |
459 * | |
460 * <p>Any double quote characters in the value are escaped with another doub
le quote.</p> | |
461 * | |
462 * <p>If the value does not contain a comma, newline or double quote, then t
he | |
463 * String value is returned unchanged.</p> | |
464 * </p> | |
465 * | |
466 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikiped
ia</a> and | |
467 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. | |
468 * | |
469 * @param str the input CSV column String, may be null | |
470 * @return the input String, enclosed in double quotes if the value contains
a comma, | |
471 * newline or double quote, <code>null</code> if null string input | |
472 * @since 2.4 | |
473 */ | |
474 public static String escapeCsv(String str) { | |
475 if (StringUtils.containsNone(str, CSV_SEARCH_CHARS)) { | |
476 return str; | |
477 } | |
478 try { | |
479 StringWriter writer = new StringWriter(); | |
480 escapeCsv(writer, str); | |
481 return writer.toString(); | |
482 } catch (IOException ioe) { | |
483 // this should never ever happen while writing to a StringWriter | |
484 ioe.printStackTrace(); | |
485 return null; | |
486 } | |
487 } | |
488 | |
489 /** | |
490 * <p>Writes a <code>String</code> value for a CSV column enclosed in double
quotes, | |
491 * if required.</p> | |
492 * | |
493 * <p>If the value contains a comma, newline or double quote, then the | |
494 * String value is written enclosed in double quotes.</p> | |
495 * </p> | |
496 * | |
497 * <p>Any double quote characters in the value are escaped with another doub
le quote.</p> | |
498 * | |
499 * <p>If the value does not contain a comma, newline or double quote, then t
he | |
500 * String value is written unchanged (null values are ignored).</p> | |
501 * </p> | |
502 * | |
503 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikiped
ia</a> and | |
504 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. | |
505 * | |
506 * @param str the input CSV column String, may be null | |
507 * @param out Writer to write input string to, enclosed in double quotes if
it contains | |
508 * a comma, newline or double quote | |
509 * @throws IOException if error occurs on underlying Writer | |
510 * @since 2.4 | |
511 */ | |
512 public static void escapeCsv(Writer out, String str) throws IOException { | |
513 if (StringUtils.containsNone(str, CSV_SEARCH_CHARS)) { | |
514 if (str != null) { | |
515 out.write(str); | |
516 } | |
517 return; | |
518 } | |
519 out.write(CSV_QUOTE); | |
520 for (int i = 0; i < str.length(); i++) { | |
521 char c = str.charAt(i); | |
522 if (c == CSV_QUOTE) { | |
523 out.write(CSV_QUOTE); // escape double quote | |
524 } | |
525 out.write(c); | |
526 } | |
527 out.write(CSV_QUOTE); | |
528 } | |
529 | |
530 /** | |
531 * <p>Returns a <code>String</code> value for an unescaped CSV column. </p> | |
532 * | |
533 * <p>If the value is enclosed in double quotes, and contains a comma, newli
ne | |
534 * or double quote, then quotes are removed. | |
535 * </p> | |
536 * | |
537 * <p>Any double quote escaped characters (a pair of double quotes) are unes
caped | |
538 * to just one double quote. </p> | |
539 * | |
540 * <p>If the value is not enclosed in double quotes, or is and does not cont
ain a | |
541 * comma, newline or double quote, then the String value is returned unch
anged.</p> | |
542 * </p> | |
543 * | |
544 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikiped
ia</a> and | |
545 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. | |
546 * | |
547 * @param str the input CSV column String, may be null | |
548 * @return the input String, with enclosing double quotes removed and embedd
ed double | |
549 * quotes unescaped, <code>null</code> if null string input | |
550 * @since 2.4 | |
551 */ | |
552 public static String unescapeCsv(String str) { | |
553 if (str == null) { | |
554 return null; | |
555 } | |
556 try { | |
557 StringWriter writer = new StringWriter(); | |
558 unescapeCsv(writer, str); | |
559 return writer.toString(); | |
560 } catch (IOException ioe) { | |
561 // this should never ever happen while writing to a StringWriter | |
562 ioe.printStackTrace(); | |
563 return null; | |
564 } | |
565 } | |
566 | |
567 /** | |
568 * <p>Returns a <code>String</code> value for an unescaped CSV column. </p> | |
569 * | |
570 * <p>If the value is enclosed in double quotes, and contains a comma, newli
ne | |
571 * or double quote, then quotes are removed. | |
572 * </p> | |
573 * | |
574 * <p>Any double quote escaped characters (a pair of double quotes) are unes
caped | |
575 * to just one double quote. </p> | |
576 * | |
577 * <p>If the value is not enclosed in double quotes, or is and does not cont
ain a | |
578 * comma, newline or double quote, then the String value is returned unch
anged.</p> | |
579 * </p> | |
580 * | |
581 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikiped
ia</a> and | |
582 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. | |
583 * | |
584 * @param str the input CSV column String, may be null | |
585 * @param out Writer to write the input String to, with enclosing double quo
tes | |
586 * removed and embedded double quotes unescaped, <code>null</code> if null s
tring input | |
587 * @throws IOException if error occurs on underlying Writer | |
588 * @since 2.4 | |
589 */ | |
590 public static void unescapeCsv(Writer out, String str) throws IOException { | |
591 if (str == null) { | |
592 return; | |
593 } | |
594 if (str.length() < 2) { | |
595 out.write(str); | |
596 return; | |
597 } | |
598 if ( str.charAt(0) != CSV_QUOTE || str.charAt(str.length() - 1) != CSV_Q
UOTE ) { | |
599 out.write(str); | |
600 return; | |
601 } | |
602 | |
603 // strip quotes | |
604 String quoteless = str.substring(1, str.length() - 1); | |
605 | |
606 if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) { | |
607 // deal with escaped quotes; ie) "" | |
608 str = StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR,
CSV_QUOTE_STR); | |
609 } | |
610 | |
611 out.write(str); | |
612 } | |
613 | |
614 } | |
OLD | NEW |