001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.imaging.common; 018 019import java.io.ByteArrayInputStream; 020import java.io.ByteArrayOutputStream; 021import java.io.IOException; 022import java.io.InputStream; 023import java.io.PushbackInputStream; 024import java.util.Map; 025 026import org.apache.commons.imaging.ImagingException; 027import org.apache.commons.lang3.StringUtils; 028 029/** 030 * A rudimentary preprocessor and parser for the C programming language. 031 * 032 * FIXME replace this by a parser generated via ANTLR (if we really need it?!) 033 */ 034public class BasicCParser { 035 /** 036 * Parses the hexadecimal-base escape-sequence found at index {@code i} of {@code string}. 037 * 038 * <p> 039 * Helper-function for {@code unescapeString()}. 040 * </p> 041 * 042 * @param i the index of the escape-sequence in the string 043 * @param stringBuilder the stringBuilder to append the escape-char to 044 * @param string the string whose chars are parsed 045 * @return the new index i 046 * @since 1.0-alpha3 047 */ 048 private static int appendHex(int i, final StringBuilder stringBuilder, final String string) throws ImagingException { 049 if (i + 2 >= string.length()) { 050 throw new ImagingException("Parsing XPM file failed, " + "hex constant in string too short"); 051 } 052 final char hex1 = string.charAt(i + 1); 053 final char hex2 = string.charAt(i + 2); 054 i += 2; 055 final int constant; 056 try { 057 constant = Integer.parseInt(hex1 + Character.toString(hex2), 16); 058 } catch (final NumberFormatException nfe) { 059 throw new ImagingException("Parsing XPM file failed, " + "hex constant invalid", nfe); 060 } 061 stringBuilder.append((char) constant); 062 return i; 063 } 064 065 /** 066 * Parses the octal-base escape-sequence found at index {@code i} of {@code string}. 067 * 068 * <p> 069 * Helper-function for {@code unescapeString()}. 070 * </p> 071 * 072 * @param i the index of the escape-sequence in the string 073 * @param stringBuilder the stringBuilder to append the escape-char to 074 * @param string the string whose chars are parsed 075 * @return the new index i 076 * @since 1.0-alpha3 077 */ 078 private static int appendOct(int i, final StringBuilder stringBuilder, final String string) { 079 int length = 1; 080 if (i + 1 < string.length() && '0' <= string.charAt(i + 1) && string.charAt(i + 1) <= '7') { 081 ++length; 082 } 083 if (i + 2 < string.length() && '0' <= string.charAt(i + 2) && string.charAt(i + 2) <= '7') { 084 ++length; 085 } 086 int constant = 0; 087 for (int j = 0; j < length; j++) { 088 constant *= 8; 089 constant += string.charAt(i + j) - '0'; 090 } 091 i += length - 1; 092 stringBuilder.append((char) constant); 093 return i; 094 } 095 096 /** 097 * Parses the {@code i:th} escape-char in the input {@code string} and appends it to {@code stringBuilder}. 098 * 099 * <p> 100 * Helper-function for {@code unescapeString()}. 101 * </p> 102 * 103 * @param i the index of the escape-char in the string 104 * @param stringBuilder the stringBuilder to append the escape-char to 105 * @param string the string whose chars are parsed 106 * @return the new index i 107 * @since 1.0-alpha3 108 */ 109 private static int parseEscape(int i, final StringBuilder stringBuilder, final String string) throws ImagingException { 110 final char c = string.charAt(i); 111 switch (c) { 112 case '\\': 113 stringBuilder.append('\\'); 114 break; 115 case '"': 116 stringBuilder.append('"'); 117 break; 118 case '\'': 119 stringBuilder.append('\''); 120 break; 121 case 'x': 122 i = appendHex(i, stringBuilder, string); 123 break; 124 case '0': 125 case '1': 126 case '2': 127 case '3': 128 case '4': 129 case '5': 130 case '6': 131 case '7': 132 i = appendOct(i, stringBuilder, string); 133 break; 134 case 'a': 135 stringBuilder.append((char) 0x07); 136 break; 137 case 'b': 138 stringBuilder.append((char) 0x08); 139 break; 140 case 'f': 141 stringBuilder.append((char) 0x0c); 142 break; 143 case 'n': 144 stringBuilder.append((char) 0x0a); 145 break; 146 case 'r': 147 stringBuilder.append((char) 0x0d); 148 break; 149 case 't': 150 stringBuilder.append((char) 0x09); 151 break; 152 case 'v': 153 stringBuilder.append((char) 0x0b); 154 break; 155 default: 156 throw new ImagingException("Parsing XPM file failed, " + "invalid escape sequence"); 157 } 158 return i; 159 160 } 161 162 public static ByteArrayOutputStream preprocess(final InputStream is, final StringBuilder firstComment, final Map<String, String> defines) 163 throws IOException, ImagingException { 164 boolean inSingleQuotes = false; 165 boolean inString = false; 166 boolean inComment = false; 167 boolean inDirective = false; 168 boolean hadSlash = false; 169 boolean hadStar = false; 170 boolean hadBackSlash = false; 171 final ByteArrayOutputStream out = new ByteArrayOutputStream(); 172 boolean seenFirstComment = firstComment == null; 173 final StringBuilder directiveBuffer = new StringBuilder(); 174 for (int c = is.read(); c != -1; c = is.read()) { 175 if (inComment) { 176 if (c == '*') { 177 if (hadStar && !seenFirstComment) { 178 firstComment.append('*'); 179 } 180 hadStar = true; 181 } else if (c == '/') { 182 if (hadStar) { 183 hadStar = false; 184 inComment = false; 185 seenFirstComment = true; 186 } else if (!seenFirstComment) { 187 firstComment.append((char) c); 188 } 189 } else { 190 if (hadStar && !seenFirstComment) { 191 firstComment.append('*'); 192 } 193 hadStar = false; 194 if (!seenFirstComment) { 195 firstComment.append((char) c); 196 } 197 } 198 } else if (inSingleQuotes) { 199 switch (c) { 200 case '\\': 201 if (hadBackSlash) { 202 out.write('\\'); 203 out.write('\\'); 204 hadBackSlash = false; 205 } else { 206 hadBackSlash = true; 207 } 208 break; 209 case '\'': 210 if (hadBackSlash) { 211 out.write('\\'); 212 hadBackSlash = false; 213 } else { 214 inSingleQuotes = false; 215 } 216 out.write('\''); 217 break; 218 case '\r': 219 case '\n': 220 throw new ImagingException("Unterminated single quote in file"); 221 default: 222 if (hadBackSlash) { 223 out.write('\\'); 224 hadBackSlash = false; 225 } 226 out.write(c); 227 break; 228 } 229 } else if (inString) { 230 switch (c) { 231 case '\\': 232 if (hadBackSlash) { 233 out.write('\\'); 234 out.write('\\'); 235 hadBackSlash = false; 236 } else { 237 hadBackSlash = true; 238 } 239 break; 240 case '"': 241 if (hadBackSlash) { 242 out.write('\\'); 243 hadBackSlash = false; 244 } else { 245 inString = false; 246 } 247 out.write('"'); 248 break; 249 case '\r': 250 case '\n': 251 throw new ImagingException("Unterminated string in file"); 252 default: 253 if (hadBackSlash) { 254 out.write('\\'); 255 hadBackSlash = false; 256 } 257 out.write(c); 258 break; 259 } 260 } else if (inDirective) { 261 if (c == '\r' || c == '\n') { 262 inDirective = false; 263 final String[] tokens = tokenizeRow(directiveBuffer.toString()); 264 if (tokens.length < 2 || tokens.length > 3) { 265 throw new ImagingException("Bad preprocessor directive"); 266 } 267 if (!tokens[0].equals("define")) { 268 throw new ImagingException("Invalid/unsupported " + "preprocessor directive '" + tokens[0] + "'"); 269 } 270 defines.put(tokens[1], tokens.length == 3 ? tokens[2] : null); 271 directiveBuffer.setLength(0); 272 } else { 273 directiveBuffer.append((char) c); 274 } 275 } else { 276 switch (c) { 277 case '/': 278 if (hadSlash) { 279 out.write('/'); 280 } 281 hadSlash = true; 282 break; 283 case '*': 284 if (hadSlash) { 285 inComment = true; 286 hadSlash = false; 287 } else { 288 out.write(c); 289 } 290 break; 291 case '\'': 292 if (hadSlash) { 293 out.write('/'); 294 } 295 hadSlash = false; 296 out.write(c); 297 inSingleQuotes = true; 298 break; 299 case '"': 300 if (hadSlash) { 301 out.write('/'); 302 } 303 hadSlash = false; 304 out.write(c); 305 inString = true; 306 break; 307 case '#': 308 if (defines == null) { 309 throw new ImagingException("Unexpected preprocessor directive"); 310 } 311 inDirective = true; 312 break; 313 default: 314 if (hadSlash) { 315 out.write('/'); 316 } 317 hadSlash = false; 318 out.write(c); 319 // Only whitespace allowed before first comment: 320 if (c != ' ' && c != '\t' && c != '\r' && c != '\n') { 321 seenFirstComment = true; 322 } 323 break; 324 } 325 } 326 } 327 if (hadSlash) { 328 out.write('/'); 329 } 330 if (hadStar) { 331 out.write('*'); 332 } 333 if (inString) { 334 throw new ImagingException("Unterminated string at the end of file"); 335 } 336 if (inComment) { 337 throw new ImagingException("Unterminated comment at the end of file"); 338 } 339 return out; 340 } 341 342 public static String[] tokenizeRow(final String row) { 343 final String[] tokens = row.split("[ \t]"); 344 int numLiveTokens = 0; 345 for (final String token : tokens) { 346 if (StringUtils.isNotEmpty(token)) { 347 ++numLiveTokens; 348 } 349 } 350 final String[] liveTokens = Allocator.array(numLiveTokens, String[]::new, 24); 351 int next = 0; 352 for (final String token : tokens) { 353 if (StringUtils.isNotEmpty(token)) { 354 liveTokens[next++] = token; 355 } 356 } 357 return liveTokens; 358 } 359 360 public static void unescapeString(final StringBuilder stringBuilder, final String string) throws ImagingException { 361 if (string.length() < 2) { 362 throw new ImagingException("Parsing XPM file failed, " + "string is too short"); 363 } 364 if (string.charAt(0) != '"' || string.charAt(string.length() - 1) != '"') { 365 throw new ImagingException("Parsing XPM file failed, " + "string not surrounded by '\"'"); 366 } 367 boolean hadBackSlash = false; 368 for (int i = 1; i < string.length() - 1; i++) { 369 final char c = string.charAt(i); 370 if (hadBackSlash) { 371 i = parseEscape(i, stringBuilder, string); 372 hadBackSlash = false; 373 } else if (c == '\\') { 374 hadBackSlash = true; 375 } else if (c == '"') { 376 throw new ImagingException("Parsing XPM file failed, " + "extra '\"' found in string"); 377 } else { 378 stringBuilder.append(c); 379 } 380 } 381 if (hadBackSlash) { 382 throw new ImagingException("Parsing XPM file failed, " + "unterminated escape sequence found in string"); 383 } 384 } 385 386 private final PushbackInputStream is; 387 388 public BasicCParser(final ByteArrayInputStream is) { 389 this.is = new PushbackInputStream(is); 390 } 391 392 public String nextToken() throws IOException, ImagingException { 393 // I don't know how complete the C parsing in an XPM file 394 // is meant to be, this is just the very basics... 395 396 boolean inString = false; 397 boolean inIdentifier = false; 398 boolean hadBackSlash = false; 399 final StringBuilder token = new StringBuilder(); 400 for (int c = is.read(); c != -1; c = is.read()) { 401 if (inString) { 402 switch (c) { 403 case '\\': 404 token.append('\\'); 405 hadBackSlash = !hadBackSlash; 406 break; 407 case '"': 408 token.append('"'); 409 if (!hadBackSlash) { 410 return token.toString(); 411 } 412 hadBackSlash = false; 413 break; 414 case '\r': 415 case '\n': 416 throw new ImagingException("Unterminated string in XPM file"); 417 default: 418 token.append((char) c); 419 hadBackSlash = false; 420 break; 421 } 422 } else if (inIdentifier) { 423 if (!Character.isLetterOrDigit(c) && c != '_') { 424 is.unread(c); 425 return token.toString(); 426 } 427 token.append((char) c); 428 } else if (c == '"') { 429 token.append('"'); 430 inString = true; 431 } else if (Character.isLetterOrDigit(c) || c == '_') { 432 token.append((char) c); 433 inIdentifier = true; 434 } else if (c == '{' || c == '}' || c == '[' || c == ']' || c == '*' || c == ';' || c == '=' || c == ',') { 435 token.append((char) c); 436 return token.toString(); 437 } else if (c == ' ' || c == '\t' || c == '\r' || c == '\n') { // NOPMD 438 // ignore 439 } else { 440 throw new ImagingException("Unhandled/invalid character '" + (char) c + "' found in XPM file"); 441 } 442 } 443 444 if (inIdentifier) { 445 return token.toString(); 446 } 447 if (inString) { 448 throw new ImagingException("Unterminated string ends XMP file"); 449 } 450 return null; 451 } 452 453}