001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.imaging.common;
018
019import java.io.ByteArrayInputStream;
020import java.io.ByteArrayOutputStream;
021import java.io.IOException;
022import java.io.InputStream;
023import java.io.PushbackInputStream;
024import java.util.Map;
025
026import org.apache.commons.imaging.ImagingException;
027import org.apache.commons.lang3.StringUtils;
028
029/**
030 * A rudimentary preprocessor and parser for the C programming language.
031 *
032 * FIXME replace this by a parser generated via ANTLR (if we really need it?!)
033 */
034public class BasicCParser {
035    /**
036     * Parses the hexadecimal-base escape-sequence found at index {@code i} of {@code string}.
037     *
038     * <p>
039     * Helper-function for {@code unescapeString()}.
040     * </p>
041     *
042     * @param i             the index of the escape-sequence in the string
043     * @param stringBuilder the stringBuilder to append the escape-char to
044     * @param string        the string whose chars are parsed
045     * @return the new index i
046     * @since 1.0-alpha3
047     */
048    private static int appendHex(int i, final StringBuilder stringBuilder, final String string) throws ImagingException {
049        if (i + 2 >= string.length()) {
050            throw new ImagingException("Parsing XPM file failed, " + "hex constant in string too short");
051        }
052        final char hex1 = string.charAt(i + 1);
053        final char hex2 = string.charAt(i + 2);
054        i += 2;
055        final int constant;
056        try {
057            constant = Integer.parseInt(hex1 + Character.toString(hex2), 16);
058        } catch (final NumberFormatException nfe) {
059            throw new ImagingException("Parsing XPM file failed, " + "hex constant invalid", nfe);
060        }
061        stringBuilder.append((char) constant);
062        return i;
063    }
064
065    /**
066     * Parses the octal-base escape-sequence found at index {@code i} of {@code string}.
067     *
068     * <p>
069     * Helper-function for {@code unescapeString()}.
070     * </p>
071     *
072     * @param i             the index of the escape-sequence in the string
073     * @param stringBuilder the stringBuilder to append the escape-char to
074     * @param string        the string whose chars are parsed
075     * @return the new index i
076     * @since 1.0-alpha3
077     */
078    private static int appendOct(int i, final StringBuilder stringBuilder, final String string) {
079        int length = 1;
080        if (i + 1 < string.length() && '0' <= string.charAt(i + 1) && string.charAt(i + 1) <= '7') {
081            ++length;
082        }
083        if (i + 2 < string.length() && '0' <= string.charAt(i + 2) && string.charAt(i + 2) <= '7') {
084            ++length;
085        }
086        int constant = 0;
087        for (int j = 0; j < length; j++) {
088            constant *= 8;
089            constant += string.charAt(i + j) - '0';
090        }
091        i += length - 1;
092        stringBuilder.append((char) constant);
093        return i;
094    }
095
096    /**
097     * Parses the {@code i:th} escape-char in the input {@code string} and appends it to {@code stringBuilder}.
098     *
099     * <p>
100     * Helper-function for {@code unescapeString()}.
101     * </p>
102     *
103     * @param i             the index of the escape-char in the string
104     * @param stringBuilder the stringBuilder to append the escape-char to
105     * @param string        the string whose chars are parsed
106     * @return the new index i
107     * @since 1.0-alpha3
108     */
109    private static int parseEscape(int i, final StringBuilder stringBuilder, final String string) throws ImagingException {
110        final char c = string.charAt(i);
111        switch (c) {
112        case '\\':
113            stringBuilder.append('\\');
114            break;
115        case '"':
116            stringBuilder.append('"');
117            break;
118        case '\'':
119            stringBuilder.append('\'');
120            break;
121        case 'x':
122            i = appendHex(i, stringBuilder, string);
123            break;
124        case '0':
125        case '1':
126        case '2':
127        case '3':
128        case '4':
129        case '5':
130        case '6':
131        case '7':
132            i = appendOct(i, stringBuilder, string);
133            break;
134        case 'a':
135            stringBuilder.append((char) 0x07);
136            break;
137        case 'b':
138            stringBuilder.append((char) 0x08);
139            break;
140        case 'f':
141            stringBuilder.append((char) 0x0c);
142            break;
143        case 'n':
144            stringBuilder.append((char) 0x0a);
145            break;
146        case 'r':
147            stringBuilder.append((char) 0x0d);
148            break;
149        case 't':
150            stringBuilder.append((char) 0x09);
151            break;
152        case 'v':
153            stringBuilder.append((char) 0x0b);
154            break;
155        default:
156            throw new ImagingException("Parsing XPM file failed, " + "invalid escape sequence");
157        }
158        return i;
159
160    }
161
162    public static ByteArrayOutputStream preprocess(final InputStream is, final StringBuilder firstComment, final Map<String, String> defines)
163            throws IOException, ImagingException {
164        boolean inSingleQuotes = false;
165        boolean inString = false;
166        boolean inComment = false;
167        boolean inDirective = false;
168        boolean hadSlash = false;
169        boolean hadStar = false;
170        boolean hadBackSlash = false;
171        final ByteArrayOutputStream out = new ByteArrayOutputStream();
172        boolean seenFirstComment = firstComment == null;
173        final StringBuilder directiveBuffer = new StringBuilder();
174        for (int c = is.read(); c != -1; c = is.read()) {
175            if (inComment) {
176                if (c == '*') {
177                    if (hadStar && !seenFirstComment) {
178                        firstComment.append('*');
179                    }
180                    hadStar = true;
181                } else if (c == '/') {
182                    if (hadStar) {
183                        hadStar = false;
184                        inComment = false;
185                        seenFirstComment = true;
186                    } else if (!seenFirstComment) {
187                        firstComment.append((char) c);
188                    }
189                } else {
190                    if (hadStar && !seenFirstComment) {
191                        firstComment.append('*');
192                    }
193                    hadStar = false;
194                    if (!seenFirstComment) {
195                        firstComment.append((char) c);
196                    }
197                }
198            } else if (inSingleQuotes) {
199                switch (c) {
200                case '\\':
201                    if (hadBackSlash) {
202                        out.write('\\');
203                        out.write('\\');
204                        hadBackSlash = false;
205                    } else {
206                        hadBackSlash = true;
207                    }
208                    break;
209                case '\'':
210                    if (hadBackSlash) {
211                        out.write('\\');
212                        hadBackSlash = false;
213                    } else {
214                        inSingleQuotes = false;
215                    }
216                    out.write('\'');
217                    break;
218                case '\r':
219                case '\n':
220                    throw new ImagingException("Unterminated single quote in file");
221                default:
222                    if (hadBackSlash) {
223                        out.write('\\');
224                        hadBackSlash = false;
225                    }
226                    out.write(c);
227                    break;
228                }
229            } else if (inString) {
230                switch (c) {
231                case '\\':
232                    if (hadBackSlash) {
233                        out.write('\\');
234                        out.write('\\');
235                        hadBackSlash = false;
236                    } else {
237                        hadBackSlash = true;
238                    }
239                    break;
240                case '"':
241                    if (hadBackSlash) {
242                        out.write('\\');
243                        hadBackSlash = false;
244                    } else {
245                        inString = false;
246                    }
247                    out.write('"');
248                    break;
249                case '\r':
250                case '\n':
251                    throw new ImagingException("Unterminated string in file");
252                default:
253                    if (hadBackSlash) {
254                        out.write('\\');
255                        hadBackSlash = false;
256                    }
257                    out.write(c);
258                    break;
259                }
260            } else if (inDirective) {
261                if (c == '\r' || c == '\n') {
262                    inDirective = false;
263                    final String[] tokens = tokenizeRow(directiveBuffer.toString());
264                    if (tokens.length < 2 || tokens.length > 3) {
265                        throw new ImagingException("Bad preprocessor directive");
266                    }
267                    if (!tokens[0].equals("define")) {
268                        throw new ImagingException("Invalid/unsupported " + "preprocessor directive '" + tokens[0] + "'");
269                    }
270                    defines.put(tokens[1], tokens.length == 3 ? tokens[2] : null);
271                    directiveBuffer.setLength(0);
272                } else {
273                    directiveBuffer.append((char) c);
274                }
275            } else {
276                switch (c) {
277                case '/':
278                    if (hadSlash) {
279                        out.write('/');
280                    }
281                    hadSlash = true;
282                    break;
283                case '*':
284                    if (hadSlash) {
285                        inComment = true;
286                        hadSlash = false;
287                    } else {
288                        out.write(c);
289                    }
290                    break;
291                case '\'':
292                    if (hadSlash) {
293                        out.write('/');
294                    }
295                    hadSlash = false;
296                    out.write(c);
297                    inSingleQuotes = true;
298                    break;
299                case '"':
300                    if (hadSlash) {
301                        out.write('/');
302                    }
303                    hadSlash = false;
304                    out.write(c);
305                    inString = true;
306                    break;
307                case '#':
308                    if (defines == null) {
309                        throw new ImagingException("Unexpected preprocessor directive");
310                    }
311                    inDirective = true;
312                    break;
313                default:
314                    if (hadSlash) {
315                        out.write('/');
316                    }
317                    hadSlash = false;
318                    out.write(c);
319                    // Only whitespace allowed before first comment:
320                    if (c != ' ' && c != '\t' && c != '\r' && c != '\n') {
321                        seenFirstComment = true;
322                    }
323                    break;
324                }
325            }
326        }
327        if (hadSlash) {
328            out.write('/');
329        }
330        if (hadStar) {
331            out.write('*');
332        }
333        if (inString) {
334            throw new ImagingException("Unterminated string at the end of file");
335        }
336        if (inComment) {
337            throw new ImagingException("Unterminated comment at the end of file");
338        }
339        return out;
340    }
341
342    public static String[] tokenizeRow(final String row) {
343        final String[] tokens = row.split("[ \t]");
344        int numLiveTokens = 0;
345        for (final String token : tokens) {
346            if (StringUtils.isNotEmpty(token)) {
347                ++numLiveTokens;
348            }
349        }
350        final String[] liveTokens = Allocator.array(numLiveTokens, String[]::new, 24);
351        int next = 0;
352        for (final String token : tokens) {
353            if (StringUtils.isNotEmpty(token)) {
354                liveTokens[next++] = token;
355            }
356        }
357        return liveTokens;
358    }
359
360    public static void unescapeString(final StringBuilder stringBuilder, final String string) throws ImagingException {
361        if (string.length() < 2) {
362            throw new ImagingException("Parsing XPM file failed, " + "string is too short");
363        }
364        if (string.charAt(0) != '"' || string.charAt(string.length() - 1) != '"') {
365            throw new ImagingException("Parsing XPM file failed, " + "string not surrounded by '\"'");
366        }
367        boolean hadBackSlash = false;
368        for (int i = 1; i < string.length() - 1; i++) {
369            final char c = string.charAt(i);
370            if (hadBackSlash) {
371                i = parseEscape(i, stringBuilder, string);
372                hadBackSlash = false;
373            } else if (c == '\\') {
374                hadBackSlash = true;
375            } else if (c == '"') {
376                throw new ImagingException("Parsing XPM file failed, " + "extra '\"' found in string");
377            } else {
378                stringBuilder.append(c);
379            }
380        }
381        if (hadBackSlash) {
382            throw new ImagingException("Parsing XPM file failed, " + "unterminated escape sequence found in string");
383        }
384    }
385
386    private final PushbackInputStream is;
387
388    public BasicCParser(final ByteArrayInputStream is) {
389        this.is = new PushbackInputStream(is);
390    }
391
392    public String nextToken() throws IOException, ImagingException {
393        // I don't know how complete the C parsing in an XPM file
394        // is meant to be, this is just the very basics...
395
396        boolean inString = false;
397        boolean inIdentifier = false;
398        boolean hadBackSlash = false;
399        final StringBuilder token = new StringBuilder();
400        for (int c = is.read(); c != -1; c = is.read()) {
401            if (inString) {
402                switch (c) {
403                case '\\':
404                    token.append('\\');
405                    hadBackSlash = !hadBackSlash;
406                    break;
407                case '"':
408                    token.append('"');
409                    if (!hadBackSlash) {
410                        return token.toString();
411                    }
412                    hadBackSlash = false;
413                    break;
414                case '\r':
415                case '\n':
416                    throw new ImagingException("Unterminated string in XPM file");
417                default:
418                    token.append((char) c);
419                    hadBackSlash = false;
420                    break;
421                }
422            } else if (inIdentifier) {
423                if (!Character.isLetterOrDigit(c) && c != '_') {
424                    is.unread(c);
425                    return token.toString();
426                }
427                token.append((char) c);
428            } else if (c == '"') {
429                token.append('"');
430                inString = true;
431            } else if (Character.isLetterOrDigit(c) || c == '_') {
432                token.append((char) c);
433                inIdentifier = true;
434            } else if (c == '{' || c == '}' || c == '[' || c == ']' || c == '*' || c == ';' || c == '=' || c == ',') {
435                token.append((char) c);
436                return token.toString();
437            } else if (c == ' ' || c == '\t' || c == '\r' || c == '\n') { // NOPMD
438                // ignore
439            } else {
440                throw new ImagingException("Unhandled/invalid character '" + (char) c + "' found in XPM file");
441            }
442        }
443
444        if (inIdentifier) {
445            return token.toString();
446        }
447        if (inString) {
448            throw new ImagingException("Unterminated string ends XMP file");
449        }
450        return null;
451    }
452
453}