// Copyright 2010 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Transformations on dictionary words.

#ifndef BROTLI_ENC_TRANSFORM_H_
#define BROTLI_ENC_TRANSFORM_H_

#include <string>

#include "./dictionary.h"

namespace brotli {

enum WordTransformType {
  kIdentity       = 0,
  kOmitLast1      = 1,
  kOmitLast2      = 2,
  kOmitLast3      = 3,
  kOmitLast4      = 4,
  kOmitLast5      = 5,
  kOmitLast6      = 6,
  kOmitLast7      = 7,
  kOmitLast8      = 8,
  kOmitLast9      = 9,
  kUppercaseFirst = 10,
  kUppercaseAll   = 11,
  kOmitFirst1     = 12,
  kOmitFirst2     = 13,
  kOmitFirst3     = 14,
  kOmitFirst4     = 15,
  kOmitFirst5     = 16,
  kOmitFirst6     = 17,
  kOmitFirst7     = 18,
  kOmitFirst8     = 19,
  kOmitFirst9     = 20,
};

struct Transform {
  const char* prefix;
  WordTransformType word_transform;
  const char* suffix;
};

static const Transform kTransforms[] = {
     {         "", kIdentity,       ""           },
     {         "", kIdentity,       " "          },
     {        " ", kIdentity,       " "          },
     {         "", kOmitFirst1,     ""           },
     {         "", kUppercaseFirst, " "          },
     {         "", kIdentity,       " the "      },
     {        " ", kIdentity,       ""           },
     {       "s ", kIdentity,       " "          },
     {         "", kIdentity,       " of "       },
     {         "", kUppercaseFirst, ""           },
     {         "", kIdentity,       " and "      },
     {         "", kOmitFirst2,     ""           },
     {         "", kOmitLast1,      ""           },
     {       ", ", kIdentity,       " "          },
     {         "", kIdentity,       ", "         },
     {        " ", kUppercaseFirst, " "          },
     {         "", kIdentity,       " in "       },
     {         "", kIdentity,       " to "       },
     {       "e ", kIdentity,       " "          },
     {         "", kIdentity,       "\""         },
     {         "", kIdentity,       "."          },
     {         "", kIdentity,       "\">"        },
     {         "", kIdentity,       "\n"         },
     {         "", kOmitLast3,      ""           },
     {         "", kIdentity,       "]"          },
     {         "", kIdentity,       " for "      },
     {         "", kOmitFirst3,     ""           },
     {         "", kOmitLast2,      ""           },
     {         "", kIdentity,       " a "        },
     {         "", kIdentity,       " that "     },
     {        " ", kUppercaseFirst, ""           },
     {         "", kIdentity,       ". "         },
     {        ".", kIdentity,       ""           },
     {        " ", kIdentity,       ", "         },
     {         "", kOmitFirst4,     ""           },
     {         "", kIdentity,       " with "     },
     {         "", kIdentity,       "'"          },
     {         "", kIdentity,       " from "     },
     {         "", kIdentity,       " by "       },
     {         "", kOmitFirst5,     ""           },
     {         "", kOmitFirst6,     ""           },
     {    " the ", kIdentity,       ""           },
     {         "", kOmitLast4,      ""           },
     {         "", kIdentity,       ". The "     },
     {         "", kUppercaseAll,   ""           },
     {         "", kIdentity,       " on "       },
     {         "", kIdentity,       " as "       },
     {         "", kIdentity,       " is "       },
     {         "", kOmitLast7,      ""           },
     {         "", kOmitLast1,      "ing "       },
     {         "", kIdentity,       "\n\t"       },
     {         "", kIdentity,       ":"          },
     {        " ", kIdentity,       ". "         },
     {         "", kIdentity,       "ed "        },
     {         "", kOmitFirst9,     ""           },
     {         "", kOmitFirst7,     ""           },
     {         "", kOmitLast6,      ""           },
     {         "", kIdentity,       "("          },
     {         "", kUppercaseFirst, ", "         },
     {         "", kOmitLast8,      ""           },
     {         "", kIdentity,       " at "       },
     {         "", kIdentity,       "ly "        },
     {    " the ", kIdentity,       " of "       },
     {         "", kOmitLast5,      ""           },
     {         "", kOmitLast9,      ""           },
     {        " ", kUppercaseFirst, ", "         },
     {         "", kUppercaseFirst, "\""         },
     {        ".", kIdentity,       "("          },
     {         "", kUppercaseAll,   " "          },
     {         "", kUppercaseFirst, "\">"        },
     {         "", kIdentity,       "=\""        },
     {        " ", kIdentity,       "."          },
     {    ".com/", kIdentity,       ""           },
     {    " the ", kIdentity,       " of the "   },
     {         "", kUppercaseFirst, "'"          },
     {         "", kIdentity,       ". This "    },
     {         "", kIdentity,       ","          },
     {        ".", kIdentity,       " "          },
     {         "", kUppercaseFirst, "("          },
     {         "", kUppercaseFirst, "."          },
     {         "", kIdentity,       " not "      },
     {        " ", kIdentity,       "=\""        },
     {         "", kIdentity,       "er "        },
     {        " ", kUppercaseAll,   " "          },
     {         "", kIdentity,       "al "        },
     {        " ", kUppercaseAll,   ""           },
     {         "", kIdentity,       "='"         },
     {         "", kUppercaseAll,   "\""         },
     {         "", kUppercaseFirst, ". "         },
     {        " ", kIdentity,       "("          },
     {         "", kIdentity,       "ful "       },
     {        " ", kUppercaseFirst, ". "         },
     {         "", kIdentity,       "ive "       },
     {         "", kIdentity,       "less "      },
     {         "", kUppercaseAll,   "'"          },
     {         "", kIdentity,       "est "       },
     {        " ", kUppercaseFirst, "."          },
     {         "", kUppercaseAll,   "\">"        },
     {        " ", kIdentity,       "='"         },
     {         "", kUppercaseFirst, ","          },
     {         "", kIdentity,       "ize "       },
     {         "", kUppercaseAll,   "."          },
     { "\xc2\xa0", kIdentity,       ""           },
     {        " ", kIdentity,       ","          },
     {         "", kUppercaseFirst, "=\""        },
     {         "", kUppercaseAll,   "=\""        },
     {         "", kIdentity,       "ous "       },
     {         "", kUppercaseAll,   ", "         },
     {         "", kUppercaseFirst, "='"         },
     {        " ", kUppercaseFirst, ","          },
     {        " ", kUppercaseAll,   "=\""        },
     {        " ", kUppercaseAll,   ", "         },
     {         "", kUppercaseAll,   ","          },
     {         "", kUppercaseAll,   "("          },
     {         "", kUppercaseAll,   ". "         },
     {        " ", kUppercaseAll,   "."          },
     {         "", kUppercaseAll,   "='"         },
     {        " ", kUppercaseAll,   ". "         },
     {        " ", kUppercaseFirst, "=\""        },
     {        " ", kUppercaseAll,   "='"         },
     {        " ", kUppercaseFirst, "='"         },
};

static const int kNumTransforms = sizeof(kTransforms) / sizeof(kTransforms[0]);

static int ToUpperCase(uint8_t *p, int len) {
  if (len == 1 || p[0] < 0xc0) {
    if (p[0] >= 'a' && p[0] <= 'z') {
      p[0] ^= 32;
    }
    return 1;
  }
  if (p[0] < 0xe0) {
    p[1] ^= 32;
    return 2;
  }
  if (len == 2) {
    return 2;
  }
  p[2] ^= 5;
  return 3;
}

inline std::string ApplyTransform(
    const Transform& t, const uint8_t* word, int len) {
  std::string ret(t.prefix);
  if (t.word_transform <= kOmitLast9) {
    len -= t.word_transform;
  }
  if (len > 0) {
    if (t.word_transform >= kOmitFirst1) {
      const int skip = t.word_transform - (kOmitFirst1 - 1);
      if (len > skip) {
        ret += std::string(word + skip, word + len);
      }
    } else {
      ret += std::string(word, word + len);
      uint8_t *uppercase = reinterpret_cast<uint8_t*>(&ret[ret.size() - len]);
      if (t.word_transform == kUppercaseFirst) {
        ToUpperCase(uppercase, len);
      } else if (t.word_transform == kUppercaseAll) {
        while (len > 0) {
          int step = ToUpperCase(uppercase, len);
          uppercase += step;
          len -= step;
        }
      }
    }
  }
  ret += std::string(t.suffix);
  return ret;
}

inline std::string GetTransformedDictionaryWord(int len_code, int word_id) {
  int num_words = 1 << kBrotliDictionarySizeBitsByLength[len_code];
  int offset = kBrotliDictionaryOffsetsByLength[len_code];
  int t = word_id / num_words;
  int word_idx = word_id % num_words;
  offset += len_code * word_idx;
  const uint8_t* word = &kBrotliDictionary[offset];
  return ApplyTransform(kTransforms[t], word, len_code);
}

}  // namespace brotli

#endif  // BROTLI_ENC_TRANSFORM_H_
