{
  "problem": {
    "name": "molecular_translation",
    "description": "In this competition, you'll interpret old chemical images. With access to a large set of synthetic image data generated by Bristol-Myers Squibb, you'll convert images back to the underlying chemical structure annotated as InChI text. Results are evaluated on the mean Levenshtein distance between the InChi strings you submit and the ground truth InChi values. **Data Overview:** `train.csv`, `valid.csv`, and `test.csv` each contain three columns: `image_id`, `InChI`, and `SMILES`. The corresponding images are stored in the `images` folder, with filenames matching the `image_id`.",
    "metric": "1 - mean Levenshtein distance",
    "interface": "deepevolve_interface.py"
  },
  "initial_idea": {
    "title": "ResNet+GRU",
    "content": "The method casts molecular translation from structure images into InChI captions as an image-to-sequence task: a deep convolutional backbone processes each image to produce a fixed-length feature vector, which initializes a recurrent decoder that generates character tokens of the InChI string. We build a character-level vocabulary with special start, end, and padding markers, and train the network end-to-end by minimizing cross-entropy loss between predicted and true token sequences. To maintain stable training we apply decoder dropout, clip gradients, and use a cosine learning-rate schedule, selecting the best model via validation edit distance between predicted and reference strings. At test time the decoder uses greedy decoding until the end marker, yielding a complete InChI. This approach brings together visual feature extraction and sequential modeling to produce accurate chemical identifiers directly from images.",
    "supplement": "https://www.kaggle.com/code/yasufuminakama/inchi-resnet-lstm-with-attention-starter/notebook"
  }
}
