exploit.py

import string
import json
from pwn import *
from ast import literal_eval
from collections import Counter, defaultdict

import sys

sys.path.append('./')
from chall import derive_markov_model


def filter_model(model):
    filtered_model = defaultdict(Counter)
    for prefix, options in model.items():
        highest_count = None
        for suffix, count in options.most_common():
            if not highest_count:
                highest_count = count
            if count < (highest_count / 4) and count < 0.1 * len(ALL_FLAGS): # less than 10% of the highest count, incorrect
                break

            filtered_model[prefix][suffix] = count
    return filtered_model

def recover_substrings(model):
    substrings_to_continue = list(model.keys())
    substrings_done = []
    while substrings_to_continue:
        new_substrings_to_continue = []
        for s in substrings_to_continue:
            continuation_options = list(model[s[-1]].keys())
            if len(continuation_options) == 1:
                new_substrings_to_continue.append(s + continuation_options[0])
            else:
                substrings_done.append(s)
        substrings_to_continue = new_substrings_to_continue
    return substrings_done

def search_tree(model, prefix, depth=0):
    if depth > 10:
        return []
    if prefix[-1] == '}':
        return [prefix]
    result = []
    if len(model[prefix[-1]]) == 1:
        suffix = list(model[prefix[-1]].keys())[0]
        result.extend(search_tree(model, prefix + suffix, depth))
    else:
        for suffix in model[prefix[-1]]:
            result.extend(search_tree(model, prefix + suffix, depth + 1))
    return result


prefix = 'ictf{'

def get_flags():
    # r = process(['python3', 'chall.py'])
    r = remote('0.cloud.chals.io', 25927)
    r.recvuntil("How many training samples would you like?\n")
    r.sendline('100')
    r.recvuntil("What percentage of training flags would you like to be included to make the flags look real? (max 20%)\n")
    r.sendline('20')
    r.recvuntil("Understood, training the model...")
    r.recvuntil("Done! Now, how many flags would you like to generate?")
    r.sendline('10000')
    r.recvuntil("Here you go:\n")
    flags = []
    for _ in range(10000):
        flag = r.recvline().decode('utf-8').strip()
        flags.append(flag)
    r.close()
    return flags

ALL_FLAGS = []
ALL_FLAGS += get_flags()
ALL_FLAGS += get_flags()
ALL_FLAGS += get_flags()
ALL_FLAGS += get_flags()
ALL_FLAGS += get_flags()

model = derive_markov_model(ALL_FLAGS)
filtered_model = filter_model(model)
print(filtered_model)

prefix = 'ictf{'
substrings = recover_substrings(filtered_model)
print(substrings)

possible_flags = search_tree(filtered_model, prefix)
for flag in possible_flags:
    print(flag)