Expressiveness Benchmark

Task: Convert strings with different formats to numbers

Specification: Convert the string value in each row into a number, removing commas if format is "comma_sep" and underscores if format is "under_sep".

Input: numbers

format	value
`comma_sep`	`12,337,800`
`under_sep`	`80_999`

Output: [12337800, 80999]

Python - Imperative

def strings_to_numbers(numbers):
  output = []
  for row in numbers:
    if row["format"] == 'comma_sep':
      sep = ","
    else:
      sep = "_"
    output.append(int(row["value"].replace(sep, "")))
  return output

Python - Functional

def strings_to_numbers(numbers):
  return [
    int(row["value"].replace(
      "," if row["format"] == "comma_sep" else "_", ""))
    for row in numbers
  ]

Python - Pandas

def strings_to_numbers(numbers):
  def convert(row):
    sep = "," if row.format == 'comma_sep' else "_"
    return int(row.value.replace(sep, ""))
  return numbers.apply(convert, axis=1).tolist()

R - Tidyverse

strings_to_numbers <- function(numbers) {
  numbers %>%
    mutate(output = as.numeric(str_replace_all(
      value, ifelse(format == "comma_sep", ",", "_"), ""))) %>%
    pull(output)
}

SQL - SQLite

SELECT 
  CAST(
    REPLACE(
      value, 
      CASE format 
        WHEN "comma_sep" THEN "," 
        WHEN "under_sep" THEN "_" 
      END, "")
    AS integer)
FROM numbers

Datalog - Souffle

.decl clean(Format:symbol, Inp:symbol, I:number, Outp:symbol) 
clean(Format, Inp, 0, "") :- numbers(Format, Inp).
clean(Format, Inp, I+1, Outp) :-
  clean(Format, Inp, I, Outp_rec),
  I <= strlen(Inp),
  Chr = substr(Inp, I, 1),
  ((Format = "comma_sep", Sep = ",");
   (Format = "under_sep", Sep = "_")),
  ((Chr  = Sep, Outp = Outp_rec);
   (Chr != Sep, Outp = cat(Outp_rec, Chr))).

strings_to_numbers(N) :-
  clean(Format, Inp, strlen(Inp), Outp),
  N = to_number(Outp).

Q - kdb+

convert: {[val; format] 
  sep: (("comma_sep"; "under_sep") ! (","; "_")) format;
  "J" $ ssr[val; sep; ""]}
  
strings_to_numbers:
  convert'[numbers[`value]; numbers[`format]]