verbs module

selecting / filtering / ordering

select(df, *args[, regex, like, include, …])

select dataframe columns

where(df, *args, **kwargs)

where/filter dataframe rows

order_by(df, *args, **kwargs)

order/sequence dataframe

data analysis / review

count(df[, columns, totals_name, percent, …])

show column/category/factor frequency

head(df[, n, shape, tablefmt, precision])

show first n records of a dataframe.

info(df[, n_dupes, fillna, memory_info])

show dataframe meta data

memory(df)

show dataframe consumed memory (mb)

sample(df[, n, shape])

show sample data

tail(df[, n, shape, tablefmt, precision])

show last n records of a dataframe

column management

clean_names(df[, case, title])

Clean column names, strip blanks, lowercase, snake_case.

names(df[, regex, astype])

show dataframe column information

drop(df, *args, **kwargs)

drop column(s)

drop_if(df[, value, how])

drop columns containing blanks, zeros or na

duplicate_names(columns[, sep, info])

identify and de-duplicate dataframe column names

flatten_names(df[, join_char, remove_prefix])

Flatten multi-index column headings

relocate(df[, column, loc, ref_column, index])

move column(s) in a dataframe

rename(df, *args, **kwargs)

rename dataframe col(s)

replace_names(df, dict_[, info])

replace column names (or partially) with dictionary values

rows_to_names(df[, start, end, delimitter, …])

promote row(s) to column name(s)

set_names(df[, columns])

set dataframe column names

data cleaning

distinct(df, *args[, shape])

select distinct/unique rows

duplicated(df[, subset, keep, sort, column, …])

locate duplicate data

fillna(df, *args, **kwargs)

fillna column(s)

overlaps(df[, unique_key, start, end, overlaps])

Analyse dataframe rows with overlapping date periods

non_alpha(df, col_name)

check for non-alphanumeric characters

assign/update column(s)

across(df[, columns, function, series_obj])

Apply function across multiple columns

assign(df, *args, **kwargs)

Assign new columns to a DataFrame.

string functions

str_clean_number(series[, decimal, dtype])

clean number (e.g.

str_join(df[, columns, column, sep, loc, drop])

join or combine columns with a separator

str_split(df[, column, columns, pat, n, …])

split column

str_squish(df[, str_columns])

reduce repeated whitespace inside a string.

str_trim(df[, str_columns])

strip leading/trailing blanks

joining data

inner_join(df, *args, **kwargs)

df (All) | df2 (All) matching records only

left_join(df, *args, **kwargs)

df (All) | df2 (All/na) df always returned

right_join(df, *args, **kwargs)

df (All/na) | df2 (All) df2 always returned

outer_join(df, *args, **kwargs)

df (All/na) | df2 (All/na) All rows returned

aggregation

adorn_totals(df[, columns, fillna, name, …])

add totals to a dataframe

group_by(df, *args[, freq])

Group by dataframe

summarise(df, *args, **kwargs)

summarise or aggregate data.

transform(df[, index])

Add a group calculation to grouped DataFrame

reshaping data

explode(df, *args, **kwargs)

Transform list-like column values to rows

explode_lists(df[, delimitter, replace_nans])

explode list-like dataframe column(s)

pivot_longer(df, *args, **kwargs)

pivot dataframe wide to long

pivot_table(df, *args[, freq, format_date])

create Excel like pivot table

split_dataframe(df[, chunk_size])

Split dataframe by chunk_size rows, returning multiple dataframes

stack(df, *args, **kwargs)

stack dataframe

summary_df(datasets[, title, col_total, …])

Summarise a dictionary of dataframes.

unstack(df, *args, **kwargs)

unstack dataframe

index management

fmt_dateidx(df[, freq])

format dataframe datelike index

rename_axis(df, *args, **kwargs)

rename dataframe axis

reset_index(df, *args, **kwargs)

reset_index dataframe

set_index(df, *args, **kwargs)

set_index dataframe