import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator, PercentFormatter
from matplotlib.offsetbox import (OffsetImage, AnnotationBbox)
import seaborn as sns
"datafantic.mplstyle") plt.style.use(
Exploring Airbnb Fees
This notebook explores the scraped data I collected from Airbnb. To view the notebook where I scraped the data, see 01 - Data Collection.
You can view the written analysis that results from this code on datafantic.com where I’m building with data, one project at a time. Sign up to get notified when a new project drops!
Now that the listings are scraped let’s read them in and do some analysis. We are only getting a single time snapshot so we will be a bit limited on big understanding. However, these are the questions I’m thinking of:
What is the average Airbnb cleaning fee?
Do certain cities have higher cleaning fees than others? Are there country differences?
What percentage of the cleaning fee is the total cost of the stay? This will require a look at the one-day listings and flexible (multi-day) listings. If cleaning fees are static for each property, then we can make a good assumption on percentages.
Is there any commonality for listings with higher cleaning fees? Larger houses or something else?
= pd.read_csv("data/flexible_listings.csv")
flexible_listings = pd.read_csv("data/one_day_listings.csv") one_day_listings
Cleanup the data
I scraped the data as is from the site, so this means we get lots of strings. I’ll convert these to ints or datetimes where appropriate and clean the data up.
Clean flexible_listings
flexible_listings
weekly_discount | long_stay_discount | cleaning_fee | service_fee | price_minus_fees | title | guest | bedrooms | beds | baths | url | location | check_in | check_out | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | NaN | NaN | $100 | $82 | $600 | Entire condo hosted by Kevin | 2 | 1.0 | 1.0 | 1 | https://www.airbnb.com/rooms/73539817214550286... | Dallas--TX--United-States | 2022-10-28 | 2022-11-03 |
1 | NaN | NaN | $30 | $54 | $350 | Tiny home hosted by Grady | 2 | 1.0 | 1.0 | 1 | https://www.airbnb.com/rooms/15342315?adults=1... | Dallas--TX--United-States | 2022-10-30 | 2022-11-04 |
2 | NaN | NaN | $110 | $131 | $824 | Entire rental unit hosted by Jen | 4 | 1.0 | 2.0 | 1 | https://www.airbnb.com/rooms/46581123?adults=1... | Dallas--TX--United-States | 2022-10-19 | 2022-10-26 |
3 | NaN | NaN | NaN | $24 | $196 | Private room in home hosted by R And R Hostel | 1 | 1.0 | 1.0 | 1 | https://www.airbnb.com/rooms/73090942173103176... | Dallas--TX--United-States | 2022-10-20 | 2022-10-25 |
4 | NaN | NaN | $59 | $61 | $464 | Entire rental unit hosted by Carpediem | 2 | 1.0 | 1.0 | 1 | https://www.airbnb.com/rooms/69420558176578361... | Dallas--TX--United-States | 2022-10-24 | 2022-10-30 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
3386 | NaN | NaN | $49 | $0 | $953 | Entire rental unit hosted by Marie | 3 | 2.0 | 2.0 | 1 | https://www.airbnb.com/rooms/65582697587414494... | Paris | 2023-02-01 | 2023-02-06 |
3387 | NaN | NaN | NaN | $116 | $822 | Entire rental unit hosted by Vanessa | 4 | 2.0 | 2.0 | 1 | https://www.airbnb.com/rooms/34830963?adults=1... | Paris | 2022-12-13 | 2022-12-18 |
3388 | NaN | NaN | NaN | $22 | $157 | Private room in townhouse hosted by Marie | 1 | 1.0 | NaN | 1.5 | https://www.airbnb.com/rooms/45496636?adults=1... | Paris | 2023-08-01 | 2023-08-06 |
3389 | NaN | NaN | $39 | $82 | $538 | Entire rental unit hosted by Margaux | 2 | 1.0 | 1.0 | 1 | https://www.airbnb.com/rooms/18545424?adults=1... | Paris | 2023-01-04 | 2023-01-09 |
3390 | NaN | NaN | $15 | $108 | $753 | Entire rental unit hosted by Adeline | 2 | 1.0 | 1.0 | 1 | https://www.airbnb.com/rooms/47823298?adults=1... | Paris | 2022-11-06 | 2022-11-13 |
3391 rows × 14 columns
Get all the columns in the right format.
'check_in'] = pd.to_datetime(flexible_listings['check_in'])
flexible_listings['check_out'] = pd.to_datetime(flexible_listings['check_out']) flexible_listings[
'cleaning_fee'] = flexible_listings['cleaning_fee'].str.replace("$","").str.replace(",","").fillna(value=0).astype("int")
flexible_listings['service_fee'] = flexible_listings['service_fee'].str.replace("$","").str.replace(",","").fillna(value=0).astype("int")
flexible_listings['price_minus_fees'] = flexible_listings['price_minus_fees'].str.replace("$","").str.replace(",","").fillna(value=0).astype("int") flexible_listings[
/tmp/ipykernel_3847/456426875.py:1: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.
flexible_listings['cleaning_fee'] = flexible_listings['cleaning_fee'].str.replace("$","").str.replace(",","").fillna(value=0).astype("int")
/tmp/ipykernel_3847/456426875.py:2: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.
flexible_listings['service_fee'] = flexible_listings['service_fee'].str.replace("$","").str.replace(",","").fillna(value=0).astype("int")
/tmp/ipykernel_3847/456426875.py:3: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.
flexible_listings['price_minus_fees'] = flexible_listings['price_minus_fees'].str.replace("$","").str.replace(",","").fillna(value=0).astype("int")
Get number of days for each stay.
'stay_days'] = (flexible_listings['check_out'] - flexible_listings['check_in']).dt.days flexible_listings[
See the stay duration.
'stay_days'].mean() flexible_listings[
5.539368917723386
'stay_days'].hist() flexible_listings[
The flexible listings scraping was about between 5 and 7 days. But it seems only one listing had a long stay discount and none had a weekly discount. Could have been my scraper but I don’t think so.
~flexible_listings['long_stay_discount'].isna()] flexible_listings[
weekly_discount | long_stay_discount | cleaning_fee | service_fee | price_minus_fees | title | guest | bedrooms | beds | baths | url | location | check_in | check_out | stay_days | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1017 | NaN | -$181 | 0 | 0 | 1205 | Room in hotel hosted by Row NYC | 2 | 1.0 | 1.0 | 1 | https://www.airbnb.com/rooms/32678720?adults=1... | New-York-City--Manhattan--United-States | 2023-05-01 | 2023-05-06 | 5 |
~flexible_listings['weekly_discount'].isna()] flexible_listings[
weekly_discount | long_stay_discount | cleaning_fee | service_fee | price_minus_fees | title | guest | bedrooms | beds | baths | url | location | check_in | check_out | stay_days |
---|
Finally let’s clean up the city name.
'city'] = flexible_listings['location'].str.split("--").str.get(0)
flexible_listings['city'] = flexible_listings['city'].str.replace("-", " ") flexible_listings[
Clean one_day_listings
Ok now let’s do the same thing for the one day listings.
one_day_listings
weekly_discount | long_stay_discount | cleaning_fee | service_fee | price_minus_fees | title | guest | bedrooms | beds | baths | url | location | check_in | check_out | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | NaN | NaN | $30 | $12 | $58 | Entire rental unit hosted by Leonard | 3 | 1.0 | 1.0 | 1 | https://www.airbnb.com/rooms/16353509?check_in... | Dallas--TX--United-States | 2022-11-02 | 2022-11-03 |
1 | NaN | NaN | $40 | $14 | $58 | Entire loft hosted by Simaiya | 4 | 1.0 | 1.0 | 1 | https://www.airbnb.com/rooms/67089461123550275... | Dallas--TX--United-States | 2022-11-02 | 2022-11-03 |
2 | NaN | NaN | $35 | $15 | $69 | Entire rental unit hosted by Frontdesk | 4 | 1.0 | 1.0 | 1 | https://www.airbnb.com/rooms/50029383?check_in... | Dallas--TX--United-States | 2022-11-02 | 2022-11-03 |
3 | NaN | NaN | $30 | $13 | $65 | Entire loft hosted by Leonard | 6 | 1.0 | 2.0 | 1 | https://www.airbnb.com/rooms/39844580?check_in... | Dallas--TX--United-States | 2022-11-02 | 2022-11-03 |
4 | NaN | NaN | NaN | $14 | $100 | Entire rental unit hosted by Dante | 3 | 1.0 | 1.0 | 1 | https://www.airbnb.com/rooms/69933821236595629... | Dallas--TX--United-States | 2022-11-02 | 2022-11-03 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
3561 | NaN | NaN | $80 | $0 | $177 | Entire rental unit hosted by Jean-Charles | 4 | 1.0 | 2.0 | 1 | https://www.airbnb.com/rooms/50883915?check_in... | Paris | 2022-11-02 | 2022-11-03 |
3562 | NaN | NaN | NaN | NaN | $108 | Room in boutique hotel hosted by Hotel De L’Aq... | 1 | 1.0 | 1.0 | 1 | https://www.airbnb.com/rooms/35145340?check_in... | Paris | 2022-11-02 | 2022-11-03 |
3563 | NaN | NaN | $64 | $0 | $173 | Entire rental unit hosted by Jérémy | 3 | NaN | 2.0 | 1 | https://www.airbnb.com/rooms/47417486?check_in... | Paris | 2022-11-02 | 2022-11-03 |
3564 | NaN | NaN | $72 | $0 | $143 | Entire rental unit hosted by Karim | 2 | 1.0 | 1.0 | 1 | https://www.airbnb.com/rooms/68604321515452884... | Paris | 2022-11-02 | 2022-11-03 |
3565 | NaN | NaN | $10 | $12 | $75 | Private room in rental unit hosted by Warda | 1 | 1.0 | 1.0 | 1 | https://www.airbnb.com/rooms/65036327067970031... | Paris | 2022-11-02 | 2022-11-03 |
3566 rows × 14 columns
'check_in'] = pd.to_datetime(one_day_listings['check_in'])
one_day_listings['check_out'] = pd.to_datetime(one_day_listings['check_out']) one_day_listings[
'cleaning_fee'] = one_day_listings['cleaning_fee'].str.replace("$","").str.replace(",","").fillna(value=0).astype("int")
one_day_listings['service_fee'] = one_day_listings['service_fee'].str.replace("$","").str.replace(",","").fillna(value=0).astype("int")
one_day_listings['price_minus_fees'] = one_day_listings['price_minus_fees'].str.replace("$","").str.replace(",","").fillna(value=0).astype("int") one_day_listings[
/tmp/ipykernel_3847/322107567.py:1: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.
one_day_listings['cleaning_fee'] = one_day_listings['cleaning_fee'].str.replace("$","").str.replace(",","").fillna(value=0).astype("int")
/tmp/ipykernel_3847/322107567.py:2: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.
one_day_listings['service_fee'] = one_day_listings['service_fee'].str.replace("$","").str.replace(",","").fillna(value=0).astype("int")
/tmp/ipykernel_3847/322107567.py:3: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.
one_day_listings['price_minus_fees'] = one_day_listings['price_minus_fees'].str.replace("$","").str.replace(",","").fillna(value=0).astype("int")
'stay_days'] = (one_day_listings['check_out'] - one_day_listings['check_in']).dt.days one_day_listings[
Well it looks like the scraper worked properly on the days.
'stay_days'].mean() one_day_listings[
1.0
'city'] = one_day_listings['location'].str.split("--").str.get(0)
one_day_listings['city'] = one_day_listings['city'].str.replace("-", " ") one_day_listings[
What is the average Airbnb cleaning fee?
First let’s look at the difference in cleaning fees for listings.
'cleaning_fee'].mean() flexible_listings[
50.206723680330285
'cleaning_fee'].mean() one_day_listings[
35.62450925406618
Almost 28% of one day listings have no cleaning fees.
'cleaning_fee'] == 0].shape[0] / one_day_listings.shape[0] one_day_listings[one_day_listings[
0.2781828379136287
Only 13% of multi day listings have no cleaning fees.
'cleaning_fee'] == 0].shape[0] / flexible_listings.shape[0] flexible_listings[flexible_listings[
0.1338838100855205
The service fee is for Airbnb. They seem to charge right at 15%.
'service_fee'] / flexible_listings['price_minus_fees']).mean() (flexible_listings[
0.14593970653802638
= plt.subplots()
fig, ax 'cleaning_fee'],
ax.hist(flexible_listings[=range(0, 400, 25),
bins=np.ones(len(flexible_listings['cleaning_fee'])) / len(flexible_listings['cleaning_fee']))
weights
0, 225)
ax.set_xlim('${x:1.0f}')
ax.xaxis.set_major_formatter(25))
ax.xaxis.set_major_locator(MultipleLocator(1, decimals=0))
ax.yaxis.set_major_formatter(PercentFormatter(0, .33)
ax.set_ylim(
# Add in title and subtitle
"""Histogram of cleaning fees for stays longer than 1 day""")
ax.set_title(=.08, y=.87,
ax.text(x="Sample of ~3,300 Airbnb listings from 12 cities.",
s=fig.transFigure,
transform='left',
ha=20,
fontsize=.8)
alpha
# Set the logo
= plt.imread('images/datafantic.png')
logo = OffsetImage(logo, zoom=.22)
imagebox = AnnotationBbox(imagebox, xy=(.99,1.06), xycoords='axes fraction', box_alignment=(1,1), frameon = False)
ab
ax.add_artist(ab)
# Export plot as high resolution PNG
'images/multi_day_histogram.png') plt.savefig(
= plt.subplots()
fig, ax 'cleaning_fee'],
ax.hist(one_day_listings[=range(0, 400, 25),
bins=np.ones(len(one_day_listings['cleaning_fee'])) / len(one_day_listings['cleaning_fee']))
weights
0, 225)
ax.set_xlim(0, 1750)
ax.set_ylim('${x:1.0f}')
ax.xaxis.set_major_formatter(25))
ax.xaxis.set_major_locator(MultipleLocator(1, decimals=0))
ax.yaxis.set_major_formatter(PercentFormatter(0, .53)
ax.set_ylim(
# Add in title and subtitle
"""Histogram of cleaning fees for one day stays.""")
ax.set_title(=.08, y=.87,
ax.text(x="Sample of ~3,300 Airbnb listings from 12 cities.",
s=fig.transFigure,
transform='left',
ha=20,
fontsize=.8)
alpha
# Set the logo
= plt.imread('images/datafantic.png')
logo = OffsetImage(logo, zoom=.25)
imagebox = AnnotationBbox(imagebox, xy=(.99,1.06), xycoords='axes fraction', box_alignment=(1,1), frameon = False)
ab
ax.add_artist(ab)
# Export plot as high resolution PNG
'images/one_day_histogram.png') plt.savefig(
'cleaning_fee'] > 50].shape[0] / one_day_listings.shape[0] * 100 one_day_listings[one_day_listings[
26.275939427930457
Do certain cities have higher cleaning fees than others? Are there country differences?
= flexible_listings.groupby(by="city").mean()['cleaning_fee'].reset_index() fees_city
= fees_city.sort_values('cleaning_fee', ascending=False) fees_city
= plt.subplots()
fig, ax 'city'], fees_city['cleaning_fee'])
ax.barh(fees_city[=False, axis='y')
ax.grid(visible=True, axis='x')
ax.grid(visible
'${x:1.0f}')
ax.xaxis.set_major_formatter(
# Add in title and subtitle
=.08, y=.93,
ax.text(x="""Land of the fees, home of the bnb""",
s=fig.transFigure,
transform='left',
ha=28,
fontsize='bold')
weight=.08, y=.89,
ax.text(x="Average Airbnb cleaning fee by city. Stays longer than 1 day.",
s=fig.transFigure,
transform='left',
ha=20,
fontsize=.8)
alpha
# Set the logo
= plt.imread('images/datafantic.png')
logo = OffsetImage(logo, zoom=.25)
imagebox = AnnotationBbox(imagebox, xy=(.99,1.08), xycoords='axes fraction', box_alignment=(1,1), frameon = False)
ab
ax.add_artist(ab)
# Export plot as high resolution PNG
'images/fees_by_city.png') plt.savefig(
= pd.concat([flexible_listings,one_day_listings]) all_listings
= flexible_listings[flexible_listings['city'] == 'Austin']['cleaning_fee']
data = plt.subplots()
fig, ax
ax.hist(data, =range(0, 400, 25),
bins=np.ones(len(data)) / len(data))
weights
0, 225)
ax.set_xlim('${x:1.0f}')
ax.xaxis.set_major_formatter(25))
ax.xaxis.set_major_locator(MultipleLocator(1, decimals=0))
ax.yaxis.set_major_formatter(PercentFormatter(0, .32)
ax.set_ylim(
# Add in title and subtitle
=.08, y=.93,
ax.text(x="""Everything is bigger in Austin, Texas""",
s=fig.transFigure,
transform='left',
ha=28,
fontsize='bold')
weight=.08, y=.89,
ax.text(x="Histogram of Airbnb cleaning fees in Austin. Stays longer than 1 day.",
s=fig.transFigure,
transform='left',
ha=20,
fontsize=.8)
alpha
# Set the logo
= plt.imread('images/datafantic.png')
logo = OffsetImage(logo, zoom=.25)
imagebox = AnnotationBbox(imagebox, xy=(.99,1.06), xycoords='axes fraction', box_alignment=(1,1), frameon = False)
ab
ax.add_artist(ab)
# Export plot as high resolution PNG
'images/austin_histogram.png') plt.savefig(
= flexible_listings[flexible_listings['city'] == 'New York City']['cleaning_fee']
data = plt.subplots()
fig, ax
ax.hist(data, =range(0, 400, 25),
bins=np.ones(len(data)) / len(data))
weights
0, 225)
ax.set_xlim('${x:1.0f}')
ax.xaxis.set_major_formatter(25))
ax.xaxis.set_major_locator(MultipleLocator(1, decimals=0))
ax.yaxis.set_major_formatter(PercentFormatter(0, .32)
ax.set_ylim(
# Add in title and subtitle
=.08, y=.93,
ax.text(x="""New York City cleaning fee histogram""",
s=fig.transFigure,
transform='left',
ha=28,
fontsize='bold')
weight
# Set the logo
= plt.imread('images/datafantic.png')
logo = OffsetImage(logo, zoom=.25)
imagebox = AnnotationBbox(imagebox, xy=(.99,1.06), xycoords='axes fraction', box_alignment=(1,1), frameon = False)
ab ax.add_artist(ab)
= flexible_listings[flexible_listings['city'] == 'Los Angeles']['cleaning_fee']
data = plt.subplots()
fig, ax
ax.hist(data, =range(0, 400, 25),
bins=np.ones(len(data)) / len(data))
weights
0, 225)
ax.set_xlim('${x:1.0f}')
ax.xaxis.set_major_formatter(25))
ax.xaxis.set_major_locator(MultipleLocator(1, decimals=0))
ax.yaxis.set_major_formatter(PercentFormatter(0, .32)
ax.set_ylim(
# Add in title and subtitle
=.08, y=.93,
ax.text(x="""Los Angeles cleaning fee histogram""",
s=fig.transFigure,
transform='left',
ha=28,
fontsize='bold')
weight
# Set the logo
= plt.imread('images/datafantic.png')
logo = OffsetImage(logo, zoom=.25)
imagebox = AnnotationBbox(imagebox, xy=(.99,1.06), xycoords='axes fraction', box_alignment=(1,1), frameon = False)
ab ax.add_artist(ab)
= flexible_listings[flexible_listings['city'] == 'San Francisco']['cleaning_fee']
data = plt.subplots()
fig, ax
ax.hist(data, =range(0, 400, 25),
bins=np.ones(len(data)) / len(data))
weights
0, 225)
ax.set_xlim('${x:1.0f}')
ax.xaxis.set_major_formatter(25))
ax.xaxis.set_major_locator(MultipleLocator(1, decimals=0))
ax.yaxis.set_major_formatter(PercentFormatter(0, .32)
ax.set_ylim(
# Add in title and subtitle
=.08, y=.93,
ax.text(x="""San Francisco cleaning fee histogram""",
s=fig.transFigure,
transform='left',
ha=28,
fontsize='bold')
weight
# Set the logo
= plt.imread('images/datafantic.png')
logo = OffsetImage(logo, zoom=.25)
imagebox = AnnotationBbox(imagebox, xy=(.99,1.06), xycoords='axes fraction', box_alignment=(1,1), frameon = False)
ab ax.add_artist(ab)
= flexible_listings[flexible_listings['city'] == 'Dallas']['cleaning_fee']
data = plt.subplots()
fig, ax
ax.hist(data, =range(0, 400, 25),
bins=np.ones(len(data)) / len(data))
weights
0, 225)
ax.set_xlim('${x:1.0f}')
ax.xaxis.set_major_formatter(25))
ax.xaxis.set_major_locator(MultipleLocator(1, decimals=0))
ax.yaxis.set_major_formatter(PercentFormatter(0, .32)
ax.set_ylim(
# Add in title and subtitle
=.08, y=.93,
ax.text(x="""Dallas cleaning fee histogram""",
s=fig.transFigure,
transform='left',
ha=28,
fontsize='bold')
weight
# Set the logo
= plt.imread('images/datafantic.png')
logo = OffsetImage(logo, zoom=.25)
imagebox = AnnotationBbox(imagebox, xy=(.99,1.06), xycoords='axes fraction', box_alignment=(1,1), frameon = False)
ab ax.add_artist(ab)
= flexible_listings[flexible_listings['city'] == 'Paris']['cleaning_fee']
data = plt.subplots()
fig, ax
ax.hist(data, =range(0, 400, 25),
bins=np.ones(len(data)) / len(data))
weights
0, 225)
ax.set_xlim('${x:1.0f}')
ax.xaxis.set_major_formatter(25))
ax.xaxis.set_major_locator(MultipleLocator(1, decimals=0))
ax.yaxis.set_major_formatter(PercentFormatter(0, .55)
ax.set_ylim(
# Add in title and subtitle
=.08, y=.93,
ax.text(x="""Paris cleaning fee histogram""",
s=fig.transFigure,
transform='left',
ha=28,
fontsize='bold')
weight
# Set the logo
= plt.imread('images/datafantic.png')
logo = OffsetImage(logo, zoom=.25)
imagebox = AnnotationBbox(imagebox, xy=(.99,1.06), xycoords='axes fraction', box_alignment=(1,1), frameon = False)
ab ax.add_artist(ab)
'city'] == 'Austin') & (flexible_listings['cleaning_fee'] > 150)] flexible_listings[(flexible_listings[
weekly_discount | long_stay_discount | cleaning_fee | service_fee | price_minus_fees | title | guest | bedrooms | beds | baths | url | location | check_in | check_out | stay_days | city | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
299 | NaN | NaN | 175 | 85 | 506 | Entire rental unit hosted by Shirlene | 6 | 2.0 | 2.0 | 2 | https://www.airbnb.com/rooms/50197119?adults=1... | Austin--TX--United-States | 2022-10-24 | 2022-10-31 | 7 | Austin |
303 | NaN | NaN | 175 | 81 | 397 | Entire rental unit hosted by Shirlene | 2 | NaN | 1.0 | 1 | https://www.airbnb.com/rooms/50659893?adults=1... | Austin--TX--United-States | 2022-10-24 | 2022-10-30 | 6 | Austin |
310 | NaN | NaN | 175 | 67 | 300 | Entire rental unit hosted by Shirlene | 3 | NaN | 1.0 | 1 | https://www.airbnb.com/rooms/46338510?adults=1... | Austin--TX--United-States | 2022-10-23 | 2022-10-28 | 5 | Austin |
313 | NaN | NaN | 175 | 78 | 380 | Entire rental unit hosted by Shirlene | 4 | 2.0 | 2.0 | 1 | https://www.airbnb.com/rooms/54041209?adults=1... | Austin--TX--United-States | 2022-10-23 | 2022-10-29 | 6 | Austin |
314 | NaN | NaN | 175 | 63 | 274 | Entire rental unit hosted by Shirlene | 4 | 1.0 | 1.0 | 1 | https://www.airbnb.com/rooms/32441667?adults=1... | Austin--TX--United-States | 2022-10-24 | 2022-10-29 | 5 | Austin |
316 | NaN | NaN | 175 | 81 | 397 | Entire rental unit hosted by Shirlene | 4 | 1.0 | 2.0 | 1 | https://www.airbnb.com/rooms/62016700224951610... | Austin--TX--United-States | 2022-10-23 | 2022-10-29 | 6 | Austin |
317 | NaN | NaN | 175 | 105 | 668 | Entire rental unit hosted by Shirlene | 4 | 1.0 | 3.0 | 1 | https://www.airbnb.com/rooms/62018601208919202... | Austin--TX--United-States | 2022-10-30 | 2022-11-06 | 7 | Austin |
318 | NaN | NaN | 175 | 95 | 497 | Entire rental unit hosted by Shirlene | 4 | 2.0 | 2.0 | 2 | https://www.airbnb.com/rooms/54041377?adults=1... | Austin--TX--United-States | 2022-10-24 | 2022-10-30 | 6 | Austin |
319 | NaN | NaN | 175 | 93 | 569 | Entire rental unit hosted by Shirlene | 3 | 1.0 | 1.0 | 1 | https://www.airbnb.com/rooms/46420312?adults=1... | Austin--TX--United-States | 2022-10-25 | 2022-11-01 | 7 | Austin |
320 | NaN | NaN | 175 | 66 | 290 | Entire rental unit hosted by Shirlene | 4 | 1.0 | 3.0 | 1 | https://www.airbnb.com/rooms/69299110895293755... | Austin--TX--United-States | 2022-10-30 | 2022-11-04 | 5 | Austin |
321 | NaN | NaN | 175 | 62 | 261 | Entire rental unit hosted by Shirlene | 4 | 1.0 | 2.0 | 1 | https://www.airbnb.com/rooms/52706100?adults=1... | Austin--TX--United-States | 2022-10-24 | 2022-10-29 | 5 | Austin |
322 | NaN | NaN | 175 | 65 | 282 | Entire rental unit hosted by Shirlene | 3 | NaN | 1.0 | 1 | https://www.airbnb.com/rooms/73183764329805170... | Austin--TX--United-States | 2022-10-24 | 2022-10-29 | 5 | Austin |
328 | NaN | NaN | 180 | 107 | 575 | Entire home hosted by Christopher | 3 | NaN | 2.0 | 1 | https://www.airbnb.com/rooms/42616935?adults=1... | Austin--TX--United-States | 2022-10-24 | 2022-10-29 | 5 | Austin |
333 | NaN | NaN | 175 | 102 | 544 | Entire rental unit hosted by Shirlene | 6 | 2.0 | 2.0 | 2 | https://www.airbnb.com/rooms/73183762401014390... | Austin--TX--United-States | 2022-10-24 | 2022-10-31 | 7 | Austin |
335 | NaN | NaN | 175 | 70 | 319 | Entire home hosted by Shirlene | 4 | 1.0 | 2.0 | 1 | https://www.airbnb.com/rooms/53813791?adults=1... | Austin--TX--United-States | 2022-10-24 | 2022-10-29 | 5 | Austin |
340 | NaN | NaN | 200 | 134 | 750 | Entire home hosted by Jack | 3 | 1.0 | 1.0 | 1 | https://www.airbnb.com/rooms/72742999684461264... | Austin--TX--United-States | 2022-10-30 | 2022-11-04 | 5 | Austin |
344 | NaN | NaN | 175 | 82 | 475 | Entire rental unit hosted by Shirlene | 5 | 1.0 | 3.0 | 1 | https://www.airbnb.com/rooms/61288848383833086... | Austin--TX--United-States | 2022-10-24 | 2022-10-31 | 7 | Austin |
354 | NaN | NaN | 175 | 66 | 289 | Entire rental unit hosted by Shirlene | 4 | 1.0 | 2.0 | 1 | https://www.airbnb.com/rooms/52772517?adults=1... | Austin--TX--United-States | 2022-10-30 | 2022-11-04 | 5 | Austin |
373 | NaN | NaN | 175 | 74 | 347 | Entire rental unit hosted by Shirlene | 6 | 2.0 | 4.0 | 2 | https://www.airbnb.com/rooms/63106532084411512... | Austin--TX--United-States | 2022-10-23 | 2022-10-28 | 5 | Austin |
377 | NaN | NaN | 175 | 97 | 513 | Entire rental unit hosted by Shirlene | 4 | 1.0 | 1.0 | 1 | https://www.airbnb.com/rooms/53549137?adults=1... | Austin--TX--United-States | 2022-10-25 | 2022-10-31 | 6 | Austin |
394 | NaN | NaN | 175 | 88 | 451 | Entire rental unit hosted by Shirlene | 4 | 1.0 | 2.0 | 1 | https://www.airbnb.com/rooms/55364974057973039... | Austin--TX--United-States | 2022-10-24 | 2022-10-30 | 6 | Austin |
399 | NaN | NaN | 175 | 104 | 660 | Entire rental unit hosted by Shirlene | 6 | 2.0 | 2.0 | 2 | https://www.airbnb.com/rooms/49214415?adults=1... | Austin--TX--United-States | 2022-10-28 | 2022-11-04 | 7 | Austin |
409 | NaN | NaN | 175 | 79 | 386 | Entire rental unit hosted by Shirlene | 3 | NaN | 1.0 | 1 | https://www.airbnb.com/rooms/46338658?adults=1... | Austin--TX--United-States | 2022-10-25 | 2022-10-30 | 5 | Austin |
419 | NaN | NaN | 250 | 129 | 665 | Entire condo hosted by Christine | 2 | 1.0 | 1.0 | 1 | https://www.airbnb.com/rooms/71450917591726026... | Austin--TX--United-States | 2022-10-31 | 2022-11-07 | 7 | Austin |
425 | NaN | NaN | 175 | 106 | 574 | Entire rental unit hosted by Shirlene | 6 | 2.0 | 4.0 | 2 | https://www.airbnb.com/rooms/63075781730046754... | Austin--TX--United-States | 2022-10-23 | 2022-10-29 | 6 | Austin |
426 | NaN | NaN | 175 | 179 | 1090 | Entire condo hosted by Barclé Group LLC | 4 | 1.0 | 2.0 | 1 | https://www.airbnb.com/rooms/68918069924803923... | Austin--TX--United-States | 2022-11-05 | 2022-11-11 | 6 | Austin |
456 | NaN | NaN | 175 | 109 | 600 | Entire rental unit hosted by Shirlene | 2 | 1.0 | 1.0 | 1 | https://www.airbnb.com/rooms/50105920?adults=1... | Austin--TX--United-States | 2022-10-31 | 2022-11-06 | 6 | Austin |
484 | NaN | NaN | 175 | 122 | 688 | Entire rental unit hosted by Shirlene | 3 | 1.0 | 2.0 | 1 | https://www.airbnb.com/rooms/46758726?adults=1... | Austin--TX--United-States | 2022-11-03 | 2022-11-09 | 6 | Austin |
490 | NaN | NaN | 450 | 376 | 2457 | Entire home hosted by Tray | 12 | 4.0 | 8.0 | 2 | https://www.airbnb.com/rooms/61974749821930955... | Austin--TX--United-States | 2022-10-24 | 2022-10-31 | 7 | Austin |
497 | NaN | NaN | 175 | 117 | 655 | Entire rental unit hosted by Shirlene | 3 | 1.0 | 3.0 | 1 | https://www.airbnb.com/rooms/62018602401955123... | Austin--TX--United-States | 2022-11-04 | 2022-11-10 | 6 | Austin |
501 | NaN | NaN | 168 | 131 | 758 | Entire home hosted by Vacasa Texas | 7 | 2.0 | 4.0 | 2.5 | https://www.airbnb.com/rooms/72233480082994855... | Austin--TX--United-States | 2022-10-30 | 2022-11-04 | 5 | Austin |
526 | NaN | NaN | 200 | 302 | 2043 | Entire home hosted by Jen | 10 | 4.0 | 5.0 | 2 | https://www.airbnb.com/rooms/48487519?adults=1... | Austin--TX--United-States | 2022-10-26 | 2022-11-02 | 7 | Austin |
536 | NaN | NaN | 175 | 97 | 513 | Entire rental unit hosted by Shirlene | 4 | 1.0 | 2.0 | 1 | https://www.airbnb.com/rooms/52687391?adults=1... | Austin--TX--United-States | 2022-10-27 | 2022-11-02 | 6 | Austin |
553 | NaN | NaN | 175 | 158 | 945 | Entire home hosted by Susan | 5 | 1.0 | 4.0 | 1 | https://www.airbnb.com/rooms/51527840?adults=1... | Austin--TX--United-States | 2022-10-30 | 2022-11-04 | 5 | Austin |
= flexible_listings[flexible_listings['city'] == 'Berlin']['cleaning_fee']
data
= plt.subplots()
fig, ax
ax.hist(data, =range(0, 400, 25),
bins=np.ones(len(data)) / len(data))
weights
0, 225)
ax.set_xlim(#ax.set_ylim(0, 1700)
'${x:1.0f}')
ax.xaxis.set_major_formatter(25))
ax.xaxis.set_major_locator(MultipleLocator(1, decimals=0))
ax.yaxis.set_major_formatter(PercentFormatter(0, .64)
ax.set_ylim(
# Add in title and subtitle
=.08, y=.93,
ax.text(x="""Thinner Berliner""",
s=fig.transFigure,
transform='left',
ha=28,
fontsize='bold')
weight=.08, y=.89,
ax.text(x="Histogram of Airbnb cleaning fees in Berlin. Stays longer than 1 day.",
s=fig.transFigure,
transform='left',
ha=20,
fontsize=.8)
alpha
# Set the logo
= plt.imread('images/datafantic.png')
logo = OffsetImage(logo, zoom=.25)
imagebox = AnnotationBbox(imagebox, xy=(.99,1.06), xycoords='axes fraction', box_alignment=(1,1), frameon = False)
ab
ax.add_artist(ab)
# Export plot as high resolution PNG
'images/berlin_histogram.png') plt.savefig(
What percentage of the cleaning fee is the total cost of the stay?
'id'] = flexible_listings['url'].str.split("?").str.get(0).str.split("rooms/").str.get(1)
flexible_listings['id'] = one_day_listings['url'].str.split("?").str.get(0).str.split("rooms/").str.get(1) one_day_listings[
= flexible_listings.merge(one_day_listings, how='inner', on='id') matched_listings
matched_listings.columns
Index(['weekly_discount_x', 'long_stay_discount_x', 'cleaning_fee_x',
'service_fee_x', 'price_minus_fees_x', 'title_x', 'guest_x',
'bedrooms_x', 'beds_x', 'baths_x', 'url_x', 'location_x', 'check_in_x',
'check_out_x', 'stay_days_x', 'city_x', 'id', 'weekly_discount_y',
'long_stay_discount_y', 'cleaning_fee_y', 'service_fee_y',
'price_minus_fees_y', 'title_y', 'guest_y', 'bedrooms_y', 'beds_y',
'baths_y', 'url_y', 'location_y', 'check_in_y', 'check_out_y',
'stay_days_y', 'city_y'],
dtype='object')
'cleaning_fee_x','cleaning_fee_y']].mean() matched_listings[[
cleaning_fee_x 37.980630
cleaning_fee_y 34.886199
dtype: float64
Note this price total is exclusive of discounts.
'price_total'] = flexible_listings[['cleaning_fee','service_fee','price_minus_fees']].sum('columns')
flexible_listings['cleaning_fee_ratio'] = (flexible_listings['cleaning_fee'] / flexible_listings['price_total']) flexible_listings[
'price_total'] = one_day_listings[['cleaning_fee','service_fee','price_minus_fees']].sum('columns')
one_day_listings['cleaning_fee_ratio'] = (one_day_listings['cleaning_fee'] / one_day_listings['price_total']) one_day_listings[
= one_day_listings[one_day_listings['cleaning_fee_ratio'] > 0]['cleaning_fee_ratio'].mean()
one_day_avg one_day_avg
0.2567421095914405
'cleaning_fee_ratio'] > 0].groupby(by='stay_days').mean()['cleaning_fee_ratio'] flexible_listings[flexible_listings[
stay_days
5 0.091761
6 0.080735
7 0.070738
Name: cleaning_fee_ratio, dtype: float64
= list(flexible_listings[flexible_listings['cleaning_fee_ratio'] > 0].groupby(by='stay_days').mean()['cleaning_fee_ratio'].values) cleaning_ratios
Let’s calculate this separately for US cities so I can compare it in the article.
= 'Austin|Dallas|Angeles|Francisco|York' us_cities
'city'].str.contains(us_cities)]['city'].unique() flexible_listings[flexible_listings[
array(['Dallas', 'Austin', 'Los Angeles', 'New York City',
'San Francisco'], dtype=object)
'cleaning_fee_ratio'] > 0) & (flexible_listings['city'].str.contains(us_cities))].groupby(by='stay_days').mean()['cleaning_fee_ratio'] flexible_listings[(flexible_listings[
stay_days
5 0.104057
6 0.103691
7 0.088404
Name: cleaning_fee_ratio, dtype: float64
'cleaning_fee_ratio'] > 0) & (one_day_listings['city'].str.contains(us_cities))]['cleaning_fee_ratio'].mean() one_day_listings[(one_day_listings[
0.27876845657082067
What about non-us cities?
'cleaning_fee_ratio'] > 0) & (~flexible_listings['city'].str.contains(us_cities))].groupby(by='stay_days').mean()['cleaning_fee_ratio'] flexible_listings[(flexible_listings[
stay_days
5 0.081278
6 0.063245
7 0.056643
Name: cleaning_fee_ratio, dtype: float64
'cleaning_fee_ratio'] > 0) & (~one_day_listings['city'].str.contains(us_cities))]['cleaning_fee_ratio'].mean() one_day_listings[(one_day_listings[
0.23558809318550125
Separate question, what percentage of listings outside the US have no cleaning fee.
'cleaning_fee_ratio'] == 0) & (~one_day_listings['city'].str.contains(us_cities))].shape[0] one_day_listings[(one_day_listings[
763
'cleaning_fee_ratio'] > 0) & (~one_day_listings['city'].str.contains(us_cities))].shape[0] one_day_listings[(one_day_listings[
1313
'cleaning_fee_ratio'] == 0) & (~one_day_listings['city'].str.contains(us_cities))].shape[0] /
(one_day_listings[(one_day_listings['cleaning_fee_ratio'] > 0) & (~one_day_listings['city'].str.contains(us_cities))].shape[0]) one_day_listings[(one_day_listings[
0.5811119573495811
'cleaning_fee_ratio'] == 0) & (~flexible_listings['city'].str.contains(us_cities))].shape[0] /
(flexible_listings[(flexible_listings['cleaning_fee_ratio'] > 0) & (~flexible_listings['city'].str.contains(us_cities))].shape[0]) flexible_listings[(flexible_listings[
0.20509633312616532
And the opposite, inside the US
'cleaning_fee_ratio'] == 0) & (one_day_listings['city'].str.contains(us_cities))].shape[0] /
(one_day_listings[(one_day_listings['cleaning_fee_ratio'] > 0) & (one_day_listings['city'].str.contains(us_cities))].shape[0]) one_day_listings[(one_day_listings[
0.18160190325138778
'cleaning_fee_ratio'] == 0) & (flexible_listings['city'].str.contains(us_cities))].shape[0] /
(flexible_listings[(flexible_listings['cleaning_fee_ratio'] > 0) & (flexible_listings['city'].str.contains(us_cities))].shape[0]) flexible_listings[(flexible_listings[
0.09337349397590361
Ok let’s move back to the full data and make the chart.
0, one_day_avg) cleaning_ratios.insert(
= [round(x * 100, 1) for x in cleaning_ratios]
cleaning_ratios cleaning_ratios
[25.7, 9.2, 8.1, 7.1]
= plt.subplots()
fig, ax '1 day', '5 days', '6 days', '7 days'], cleaning_ratios, width=.6)
ax.bar([
'{x:1.0f}%')
ax.yaxis.set_major_formatter("Cleaning fee share of total price")
ax.set_ylabel("Stay duration")
ax.set_xlabel(
# Add in title and subtitle
=.08, y=.93,
ax.text(x="""Staying longer pays""",
s=fig.transFigure,
transform='left',
ha=28,
fontsize='bold')
weight=.08, y=.89,
ax.text(x="Share of cleaning fee in total price by stay duration.",
s=fig.transFigure,
transform='left',
ha=20,
fontsize=.8)
alpha
# Set the logo
= plt.imread('images/datafantic.png')
logo = OffsetImage(logo, zoom=.25)
imagebox = AnnotationBbox(imagebox, xy=(.98,1.06), xycoords='axes fraction', box_alignment=(1,1), frameon = False)
ab
ax.add_artist(ab)
# Export plot as high resolution PNG
'images/stay_duration.png') plt.savefig(
Is there any commonality for listings with higher cleaning fees?
= plt.subplots()
fig, ax =flexible_listings[flexible_listings['beds'] < 7],
sns.boxplot(data='beds', y='cleaning_fee',
x=ax,
ax='#1879CE',
color=.5, linewidth=2)
width0, 320)
ax.set_ylim(
"Cleaning fee")
ax.set_ylabel("Number of beds")
ax.set_xlabel(
'${x:1.0f}')
ax.yaxis.set_major_formatter(
# Add in title and subtitle
=.08, y=.93,
ax.text(x="""Messy beds""",
s=fig.transFigure,
transform='left',
ha=28,
fontsize='bold')
weight=.08, y=.89,
ax.text(x="Boxplots of cleaning fees by the number of beds.",
s=fig.transFigure,
transform='left',
ha=20,
fontsize=.8)
alpha
# Set the logo
= plt.imread('images/datafantic.png')
logo = OffsetImage(logo, zoom=.25)
imagebox = AnnotationBbox(imagebox, xy=(.98,1.06), xycoords='axes fraction', box_alignment=(1,1), frameon = False)
ab
ax.add_artist(ab)
# Export plot as high resolution PNG
'images/fees_beds.png') plt.savefig(
'beds'] < 7) & (flexible_listings['city'] == 'Austin')] flexible_listings[(flexible_listings[
weekly_discount | long_stay_discount | cleaning_fee | service_fee | price_minus_fees | title | guest | bedrooms | beds | baths | url | location | check_in | check_out | stay_days | city | id | price_total | cleaning_fee_ratio | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
295 | NaN | NaN | 50 | 54 | 330 | Tiny home hosted by Trish | 2 | 1.0 | 1.0 | 1 | https://www.airbnb.com/rooms/4906733?adults=1&... | Austin--TX--United-States | 2022-12-18 | 2022-12-23 | 5 | Austin | 4906733 | 434 | 0.115207 |
296 | NaN | NaN | 70 | 88 | 550 | Entire rental unit hosted by Leon And Stella | 2 | 1.0 | 1.0 | 1 | https://www.airbnb.com/rooms/13658208?adults=1... | Austin--TX--United-States | 2022-10-25 | 2022-10-30 | 5 | Austin | 13658208 | 708 | 0.098870 |
297 | NaN | NaN | 19 | 49 | 329 | Private room in guest suite hosted by Nate | 1 | 1.0 | 1.0 | 1 | https://www.airbnb.com/rooms/20036438?adults=1... | Austin--TX--United-States | 2022-10-25 | 2022-10-31 | 6 | Austin | 20036438 | 397 | 0.047859 |
298 | NaN | NaN | 50 | 104 | 687 | Entire rental unit hosted by Ana Maria | 2 | 1.0 | 1.0 | 1 | https://www.airbnb.com/rooms/48071617?adults=1... | Austin--TX--United-States | 2022-11-01 | 2022-11-08 | 7 | Austin | 48071617 | 841 | 0.059453 |
299 | NaN | NaN | 175 | 85 | 506 | Entire rental unit hosted by Shirlene | 6 | 2.0 | 2.0 | 2 | https://www.airbnb.com/rooms/50197119?adults=1... | Austin--TX--United-States | 2022-10-24 | 2022-10-31 | 7 | Austin | 50197119 | 766 | 0.228460 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
582 | NaN | NaN | 60 | 114 | 750 | Entire rental unit hosted by Diamantina | 3 | 1.0 | 1.0 | 1 | https://www.airbnb.com/rooms/8390607?adults=1&... | Austin--TX--United-States | 2022-10-25 | 2022-10-30 | 5 | Austin | 8390607 | 924 | 0.064935 |
583 | NaN | NaN | 95 | 114 | 710 | Entire bungalow hosted by Roman | 4 | 1.0 | 2.0 | 1 | https://www.airbnb.com/rooms/plus/890163?adult... | Austin--TX--United-States | 2022-10-30 | 2022-11-04 | 5 | Austin | plus/890163 | 919 | 0.103373 |
584 | NaN | NaN | 85 | 90 | 550 | Entire home hosted by Rainboat | 4 | 1.0 | 2.0 | 1 | https://www.airbnb.com/rooms/41983088?adults=1... | Austin--TX--United-States | 2022-11-06 | 2022-11-11 | 5 | Austin | 41983088 | 725 | 0.117241 |
585 | NaN | NaN | 65 | 118 | 770 | Entire guesthouse hosted by Nina | 2 | 1.0 | 1.0 | 1 | https://www.airbnb.com/rooms/50851667?adults=1... | Austin--TX--United-States | 2022-11-04 | 2022-11-10 | 6 | Austin | 50851667 | 953 | 0.068206 |
586 | NaN | NaN | 130 | 140 | 865 | Entire condo hosted by Austin | 4 | 2.0 | 2.0 | 1.5 | https://www.airbnb.com/rooms/50126061465762952... | Austin--TX--United-States | 2022-10-24 | 2022-10-30 | 6 | Austin | 501260614657629525 | 1135 | 0.114537 |
286 rows × 19 columns
= plt.subplots()
fig, ax =flexible_listings[(flexible_listings['beds'] < 7) & (flexible_listings['city'] == 'Austin')],
sns.boxplot(data='beds', y='cleaning_fee',
x=ax,
ax='#1879CE',
color=.5, linewidth=2)
width0, 320)
ax.set_ylim(
"Cleaning fee")
ax.set_ylabel("Number of beds")
ax.set_xlabel(
'${x:1.0f}')
ax.yaxis.set_major_formatter(
# Add in title and subtitle
=.08, y=.93,
ax.text(x="""Messy beds in Austin""",
s=fig.transFigure,
transform='left',
ha=28,
fontsize='bold')
weight=.08, y=.89,
ax.text(x="Boxplots of cleaning fees by the number of beds. Austin only.",
s=fig.transFigure,
transform='left',
ha=20,
fontsize=.8)
alpha
# Set the logo
= plt.imread('images/datafantic.png')
logo = OffsetImage(logo, zoom=.25)
imagebox = AnnotationBbox(imagebox, xy=(.98,1.06), xycoords='axes fraction', box_alignment=(1,1), frameon = False)
ab ax.add_artist(ab)