rm(list=ls()) library(data.table) library(ggplot2) library(scales) library(car) setwd("/Users/otto/Documents/OLI_paper/Analyses/BuyerCountryDist") # load data bcd <- fread('http://linux.oii.ox.ac.uk/~otto.kassi/OLI/bcountrydata.txt', header=T) bcd <- bcd[bcd$timestamp <= '2016-10-01',] bcd <- aggregate(count ~ occupation + country_group, data=bcd, FUN=sum) bcd.agg <- aggregate(count ~ country_group, data=bcd, FUN=sum) names(bcd.agg)[2] <- 'cnt.sum' denom <- sum(bcd.agg$cnt.sum) bcd.agg$share <- bcd.agg$cnt.sum / denom bcd.agg <- bcd.agg[order(bcd.agg$share, decreasing=T),] bcd.agg$country_group <- ordered(bcd.agg$country_group, levels=rev(bcd.agg$country_group)) #fig 4 pdf('country_dist.pdf') ggplot(bcd.agg, aes(country_group, y=share)) + geom_bar(stat='identity') + labs(y = "Share", x='') + coord_flip() + scale_y_continuous(limits=c(0,1), labels=percent) + guides(fill=FALSE) dev.off() bcd.country.agg <- aggregate(count ~ country_group, data=bcd, FUN=sum) names(bcd.country.agg)[2] <- 'c.count' bcd <- merge(bcd, bcd.country.agg, by='country_group') bcd$share <- bcd$count / bcd$c.count bcd$occupation <- as.character(bcd$occupation) bcd$occupation = as.character(recode(bcd$occupation, "'Clerical and data entry'='Clerical'; 'Software development and technology' = 'Software'; 'Writing and translation' = 'Writing'; 'Professional services' = 'Professional'; 'Creative and multimedia' = 'Creative'; 'Sales and marketing support' = 'Sales'")) bcd$occupation <- factor(bcd$occupation, levels=c('Software', 'Creative','Clerical', 'Writing', 'Sales', 'Professional')) bcd$occupation <- as.ordered(bcd$occupation) names(bcd)[names(bcd) == 'occupation'] <- 'Occupation' bcd$country_group <- ordered(bcd$country_group, levels=rev(c('United States', 'Canada','other Americas','United Kingdom','other Europe','Australia','India','other Asia and Oceania','all Africa'))) # fig 5 pdf('countryXoccupation.pdf') ggplot(bcd[order(bcd$country_group, bcd$Occupation),], aes(country_group, y=share, fill=Occupation, order=Occupation )) + coord_flip() + geom_bar(stat='identity', position='stack') + scale_fill_brewer(palette="Greys" , name="Occupation", guide = guide_legend(reverse=FALSE), direction=-1) + theme(axis.text.x=element_text(angle=0)) + scale_y_continuous(labels=percent) + labs(x = " ", y = 'Occupation share within a country') dev.off() # merge(bcd, bcd.agg, by='country_group') -> bcd # bcd$share <- bcd$count / bcd$cnt.sum # bcd$occupation <- as.character(bcd$occupation) # bcd$occupation = as.character(recode(bcd$occupation, # "'Clerical and data entry'='Clerical'; # 'Software development and technology' = 'Software'; # 'Writing and translation' = 'Writing'; # 'Professional services' = 'Professional'; # 'Creative and multimedia' = 'Creative'; # 'Sales and marketing support' = 'Sales'")) # bcd$occupation <- factor(bcd$occupation, levels=c('Software', # 'Creative','Clerical', # 'Writing', # 'Sales', # 'Professional')) # bcd$occupation <- as.ordered(bcd$occupation) # bcd$bcountry <- factor(as.character(bcd$country_group), levels=c('United States', # 'Canada','other Americas','United Kingdom','other Europe','Australia',' # India','other Asia and Oceania','all Africa') ) # bcd$bcountry <- ordered(bcd$bcountry) # names(bcd)[names(bcd) == 'occupation'] <- 'Occupation' # pdf('BuyerCountryDist.pdf') # ggplot(bcd[order(bcd$Occupation),], aes(bcountry, y=share, fill=Occupation, order=Occupation )) + coord_flip() + geom_bar(stat='identity', position='stack') + scale_fill_brewer(palette="Paired" , name="Employer home country", guide = guide_legend(reverse=FALSE)) + theme(axis.text.x=element_text(angle=0)) + scale_y_continuous(labels=percent) + labs(x = " ", y = 'Buyer country market share') # dev.off()