Browse Source

xlsx,csv: trim whitespace, yield to optional block

Closes #108 and #79
dev
Andy Meneely 10 years ago
parent
commit
c8e6f9803c
  1. 8
      CHANGELOG.md
  2. 23
      lib/squib/api/data.rb
  3. 34
      lib/squib/args/import.rb
  4. 33
      samples/excel.rb
  5. BIN
      samples/sample.xlsx
  6. 43
      spec/api/api_data_spec.rb
  7. 2
      spec/data/csv/with_spaces.csv
  8. 181
      spec/data/samples/excel.rb.txt
  9. BIN
      spec/data/xlsx/whitespace.xlsx

8
CHANGELOG.md

@ -1,6 +1,14 @@
# Squib CHANGELOG
Squib follows [semantic versioning](http://semver.org).
## v0.8.0 / Unreleased
Features
* The `xlsx` and `csv` methods will now strip leading and trailing whitespace by default where applicable. This is now turned on by default, but can be turned off with `strip: false`.
* The `xlsx` and `csv` methods will now yield to a block (if given) for each cell so you can do some extra processing if you like. See samples/excel.rb for an example.
Compatibility change:
* Stripping leading and trailing whitespace of xlsx and csv values by default might change how your data gets parsed.
## v0.7.0 / 2015-09-11
Features

23
lib/squib/api/data.rb

@ -1,6 +1,7 @@
require 'roo'
require 'csv'
require 'squib/args/input_file'
require 'squib/args/import'
module Squib
@ -16,24 +17,30 @@ module Squib
# # | 1 | 2 |
# # | 3 | 4 |
# data = xlsx file: 'data.xlsx', sheet: 0
# {'h1' => [1,3], 'h2' => [2,4]}
# => {'h1' => [1,3], 'h2' => [2,4]}
#
# @option opts file [String] the file to open. Must end in `.xlsx`. Opens relative to the current directory.
# @option opts sheet [Integer] (0) The zero-based index of the sheet from which to read.
# @option opts strip [Boolean] (true) When true, strips leading and trailing whitespace on values and headers
# @option opts qty_header [String] ('qty']) Quantity explosion will be applied to the column this name
# @return [Hash] a hash of arrays based on columns in the spreadsheet
# @api public
def xlsx(opts = {})
input = Args::InputFile.new(file: 'deck.xlsx').load!(opts)
import = Args::Import.new.load!(opts)
s = Roo::Excelx.new(input.file[0])
s.default_sheet = s.sheets[input.sheet[0]]
data = {}
s.first_column.upto(s.last_column) do |col|
header = s.cell(s.first_row,col).to_s
header.strip! if import.strip?
data[header] = []
(s.first_row + 1).upto(s.last_row) do |row|
cell = s.cell(row,col)
# Roo hack for avoiding unnecessary .0's on whole integers (https://github.com/roo-rb/roo/issues/139)
cell = s.excelx_value(row,col) if s.excelx_type(row,col) == [:numeric_or_formula, 'General']
cell.strip! if cell.respond_to?(:strip) && import.strip?
cell = yield(header, cell) if block_given?
data[header] << cell
end#row
end#col
@ -58,17 +65,25 @@ module Squib
# http://www.ruby-doc.org/stdlib-2.0/libdoc/csv/rdoc/CSV.html
#
# @option opts file [String] the CSV-formatted file to open. Opens relative to the current directory.
# @option opts strip [Boolean] (true) When true, strips leading and trailing whitespace on values and headers
# @option opts qty_header [String] ('qty']) Quantity explosion will be applied to the column this name
# @return [Hash] a hash of arrays based on columns in the table
# @api public
def csv(opts = {})
file = Args::InputFile.new(file: 'deck.csv').load!(opts).file[0]
opts = Squib::SYSTEM_DEFAULTS.merge(opts)
# opts = Squib::InputHelpers.fileify(opts)
import = Args::Import.new.load!(opts)
table = CSV.read(file, headers: true, converters: :numeric)
check_duplicate_csv_headers(table)
hash = Hash.new
table.headers.each do |header|
hash[header.to_s] ||= table[header]
new_header = header.to_s
new_header.strip! if import.strip?
hash[new_header] ||= table[header]
end
if import.strip?
hash.each do |header, col|
col.map! { |str| str.strip! if str.respond_to?(:strip); str }
end
end
return hash
end

34
lib/squib/args/import.rb

@ -0,0 +1,34 @@
require 'squib/args/arg_loader'
module Squib
# @api private
module Args
class Import
include ArgLoader
def self.parameters
{ strip: true }
end
def self.expanding_parameters
[] # none of them
end
def self.params_with_units
[] # none of them
end
def validate_strip(arg)
raise 'Strip must be true or false' unless arg == true || arg == false
arg
end
def strip?
strip
end
end
end
end

33
samples/excel.rb

@ -3,7 +3,7 @@ require 'squib'
Squib::Deck.new(cards: 3) do
background color: :white
# Takes the first sheet by default
# Reads the first sheet by default (sheet 0)
# Outputs a hash of arrays with the header names as keys
data = xlsx file: 'sample.xlsx'
@ -11,8 +11,33 @@ Squib::Deck.new(cards: 3) do
text str: data['Level'], x: 65, y: 65, font: 'Arial 72'
text str: data['Description'], x: 65, y: 600, font: 'Arial 36'
# You can also specify the sheet, starting at 0
data = xlsx file: 'sample.xlsx', sheet: 2
save format: :png, prefix: 'sample_excel_' #save to individual pngs
end
# Here's another example, a bit more realistic. Here's what's going on:
# * We call xlsx from Squib directly - BEFORE Squib::Deck creation. This
# allows us to infer the number of cards based on the size of the "Name"
# field
# * We make use of quantity explosion. Fields named "Qty" or "Quantity"
# (any capitalization), or any other in the "qty_header" get expanded by the
# number given
# * We also make sure that trailing and leading whitespace is stripped
# from each value. This is the default behavior in Squib, but the options
# are here just to make sure.
save format: :png, prefix: 'sample_excel_'
resource_data = Squib.xlsx(file: 'sample.xlsx', sheet: 2, strip: true) do |header, value|
case header
when 'Cost'
"$#{value}k" # e.g. "3" becomes "$3k"
else
value # always return the original value if you didn't do anything to it
end
end
Squib::Deck.new(cards: resource_data['Name'].size) do
background color: :white
rect width: :deck, height: :deck
text str: resource_data['Name'], align: :center, width: :deck, hint: 'red'
text str: resource_data['Cost'], align: :right, width: :deck, hint: 'red'
save_sheet prefix: 'sample_excel_resources_' #save to a whole sheet
end

BIN
samples/sample.xlsx

Binary file not shown.

43
spec/api/api_data_spec.rb

@ -20,13 +20,22 @@ describe Squib::Deck do
})
end
it 'handles spaces properly' do
it 'strips spaces by default' do
expect(Squib.csv(file: csv_file('with_spaces.csv'))).to eq({
'With Spaces' => ['a b c ', 3],
'With Spaces' => ['a b c', 3],
'h2' => [2, 4],
'h3' => [3, nil]
})
end
it 'skips space stripping if told to' do
expect(Squib.csv(strip: false, file: csv_file('with_spaces.csv'))).to eq({
' With Spaces ' => ['a b c ', 3],
'h2' => [2, 4],
'h3' => [3, nil]
})
end
end
context '#xlsx' do
@ -53,5 +62,35 @@ describe Squib::Deck do
})
end
it 'strips whitespace by default' do
expect(Squib.xlsx(file: xlsx_file('whitespace.xlsx'))).to eq({
'With Whitespace' => ['foo', 'bar', 'baz'],
})
end
it 'does not strip whitespace when specified' do
expect(Squib.xlsx(file: xlsx_file('whitespace.xlsx'), strip: false)).to eq({
' With Whitespace ' => ['foo ', ' bar', ' baz '],
})
end
it 'yields to block when given' do
data = Squib.xlsx(file: xlsx_file('basic.xlsx')) do |header, value|
case header
when 'Name'
'he'
when 'Actual Number'
value * 2
else
'ha'
end
end
expect(data).to eq({
'Name' => %w(he he he),
'General Number' => %w(ha ha ha),
'Actual Number' => [8.0, 10.0, 12.0],
})
end
end
end

2
spec/data/csv/with_spaces.csv

@ -1,3 +1,3 @@
With Spaces,h2,h3
With Spaces ,h2,h3
a b c , 2,3
3 ,4
1 With Spaces,h2,h3 With Spaces ,h2,h3
2 a b c , 2,3 a b c , 2,3
3 3 ,4 3 ,4

181
spec/data/samples/excel.rb.txt

@ -160,3 +160,184 @@ cairo: restore([])
surface: write_to_png(["_output/sample_excel_00.png"])
surface: write_to_png(["_output/sample_excel_01.png"])
surface: write_to_png(["_output/sample_excel_02.png"])
cairo: antialias=(["subpixel"])
cairo: antialias=(["subpixel"])
cairo: antialias=(["subpixel"])
cairo: save([])
cairo: set_source_color(["white"])
cairo: paint([])
cairo: restore([])
cairo: save([])
cairo: set_source_color(["white"])
cairo: paint([])
cairo: restore([])
cairo: save([])
cairo: set_source_color(["white"])
cairo: paint([])
cairo: restore([])
cairo: save([])
cairo: rounded_rectangle([0, 0, 825, 1125, 0, 0])
cairo: set_source_color(["#0000"])
cairo: fill_preserve([])
cairo: set_source_color(["black"])
cairo: set_line_width([2.0])
cairo: set_line_join([0])
cairo: set_line_cap([0])
cairo: set_dash([[]])
cairo: stroke([])
cairo: restore([])
cairo: save([])
cairo: rounded_rectangle([0, 0, 825, 1125, 0, 0])
cairo: set_source_color(["#0000"])
cairo: fill_preserve([])
cairo: set_source_color(["black"])
cairo: set_line_width([2.0])
cairo: set_line_join([0])
cairo: set_line_cap([0])
cairo: set_dash([[]])
cairo: stroke([])
cairo: restore([])
cairo: save([])
cairo: rounded_rectangle([0, 0, 825, 1125, 0, 0])
cairo: set_source_color(["#0000"])
cairo: fill_preserve([])
cairo: set_source_color(["black"])
cairo: set_line_width([2.0])
cairo: set_line_join([0])
cairo: set_line_cap([0])
cairo: set_dash([[]])
cairo: stroke([])
cairo: restore([])
cairo: save([])
cairo: set_source_color(["black"])
cairo: translate([0, 0])
cairo: rotate([0])
cairo: move_to([0, 0])
pango: font_description=([MockDouble])
pango: text=(["Wood"])
pango: width=([844800])
pango: wrap=([#<Pango::Layout::WrapMode word-char>])
pango: ellipsize=([#<Pango::Layout::EllipsizeMode end>])
pango: alignment=([#<Pango::Layout::Alignment center>])
pango: justify=([false])
cairo: move_to([0, 0])
cairo: move_to([0, 0])
cairo: show_pango_layout([MockDouble])
cairo: rounded_rectangle([0, 0, 0, 0, 0, 0])
cairo: set_source_color(["red"])
cairo: set_line_width([2.0])
cairo: stroke([])
pango: ellipsized?([])
cairo: restore([])
cairo: save([])
cairo: set_source_color(["black"])
cairo: translate([0, 0])
cairo: rotate([0])
cairo: move_to([0, 0])
pango: font_description=([MockDouble])
pango: text=(["Metal"])
pango: width=([844800])
pango: wrap=([#<Pango::Layout::WrapMode word-char>])
pango: ellipsize=([#<Pango::Layout::EllipsizeMode end>])
pango: alignment=([#<Pango::Layout::Alignment center>])
pango: justify=([false])
cairo: move_to([0, 0])
cairo: move_to([0, 0])
cairo: show_pango_layout([MockDouble])
cairo: rounded_rectangle([0, 0, 0, 0, 0, 0])
cairo: set_source_color(["red"])
cairo: set_line_width([2.0])
cairo: stroke([])
pango: ellipsized?([])
cairo: restore([])
cairo: save([])
cairo: set_source_color(["black"])
cairo: translate([0, 0])
cairo: rotate([0])
cairo: move_to([0, 0])
pango: font_description=([MockDouble])
pango: text=(["Stone"])
pango: width=([844800])
pango: wrap=([#<Pango::Layout::WrapMode word-char>])
pango: ellipsize=([#<Pango::Layout::EllipsizeMode end>])
pango: alignment=([#<Pango::Layout::Alignment center>])
pango: justify=([false])
cairo: move_to([0, 0])
cairo: move_to([0, 0])
cairo: show_pango_layout([MockDouble])
cairo: rounded_rectangle([0, 0, 0, 0, 0, 0])
cairo: set_source_color(["red"])
cairo: set_line_width([2.0])
cairo: stroke([])
pango: ellipsized?([])
cairo: restore([])
cairo: save([])
cairo: set_source_color(["black"])
cairo: translate([0, 0])
cairo: rotate([0])
cairo: move_to([0, 0])
pango: font_description=([MockDouble])
pango: text=(["$2k"])
pango: width=([844800])
pango: wrap=([#<Pango::Layout::WrapMode word-char>])
pango: ellipsize=([#<Pango::Layout::EllipsizeMode end>])
pango: alignment=([#<Pango::Layout::Alignment right>])
pango: justify=([false])
cairo: move_to([0, 0])
cairo: move_to([0, 0])
cairo: show_pango_layout([MockDouble])
cairo: rounded_rectangle([0, 0, 0, 0, 0, 0])
cairo: set_source_color(["red"])
cairo: set_line_width([2.0])
cairo: stroke([])
pango: ellipsized?([])
cairo: restore([])
cairo: save([])
cairo: set_source_color(["black"])
cairo: translate([0, 0])
cairo: rotate([0])
cairo: move_to([0, 0])
pango: font_description=([MockDouble])
pango: text=(["$3k"])
pango: width=([844800])
pango: wrap=([#<Pango::Layout::WrapMode word-char>])
pango: ellipsize=([#<Pango::Layout::EllipsizeMode end>])
pango: alignment=([#<Pango::Layout::Alignment right>])
pango: justify=([false])
cairo: move_to([0, 0])
cairo: move_to([0, 0])
cairo: show_pango_layout([MockDouble])
cairo: rounded_rectangle([0, 0, 0, 0, 0, 0])
cairo: set_source_color(["red"])
cairo: set_line_width([2.0])
cairo: stroke([])
pango: ellipsized?([])
cairo: restore([])
cairo: save([])
cairo: set_source_color(["black"])
cairo: translate([0, 0])
cairo: rotate([0])
cairo: move_to([0, 0])
pango: font_description=([MockDouble])
pango: text=(["$5k"])
pango: width=([844800])
pango: wrap=([#<Pango::Layout::WrapMode word-char>])
pango: ellipsize=([#<Pango::Layout::EllipsizeMode end>])
pango: alignment=([#<Pango::Layout::Alignment right>])
pango: justify=([false])
cairo: move_to([0, 0])
cairo: move_to([0, 0])
cairo: show_pango_layout([MockDouble])
cairo: rounded_rectangle([0, 0, 0, 0, 0, 0])
cairo: set_source_color(["red"])
cairo: set_line_width([2.0])
cairo: stroke([])
pango: ellipsized?([])
cairo: restore([])
cairo: set_source([MockDouble, 0, 0])
cairo: paint([])
cairo: set_source([MockDouble, 100, 0])
cairo: paint([])
cairo: set_source([MockDouble, 200, 0])
cairo: paint([])
surface: write_to_png(["_output/sample_excel_resources_00.png"])

BIN
spec/data/xlsx/whitespace.xlsx

Binary file not shown.
Loading…
Cancel
Save