From 9b0602a50ca650a05744dd0df0a5aacee172207c Mon Sep 17 00:00:00 2001 From: Andy Meneely Date: Fri, 2 Oct 2020 13:44:42 -0400 Subject: [PATCH] ported xlsx over --- docs/dsl/csv.rst | 3 + docs/dsl/xlsx.rst | 3 + docs/dsl/yaml.rst | 2 + lib/squib/api/data.rb | 39 +-------- lib/squib/args/arg_loader.rb | 2 +- lib/squib/args/import.rb | 34 ++++++-- lib/squib/args/input_file.rb | 4 +- lib/squib/deck.rb | 1 + lib/squib/dsl/xlsx.rb | 41 ++++++++++ lib/squib/import/quantity_exploder.rb | 18 +++++ lib/squib/import/xlsx_importer.rb | 27 +++++++ spec/import/csv_spec.rb | 109 ++++++++++++++++++++++++++ spec/import/xlsx_spec.rb | 66 ++++++++++++++++ spec/import/yaml_spec.rb | 43 ++++++++++ 14 files changed, 344 insertions(+), 48 deletions(-) create mode 100644 lib/squib/dsl/xlsx.rb create mode 100644 lib/squib/import/quantity_exploder.rb create mode 100644 lib/squib/import/xlsx_importer.rb create mode 100644 spec/import/csv_spec.rb create mode 100644 spec/import/xlsx_spec.rb create mode 100644 spec/import/yaml_spec.rb diff --git a/docs/dsl/csv.rst b/docs/dsl/csv.rst index fc9f055..b9bd236 100644 --- a/docs/dsl/csv.rst +++ b/docs/dsl/csv.rst @@ -52,6 +52,9 @@ col_sep CSV custom options in Ruby standard lib. All of the options in Ruby's std lib version of CSV are supported **except** ``headers`` is always ``true`` and ``converters`` is always set to ``:numeric``. See the `Ruby Docs `_ for information on the options. +.. warning:: + Data import methods such as ``xlsx`` and ``csv`` will not consult your layout file or follow the :doc:`/arrays` feature. + Individual Pre-processing ------------------------- diff --git a/docs/dsl/xlsx.rst b/docs/dsl/xlsx.rst index 2af1d50..f05ed0e 100644 --- a/docs/dsl/xlsx.rst +++ b/docs/dsl/xlsx.rst @@ -32,6 +32,9 @@ explode Quantity explosion will be applied to the column this name. For example, rows in the csv with a ``'qty'`` of 3 will be duplicated 3 times. +.. warning:: + Data import methods such as ``xlsx`` and ``csv`` will not consult your layout file or follow the :doc:`/arrays` feature. + Individual Pre-processing ------------------------- diff --git a/docs/dsl/yaml.rst b/docs/dsl/yaml.rst index 77de216..636b570 100644 --- a/docs/dsl/yaml.rst +++ b/docs/dsl/yaml.rst @@ -35,6 +35,8 @@ explode Quantity explosion will be applied to the column this name. For example, rows in the csv with a ``'qty'`` of 3 will be duplicated 3 times. +.. warning:: + Data import methods such as ``xlsx`` and ``csv`` will not consult your layout file or follow the :doc:`/arrays` feature. Individual Pre-processing ------------------------- diff --git a/lib/squib/api/data.rb b/lib/squib/api/data.rb index 682eb2c..73fa96a 100644 --- a/lib/squib/api/data.rb +++ b/lib/squib/api/data.rb @@ -8,30 +8,6 @@ require_relative '../import/data_frame' module Squib - # DSL method. See http://squib.readthedocs.io - def xlsx(opts = {}) - input = Args::InputFile.new(file: 'deck.xlsx').load!(opts) - import = Args::Import.new.load!(opts) - s = Roo::Excelx.new(input.file[0]) - s.default_sheet = s.sheets[input.sheet[0]] - data = Squib::DataFrame.new - s.first_column.upto(s.last_column) do |col| - header = s.cell(s.first_row, col).to_s - header.strip! if import.strip? - data[header] = [] - (s.first_row + 1).upto(s.last_row) do |row| - cell = s.cell(row, col) - # Roo hack for avoiding unnecessary .0's on whole integers (https://github.com/roo-rb/roo/issues/139) - cell = s.excelx_value(row, col) if s.excelx_type(row, col) == [:numeric_or_formula, 'General'] - cell.strip! if cell.respond_to?(:strip) && import.strip? - cell = yield(header, cell) if block_given? - data[header] << cell - end# row - end# col - explode_quantities(data, import.explode) - end# xlsx - module_function :xlsx - # DSL method. See http://squib.readthedocs.io def csv(opts = {}) # TODO refactor all this out to separate methods, and its own class @@ -102,20 +78,7 @@ module Squib end module_function :check_duplicate_csv_headers - # @api private - def explode_quantities(data, qty) - return data unless data.col? qty.to_s.strip - qtys = data[qty] - new_data = Squib::DataFrame.new - data.each do |col, arr| - new_data[col] = [] - qtys.each_with_index do |qty, index| - qty.to_i.times { new_data[col] << arr[index] } - end - end - return new_data - end - module_function :explode_quantities + class Deck diff --git a/lib/squib/args/arg_loader.rb b/lib/squib/args/arg_loader.rb index 5dbb4d9..bacca36 100644 --- a/lib/squib/args/arg_loader.rb +++ b/lib/squib/args/arg_loader.rb @@ -12,7 +12,7 @@ module Squib::Args::ArgLoader load!(args, expand_by: deck.size, layout: deck.layout, dpi: deck.dpi) end - # Main class invoked by the client (i.e. api/ methods) + # Main class invoked by the client (i.e. dsl/ methods) def load!(args, expand_by: 1, layout: {}, dpi: 300) @dpi = dpi args[:layout] = prep_layout_args(args[:layout], expand_by: expand_by) diff --git a/lib/squib/args/import.rb b/lib/squib/args/import.rb index a8cc333..10c61de 100644 --- a/lib/squib/args/import.rb +++ b/lib/squib/args/import.rb @@ -1,16 +1,24 @@ -require_relative 'arg_loader' - module Squib::Args + module_function def extract_import(opts) + # note how we don't use ArgLoader here because it's way more complex than + # what we need here. Don't need layouts or singleton expansion, so... + # ...let's just do it ourselves. + Import.parameters.each { |p, value| opts[p] = value unless opts.key? p } + return Import.new.load! opts + end class Import - include ArgLoader - + def self.parameters { strip: true, - explode: 'qty' + explode: 'qty', + file: nil, + sheet: 0 } end + attr_accessor *(self.parameters.keys) + def self.expanding_parameters [] # none of them end @@ -19,13 +27,27 @@ module Squib::Args [] # none of them end + def load!(opts) + @strip = validate_strip opts[:strip] + @explode = validate_explode opts[:explode] + @file = validate_file opts[:file] + @sheet = opts[:sheet] + return self + end + def validate_strip(arg) raise 'Strip must be true or false' unless arg == true || arg == false arg end def validate_explode(arg) - arg + arg.to_s + end + + def validate_file(arg) + raise 'file argument not provided.' if arg.nil? + raise "File #{File.expand_path(arg)} does not exist!" unless File.exists?(arg) + File.expand_path(arg) end def strip? diff --git a/lib/squib/args/input_file.rb b/lib/squib/args/input_file.rb index a84b6e7..17bbdd1 100644 --- a/lib/squib/args/input_file.rb +++ b/lib/squib/args/input_file.rb @@ -13,9 +13,7 @@ module Squib::Args end def self.parameters - { file: nil, - sheet: 0, - } + { file: nil } end def self.expanding_parameters diff --git a/lib/squib/deck.rb b/lib/squib/deck.rb index 2387ef0..5a2e03d 100644 --- a/lib/squib/deck.rb +++ b/lib/squib/deck.rb @@ -131,5 +131,6 @@ module Squib require_relative 'dsl/text' require_relative 'dsl/triangle' require_relative 'dsl/units' + require_relative 'dsl/xlsx' end end diff --git a/lib/squib/dsl/xlsx.rb b/lib/squib/dsl/xlsx.rb new file mode 100644 index 0000000..dd54f59 --- /dev/null +++ b/lib/squib/dsl/xlsx.rb @@ -0,0 +1,41 @@ +require_relative '../args/input_file' +require_relative '../args/import' +require_relative '../import/xlsx_importer' +require_relative '../errors_warnings/warn_unexpected_params' + +module Squib + # DSL method. See http://squib.readthedocs.io + def xlsx(opts = {}, &block) + DSL::Xlsx.new(__callee__).run(opts, &block) + end + module_function :xlsx + + class Deck + # DSL method. See http://squib.readthedocs.io + def xlsx(opts = {}, &block) + DSL::Xlsx.new(__callee__).run(opts, &block) + end + end + + module DSL + class Xlsx + include WarnUnexpectedParams + attr_reader :dsl_method, :block + + def initialize(dsl_method) + @dsl_method = dsl_method + end + + def self.accepted_params + %i( file sheet strip explode ) + end + + def run(opts,&block) + warn_if_unexpected opts + import_args = Args.extract_import opts + importer = Squib::Import::XlsxImporter.new + importer.import_to_dataframe(import_args, &block) + end + end + end +end diff --git a/lib/squib/import/quantity_exploder.rb b/lib/squib/import/quantity_exploder.rb new file mode 100644 index 0000000..68e24d7 --- /dev/null +++ b/lib/squib/import/quantity_exploder.rb @@ -0,0 +1,18 @@ +module Squib + module Import + module QuantityExploder + def explode_quantities(data, qty) + return data unless data.col? qty.to_s.strip + qtys = data[qty] + new_data = Squib::DataFrame.new + data.each do |col, arr| + new_data[col] = [] + qtys.each_with_index do |qty, index| + qty.to_i.times { new_data[col] << arr[index] } + end + end + return new_data + end + end + end +end \ No newline at end of file diff --git a/lib/squib/import/xlsx_importer.rb b/lib/squib/import/xlsx_importer.rb new file mode 100644 index 0000000..ba6bee4 --- /dev/null +++ b/lib/squib/import/xlsx_importer.rb @@ -0,0 +1,27 @@ +require_relative 'quantity_exploder' + +module Squib::Import + class XlsxImporter + include Squib::Import::QuantityExploder + def import_to_dataframe(import, &block) + s = Roo::Excelx.new(import.file) + s.default_sheet = s.sheets[import.sheet] + data = Squib::DataFrame.new + s.first_column.upto(s.last_column) do |col| + header = s.cell(s.first_row, col).to_s + header.strip! if import.strip? + data[header] = [] + (s.first_row + 1).upto(s.last_row) do |row| + cell = s.cell(row, col) + # Roo hack for avoiding unnecessary .0's on whole integers (https://github.com/roo-rb/roo/issues/139) + cell = s.excelx_value(row, col) if s.excelx_type(row, col) == [:numeric_or_formula, 'General'] + cell.strip! if cell.respond_to?(:strip) && import.strip? + cell = block.yield(header, cell) unless block.nil? + data[header] << cell + end# row + end# col + explode_quantities(data, import.explode) + end + end +end + diff --git a/spec/import/csv_spec.rb b/spec/import/csv_spec.rb new file mode 100644 index 0000000..88a7692 --- /dev/null +++ b/spec/import/csv_spec.rb @@ -0,0 +1,109 @@ +require 'spec_helper' + +describe Squib::Deck do + context '#csv' do + it 'loads basic csv data' do + expect(Squib.csv(file: csv_file('basic.csv')).to_h.to_h).to eq({ + 'h1' => [1, 3], + 'h2' => [2, 4] + }) + end + + it 'collapses duplicate columns and warns' do + expect(Squib.logger).to receive(:warn) + .with('CSV duplicated the following column keys: h1,h1') + expect(Squib.csv(file: csv_file('dup_cols.csv')).to_h.to_h).to eq({ + 'h1' => [1, 3], + 'h2' => [5, 7], + 'H2' => [6, 8], + 'h3' => [9, 10], + }) + end + + it 'strips spaces by default' do + expect(Squib.csv(file: csv_file('with_spaces.csv')).to_h).to eq({ + 'With Spaces' => ['a b c', 3], + 'h2' => [2, 4], + 'h3' => [3, nil] + }) + end + + it 'skips space stripping if told to' do + expect(Squib.csv(strip: false, file: csv_file('with_spaces.csv')).to_h).to eq({ + ' With Spaces ' => ['a b c ', 3], + 'h2' => [2, 4], + 'h3' => [3, nil] + }) + end + + it 'explodes quantities' do + expect(Squib.csv(file: csv_file('qty.csv')).to_h).to eq({ + 'Name' => %w(Ha Ha Ha Ho), + 'qty' => [3, 3, 3, 1], + }) + end + + it 'explodes quantities on specified header' do + expect(Squib.csv(explode: 'Quantity', file: csv_file('qty_named.csv')).to_h).to eq({ + 'Name' => %w(Ha Ha Ha Ho), + 'Quantity' => [3, 3, 3, 1], + }) + end + + it 'loads inline data' do + hash = Squib.csv(data: "h1,h2\n1,2\n3,4") + expect(hash.to_h).to eq({ + 'h1' => [1, 3], + 'h2' => [2, 4] + }) + end + + it 'loads csv with newlines' do + hash = Squib.csv(file: csv_file('newline.csv')) + expect(hash.to_h).to eq({ + 'title' => ['Foo'], + 'level' => [1], + 'notes' => ["a\nb"] + }) + end + + it 'loads custom CSV options' do + hash = Squib.csv(file: csv_file('custom_opts.csv'), + col_sep: '-', quote_char: '|') + expect(hash.to_h).to eq({ + 'x' => ['p'], + 'y' => ['q-r'] + }) + end + + it 'yields to block when given' do + data = Squib.csv(file: csv_file('basic.csv')) do |header, value| + case header + when 'h1' + value * 2 + else + 'ha' + end + end + expect(data.to_h).to eq({ + 'h1' => [2, 6], + 'h2' => %w(ha ha), + }) + end + + it 'replaces newlines whenever its a string' do + data = Squib.csv(file: csv_file('yield.csv')) do |header, value| + if value.respond_to? :gsub + value.gsub '%n', "\n" + else + value + end + end + expect(data.to_h).to eq({ + 'a' => ["foo\nbar", 1], + 'b' => [1, "blah\n"], + }) + end + + end +end diff --git a/spec/import/xlsx_spec.rb b/spec/import/xlsx_spec.rb new file mode 100644 index 0000000..f7a5b97 --- /dev/null +++ b/spec/import/xlsx_spec.rb @@ -0,0 +1,66 @@ +require 'spec_helper' + +describe Squib::Deck do + context '#xlsx' do + it 'loads basic xlsx data' do + expect(Squib.xlsx(file: xlsx_file('basic.xlsx')).to_h).to eq({ + 'Name' => %w(Larry Curly Mo), + 'General Number' => %w(1 2 3), # general types always get loaded as strings with no conversion + 'Actual Number' => [4.0, 5.0, 6.0], # numbers get auto-converted to integers + }) + end + + it 'loads xlsx with formulas' do + expect(Squib.xlsx(file: xlsx_file('formulas.xlsx')).to_h).to eq({ + 'A' => %w(1 2), + 'B' => %w(3 4), + 'Sum' => %w(4 6), + }) + end + + it 'loads xlsm files with macros' do + expect(Squib.xlsx(file: xlsx_file('with_macros.xlsm')).to_h).to eq({ + 'foo' => %w(8 10), + 'bar' => %w(9 11), + }) + end + + it 'strips whitespace by default' do + expect(Squib.xlsx(file: xlsx_file('whitespace.xlsx')).to_h).to eq({ + 'With Whitespace' => ['foo', 'bar', 'baz'], + }) + end + + it 'does not strip whitespace when specified' do + expect(Squib.xlsx(file: xlsx_file('whitespace.xlsx'), strip: false).to_h).to eq({ + ' With Whitespace ' => ['foo ', ' bar', ' baz '], + }) + end + + it 'yields to block when given' do + data = Squib.xlsx(file: xlsx_file('basic.xlsx')) do |header, value| + case header + when 'Name' + 'he' + when 'Actual Number' + value * 2 + else + 'ha' + end + end + expect(data.to_h).to eq({ + 'Name' => %w(he he he), + 'General Number' => %w(ha ha ha), + 'Actual Number' => [8.0, 10.0, 12.0], + }) + end + + it 'explodes quantities' do + expect(Squib.xlsx(explode: 'Quantity', file: xlsx_file('explode_quantities.xlsx')).to_h).to eq({ + 'Name' => ['Zergling', 'Zergling', 'Zergling', 'High Templar'], + 'Quantity' => %w(3 3 3 1), + }) + end + + end +end diff --git a/spec/import/yaml_spec.rb b/spec/import/yaml_spec.rb new file mode 100644 index 0000000..1d4751c --- /dev/null +++ b/spec/import/yaml_spec.rb @@ -0,0 +1,43 @@ +require 'spec_helper' + +describe Squib::Deck do + context '#yaml' do + it 'loads basic data' do + expect(Squib.yaml(file: yaml_file('basic.yml')).to_h).to eq({ + 'Name' => %w(Larry Curly Mo), + 'Number' => [4.0, 5.0, 6.0], # numbers get auto-converted to integers + }) + end + + it 'explodes quantities' do + expect(Squib.yaml(explode: 'qty', file: yaml_file('qty.yml')).to_h).to eq({ + 'name' => %w(ha ha he), + 'qty' => [2, 2, 1], + }) + end + + it 'handles silence' do + expect(Squib.yaml(file: yaml_file('nilly.yml')).to_h).to eq({ + 'name' => %w(foo bar), + 'desc' => [nil, 'Hello'], + }) + end + + it 'yields to block when given' do + data = Squib.yaml(file: yaml_file('basic.yml')) do |header, value| + case header + when 'Name' + 'he' + when 'Number' + value * 2 + else + 'ha' + end + end + expect(data.to_h).to eq({ + 'Name' => %w(he he he), + 'Number' => [8.0, 10.0, 12.0], + }) + end + end +end