diff --git a/import.rb b/import.rb new file mode 100755 index 0000000..5bd77df --- /dev/null +++ b/import.rb @@ -0,0 +1,86 @@ +#!/usr/bin/env ruby + +require 'yaml' +require 'fileutils' +require 'set' + +Post = Struct.new(:filename, :frontmatter, :body) do + def to_s + "#{YAML.dump(frontmatter)}---\n#{body}" + end + + def self.load(filename) + parts = File.read(filename).split("---\n") + frontmatter = YAML.load(parts[1]) + + self.new(filename, frontmatter, parts[2]) + end + + def dump_hugo + File.write("content/post/#{File.basename(filename)}", to_s) + end +end + +posts = Dir["imported-content/_posts/*.md"].map do |filename| + Post.load(filename) +end + +# Idempotency check +# just checking if the ruby serialization isn't changing files unnecessarily +posts.each do |post| + if File.read(post.filename) != post.to_s + raise "not idempotent!" + end +end + +# Handle attachments + +FileUtils.mkdir_p("static/attachments") +files_to_copy = Set.new +posts.select { |p| p.frontmatter.dig("metadata", "anexos")&.any? }.each do |post| + anexos = post.frontmatter.dig("metadata", "anexos") + + anexos.filter! { |anexo| anexo["anexos_display"] == 1 } + + if anexos.empty? + post.frontmatter["metadata"].delete("anexos") + end + + anexos.each do |anexo| + files_to_copy.add(anexo["anexos_uri"].gsub("public://", "")) + + anexo["anexos_uri"].gsub!("public://", "/attachments/") + end + + post.dump_hugo +end + + +# Handle links +posts.each do |post| + post.body.gsub!(/href="(.*?)"/) do |match| + url = $1.sub(/^http:\/\/ansol\.org/, "https://ansol.org") + + url = + if url.start_with?("https://ansol.org/sites/ansol.org/files/") + filename = url.gsub("https://ansol.org/sites/ansol.org/files/", "") + files_to_copy.add(filename.gsub("%20", " ")) + "/attachments/#{filename}" + elsif url.start_with?("https://ansol.org") + pp url + + url + else + url + end + + "href=\"#{url}\"" + end + + post.dump_hugo +end + +files_to_copy.each do |file| + FileUtils.mkdir_p("static/attachments/#{File.dirname(file)}") + FileUtils.copy_file("imported-files/#{file}", "static/attachments/#{file}") +end