web-ansol.org/import.rb

132 lines
3.4 KiB
Ruby
Raw Normal View History

2021-09-23 13:29:55 +00:00
#!/usr/bin/env ruby
require 'yaml'
require 'fileutils'
require 'set'
Post = Struct.new(:filename, :frontmatter, :body) do
def to_s
"#{YAML.dump(frontmatter)}---\n#{body}"
end
def self.load(filename)
parts = File.read(filename).split("---\n")
frontmatter = YAML.load(parts[1])
self.new(filename, frontmatter, parts[2])
end
def dump_hugo
File.write("content/post/#{File.basename(filename)}", to_s)
end
end
posts = Dir["imported-content/_posts/*.md"].map do |filename|
Post.load(filename)
end
redirects = Dir["imported-content/**/*.md"]
.map { |filename| Post.load(filename) }
.select { |filename| filename.frontmatter.include?("refresh_to_post_id") }
2021-09-23 13:29:55 +00:00
# Idempotency check
# just checking if the ruby serialization isn't changing files unnecessarily
posts.each do |post|
if File.read(post.filename) != post.to_s
raise "not idempotent!"
end
end
# Handle attachments
FileUtils.mkdir_p("static/attachments")
files_to_copy = Set.new
posts.select { |p| p.frontmatter.dig("metadata", "anexos")&.any? }.each do |post|
anexos = post.frontmatter.dig("metadata", "anexos")
anexos.filter! { |anexo| anexo["anexos_display"] == 1 }
if anexos.empty?
post.frontmatter["metadata"].delete("anexos")
end
anexos.each do |anexo|
files_to_copy.add(anexo["anexos_uri"].gsub("public://", ""))
anexo["anexos_uri"].gsub!("public://", "/attachments/")
end
end
# Handle links
posts.each do |post|
post.body.gsub!(/href="(.*?)"/) do |match|
url = $1.sub(/^http:\/\/ansol\.org/, "https://ansol.org")
url =
if url.start_with?("https://ansol.org/sites/ansol.org/files/")
filename = url.gsub("https://ansol.org/sites/ansol.org/files/", "")
files_to_copy.add(filename.gsub("%20", " "))
"/attachments/#{filename}"
elsif url.start_with?("https://ansol.org")
url
else
url
end
"href=\"#{url}\""
end
end
files_to_copy.each do |file|
FileUtils.mkdir_p("static/attachments/#{File.dirname(file)}")
FileUtils.copy_file("imported-files/#{file}", "static/attachments/#{file}")
end
# Handle event timezone derp
posts.each do |post|
post.frontmatter.dig("metadata", "event_start")&.each do |event|
event["event_start_value"] += event["event_start_value"].utc_offset
event["event_start_value2"] += event["event_start_value2"].utc_offset
end
end
# Convert event metadata to a simpler format
posts.filter do |post|
post.frontmatter["metadata"].keys.any? { |key| key.start_with?("event_") }
end.each do |post|
metadata = post.frontmatter["metadata"]
metadata["event"] = {
"location" => metadata.dig("event_location", 0, "event_location_value"),
"site" => {
"title" => metadata.dig("event_site", 0, "event_site_title"),
"url" => metadata.dig("event_site", 0, "event_site_url"),
},
"date" => {
"start" => metadata.dig("event_start", 0, "event_start_value"),
"finish" => metadata.dig("event_start", 0, "event_start_value2"),
},
"map" => {},
}
metadata.delete("event_location")
metadata.delete("event_start")
metadata.delete("event_site")
metadata.delete("event_mapa")
end
redirects.each do |redir|
nid = redir.frontmatter["source_nid"]
post = posts.find { |p| p.frontmatter["metadata"]["node_id"] == nid }
if post.nil?
raise "couldn't find post #{nid} (#{redir.inspect})"
end
post.frontmatter["aliases"] ||= []
post.frontmatter["aliases"] << "/" + redir.frontmatter["permalink"]
end
posts.each(&:dump_hugo)