setup
i have 30-odd author affiliations that look like the left panel and want to re-format to what's shown in the right panel. and i'm too lazy to do that manually ...

solution
the plan
- copy the selected texts. read texts from the clipboard.
- using regular expression (regex), replace '[affiliation number]. ' with '[affiliation number]'.
- write the resulting string in a new document.
- in the new doc, format the affiliation numbers.
- copy all contents of the new document.
programming language & module(s)
- VBA
file preps
select the texts to turn affiliation numbers to superscripts, like this:

variables to customize
none.
the script
fmtAffilNoPoster
| Sub fmtAffilNoPoster() | |
| Dim ret, clipboard As Object | |
| Dim cbTxt As String | |
| Dim newDoc As Document | |
| Dim retPattern As String | |
| Dim newDocEnd As Long | |
| retPattern = "(\d+)\.?\s+(\w+)" | |
| clearFind | |
| Selection.Copy | |
| Set DataObj = New MSForms.DataObject | |
| DataObj.GetFromClipboard | |
| txt = DataObj.GetText(1) | |
| ' Debug.Print txt | |
| splitted = Split(txt, Chr(13)) | |
| Set ret = CreateObject("VBScript.RegExp") | |
| ret.pattern = retPattern | |
| ret.ignorecase = True ' if needed | |
| cbTxt = "" | |
| For i = LBound(splitted) To UBound(splitted) | |
| txt = splitted(i) | |
| If ret.test(txt) Then | |
| ' replaces "1. Affiliation A" with "1Affiliation A". | |
| ' "$1" stands for submatches(0). | |
| replacedTxt = ret.Replace(txt, "$1$2") | |
| replacedTxt = Replace(replacedTxt, Chr(13), "") | |
| replacedTxt = Trim(replacedTxt) | |
| If Len(cbTxt) = 0 Then | |
| cbTxt = replacedTxt | |
| Else | |
| cbTxt = cbTxt & Chr(13) & replacedTxt | |
| End If | |
| End If | |
| Next | |
| ' Debug.Print "cbTxt: ", cbTxt | |
| Set newDoc = Documents.Add | |
| Selection.TypeText cbTxt | |
| For Each p In newDoc.Paragraphs | |
| Set rng = newDoc.Range(p.Range.Start, p.Range.Start + 1) | |
| rng.MoveEndWhile "0123456789" | |
| rng.Font.superScript = True | |
| Next | |
| With newDoc.Range.Find | |
| ' collapse all paragraphs into 1. | |
| .Text = "^p" | |
| .Replacement.Text = "; " | |
| .Execute Replace:=wdReplaceAll | |
| With Selection | |
| .EndKey wdStory | |
| ' delete the last "; " | |
| With .Find | |
| .Text = "; " | |
| .Replacement.Text = "" | |
| .Forward = False | |
| .Execute Replace:=wdReplaceOne | |
| End With | |
| .EndKey wdStory | |
| .MoveStartWhile Chr(13) | |
| newDoc.Range(0, .Start).Copy | |
| End With | |
| End With | |
| newDoc.Close savechanges:=wdDoNotSaveChanges | |
| clearFind | |
| End Sub | |
| Sub clearFind() | |
| With Selection.Find | |
| .Text = "" | |
| .Replacement.Text = "" | |
| .Forward = True | |
| .Wrap = wdFindContinue | |
| .Format = False | |
| .MatchCase = False | |
| .MatchWholeWord = False | |
| .MatchByte = True | |
| .MatchWildcards = False | |
| .MatchSoundsLike = False | |
| .MatchAllWordForms = False | |
| End With | |
| End Sub |
output
the result is now in the clipboard, ready to be pasted.
note to self
- a good tutorial (in Chinese) of working with regex in VBA: https://geek-docs.com/regexp/regexp-tutorials/51_vba_regular_expression.html